Making a Weatherman Chatbot with Qdrant
A chatbot that tells you about real-time weather updates anywhere in the world
Why Weatherman Chatbot
Having quick and easy access to weather information is crucial for planning daily activities. Whether you’re deciding on what to wear, planning a trip, or managing outdoor events, staying informed about the weather is essential. However, with the vast amount of weather data available, it can be overwhelming and time-consuming to navigate through it all.
This is where a weatherman chatbot can come in handy. By leveraging the power of AI, we can create an intelligent chatbot that provides personalized weather information on demand. By integrating various technologies such as data retrieval from weather APIs, data formatting and chunking, storing the data in the vector database Qdrant, and connecting it all to the LLM Mistral, we will create a seamless experience for users seeking weather information.
Let’s Code
We will start off with a simple function that takes location names (for e.g. Eiffel Tower, Piccadilly Circus) as inputs and returns the latitude and longitude of the location. This information will be needed downstream when we send queries to the weather API. We’ll use the API from Nominatim to do so. It provides us with free and fast results.
import requests
from urllib.parse import quote_plus
def get_lat_lon(location_name):
# Encode the location name to be URL-friendly
encoded_location_name = quote_plus(location_name)
# Use the OpenStreetMap Nominatim API for geocoding
url = f"https://nominatim.openstreetmap.org/search?format=json&q={encoded_location_name}"
# Make an HTTP request to the service
response = requests.get(url)
# Check if the request was successful
if response.status_code == 200:
# Parse the JSON response
data = response.json()
# Check if any results were returned
if data:
# Extract the latitude and longitude from the first result
latitude = float(data[0]['lat'])
longitude = float(data[0]['lon'])
return latitude, longitude
else:
raise Exception(f"No results found for location '{location_name}'")
else:
raise Exception("Failed to connect to the geocoding service")
# Usage example:
try:
location = "Delhi"
lat, lon = get_lat_lon(location)
print(f"The latitude and longitude of {location} are {lat}, {lon}")
except Exception as e:
print(e)
Now that we have the latitude and longitude available, we can send it to our Weather API to retrieve all the relevant information about the upcoming weather forecast. In my research, I found Open-Meteo to be an excellent choice for retrieving weather data.
def weather_api(lat, long):
# Define the base URL for the API
base_url = "https://api.open-meteo.com/v1/forecast"
# Define the parameters in a dictionary
params = {
'latitude': f'{lat}',
'longitude': f'{long}',
'forecast_days': '16',
'daily': ','.join([
'temperature_2m_max',
'temperature_2m_min',
'precipitation_sum',
'rain_sum',
'showers_sum',
'snowfall_sum',
'precipitation_hours',
'precipitation_probability_max',
'precipitation_probability_min',
'precipitation_probability_mean',
'sunrise',
'sunset',
'sunshine_duration',
'daylight_duration',
'wind_speed_10m_max',
'wind_gusts_10m_max',
'wind_direction_10m_dominant',
'shortwave_radiation_sum',
'et0_fao_evapotranspiration',
'uv_index_max',
'uv_index_clear_sky_max',
])
}
# Make the GET request with the base URL and the parameters
response = requests.get(base_url, params=params)
if response.status_code == 200:
data = response.json()
# Process the response data
return data
else:
raise Exception(f"Request failed with status code: {response.status_code}")
Now we’ll write a function to format the weather data so that we have the information segregated according to dates.
def format_weather_data(weather_data):
# Extract daily weather data
daily_data = weather_data.get('daily', {})
time_data = daily_data.get('time', [])
# Prepare a list to hold the formatted strings for each time entry
formatted_entries = []
# Loop through each time entry and collect the corresponding weather features
for index, time_entry in enumerate(time_data):
features = [f"Time: {time_entry}"]
for feature, values in daily_data.items():
if feature != 'time': # Skip the 'time' key since we already have it
value = values[index]
features.append(f"{feature}: {value}")
formatted_entries.append('\n'.join(features))
# Join all entries separated by two newlines (for readability)
formatted_text = '\n\n'.join(formatted_entries)
# Print the formatted text
return formatted_text
Then we’ll convert it to document format so that it can be inserted into the vector database.
from langchain.text_splitter import CharacterTextSplitter
from langchain.schema.document import Document
def get_text_chunks_langchain(text):
text_splitter = CharacterTextSplitter(chunk_size=500, chunk_overlap=100)
docs = [Document(page_content=x) for x in text_splitter.split_text(text)]
return docs
Let’s write a function to load Mistral LLM. We’ll be using a quantized version for better efficiency.
from transformers import (
AutoTokenizer,
AutoModelForCausalLM,
BitsAndBytesConfig,
pipeline
)
import torch
def load_llm():
#Loading the Mistral Model
model_name='mistralai/Mistral-7B-Instruct-v0.2'
tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)
tokenizer.pad_token = tokenizer.eos_token
tokenizer.padding_side = "right"
bnb_config = BitsAndBytesConfig(
load_in_4bit=True,
bnb_4bit_quant_type="nf4",
bnb_4bit_use_double_quant=True,
bnb_4bit_compute_dtype=torch.bfloat16
)
model = AutoModelForCausalLM.from_pretrained(
model_name,
quantization_config=bnb_config,
)
# Building a LLM text-generation pipeline
text_generation_pipeline = pipeline(
model=model,
tokenizer=tokenizer,
task="text-generation",
temperature=0.2,
repetition_penalty=1.1,
return_full_text=True,
max_new_tokens=1024,
)
return text_generation_pipeline
llm = load_llm()
Let’s launch a local instance of Qdrant:
docker run -p 6333:6333 -p 6334:6334 \
-v $(pwd)/qdrant_storage:/qdrant/storage:z \
qdrant/qdrant
Now we’ll write a function that takes in the location (input by user) and adds all the relevant weather data of that location into the vector database. Note that this functionality also adds a real-time gathering of weather data. This is because every time the user enters a location, fresh weather data is going to be fetched from the API.
from qdrant_client import QdrantClient
from langchain_community.embeddings import HuggingFaceEmbeddings
from langchain_community.vectorstores import Qdrant
def add_to_vectorstore(location):
lat, long = get_lat_lon(location)
data = weather_api(lat, long)
formatted_data = format_weather_data(data)
docs = get_text_chunks_langchain(formatted_data)
client = QdrantClient("localhost", port=6333)
client.delete_collection(collection_name="my_weather_data")
global qdrant
qdrant = Qdrant.from_documents(
docs,
HuggingFaceEmbeddings(),
url="http://localhost:6333",
prefer_grpc=True,
collection_name="my_weather_data",
)
return f"Weather data for latitude: {lat} & longitude: {long} added"
Now let’s write the function that takes in the user query, performs a similarity search to retrieve the context, and run it by the LLM to generate the appropriate answer to the user query.
def answer_query(message, chat_history):
context_docs = qdrant.similarity_search(message, k= 16)
context = ' '.join(doc.page_content for doc in context_docs)
template = f"""Answer the question based only on the following context:
{context}
Question: {message}
"""
result = llm(template)
answer = result[0]["generated_text"].replace(template, '')
chat_history.append((message, answer))
return "", chat_history
With all the above done, we can design a simple Gradio UI to wrap everything together:
with gr.Blocks() as demo:
loc = gr.Textbox(label= "Enter the location for weather data, for e.g Eiffel Tower, Delhi")
chatbot = gr.Chatbot()
msg = gr.Textbox()
clear = gr.Button("Clear")
loc.submit(add_to_vectorstore, loc, loc)
msg.submit(answer_query, [msg, chatbot], [msg, chatbot])
clear.click(lambda: None, None, chatbot, queue=False)
demo.launch(share=True)
Results
Here are a few screenshots from the UI:
GitHub
You can access the full code in the article in the following GitHub repo: https://github.com/vardhanam/weatherman_chatbot_qdrant