This repository was archived by the owner on Mar 13, 2025. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 1
Expand file tree
/
Copy pathllm_query.py
More file actions
99 lines (82 loc) · 3.44 KB
/
llm_query.py
File metadata and controls
99 lines (82 loc) · 3.44 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
from langchain_openai import OpenAIEmbeddings, ChatOpenAI
from langchain_chroma import Chroma
from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder
from langchain_core.runnables import RunnablePassthrough
from langchain_core.runnables.history import RunnableWithMessageHistory
from langchain_community.chat_message_histories import ChatMessageHistory
from langchain.schema import format_document
from dotenv import load_dotenv
import os
# Load environment variables
load_dotenv()
# Set OpenAI API key
os.environ["OPENAI_API_KEY"] = os.getenv("OPENAI_API_KEY")
# Initialize embeddings and vector store
embeddings = OpenAIEmbeddings()
vector_store = Chroma(
persist_directory="./data/chroma_data",
embedding_function=embeddings
)
# Define the prompt template
prompt = ChatPromptTemplate.from_messages(
[
(
"system",
"You are ChatUTM, a witty AI assistant for UTM students and staff. "
"Provide accurate and useful information related to Universiti Teknologi Malaysia (UTM), including courses, facilities, events, and academic support. "
"If the user greets you with 'hi', '/start', or similar, introduce yourself and explain how you can assist them. "
"Use the following context to answer the user's question:\n\nContext:\n{context}",
),
MessagesPlaceholder(variable_name="chat_history"), # Placeholder for chat history
("human", "{input}"), # Placeholder for user input
]
)
# Initialize the Chat Model
llm = ChatOpenAI(
model="gpt-4o-mini", # Replace with the correct model name for GPT-4o Mini
temperature=0.8, # Higher temperature for more creativity
top_p=0.9, # Higher top_p for more diverse responses
max_tokens=150 # Limit response length for brevity
)
# Format retrieved documents
def format_docs(docs):
return "\n\n".join(doc.page_content for doc in docs)
# Create the Runnable Chain with Retrieval
chain = (
RunnablePassthrough.assign(
context=lambda x: format_docs(vector_store.as_retriever().invoke(x["input"])) # Retrieve and format docs
)
| prompt
| llm
)
# Add Memory with RunnableWithMessageHistory
session_store = {} # Store chat histories for different sessions
def get_session_history(session_id: str) -> ChatMessageHistory:
if session_id not in session_store:
session_store[session_id] = ChatMessageHistory()
return session_store[session_id]
chain_with_history = RunnableWithMessageHistory(
chain,
get_session_history,
input_messages_key="input",
history_messages_key="chat_history",
)
def get_response(user_message, chat_history):
"""
Generate a response from the LLM based on user input and chat history.
Args:
user_message (str): The latest message from the user.
chat_history (list): List of tuples containing past interactions.
Returns:
tuple: A tuple containing the response string and the updated chat history.
"""
# Append the latest user message to the chat history
chat_history.append(("user", user_message))
# Generate a response using the QA chain
result = chain_with_history.invoke(
{"input": user_message},
config={"configurable": {"session_id": "user_session"}}, # Use a fixed session ID for simplicity
)
# Append the assistant's response to the chat history
chat_history.append(("assistant", result.content))
return result.content, chat_history