-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy path19_rag_basic.py
More file actions
175 lines (149 loc) · 7.26 KB
/
19_rag_basic.py
File metadata and controls
175 lines (149 loc) · 7.26 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
"""
INTERVIEW STYLE Q&A:
Q: What is RAG (Retrieval-Augmented Generation) and why is it useful?
A: RAG combines information retrieval with LLM generation. Instead of relying solely
on the model's training data, RAG retrieves relevant documents, adds them as context,
and generates answers based on that context. This enables answering questions about
specific documents the model wasn't trained on.
Q: How does RAG work in practice?
A: (1) Load and split documents into chunks, (2) Create embeddings and store in a
vector database, (3) When asked a question, retrieve similar chunks, (4) Include
retrieved chunks as context in the LLM prompt, (5) Generate answer based on context.
Q: What is a vector store and why use it?
A: A vector store (like Chroma) stores document embeddings and enables similarity
search. When you query, it finds the most semantically similar document chunks
to your question, even if they don't contain exact keyword matches.
Q: What are the key components of a RAG system?
A: (1) Document loader (loads PDFs, text files, etc.), (2) Text splitter (chunks
documents), (3) Embeddings model (converts text to vectors), (4) Vector store
(stores and searches embeddings), (5) Retriever (finds relevant chunks), (6) LLM
(generates answers from context).
SAMPLE CODE:
"""
import os
from operator import itemgetter
from pathlib import Path
from langchain_chroma import Chroma
from langchain_community.document_loaders import PyPDFLoader
from langchain_core.output_parsers import StrOutputParser
from langchain_core.prompts import PromptTemplate
from langchain_openai import AzureChatOpenAI, AzureOpenAIEmbeddings
from langchain_text_splitters import RecursiveCharacterTextSplitter
# Q: How do you build a vector store from documents?
# A: Load documents, split into chunks, create embeddings, and store in vector database
def build_vectorstore(persist_dir: str) -> Chroma:
# Q: How do you handle missing documents?
# A: Check if file exists, provide fallback (README) or placeholder text
pdf_path = Path(__file__).parent / "14_openresume-resume.pdf"
if not pdf_path.exists():
# Fallback to README if PDF not found
corpus_path = Path(__file__).parent / "README.md"
text = corpus_path.read_text(encoding="utf-8") if corpus_path.exists() else ""
if not text:
text = "Resume PDF not found and README missing; using placeholder text."
# Q: How do you split text into chunks?
# A: Use RecursiveCharacterTextSplitter with chunk_size and chunk_overlap
# Overlap ensures context isn't lost at chunk boundaries
splitter = RecursiveCharacterTextSplitter(chunk_size=1200, chunk_overlap=200)
docs = splitter.create_documents(
[text], metadatas=[{"source": str(corpus_path)}]
)
else:
# Q: How do you load PDF documents?
# A: Use PyPDFLoader to load PDF pages, then split into smaller chunks
# Load PDF pages, then split into chunks
loader = PyPDFLoader(str(pdf_path))
pages = loader.load()
splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=150)
docs = splitter.split_documents(pages)
print(f"Loaded {len(docs)} chunks for indexing")
# Q: How do you create embeddings?
# A: Use an embeddings model (AzureOpenAIEmbeddings) to convert text to vectors
embeddings = AzureOpenAIEmbeddings(
azure_deployment=os.environ["AZURE_OPENAI_EMBEDDINGS_DEPLOYMENT"]
)
# Q: How do you store documents in a vector database?
# A: Use Chroma.from_documents() with documents, embeddings, and persist directory
# This creates embeddings and stores them for later retrieval
vs = Chroma.from_documents(
docs, embedding=embeddings, persist_directory=persist_dir
)
vs.persist() # Save to disk for reuse
return vs
def get_or_create_vectorstore(base_persist_dir: str) -> Chroma:
deployment = os.environ["AZURE_OPENAI_EMBEDDINGS_DEPLOYMENT"]
persist_dir = f"{base_persist_dir}_{deployment}"
embeddings = AzureOpenAIEmbeddings(azure_deployment=deployment)
if Path(persist_dir).exists():
try:
return Chroma(embedding_function=embeddings, persist_directory=persist_dir)
except Exception:
pass
return build_vectorstore(persist_dir)
# Q: How do you create a RAG chain?
# A: Combine retriever (finds relevant docs) with LLM (generates answer from context)
def make_chain(vs: Chroma):
# Q: How do you create a retriever?
# A: Convert vector store to retriever with search_kwargs (like k=6 for top 6 results)
retriever = vs.as_retriever(search_kwargs={"k": 6})
# Q: How do you design a RAG prompt?
# A: Include placeholders for context and question - instruct model to use context
# and say "don't know" if answer isn't in context
system_template = (
"You are a concise assistant. Use the provided context to answer the user's question. "
"If the answer is not in the context, say you don't know. Keep answers under 8 sentences.\n\n"
"Context:\n{context}\n\nQuestion: {question}"
)
prompt = PromptTemplate.from_template(system_template)
# Q: How do you set up the LLM for RAG?
# A: Use your chat model with low temperature for consistent, factual responses
# Use AzureChatOpenAI per user's preference
llm = AzureChatOpenAI(
azure_deployment=os.environ["AZURE_OPENAI_DEPLOYMENT_NAME"],
temperature=0,
)
parser = StrOutputParser()
# Q: How do you format retrieved documents?
# A: Create a function that formats document chunks for inclusion in the prompt
def format_docs(docs):
return "\n\n".join(f"[{i+1}] {d.page_content}" for i, d in enumerate(docs))
# Q: How do you build the complete RAG chain?
# A: Use LCEL to chain: question → retrieve → format → prompt → llm → parse
# LCEL chain: take question -> retrieve -> format -> prompt -> llm -> string
return (
{
"context": itemgetter("question") | retriever | format_docs,
"question": itemgetter("question"),
}
| prompt
| llm
| parser
)
def main():
# Accept either OpenAI or Azure OpenAI env configuration
has_openai = bool(os.getenv("OPENAI_API_KEY"))
has_azure = bool(os.getenv("AZURE_OPENAI_DEPLOYMENT_NAME"))
if not (has_openai or has_azure):
raise RuntimeError(
"Set OPENAI_API_KEY or configure Azure with AZURE_OPENAI_DEPLOYMENT_NAME (and related Azure env vars)."
)
persist_dir = str(Path(__file__).parent / ".chroma_rag_resume")
vs = get_or_create_vectorstore(persist_dir)
chain = make_chain(vs)
questions = [
"Summarize the candidate's experience.",
"What are the key skills listed?",
"Give 2 interview questions tailored to this resume.",
]
for q in questions:
print("\n=== Question ===\n", q)
# For quick visibility, show top retrieved docs
top_docs = vs.similarity_search(q, k=3)
print(
f"Retrieved {len(top_docs)} docs. First snippet: ",
(top_docs[0].page_content[:180] + "...") if top_docs else "<none>",
)
answer = chain.invoke({"question": q})
print("\n--- Answer ---\n", answer)
if __name__ == "__main__":
main()