0% found this document useful (0 votes)

21 views11 pages

Notes - by Kishor

Uploaded by

Kishore Chowdary

We take content rights seriously. If you suspect this is your content, claim it here.

Available Formats

Download as DOCX, PDF, TXT or read online on Scribd

0% found this document useful (0 votes)

21 views11 pages

Notes - by Kishor

Uploaded by

Kishore Chowdary

We take content rights seriously. If you suspect this is your content, claim it here.

Available Formats

Download as DOCX, PDF, TXT or read online on Scribd

You are on page 1/ 11

Let's understand what happens when you run the application:

1. Document Processing:

o The application loads all .txt files from your documents directory

o It splits these documents into smaller chunks (default 1000 characters)

o Each chunk is converted into a numerical vector using OpenAI's embedding model

2. Vector Storage:

o The document vectors are stored in a Chroma database

o This database is saved locally in a ./chroma_db directory

o This allows for quick similarity searching when answering questions

3. Question Answering:

o When you ask a question, the system:

 Converts your question to a vector

 Finds the most similar document chunks

 Combines these chunks with your question

 Generates an answer using the OpenAI model

Common Issues and Solutions:

1. Import Errors:

o Make sure your virtual environment is activated

o Verify all packages are installed correctly

2. API Key Errors:

o Check your .env file exists and contains the correct API key

o Or provide the API key via command line argument

3. Document Loading Issues:

o Verify your documents are in the correct directory

o Make sure they are .txt files

o Check file permissions

import os

import argparse

from typing import List, Dict

from langchain.document_loaders import DirectoryLoader, TextLoader

from langchain.text_splitter import RecursiveCharacterTextSplitter

from langchain.embeddings import OpenAIEmbeddings

from langchain.vectorstores import Chroma

from langchain.llms import OpenAI

from langchain.chains import RetrievalQA

from dotenv import load_dotenv

class RAGApplication:

def init(self, documents_dir: str, openai_api_key: str = None):

"""

Initialize the RAG application.

Parameters:

- documents_dir: Directory containing the source documents

- openai_api_key: Optional API key (can also be set via .env file)

"""

# Load environment variables from .env file

load_dotenv()

# Set API key if provided, otherwise use from .env

if openai_api_key:

os.environ["OPENAI_API_KEY"] = openai_api_key

self.documents_dir = documents_dir

# Initialize OpenAI embeddings - this converts text to numerical vectors

self.embeddings = OpenAIEmbeddings()

# Initialize storage variables

self.vector_store = None # Will store document embeddings

self.qa_chain = None # Will handle the Q&A process

def load_documents(self) -> List[str]:

"""

Load all text documents from the specified directory.

Supports recursive directory search for .txt files.

"""

# Create a loader that will read all .txt files in the directory

loader = DirectoryLoader(

self.documents_dir,

glob="**/*.txt", # Pattern to match text files

loader_cls=TextLoader # Use basic text loader

# Load all documents

documents = loader.load()

return documents

def split_documents(self, documents: List[str], chunk_size: int = 1000) -> List[str]:

"""

Split documents into smaller chunks for better processing.

Parameters:

- documents: List of loaded documents

- chunk_size: Size of each chunk in characters

"""

# Create a text splitter that uses recursive character splitting

text_splitter = RecursiveCharacterTextSplitter(

chunk_size=chunk_size, # Number of characters per chunk

chunk_overlap=200, # Overlap between chunks to maintain context

length_function=len # Function to measure text length

)
# Split all documents into chunks

splits = text_splitter.split_documents(documents)

return splits

def create_vector_store(self, splits: List[str]):

"""

Create a vector store from document chunks.

This converts text chunks to vectors and stores them for similarity search.

"""

# Create a Chroma vector store from the document chunks

self.vector_store = Chroma.from_documents(

documents=splits,

embedding=self.embeddings,

persist_directory="./chroma_db" # Store vectors on disk

# Save the vector store to disk

self.vector_store.persist()

def setup_qa_chain(self):

"""

Set up the question-answering chain that combines retrieval and generation.

"""

# Create a retriever that will fetch relevant documents

retriever = self.vector_store.as_retriever(

search_kwargs={"k": 3} # Number of documents to retrieve

# Create the QA chain

self.qa_chain = RetrievalQA.from_chain_type(

llm=OpenAI(), # Language model for generation

chain_type="stuff", # Combine retrieved docs into prompt

retriever=retriever, # Document retriever

return_source_documents=True # Include source docs in response

def query(self, question: str) -> Dict:

"""

Process a question and return an answer with source documents.

Parameters:

- question: The question to answer

Returns:

- Dictionary containing answer and source documents

"""

# Check if QA chain is initialized

if not self.qa_chain:

raise ValueError("QA chain not initialized. Run setup_qa_chain first.")

# Process the question

response = self.qa_chain({"query": question})

# Format the response

return {

"answer": response["result"],

"source_documents": [doc.page_content for doc in response["source_documents"]]

def main():

# Set up command line argument parsing

parser = argparse.ArgumentParser(description='RAG Application')

parser.add_argument('--docs_dir', type=str, default='./documents',

help='Directory containing documents')

parser.add_argument('--question', type=str,

help='Question to ask the RAG system')

parser.add_argument('--api_key', type=str,

help='OpenAI API key (optional if set in .env file)')

parser.add_argument('--chunk_size', type=int, default=1000,

help='Chunk size for splitting documents')

# Parse command line arguments

args = parser.parse_args()

# Initialize the RAG application

rag_app = RAGApplication(

documents_dir=args.docs_dir,

openai_api_key=args.api_key

# Process the documents

print("Loading documents...")

documents = rag_app.load_documents()

print(f"Found {len(documents)} documents")

print("Splitting documents...")

splits = rag_app.split_documents(documents, chunk_size=args.chunk_size)

print(f"Created {len(splits)} chunks")

print("Creating vector store...")

rag_app.create_vector_store(splits)

rag_app.setup_qa_chain()
# Handle questions either from command line or interactive mode

if args.question:

# Single question mode

print("\nProcessing question:", args.question)

response = rag_app.query(args.question)

print("\nAnswer:", response['answer'])

print("\nSource Documents:")

for idx, doc in enumerate(response['source_documents'], 1):

print(f"\nDocument {idx}:")

print(doc[:200] + "...")

else:

# Interactive mode

while True:

question = input("\nEnter your question (or 'quit' to exit): ")

if question.lower() == 'quit':

break

response = rag_app.query(question)

print("\nAnswer:", response['answer'])

print("\nSource Documents:")

for idx, doc in enumerate(response['source_documents'], 1):

print(f"\nDocument {idx}:")

print(doc[:200] + "...")

if __name__ == "__main__":

main()

Now comes the exciting part - actually running your RAG application! You have several ways to do
this:

1. Interactive Mode (recommended for first-time users):

```bash

python rag_app.py

```

When you run this, several things happen in sequence:

a) Document Processing:

- The system reads all your text files from the documents folder

- It prints "Loading documents..." and tells you how many it found

- Then it splits these documents into smaller, manageable chunks

- You'll see "Created X chunks" showing how many pieces it made

b) Vector Store Creation:

- The system converts your document chunks into numerical vectors

- These vectors are stored in a database for quick searching

- You'll see "Creating vector store..." during this process

c) Question-Answering:

- The system will prompt you to "Enter your question"

- Type your question and press Enter

- The system will:

- Find relevant document chunks

- Generate an answer using those chunks

- Show you both the answer and the source documents it used

2. Single Question Mode:

If you want to ask just one question:

```bash

python rag_app.py --question "What is the main topic of these documents?"

```
3. Advanced Configuration:

For more control over how the system works:

```bash

python rag_app.py --docs_dir "./my_documents" --chunk_size 500

```

Troubleshooting Common Issues:

If you encounter problems, here's what to check:

1. "Module not found" errors:

- Make sure your virtual environment is activated

- Try running `pip install -r requirements.txt` again

2. API Key errors:

- Check your .env file exists and has the correct API key

- Make sure there are no spaces around the equals sign

3. Document loading issues:

- Verify your documents are .txt files

- Check if they're in the correct folder

- Make sure they're readable (try opening them in Notepad)

4. Memory or performance issues:

- Try processing fewer documents first

- Use a larger chunk size (e.g., --chunk_size 2000)

Understanding the Output:

When you ask a question, you'll see:

1. The answer to your question

2. Snippets from the source documents that were used

3. This helps you understand where the information came from

Would you like me to explain any part of this process in more detail? Or would you like to see an
example of the system in action with some sample documents?

Linux commands :
Intro 0:05
⏩ ssh 0:21
⏩ ls 0:30
⏩ pwd 0:35
⏩ cd 0:51
⏩ touch 1:23
⏩ echo 1:32
⏩ nano 1:42
⏩ vim 1:56
⏩ cat 2:02
⏩ shred 2:10
⏩ mkdir 2:15
⏩ cp 2:26
⏩ rm 2:28
⏩ rmdir 2:38
⏩ ln 2:45
⏩ clear 2:50
⏩ whoami 2:57
⏩ useradd 3:02
⏩ sudo 3:08
⏩ adduser 3:15
⏩ su 3:21
⏩ exit 3:29
⏩ passwd 3:50
⏩ apt 4:12
⏩ finger 4:20
⏩ man 4:33
⏩ whatis 4:55 ⏩ curl 5:05 ⏩ zip 5:13 ⏩ unzip 5:20 ⏩ less 5:29 ⏩ head 5:32 ⏩ tail 5:34 ⏩ cmp
5:42 ⏩ diff 5:50 ⏩ sort 6:00 ⏩ find 6:19 ⏩ chmod 6:24 ⏩ chown 6:34 ⏩ ifconfig 6:40 ⏩ ip
address 6:47 ⏩ grep 7:02 ⏩ awk 7:26 ⏩ resolvectl status 7:31 ⏩ ping 7:57 ⏩ netstat 8:08 ⏩ ss
8:14 ⏩ iptables 8:24 ⏩ ufw 8:43 ⏩ uname 8:52 ⏩ neofetch 9:01 ⏩ cal 9:14 ⏩ free 9:21 ⏩ df
9:28 ⏩ ps 9:36 ⏩ top 9:40 ⏩ htop 9:44 ⏩ kill 10:03 ⏩ pkill 10:14 ⏩ systemctl 10:29 ⏩ history
10:35 ⏩ reboot 10:37 ⏩ shutdown

Anthropic-cookbook:Skills:Contextual-embeddings:Guide - Ipynb at Main Anthropics
No ratings yet
Anthropic-cookbook:Skills:Contextual-embeddings:Guide - Ipynb at Main Anthropics
21 pages
CS 3308 Programming Assignment Unit 4
No ratings yet
CS 3308 Programming Assignment Unit 4
7 pages
Building RAG-based LLM Applications For Production (Part 1) : Blog Detail
100% (1)
Building RAG-based LLM Applications For Production (Part 1) : Blog Detail
39 pages
Building RAG-based LLM Applications For Production: Blog Detail
No ratings yet
Building RAG-based LLM Applications For Production: Blog Detail
78 pages
Langchain Onepager
No ratings yet
Langchain Onepager
1 page
Guide Ipynb
No ratings yet
Guide Ipynb
26 pages
SAP CONTROLLING - PRODUCT COSTING PART-1 - SAP Blogs
No ratings yet
SAP CONTROLLING - PRODUCT COSTING PART-1 - SAP Blogs
47 pages
Grade 2 Tos Sum1
No ratings yet
Grade 2 Tos Sum1
5 pages
(English) Python RAG Tutorial (With Local LLMS) - AI For Your PDFs (DownSub - Com)
No ratings yet
(English) Python RAG Tutorial (With Local LLMS) - AI For Your PDFs (DownSub - Com)
15 pages
Flowise AI Tutorial #3 File Loaders, Text Splitters, Embeddings & Vector Stores
No ratings yet
Flowise AI Tutorial #3 File Loaders, Text Splitters, Embeddings & Vector Stores
3 pages
Multimodel Text
No ratings yet
Multimodel Text
9 pages
RAG With Reinforcement Learning
No ratings yet
RAG With Reinforcement Learning
40 pages
Azure Rag Implementation Part2
No ratings yet
Azure Rag Implementation Part2
15 pages
Labsheet 9
No ratings yet
Labsheet 9
2 pages
MultiModel RAG
No ratings yet
MultiModel RAG
18 pages
How I Built A Basic RAG For PDF QA in A Few Lines of Python Code - by DR Julija - Medium
No ratings yet
How I Built A Basic RAG For PDF QA in A Few Lines of Python Code - by DR Julija - Medium
8 pages
Case Study
No ratings yet
Case Study
25 pages
Algebra I
100% (1)
Algebra I
1,115 pages
RAG and LangChain Loading Documents Round1
No ratings yet
RAG and LangChain Loading Documents Round1
8 pages
Synopsis
No ratings yet
Synopsis
3 pages
Assessment - 2: - K Mary Nikitha
No ratings yet
Assessment - 2: - K Mary Nikitha
27 pages
Claude Comparet DB
No ratings yet
Claude Comparet DB
8 pages
Iare DS Lecture Notes 2
No ratings yet
Iare DS Lecture Notes 2
135 pages
02 Data Connections
No ratings yet
02 Data Connections
32 pages
Zref
No ratings yet
Zref
8 pages
Agentic RAG - Removed
No ratings yet
Agentic RAG - Removed
9 pages
Iso 8503-1 - 8503-2 - Surface Roughness Comprator PDF
No ratings yet
Iso 8503-1 - 8503-2 - Surface Roughness Comprator PDF
4 pages
Rag-From-Scratch:rag - From - Scratch - 1 - To - 4.ipynb at Main Langchain-Ai:rag-From-Scratch
No ratings yet
Rag-From-Scratch:rag - From - Scratch - 1 - To - 4.ipynb at Main Langchain-Ai:rag-From-Scratch
8 pages
AIlab 10
No ratings yet
AIlab 10
3 pages
Stress Analysis and Weight Reduction of Roller of Roller Conveyor
No ratings yet
Stress Analysis and Weight Reduction of Roller of Roller Conveyor
7 pages
Chatbot Code
No ratings yet
Chatbot Code
2 pages
QA Using Gemini Langchain ChromaDB PDF
No ratings yet
QA Using Gemini Langchain ChromaDB PDF
2 pages
Reference API
No ratings yet
Reference API
2 pages
Demo
No ratings yet
Demo
3 pages
Introduction
No ratings yet
Introduction
17 pages
Building RAG Apps
No ratings yet
Building RAG Apps
32 pages
LLM Prcess
No ratings yet
LLM Prcess
7 pages
Chatbot Code
No ratings yet
Chatbot Code
2 pages
CE Board Nov 2020 - Hydraulics - Set 19
No ratings yet
CE Board Nov 2020 - Hydraulics - Set 19
1 page
20BCE1779 - Web Mining - Lab-4
No ratings yet
20BCE1779 - Web Mining - Lab-4
10 pages
Otto Cycle - Wikipedia
No ratings yet
Otto Cycle - Wikipedia
13 pages
Mcp737Pro: Cpflight Operations Manual
No ratings yet
Mcp737Pro: Cpflight Operations Manual
12 pages
CODE Explanation
No ratings yet
CODE Explanation
6 pages
Cs 3308 Unit 7 Programming Assignment
No ratings yet
Cs 3308 Unit 7 Programming Assignment
8 pages
At2150b Series
0% (1)
At2150b Series
3 pages
FIBA Basketball Equipment 2020 - V1
No ratings yet
FIBA Basketball Equipment 2020 - V1
30 pages
Gen Ai-1
No ratings yet
Gen Ai-1
6 pages
Self RAG
No ratings yet
Self RAG
12 pages
Langchain App Design
No ratings yet
Langchain App Design
7 pages
Worksheet On Force
No ratings yet
Worksheet On Force
3 pages
Introducing Transformers Agents 20
No ratings yet
Introducing Transformers Agents 20
8 pages
Blas Lapack
No ratings yet
Blas Lapack
21 pages
RAG With OpenAI For Financial Analysis
No ratings yet
RAG With OpenAI For Financial Analysis
11 pages
Sahil Malhotra 16 BCE 0113 Web Mining L51+L52: 1. Universal Crawling 1.1. CODE
No ratings yet
Sahil Malhotra 16 BCE 0113 Web Mining L51+L52: 1. Universal Crawling 1.1. CODE
11 pages
Gas Absorption
No ratings yet
Gas Absorption
11 pages
Intro S4HANA Using Global Bike Exercises PP Fiori en v4.2
No ratings yet
Intro S4HANA Using Global Bike Exercises PP Fiori en v4.2
16 pages
Chemistry Acid and Basic Radicals
87% (15)
Chemistry Acid and Basic Radicals
1 page
Chatbot Code
No ratings yet
Chatbot Code
2 pages
Age Questions
No ratings yet
Age Questions
6 pages
Https Raw - Githubusercontent.com Joelgrus Data-Science-From-Scratch Master Code Natural Language Processing
No ratings yet
Https Raw - Githubusercontent.com Joelgrus Data-Science-From-Scratch Master Code Natural Language Processing
5 pages
WF4 Pre Production HoW
No ratings yet
WF4 Pre Production HoW
142 pages
Microsoft Excel Intermediate
No ratings yet
Microsoft Excel Intermediate
9 pages
GP2Y0D340K: Distance Measuring Sensor Unit Digital Output (400 MM) Type
No ratings yet
GP2Y0D340K: Distance Measuring Sensor Unit Digital Output (400 MM) Type
9 pages
Advanced Micro Controller: Unit I - AVR Microcontroller
No ratings yet
Advanced Micro Controller: Unit I - AVR Microcontroller
52 pages
Draftspecificationformantransformer 7775 Kvawithincr
No ratings yet
Draftspecificationformantransformer 7775 Kvawithincr
13 pages
Aw GR 11 Junie 2024 Memo Finaal
No ratings yet
Aw GR 11 Junie 2024 Memo Finaal
14 pages
CWS19产品资料英文
No ratings yet
CWS19产品资料英文
7 pages
Motor Current Calculator
No ratings yet
Motor Current Calculator
2 pages
CME113 Formula Excel
No ratings yet
CME113 Formula Excel
16 pages
Homomorphism
No ratings yet
Homomorphism
10 pages
A Rhodium/silicon Co-Electrocatalyst Design Concept To Surpass Platinum Hydrogen Evolution Activity at High Overpotentials
No ratings yet
A Rhodium/silicon Co-Electrocatalyst Design Concept To Surpass Platinum Hydrogen Evolution Activity at High Overpotentials
7 pages
Java Sript
No ratings yet
Java Sript
2 pages
Q1 (25pt.) Q2 (25pt.) Q3 (25pt.) Q4 (25pt.) Total (100pt.) : Instructor: Dr. Moayed Almobaied, Ph.D. Control & Automation
No ratings yet
Q1 (25pt.) Q2 (25pt.) Q3 (25pt.) Q4 (25pt.) Total (100pt.) : Instructor: Dr. Moayed Almobaied, Ph.D. Control & Automation
4 pages
Python: Learn Python in 24 Hours
From Everand
Python: Learn Python in 24 Hours
Alex Nordeen
4/5 (12)
Python Advanced Programming: The Guide to Learn Python Programming. Reference with Exercises and Samples About Dynamical Programming, Multithreading, Multiprocessing, Debugging, Testing and More
From Everand
Python Advanced Programming: The Guide to Learn Python Programming. Reference with Exercises and Samples About Dynamical Programming, Multithreading, Multiprocessing, Debugging, Testing and More
Marcus Richards
No ratings yet
Mastering Node.js Web Development: Go on a comprehensive journey from the fundamentals to advanced web development with Node.js
From Everand
Mastering Node.js Web Development: Go on a comprehensive journey from the fundamentals to advanced web development with Node.js
Adam Freeman
No ratings yet
NoSQL Injection for Elasticsearch
From Everand
NoSQL Injection for Elasticsearch
Gary Drocella
No ratings yet
50 Recipes for Programming Node.js
From Everand
50 Recipes for Programming Node.js
Jamie Munro
3/5 (4)
Simplifying Data Science With Python
From Everand
Simplifying Data Science With Python
Billy David millican
No ratings yet
Azure For Starters
From Everand
Azure For Starters
Chinmoy Mukherjee
No ratings yet
Angular Workshop: From Beginner to Pro, Creating Applications for the Real World
From Everand
Angular Workshop: From Beginner to Pro, Creating Applications for the Real World
Abdelfattah Ragab
No ratings yet
Easy Programming for Everyone
From Everand
Easy Programming for Everyone
Umar Asghar
No ratings yet
Firebase Storage for Angular: A reliable file upload solution for your applications
From Everand
Firebase Storage for Angular: A reliable file upload solution for your applications
Abdelfattah Ragab
No ratings yet
Quick Python Guide
From Everand
Quick Python Guide
Coder1
No ratings yet
C# Interview Questions, Answers, and Explanations: C Sharp Certification Review
From Everand
C# Interview Questions, Answers, and Explanations: C Sharp Certification Review
equitypress
4.5/5 (3)
Oracle Certified Professional Java Programmer OCPJP 1Z0 809
From Everand
Oracle Certified Professional Java Programmer OCPJP 1Z0 809
Manish Soni
No ratings yet
Python: Advanced Guide to Programming Code with Python
From Everand
Python: Advanced Guide to Programming Code with Python
Charlie Masterson
No ratings yet
Inspiring Powershell Articles
From Everand
Inspiring Powershell Articles
Murat Yildirimoglu
No ratings yet
DevOps. How To Build Pipelines With Bitbucket Pipelines + Docker Container + AWS ECS + JDK 11 + Maven 3?
From Everand
DevOps. How To Build Pipelines With Bitbucket Pipelines + Docker Container + AWS ECS + JDK 11 + Maven 3?
John Edward Cooper Berg
No ratings yet
UNIX Shell Programming Interview Questions You'll Most Likely Be Asked
From Everand
UNIX Shell Programming Interview Questions You'll Most Likely Be Asked
Vibrant Publishers
No ratings yet

Note: This service is not intended for secure transactions such as banking, social media, email, or purchasing. Use at your own risk. We assume no liability whatsoever for broken pages.

Notes - by Kishor

Uploaded by

Notes - by Kishor

Uploaded by

Let's understand what happens when you run the application:

o It splits these documents into smaller chunks (default 1000 characters)

o The document vectors are stored in a Chroma database

o This database is saved locally in a ./chroma_db directory

o This allows for quick similarity searching when answering questions

o When you ask a question, the system:

 Converts your question to a vector

 Finds the most similar document chunks

 Combines these chunks with your question

 Generates an answer using the OpenAI model

Common Issues and Solutions:

o Make sure your virtual environment is activated

o Verify all packages are installed correctly

2. API Key Errors:

o Or provide the API key via command line argument

3. Document Loading Issues:

o Verify your documents are in the correct directory

o Make sure they are .txt files

o Check file permissions

from typing import List, Dict

from langchain.document_loaders import DirectoryLoader, TextLoader

from langchain.text_splitter import RecursiveCharacterTextSplitter

from langchain.vectorstores import Chroma

from langchain.llms import OpenAI

from langchain.chains import RetrievalQA

from dotenv import load_dotenv

def __init__(self, documents_dir: str, openai_api_key: str = None):

Initialize the RAG application.

- documents_dir: Directory containing the source documents

# Load environment variables from .env file

# Set API key if provided, otherwise use from .env

# Initialize OpenAI embeddings - this converts text to numerical vectors

# Initialize storage variables

self.vector_store = None # Will store document embeddings

self.qa_chain = None # Will handle the Q&A process

Load all text documents from the specified directory.

Supports recursive directory search for .txt files.

glob="**/*.txt", # Pattern to match text files

loader_cls=TextLoader # Use basic text loader

# Load all documents

def split_documents(self, documents: List[str], chunk_size: int = 1000) -> List[str]:

Split documents into smaller chunks for better processing.

- documents: List of loaded documents

- chunk_size: Size of each chunk in characters

# Create a text splitter that uses recursive character splitting

chunk_size=chunk_size, # Number of characters per chunk

chunk_overlap=200, # Overlap between chunks to maintain context

length_function=len # Function to measure text length

def create_vector_store(self, splits: List[str]):

Create a vector store from document chunks.

# Create a Chroma vector store from the document chunks

persist_directory="./chroma_db" # Store vectors on disk

# Save the vector store to disk

Set up the question-answering chain that combines retrieval and generation.

# Create a retriever that will fetch relevant documents

search_kwargs={"k": 3} # Number of documents to retrieve

# Create the QA chain

llm=OpenAI(), # Language model for generation

retriever=retriever, # Document retriever

return_source_documents=True # Include source docs in response

def query(self, question: str) -> Dict:

Process a question and return an answer with source documents.

- question: The question to answer

- Dictionary containing answer and source documents

# Check if QA chain is initialized

raise ValueError("QA chain not initialized. Run setup_qa_chain first.")

# Process the question

response = self.qa_chain({"query": question})

# Format the response

"source_documents": [doc.page_content for doc in response["source_documents"]]

# Set up command line argument parsing

parser = argparse.ArgumentParser(description='RAG Application')

help='Directory containing documents')

help='Question to ask the RAG system')

help='OpenAI API key (optional if set in .env file)')

parser.add_argument('--chunk_size', type=int, default=1000,

def init(self, documents_dir: str, openai_api_key: str = None):