feat: Add RAG service and modelfile
This commit is contained in:
23
rag_service/Dockerfile
Normal file
23
rag_service/Dockerfile
Normal file
@@ -0,0 +1,23 @@
|
||||
# Use an official Python runtime as a parent image
|
||||
FROM python:3.11-slim
|
||||
|
||||
# Set the working directory in the container
|
||||
WORKDIR /app
|
||||
|
||||
# Copy the requirements file into the container at /app
|
||||
COPY ./requirements.txt /app/
|
||||
|
||||
# Install any needed packages specified in requirements.txt
|
||||
RUN pip install --no-cache-dir -r requirements.txt
|
||||
|
||||
# Copy the rest of the application's code into the container at /app
|
||||
COPY . /app/
|
||||
|
||||
# Make port 8000 available to the world outside this container
|
||||
EXPOSE 8000
|
||||
|
||||
# Define environment variables
|
||||
ENV OLLAMA_BASE_URL=http://192.168.1.2:30068
|
||||
|
||||
# Run the command to build the vector store and then start the API
|
||||
CMD sh -c "python rag_builder.py && uvicorn rag_api:app --host 0.0.0.0 --port 8000"
|
||||
37
rag_service/rag_api.py
Normal file
37
rag_service/rag_api.py
Normal file
@@ -0,0 +1,37 @@
|
||||
from fastapi import FastAPI, HTTPException
|
||||
from pydantic import BaseModel
|
||||
from langchain_community.vectorstores import Chroma
|
||||
from langchain_community.embeddings import OllamaEmbeddings
|
||||
|
||||
# Configuration
|
||||
PERSIST_DIRECTORY = "/data/db"
|
||||
|
||||
# Initialize FastAPI app
|
||||
app = FastAPI()
|
||||
|
||||
# Load the vector store
|
||||
embeddings = OllamaEmbeddings(model="nomic-embed-text")
|
||||
db = Chroma(persist_directory=PERSIST_DIRECTORY, embedding_function=embeddings)
|
||||
retriever = db.as_retriever()
|
||||
|
||||
class RetrieveRequest(BaseModel):
|
||||
query: str
|
||||
|
||||
class RetrieveResponse(BaseModel):
|
||||
context: str
|
||||
|
||||
@app.post("/retrieve", response_model=RetrieveResponse)
|
||||
async def retrieve_context(request: RetrieveRequest):
|
||||
"""
|
||||
Retrieves context from the vector store for a given query.
|
||||
"""
|
||||
try:
|
||||
docs = retriever.get_relevant_documents(request.query)
|
||||
context = "\n\n".join([doc.page_content for doc in docs])
|
||||
return RetrieveResponse(context=context)
|
||||
except Exception as e:
|
||||
raise HTTPException(status_code=500, detail=str(e))
|
||||
|
||||
if __name__ == "__main__":
|
||||
import uvicorn
|
||||
uvicorn.run(app, host="0.0.0.0", port=8000)
|
||||
55
rag_service/rag_builder.py
Normal file
55
rag_service/rag_builder.py
Normal file
@@ -0,0 +1,55 @@
|
||||
import os
|
||||
import shutil
|
||||
from langchain.text_splitter import RecursiveCharacterTextSplitter
|
||||
from langchain_community.document_loaders import DirectoryLoader, UnstructuredFileLoader
|
||||
from langchain_community.vectorstores import Chroma
|
||||
from langchain_community.embeddings import OllamaEmbeddings
|
||||
|
||||
# Configuration
|
||||
SOURCE_DIRECTORY = "/data/rag_source"
|
||||
PERSIST_DIRECTORY = "/data/db"
|
||||
CHUNK_SIZE = 1000
|
||||
CHUNK_OVERLAP = 200
|
||||
|
||||
def build_vector_store():
|
||||
"""
|
||||
Builds the vector store from the documents in the source directory.
|
||||
"""
|
||||
print("Starting to build vector store...")
|
||||
|
||||
# Clean up old database
|
||||
if os.path.exists(PERSIST_DIRECTORY):
|
||||
print(f"Removing old database from {PERSIST_DIRECTORY}")
|
||||
shutil.rmtree(PERSIST_DIRECTORY)
|
||||
|
||||
# Load the documents
|
||||
print(f"Loading documents from {SOURCE_DIRECTORY}...")
|
||||
loader = DirectoryLoader(SOURCE_DIRECTORY, glob="**/*.*", show_progress=True, use_multithreading=True)
|
||||
documents = loader.load()
|
||||
|
||||
if not documents:
|
||||
print("No documents found. Exiting.")
|
||||
return
|
||||
|
||||
print(f"Loaded {len(documents)} documents.")
|
||||
|
||||
# Split the documents into chunks
|
||||
print("Splitting documents into chunks...")
|
||||
text_splitter = RecursiveCharacterTextSplitter(chunk_size=CHUNK_SIZE, chunk_overlap=CHUNK_OVERLAP)
|
||||
texts = text_splitter.split_documents(documents)
|
||||
print(f"Split documents into {len(texts)} chunks.")
|
||||
|
||||
# Create the embeddings
|
||||
print("Creating embeddings...")
|
||||
embeddings = OllamaEmbeddings(model="nomic-embed-text", show_progress=True)
|
||||
|
||||
# Create and persist the vector store
|
||||
print("Creating and persisting vector store...")
|
||||
db = Chroma.from_documents(texts, embeddings, persist_directory=PERSIST_DIRECTORY)
|
||||
print("Vector store created successfully.")
|
||||
db.persist()
|
||||
print("Vector store persisted.")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
build_vector_store()
|
||||
7
rag_service/requirements.txt
Normal file
7
rag_service/requirements.txt
Normal file
@@ -0,0 +1,7 @@
|
||||
langchain
|
||||
fastapi
|
||||
uvicorn
|
||||
chromadb
|
||||
ollama
|
||||
unstructured
|
||||
pypdf
|
||||
Reference in New Issue
Block a user