feat: Add RAG service and modelfile

2026-01-07 18:05:08 -08:00
parent 5d1a0ee72b
commit 8d272f1c08
5 changed files with 141 additions and 0 deletions
--- a/rag_service/Dockerfile
+++ b/rag_service/Dockerfile
@@ -0,0 +1,23 @@
+# Use an official Python runtime as a parent image
+FROM python:3.11-slim
+
+# Set the working directory in the container
+WORKDIR /app
+
+# Copy the requirements file into the container at /app
+COPY ./requirements.txt /app/
+
+# Install any needed packages specified in requirements.txt
+RUN pip install --no-cache-dir -r requirements.txt
+
+# Copy the rest of the application's code into the container at /app
+COPY . /app/
+
+# Make port 8000 available to the world outside this container
+EXPOSE 8000
+
+# Define environment variables
+ENV OLLAMA_BASE_URL=http://192.168.1.2:30068
+
+# Run the command to build the vector store and then start the API
+CMD sh -c "python rag_builder.py && uvicorn rag_api:app --host 0.0.0.0 --port 8000"
--- a/rag_service/rag_api.py
+++ b/rag_service/rag_api.py
@@ -0,0 +1,37 @@
+from fastapi import FastAPI, HTTPException
+from pydantic import BaseModel
+from langchain_community.vectorstores import Chroma
+from langchain_community.embeddings import OllamaEmbeddings
+
+# Configuration
+PERSIST_DIRECTORY = "/data/db"
+
+# Initialize FastAPI app
+app = FastAPI()
+
+# Load the vector store
+embeddings = OllamaEmbeddings(model="nomic-embed-text")
+db = Chroma(persist_directory=PERSIST_DIRECTORY, embedding_function=embeddings)
+retriever = db.as_retriever()
+
+class RetrieveRequest(BaseModel):
+    query: str
+
+class RetrieveResponse(BaseModel):
+    context: str
+
+@app.post("/retrieve", response_model=RetrieveResponse)
+async def retrieve_context(request: RetrieveRequest):
+    """
+    Retrieves context from the vector store for a given query.
+    """
+    try:
+        docs = retriever.get_relevant_documents(request.query)
+        context = "\n\n".join([doc.page_content for doc in docs])
+        return RetrieveResponse(context=context)
+    except Exception as e:
+        raise HTTPException(status_code=500, detail=str(e))
+
+if __name__ == "__main__":
+    import uvicorn
+    uvicorn.run(app, host="0.0.0.0", port=8000)
--- a/rag_service/rag_builder.py
+++ b/rag_service/rag_builder.py
@@ -0,0 +1,55 @@
+import os
+import shutil
+from langchain.text_splitter import RecursiveCharacterTextSplitter
+from langchain_community.document_loaders import DirectoryLoader, UnstructuredFileLoader
+from langchain_community.vectorstores import Chroma
+from langchain_community.embeddings import OllamaEmbeddings
+
+# Configuration
+SOURCE_DIRECTORY = "/data/rag_source"
+PERSIST_DIRECTORY = "/data/db"
+CHUNK_SIZE = 1000
+CHUNK_OVERLAP = 200
+
+def build_vector_store():
+    """
+    Builds the vector store from the documents in the source directory.
+    """
+    print("Starting to build vector store...")
+
+    # Clean up old database
+    if os.path.exists(PERSIST_DIRECTORY):
+        print(f"Removing old database from {PERSIST_DIRECTORY}")
+        shutil.rmtree(PERSIST_DIRECTORY)
+
+    # Load the documents
+    print(f"Loading documents from {SOURCE_DIRECTORY}...")
+    loader = DirectoryLoader(SOURCE_DIRECTORY, glob="**/*.*", show_progress=True, use_multithreading=True)
+    documents = loader.load()
+
+    if not documents:
+        print("No documents found. Exiting.")
+        return
+
+    print(f"Loaded {len(documents)} documents.")
+
+    # Split the documents into chunks
+    print("Splitting documents into chunks...")
+    text_splitter = RecursiveCharacterTextSplitter(chunk_size=CHUNK_SIZE, chunk_overlap=CHUNK_OVERLAP)
+    texts = text_splitter.split_documents(documents)
+    print(f"Split documents into {len(texts)} chunks.")
+
+    # Create the embeddings
+    print("Creating embeddings...")
+    embeddings = OllamaEmbeddings(model="nomic-embed-text", show_progress=True)
+
+    # Create and persist the vector store
+    print("Creating and persisting vector store...")
+    db = Chroma.from_documents(texts, embeddings, persist_directory=PERSIST_DIRECTORY)
+    print("Vector store created successfully.")
+    db.persist()
+    print("Vector store persisted.")
+
+
+if __name__ == "__main__":
+    build_vector_store()
--- a/rag_service/requirements.txt
+++ b/rag_service/requirements.txt
@@ -0,0 +1,7 @@
+langchain
+fastapi
+uvicorn
+chromadb
+ollama
+unstructured
+pypdf