feat: Add RAG service and modelfile

2026-01-07 18:05:08 -08:00
parent 5d1a0ee72b
commit 8d272f1c08
5 changed files with 141 additions and 0 deletions
--- a/rag_modelfiles/gpt-oss-rag.Modelfile
+++ b/rag_modelfiles/gpt-oss-rag.Modelfile
@@ -0,0 +1,19 @@
+FROM gpt-oss:20b
+
+# Set the system prompt
+SYSTEM """You are a specialized assistant for the rushg.me knowledge base. Your goal is to answer questions based *only* on the context provided to you. If the information needed to answer the question is not in the context, you must state that you do not have enough information to answer. Do not use any of your prior knowledge or external information."""
+
+# Set the template for how the prompt will be structured
+TEMPLATE """{{- if .System }}
+### System:
+{{ .System }}
+{{- end }}
+
+### Context:
+{{ .Prompt }}
+
+### User Question:
+{{- /* This is a placeholder. The user's actual question should be appended here by the application. */}}
+
+### Answer:
+"""
--- a/rag_service/Dockerfile
+++ b/rag_service/Dockerfile
@@ -0,0 +1,23 @@
+# Use an official Python runtime as a parent image
+FROM python:3.11-slim
+
+# Set the working directory in the container
+WORKDIR /app
+
+# Copy the requirements file into the container at /app
+COPY ./requirements.txt /app/
+
+# Install any needed packages specified in requirements.txt
+RUN pip install --no-cache-dir -r requirements.txt
+
+# Copy the rest of the application's code into the container at /app
+COPY . /app/
+
+# Make port 8000 available to the world outside this container
+EXPOSE 8000
+
+# Define environment variables
+ENV OLLAMA_BASE_URL=http://192.168.1.2:30068
+
+# Run the command to build the vector store and then start the API
+CMD sh -c "python rag_builder.py && uvicorn rag_api:app --host 0.0.0.0 --port 8000"
--- a/rag_service/rag_api.py
+++ b/rag_service/rag_api.py
@@ -0,0 +1,37 @@
+from fastapi import FastAPI, HTTPException
+from pydantic import BaseModel
+from langchain_community.vectorstores import Chroma
+from langchain_community.embeddings import OllamaEmbeddings
+
+# Configuration
+PERSIST_DIRECTORY = "/data/db"
+
+# Initialize FastAPI app
+app = FastAPI()
+
+# Load the vector store
+embeddings = OllamaEmbeddings(model="nomic-embed-text")
+db = Chroma(persist_directory=PERSIST_DIRECTORY, embedding_function=embeddings)
+retriever = db.as_retriever()
+
+class RetrieveRequest(BaseModel):
+    query: str
+
+class RetrieveResponse(BaseModel):
+    context: str
+
+@app.post("/retrieve", response_model=RetrieveResponse)
+async def retrieve_context(request: RetrieveRequest):
+    """
+    Retrieves context from the vector store for a given query.
+    """
+    try:
+        docs = retriever.get_relevant_documents(request.query)
+        context = "\n\n".join([doc.page_content for doc in docs])
+        return RetrieveResponse(context=context)
+    except Exception as e:
+        raise HTTPException(status_code=500, detail=str(e))
+
+if __name__ == "__main__":
+    import uvicorn
+    uvicorn.run(app, host="0.0.0.0", port=8000)
--- a/rag_service/rag_builder.py
+++ b/rag_service/rag_builder.py
@@ -0,0 +1,55 @@
+import os
+import shutil
+from langchain.text_splitter import RecursiveCharacterTextSplitter
+from langchain_community.document_loaders import DirectoryLoader, UnstructuredFileLoader
+from langchain_community.vectorstores import Chroma
+from langchain_community.embeddings import OllamaEmbeddings
+
+# Configuration
+SOURCE_DIRECTORY = "/data/rag_source"
+PERSIST_DIRECTORY = "/data/db"
+CHUNK_SIZE = 1000
+CHUNK_OVERLAP = 200
+
+def build_vector_store():
+    """
+    Builds the vector store from the documents in the source directory.
+    """
+    print("Starting to build vector store...")
+
+    # Clean up old database
+    if os.path.exists(PERSIST_DIRECTORY):
+        print(f"Removing old database from {PERSIST_DIRECTORY}")
+        shutil.rmtree(PERSIST_DIRECTORY)
+
+    # Load the documents
+    print(f"Loading documents from {SOURCE_DIRECTORY}...")
+    loader = DirectoryLoader(SOURCE_DIRECTORY, glob="**/*.*", show_progress=True, use_multithreading=True)
+    documents = loader.load()
+
+    if not documents:
+        print("No documents found. Exiting.")
+        return
+
+    print(f"Loaded {len(documents)} documents.")
+
+    # Split the documents into chunks
+    print("Splitting documents into chunks...")
+    text_splitter = RecursiveCharacterTextSplitter(chunk_size=CHUNK_SIZE, chunk_overlap=CHUNK_OVERLAP)
+    texts = text_splitter.split_documents(documents)
+    print(f"Split documents into {len(texts)} chunks.")
+
+    # Create the embeddings
+    print("Creating embeddings...")
+    embeddings = OllamaEmbeddings(model="nomic-embed-text", show_progress=True)
+
+    # Create and persist the vector store
+    print("Creating and persisting vector store...")
+    db = Chroma.from_documents(texts, embeddings, persist_directory=PERSIST_DIRECTORY)
+    print("Vector store created successfully.")
+    db.persist()
+    print("Vector store persisted.")
+
+
+if __name__ == "__main__":
+    build_vector_store()
--- a/rag_service/requirements.txt
+++ b/rag_service/requirements.txt
@@ -0,0 +1,7 @@
+langchain
+fastapi
+uvicorn
+chromadb
+ollama
+unstructured
+pypdf