feat: Add RAG service and modelfile

2026-01-07 18:05:08 -08:00
parent 5d1a0ee72b
commit 8d272f1c08
5 changed files with 141 additions and 0 deletions
--- a/rag_modelfiles/gpt-oss-rag.Modelfile
+++ b/rag_modelfiles/gpt-oss-rag.Modelfile
@@ -0,0 +1,19 @@
 FROM gpt-oss:20b
 # Set the system prompt
 SYSTEM """You are a specialized assistant for the rushg.me knowledge base. Your goal is to answer questions based *only* on the context provided to you. If the information needed to answer the question is not in the context, you must state that you do not have enough information to answer. Do not use any of your prior knowledge or external information."""
 # Set the template for how the prompt will be structured
 TEMPLATE """{{- if .System }}
 ### System:
 {{ .System }}
 {{- end }}
 ### Context:
 {{ .Prompt }}
 ### User Question:
 {{- /* This is a placeholder. The user's actual question should be appended here by the application. */}}
 ### Answer:
 """
--- a/rag_service/Dockerfile
+++ b/rag_service/Dockerfile
@@ -0,0 +1,23 @@
 # Use an official Python runtime as a parent image
 FROM python:3.11-slim
 # Set the working directory in the container
 WORKDIR /app
 # Copy the requirements file into the container at /app
 COPY ./requirements.txt /app/
 # Install any needed packages specified in requirements.txt
 RUN pip install --no-cache-dir -r requirements.txt
 # Copy the rest of the application's code into the container at /app
 COPY . /app/
 # Make port 8000 available to the world outside this container
 EXPOSE 8000
 # Define environment variables
 ENV OLLAMA_BASE_URL=http://192.168.1.2:30068
 # Run the command to build the vector store and then start the API
 CMD sh -c "python rag_builder.py && uvicorn rag_api:app --host 0.0.0.0 --port 8000"
--- a/rag_service/rag_api.py
+++ b/rag_service/rag_api.py
@@ -0,0 +1,37 @@
 from fastapi import FastAPI, HTTPException
 from pydantic import BaseModel
 from langchain_community.vectorstores import Chroma
 from langchain_community.embeddings import OllamaEmbeddings
 # Configuration
 PERSIST_DIRECTORY = "/data/db"
 # Initialize FastAPI app
 app = FastAPI()
 # Load the vector store
 embeddings = OllamaEmbeddings(model="nomic-embed-text")
 db = Chroma(persist_directory=PERSIST_DIRECTORY, embedding_function=embeddings)
 retriever = db.as_retriever()
 class RetrieveRequest(BaseModel):
    query: str
 class RetrieveResponse(BaseModel):
    context: str
@app.post("/retrieve", response_model=RetrieveResponse)
 async def retrieve_context(request: RetrieveRequest):
    """
    Retrieves context from the vector store for a given query.
    """
    try:
        docs = retriever.get_relevant_documents(request.query)
        context = "\n\n".join([doc.page_content for doc in docs])
        return RetrieveResponse(context=context)
    except Exception as e:
        raise HTTPException(status_code=500, detail=str(e))
 if __name__ == "__main__":
    import uvicorn
    uvicorn.run(app, host="0.0.0.0", port=8000)
--- a/rag_service/rag_builder.py
+++ b/rag_service/rag_builder.py
@@ -0,0 +1,55 @@
 import os
 import shutil
 from langchain.text_splitter import RecursiveCharacterTextSplitter
 from langchain_community.document_loaders import DirectoryLoader, UnstructuredFileLoader
 from langchain_community.vectorstores import Chroma
 from langchain_community.embeddings import OllamaEmbeddings
 # Configuration
 SOURCE_DIRECTORY = "/data/rag_source"
 PERSIST_DIRECTORY = "/data/db"
 CHUNK_SIZE = 1000
 CHUNK_OVERLAP = 200
 def build_vector_store():
    """
    Builds the vector store from the documents in the source directory.
    """
    print("Starting to build vector store...")
    # Clean up old database
    if os.path.exists(PERSIST_DIRECTORY):
        print(f"Removing old database from {PERSIST_DIRECTORY}")
        shutil.rmtree(PERSIST_DIRECTORY)
    # Load the documents
    print(f"Loading documents from {SOURCE_DIRECTORY}...")
    loader = DirectoryLoader(SOURCE_DIRECTORY, glob="**/*.*", show_progress=True, use_multithreading=True)
    documents = loader.load()
    if not documents:
        print("No documents found. Exiting.")
        return
    print(f"Loaded {len(documents)} documents.")
    # Split the documents into chunks
    print("Splitting documents into chunks...")
    text_splitter = RecursiveCharacterTextSplitter(chunk_size=CHUNK_SIZE, chunk_overlap=CHUNK_OVERLAP)
    texts = text_splitter.split_documents(documents)
    print(f"Split documents into {len(texts)} chunks.")
    # Create the embeddings
    print("Creating embeddings...")
    embeddings = OllamaEmbeddings(model="nomic-embed-text", show_progress=True)
    # Create and persist the vector store
    print("Creating and persisting vector store...")
    db = Chroma.from_documents(texts, embeddings, persist_directory=PERSIST_DIRECTORY)
    print("Vector store created successfully.")
    db.persist()
    print("Vector store persisted.")
 if __name__ == "__main__":
    build_vector_store()
--- a/rag_service/requirements.txt
+++ b/rag_service/requirements.txt
@@ -0,0 +1,7 @@
 langchain
 fastapi
 uvicorn
 chromadb
 ollama
 unstructured
 pypdf