feat: Add RAG service and modelfile

This commit is contained in:
Rushabh Gosar
2026-01-07 18:05:08 -08:00
parent 5d1a0ee72b
commit 8d272f1c08
5 changed files with 141 additions and 0 deletions

View File

@@ -0,0 +1,19 @@
FROM gpt-oss:20b
# Set the system prompt
SYSTEM """You are a specialized assistant for the rushg.me knowledge base. Your goal is to answer questions based *only* on the context provided to you. If the information needed to answer the question is not in the context, you must state that you do not have enough information to answer. Do not use any of your prior knowledge or external information."""
# Set the template for how the prompt will be structured
TEMPLATE """{{- if .System }}
### System:
{{ .System }}
{{- end }}
### Context:
{{ .Prompt }}
### User Question:
{{- /* This is a placeholder. The user's actual question should be appended here by the application. */}}
### Answer:
"""

23
rag_service/Dockerfile Normal file
View File

@@ -0,0 +1,23 @@
# Use an official Python runtime as a parent image
FROM python:3.11-slim
# Set the working directory in the container
WORKDIR /app
# Copy the requirements file into the container at /app
COPY ./requirements.txt /app/
# Install any needed packages specified in requirements.txt
RUN pip install --no-cache-dir -r requirements.txt
# Copy the rest of the application's code into the container at /app
COPY . /app/
# Make port 8000 available to the world outside this container
EXPOSE 8000
# Define environment variables
ENV OLLAMA_BASE_URL=http://192.168.1.2:30068
# Run the command to build the vector store and then start the API
CMD sh -c "python rag_builder.py && uvicorn rag_api:app --host 0.0.0.0 --port 8000"

37
rag_service/rag_api.py Normal file
View File

@@ -0,0 +1,37 @@
from fastapi import FastAPI, HTTPException
from pydantic import BaseModel
from langchain_community.vectorstores import Chroma
from langchain_community.embeddings import OllamaEmbeddings
# Configuration
PERSIST_DIRECTORY = "/data/db"
# Initialize FastAPI app
app = FastAPI()
# Load the vector store
embeddings = OllamaEmbeddings(model="nomic-embed-text")
db = Chroma(persist_directory=PERSIST_DIRECTORY, embedding_function=embeddings)
retriever = db.as_retriever()
class RetrieveRequest(BaseModel):
query: str
class RetrieveResponse(BaseModel):
context: str
@app.post("/retrieve", response_model=RetrieveResponse)
async def retrieve_context(request: RetrieveRequest):
"""
Retrieves context from the vector store for a given query.
"""
try:
docs = retriever.get_relevant_documents(request.query)
context = "\n\n".join([doc.page_content for doc in docs])
return RetrieveResponse(context=context)
except Exception as e:
raise HTTPException(status_code=500, detail=str(e))
if __name__ == "__main__":
import uvicorn
uvicorn.run(app, host="0.0.0.0", port=8000)

View File

@@ -0,0 +1,55 @@
import os
import shutil
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_community.document_loaders import DirectoryLoader, UnstructuredFileLoader
from langchain_community.vectorstores import Chroma
from langchain_community.embeddings import OllamaEmbeddings
# Configuration
SOURCE_DIRECTORY = "/data/rag_source"
PERSIST_DIRECTORY = "/data/db"
CHUNK_SIZE = 1000
CHUNK_OVERLAP = 200
def build_vector_store():
"""
Builds the vector store from the documents in the source directory.
"""
print("Starting to build vector store...")
# Clean up old database
if os.path.exists(PERSIST_DIRECTORY):
print(f"Removing old database from {PERSIST_DIRECTORY}")
shutil.rmtree(PERSIST_DIRECTORY)
# Load the documents
print(f"Loading documents from {SOURCE_DIRECTORY}...")
loader = DirectoryLoader(SOURCE_DIRECTORY, glob="**/*.*", show_progress=True, use_multithreading=True)
documents = loader.load()
if not documents:
print("No documents found. Exiting.")
return
print(f"Loaded {len(documents)} documents.")
# Split the documents into chunks
print("Splitting documents into chunks...")
text_splitter = RecursiveCharacterTextSplitter(chunk_size=CHUNK_SIZE, chunk_overlap=CHUNK_OVERLAP)
texts = text_splitter.split_documents(documents)
print(f"Split documents into {len(texts)} chunks.")
# Create the embeddings
print("Creating embeddings...")
embeddings = OllamaEmbeddings(model="nomic-embed-text", show_progress=True)
# Create and persist the vector store
print("Creating and persisting vector store...")
db = Chroma.from_documents(texts, embeddings, persist_directory=PERSIST_DIRECTORY)
print("Vector store created successfully.")
db.persist()
print("Vector store persisted.")
if __name__ == "__main__":
build_vector_store()

View File

@@ -0,0 +1,7 @@
langchain
fastapi
uvicorn
chromadb
ollama
unstructured
pypdf