This commit is contained in:
Alexander Svan
2025-04-08 14:42:03 +02:00
commit 1ad9ec82f2
8 changed files with 240 additions and 0 deletions

View File

@@ -0,0 +1,14 @@
{
"name": "Existing Docker Compose (Extend)",
"dockerComposeFile": [
"../docker-compose.yml",
"docker-compose.yml"
],
"service": "python-loader",
"workspaceFolder": "/workspaces/${localWorkspaceFolderBasename}",
"features": {
"ghcr.io/devcontainers/features/python:1": {}
},
"remoteUser": "root"
}

View File

@@ -0,0 +1,7 @@
version: '3.8'
services:
python-loader:
volumes:
- ..:/workspaces:cached
command: sleep infinity

3
.gitignore vendored Normal file
View File

@@ -0,0 +1,3 @@
ollama_models
postgres_db
chroma_db

50
README.md Normal file
View File

@@ -0,0 +1,50 @@
# Vector Database POC
## What it is
- Development environment using Devcontainers
- Stores document embeddings in ChromaDB
- Retrieves relevant documents for a given query
- Generates answers using Ollama
## Getting started
Download some models
```bash
ollama pull mxbai-embed-large # Used for embeddings
ollama pull gemma3 # Used as LLM
```
Download python depedencies
```bash
pip install -r requirements.txt
```
## Run
First you need to seed the database with a few documents
```bash
python script.py seed
```
And then you can do your search
```bash
python script.py search --query "What is NAF and how can it help?"
```
## Roadmap
- Streaming responses (for real-time chat)
- Metadata filtering (e.g., retrieving only recent docs)
- Improved ranking (reranking results for better accuracy)

42
docker-compose.nvidia.yml Normal file
View File

@@ -0,0 +1,42 @@
services:
chroma:
image: chromadb/chroma:latest
ports:
- "8000:8000"
volumes:
- ./chroma_db_data:/db
environment:
- POSTGRES_HOST=chroma-db
- POSTGRES_PORT=5432
- POSTGRES_USER=chroma
- POSTGRES_PASSWORD=chroma
- POSTGRES_DB=chroma
depends_on:
- chroma-db
chroma-db:
image: postgres:15-alpine
environment:
POSTGRES_USER: chroma
POSTGRES_PASSWORD: chroma
POSTGRES_DB: chroma
volumes:
- ./chroma_db_data:/var/lib/postgresql/data
ollama:
image: ollama/ollama:latest
ports:
- "11434:11434"
volumes:
- ./ollama_models:/root/.ollama
devices:
- /dev/nvidia0:/dev/nvidia0
environment:
- OLLAMA_HOST=0.0.0.0
deploy:
resources:
reservations:
devices:
- driver: nvidia
count: all
capabilities: [gpu]

37
docker-compose.yml Normal file
View File

@@ -0,0 +1,37 @@
services:
chroma:
image: chromadb/chroma:latest
environment:
- POSTGRES_HOST=chroma-db
- POSTGRES_PORT=5432
- POSTGRES_USER=chroma
- POSTGRES_PASSWORD=chroma
- POSTGRES_DB=chroma
- CHROMA_SERVER_HOST=0.0.0.0
- CHROMA_SERVER_HTTP_PORT=8000
volumes:
- ./chroma_db:/data
depends_on:
- chroma-db
chroma-db:
image: postgres:15-alpine
environment:
POSTGRES_USER: chroma
POSTGRES_PASSWORD: chroma
POSTGRES_DB: chroma
PGDATA: /var/lib/postgresql/data/pgdata
volumes:
- ./postgres_db:/var/lib/postgresql/data
ollama:
image: ollama/ollama:latest
ports:
- "11434:11434"
volumes:
- ./ollama_models:/root/.ollama
environment:
- OLLAMA_HOST=0.0.0.0
python-loader:
image: mcr.microsoft.com/devcontainers/base:jammy

2
requirements.txt Normal file
View File

@@ -0,0 +1,2 @@
chromadb
requests

85
script.py Normal file
View File

@@ -0,0 +1,85 @@
import chromadb
import requests
import argparse
OLLAMA_URL = "http://ollama:11434/api"
CHROMA_COLLECTION_NAME = "rag_documents"
# Connect to ChromaDB client
client = chromadb.HttpClient(host="chroma", port=8000)
collection = client.get_or_create_collection(name=CHROMA_COLLECTION_NAME)
def get_embedding(text, embedding_model):
"""Generate an embedding using Ollama."""
response = requests.post(f"{OLLAMA_URL}/embeddings", json={"model": embedding_model, "prompt": text})
if response.status_code == 200:
return response.json().get("embedding")
else:
raise Exception(f"Failed to get embedding: {response.text}")
def seed_database(embedding_model):
"""Seed the ChromaDB with example documents."""
documents = [
"NAF is a architectural framework.",
"C3 Taxonomi helps define capabilties.",
"Architectural framework help organizations define architecture in standardized way with diffrent view points.",
]
for i, doc in enumerate(documents):
embedding = get_embedding(doc, embedding_model)
collection.add(ids=[str(i)], embeddings=[embedding], metadatas=[{"text": doc}])
print(f"Added document {i}: {doc}")
print("Database seeding complete.")
def search(query, embedding_model, llm_model, top_k=2):
"""Retrieve similar documents and generate an answer using an LLM."""
query_embedding = get_embedding(query, embedding_model)
results = collection.query(query_embeddings=[query_embedding], n_results=top_k)
retrieved_docs = [doc["text"] for doc_list in results["metadatas"] for doc in doc_list]
if not retrieved_docs:
print("No relevant documents found.")
return
# Construct the LLM prompt
prompt = f"Use the following documents to answer the question:\n\n"
for doc in retrieved_docs:
prompt += f"- {doc}\n"
prompt += f"\nQuestion: {query}\nAnswer:"
response = requests.post(f"{OLLAMA_URL}/generate", json={"model": llm_model, "prompt": prompt, "stream": False})
print("RAW RESPONSE:", response.text)
if response.status_code == 200:
try:
data = response.json()
answer = data.get("response", "No response field found.")
except requests.exceptions.JSONDecodeError:
answer = response.text # Fallback to raw text
print("\nSearch Results:\n")
for doc in retrieved_docs:
print(f"Retrieved: {doc}")
print("\nGenerated Answer:\n", answer)
else:
print(f"Failed to generate response: {response.status_code} - {response.text}")
if __name__ == "__main__":
parser = argparse.ArgumentParser()
parser.add_argument("command", choices=["seed", "search"], help="Command to run")
parser.add_argument("--query", type=str, help="Query text for searching")
parser.add_argument("--embedding_model", type=str, default="mxbai-embed-large", help="Embedding model")
parser.add_argument("--llm_model", type=str, default="gemma3", help="LLM model for generating responses")
args = parser.parse_args()
if args.command == "seed":
seed_database(args.embedding_model)
elif args.command == "search":
if not args.query:
print("Please provide a query with --query")
else:
search(args.query, args.embedding_model, args.llm_model)