#!/usr/bin/env python3 """ Odoo 18 RAG Query API ====================== FastAPI service — embeds the question, retrieves top-K chunks from Qdrant, builds a prompt, and streams or returns the answer from Ollama. Endpoints: POST /ask blocking answer + sources POST /ask/stream Server-Sent Events token stream POST /agent/ask ActiveBlue AI agent integration GET /health connectivity check GET /modules list indexed modules GET /stats collection stats Run: uvicorn api.main:app --host 0.0.0.0 --port 8000 --reload """ import json import logging import os import httpx from fastapi import FastAPI, HTTPException from fastapi.middleware.cors import CORSMiddleware from fastapi.responses import StreamingResponse from pydantic import BaseModel, Field from qdrant_client import QdrantClient from qdrant_client.models import Filter, FieldCondition, MatchValue from typing import AsyncIterator logging.basicConfig(level=logging.INFO) log = logging.getLogger("odoo18_rag") OLLAMA_URL = os.getenv("OLLAMA_URL", "http://miaai:11434") QDRANT_URL = os.getenv("QDRANT_URL", "http://qdrant:6333") EMBED_MODEL = os.getenv("EMBED_MODEL", "nomic-embed-text") GEN_MODEL = os.getenv("GEN_MODEL", "llama3.1") COLLECTION_NAME = os.getenv("COLLECTION_NAME", "odoo18_docs") TOP_K = 6 MAX_CONTEXT = 4000 SYSTEM_PROMPT = """\ You are an expert Odoo 18 consultant for ActiveBlue LLC, an MSP serving \ medical and dental practices. You have deep knowledge of all Odoo 18 modules: \ Finance, Accounting, Inventory, Manufacturing, Purchase, Sales, CRM, HR, \ Payroll, eCommerce, Helpdesk, Project, and more. Answer questions clearly and concisely using the provided documentation context. \ Use numbered steps when explaining procedures. Always mention the Odoo menu path \ when explaining navigation. If the context doesn't cover the question fully, say \ so and answer from general knowledge.\ """ # ── Models ──────────────────────────────────────────────────────────────────── class AskRequest(BaseModel): question: str = Field(..., min_length=5, max_length=2000) module: str | None = Field(None, description="Filter to one Odoo module") model: str | None = Field(None, description="Override the LLM model") top_k: int = Field(TOP_K, ge=1, le=20) temperature: float = Field(0.3, ge=0.0, le=1.0) class Source(BaseModel): url: str title: str module: str section: str class AskResponse(BaseModel): answer: str sources: list[Source] model: str question: str # ── App ─────────────────────────────────────────────────────────────────────── app = FastAPI( title="Odoo 18 RAG API", description="Retrieval-Augmented Generation over Odoo 18 documentation", version="1.0.0", ) app.add_middleware( CORSMiddleware, allow_origins=["*"], allow_methods=["*"], allow_headers=["*"], ) qdrant = QdrantClient(url=QDRANT_URL) # ── Helpers ─────────────────────────────────────────────────────────────────── async def embed_query(text: str) -> list: async with httpx.AsyncClient(timeout=30) as client: resp = await client.post( f"{OLLAMA_URL}/api/embed", json={"model": EMBED_MODEL, "input": [text]}, ) resp.raise_for_status() embeddings = resp.json().get("embeddings", []) if not embeddings: raise HTTPException(500, "Empty embedding response from Ollama") return embeddings[0] def retrieve(vector: list, top_k: int, module: str | None) -> list: query_filter = None if module: query_filter = Filter( must=[FieldCondition(key="module", match=MatchValue(value=module))] ) results = qdrant.search( collection_name=COLLECTION_NAME, query_vector=vector, limit=top_k, query_filter=query_filter, with_payload=True, ) return [hit.payload for hit in results] def build_prompt(question: str, chunks: list) -> str: context_parts = [] char_count = 0 for i, chunk in enumerate(chunks, 1): block = ( f"[Source {i}: {chunk.get('title', '')} | {chunk.get('section', '')}]\n" f"{chunk.get('text', '')}\n" f"URL: {chunk.get('url', '')}\n" ) if char_count + len(block) > MAX_CONTEXT: break context_parts.append(block) char_count += len(block) return ( f"{SYSTEM_PROMPT}\n\n" f"## Relevant documentation\n\n" f"{'---'.join(context_parts)}\n\n" f"---\n\n" f"## Question\n\n{question}\n\n" f"## Answer\n" ) def dedupe_sources(chunks: list) -> list[Source]: seen = set() sources = [] for chunk in chunks: url = chunk.get("url", "") if url not in seen: seen.add(url) sources.append(Source( url=url, title=chunk.get("title", ""), module=chunk.get("module", ""), section=chunk.get("section", ""), )) return sources async def generate_blocking(prompt: str, model: str, temperature: float) -> str: async with httpx.AsyncClient(timeout=120) as client: resp = await client.post( f"{OLLAMA_URL}/api/generate", json={ "model": model, "prompt": prompt, "stream": False, "options": {"temperature": temperature, "num_ctx": 8192}, }, ) resp.raise_for_status() return resp.json().get("response", "").strip() async def generate_stream(prompt: str, model: str, temperature: float) -> AsyncIterator[str]: async with httpx.AsyncClient(timeout=120) as client: async with client.stream( "POST", f"{OLLAMA_URL}/api/generate", json={ "model": model, "prompt": prompt, "stream": True, "options": {"temperature": temperature, "num_ctx": 8192}, }, ) as resp: async for line in resp.aiter_lines(): if line.strip(): try: data = json.loads(line) token = data.get("response", "") if token: yield token if data.get("done"): break except json.JSONDecodeError: continue # ── Endpoints ───────────────────────────────────────────────────────────────── @app.get("/health") async def health(): status = {"api": "ok", "qdrant": "unknown", "ollama": "unknown"} try: info = qdrant.get_collection(COLLECTION_NAME) status["qdrant"] = f"ok ({info.points_count} vectors)" except Exception as e: status["qdrant"] = f"error: {e}" try: async with httpx.AsyncClient(timeout=5) as client: resp = await client.get(f"{OLLAMA_URL}/api/tags") models = [m["name"] for m in resp.json().get("models", [])] status["ollama"] = f"ok ({len(models)} models)" except Exception as e: status["ollama"] = f"error: {e}" return status @app.get("/modules") async def list_modules(): try: result = qdrant.scroll(collection_name=COLLECTION_NAME, limit=1000, with_payload=["module"]) modules = sorted(set(p.payload.get("module", "general") for p in result[0])) return {"modules": modules} except Exception as e: raise HTTPException(500, str(e)) @app.get("/stats") async def stats(): try: info = qdrant.get_collection(COLLECTION_NAME) return { "collection": COLLECTION_NAME, "vectors": info.points_count, "vector_size": 768, "embed_model": EMBED_MODEL, "gen_model": GEN_MODEL, } except Exception as e: raise HTTPException(500, str(e)) @app.post("/ask", response_model=AskResponse) async def ask(req: AskRequest): model = req.model or GEN_MODEL try: vector = await embed_query(req.question) except Exception as e: raise HTTPException(500, f"Embedding failed: {e}") chunks = retrieve(vector, req.top_k, req.module) if not chunks: raise HTTPException(404, "No relevant documentation found.") prompt = build_prompt(req.question, chunks) try: answer = await generate_blocking(prompt, model, req.temperature) except Exception as e: raise HTTPException(500, f"Generation failed: {e}") return AskResponse( answer=answer, sources=dedupe_sources(chunks), model=model, question=req.question, ) @app.post("/ask/stream") async def ask_stream(req: AskRequest): model = req.model or GEN_MODEL try: vector = await embed_query(req.question) except Exception as e: raise HTTPException(500, f"Embedding failed: {e}") chunks = retrieve(vector, req.top_k, req.module) if not chunks: raise HTTPException(404, "No relevant documentation found.") prompt = build_prompt(req.question, chunks) sources = [s.model_dump() for s in dedupe_sources(chunks)] async def sse(): async for token in generate_stream(prompt, model, req.temperature): yield f"data: {json.dumps({'type': 'token', 'content': token})}\n\n" yield f"data: {json.dumps({'type': 'sources', 'sources': sources})}\n\n" yield "data: [DONE]\n\n" return StreamingResponse(sse(), media_type="text/event-stream") @app.post("/agent/ask") async def agent_ask(req: AskRequest): """ActiveBlue AI agent endpoint — compatible with PeerBus message format.""" result = await ask(req) return { "answer": result.answer, "sources": [s.url for s in result.sources], "module_context": req.module, "model_used": result.model, }