diff --git a/indexer/indexer.py b/indexer/indexer.py index d4b41c4..8f19f16 100644 --- a/indexer/indexer.py +++ b/indexer/indexer.py @@ -39,7 +39,7 @@ QDRANT_URL = os.getenv("QDRANT_URL", "http://localhost:6333") EMBED_MODEL = os.getenv("EMBED_MODEL", "nomic-embed-text") COLLECTION_NAME = os.getenv("COLLECTION_NAME", "odoo18_docs") VECTOR_SIZE = 768 -RAW_DATA_FILE = Path("../data/raw/odoo18_docs_raw.jsonl") +RAW_DATA_FILE = Path("/app/data/raw/odoo18_docs_raw.jsonl") BATCH_SIZE = 32 CHUNK_SIZE = 512 CHUNK_OVERLAP = 64 diff --git a/scraper/scraper.py b/scraper/scraper.py index fff77b5..6ed8bd9 100644 --- a/scraper/scraper.py +++ b/scraper/scraper.py @@ -31,7 +31,7 @@ log = logging.getLogger(__name__) BASE_URL = "https://www.odoo.com/documentation/18.0" SITEMAP_URL = f"{BASE_URL}/sitemap.xml" -OUTPUT_DIR = Path("../data/raw") +OUTPUT_DIR = Path("/app/data/raw") OUTPUT_FILE = OUTPUT_DIR / "odoo18_docs_raw.jsonl" DELAY_SECONDS = 1.2 MAX_RETRIES = 3