diff --git a/config.example.yaml b/config.example.yaml new file mode 100644 index 0000000..2b778ce --- /dev/null +++ b/config.example.yaml @@ -0,0 +1,61 @@ +# Define your application settings here. + +chat_backend: local # Select the primary chat backend (azure, openai, google_vertex, aws, local) +emb_backend: local # Select the primary embedding backend (azure, openai, google_vertex, aws, local, huggingface) + +use_conditional_graph: false # Use a conditional RAG model with historical chat context, or a non-conditional model without access to the current conversation + +# --- Provider Specific Settings --- + +azure: + openai_api_key: "your_openai_api_key" + llm_endpoint: "https://example.openai.azure.com" + llm_deployment_name: "gpt-4o-mini" + llm_api_version: "2025-01-01-preview" + emb_endpoint: "https://example.openai.azure.com" # Can be same as LLM endpoint + emb_deployment_name: "text-embedding-3-large" + emb_api_version: "2023-05-15" + +openai: + openai_api_key: "your_openai_api_key" + chat_model: "gpt-4o-mini" + emb_model: "text-embedding-3-large" + +google_vertex: + project_id: "your_gcp_project_id" + location: "europe-west4" + chat_model: "gemini-pro" + emb_model: "textembedding-gecko@001" + +aws: + region: "us-east-1" + credentials: "PATH_TO_YOUR_CREDENTIALS_FILE.json" + +local: # Settings for local models (e.g., Ollama) + chat_model: "llama3.1:8b" + emb_model: "llama3.1:8b" + +huggingface: # Settings specific to HuggingFace embedding backend + emb_model: "sentence-transformers/paraphrase-MiniLM-L12-v2" + +# --- Data Processing Settings --- + +pdf: + # List of paths to PDF files or folders containing PDFs. + # Pydantic converts these strings to pathlib.Path objects. + data: + - "C:/path/folder" + unstructured: false # Use the unstructured PDF loader? + chunk_size: 1000 + chunk_overlap: 200 + add_start_index: false + +web: + # List of URLs to scrape for data. + data: + - "https://www.example.nl/subdomain" + chunk_size: 200 + +chroma_db: + location: "/app/data/vector_database" # Override default DB path (default: '.chroma_db') + reset: True # Reset the database on startup? (default: false)