# Define your application settings here. chat_backend: local # Select the primary chat backend (azure, openai, google_vertex, aws, local) emb_backend: local # Select the primary embedding backend (azure, openai, google_vertex, aws, local, huggingface) use_conditional_graph: false # Use a conditional RAG model with historical chat context, or a non-conditional model without access to the current conversation # --- Provider Specific Settings --- azure: llm_endpoint: "https://example.openai.azure.com" llm_deployment_name: "gpt-4o-mini" llm_api_version: "2025-01-01-preview" emb_endpoint: "https://example.openai.azure.com" emb_deployment_name: "text-embedding-3-large" emb_api_version: "2023-05-15" openai: chat_model: "gpt-4o-mini" emb_model: "text-embedding-3-large" google_vertex: project_id: "your_gcp_project_id" location: "europe-west4" chat_model: "gemini-pro" emb_model: "textembedding-gecko@001" aws: chat_model: "amazon.titan-llm-v1" emb_model: "amazon.titan-embed-text-v1" region: "us-east-1" local: # Settings for local models (e.g., Ollama) chat_model: "llama3.1:8b" emb_model: "llama3.1:8b" huggingface: chat_model: "meta-llama/Llama-2-7b-chat-hf" emb_model: "sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2" # --- Data Processing Settings --- pdf: # List of paths to PDF files or folders containing PDFs. # Pydantic converts these strings to pathlib.Path objects. data: - "C:/path/folder" unstructured: false # Use the unstructured PDF loader? chunk_size: 1000 chunk_overlap: 200 add_start_index: false web: # List of URLs to scrape for data. data: - "https://www.example.nl/subdomain" chunk_size: 200 chroma_db: location: "/app/data/vector_database" # Override default DB path (default: '.chroma_db') reset: False # Reset the database on startup? (default: false)