# Define your application settings here. chat_backend: local # Select the primary chat backend (azure, openai, google_vertex, aws, local) emb_backend: local # Select the primary embedding backend (azure, openai, google_vertex, aws, local, huggingface) use_conditional_graph: false # Use a conditional RAG model with historical chat context, or a non-conditional model without access to the current conversation use_reranker: false # Use a LLM to rerank the retrieved context documents # --- Provider Specific Settings --- azure: llm_endpoint: "https://example.openai.azure.com" llm_deployment_name: "gpt-4o-mini" llm_api_version: "2025-01-01-preview" emb_endpoint: "https://example.openai.azure.com" emb_deployment_name: "text-embedding-3-large" emb_api_version: "2023-05-15" openai: chat_model: "gpt-4o-mini" emb_model: "text-embedding-3-large" google_vertex: project_id: "your_gcp_project_id" location: "europe-west4" chat_model: "gemini-pro" emb_model: "textembedding-gecko@001" aws: chat_model: "amazon.titan-llm-v1" emb_model: "amazon.titan-embed-text-v1" region: "us-east-1" local: # Settings for local models (e.g., Ollama) chat_model: "llama3.1:8b" emb_model: "llama3.1:8b" huggingface: chat_model: "meta-llama/Llama-2-7b-chat-hf" emb_model: "sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2" # --- Data Processing Settings --- #pdf: # # List of paths to PDF files or folders containing PDFs. # # Pydantic converts these strings to pathlib.Path objects. # data: # - "C:/path/folder" # unstructured: false # Use the unstructured PDF loader? # chunk_size: 1000 # chunk_overlap: 200 # add_start_index: false # #web: # # List of URLs to scrape for data. # data: # - "https://www.example.nl/subdomain" # chunk_size: 200 text: data: - "../transcriptions" unstructured: true chunk_size: 500 chunk_overlap: 100 add_start_index: false chroma_db: location: "/app/data/vector_database" # Override default DB path (default: '.chroma_db') reset: False # Reset the database on startup? (default: false)