Philosophy-RAG-demo/config.yaml

# Define your application settings here.

chat_backend: local # Select the primary chat backend (azure, openai, google_vertex, aws, local)
emb_backend: local # Select the primary embedding backend (azure, openai, google_vertex, aws, local, huggingface)

use_conditional_graph: false  # Use a conditional RAG model with historical chat context, or a non-conditional model without access to the current conversation
use_reranker: false # Use a LLM to rerank the retrieved context documents

# --- Provider Specific Settings ---

azure:
  llm_endpoint: "https://example.openai.azure.com"
  llm_deployment_name: "gpt-4o-mini"
  llm_api_version: "2025-01-01-preview"
  emb_endpoint: "https://example.openai.azure.com"
  emb_deployment_name: "text-embedding-3-large"
  emb_api_version: "2023-05-15"

openai:
  chat_model: "gpt-4o-mini"
  emb_model: "text-embedding-3-large"

google_vertex:
  project_id: "your_gcp_project_id"
  location: "europe-west4"
  chat_model: "gemini-pro"
  emb_model: "textembedding-gecko@001"

aws:
  chat_model: "amazon.titan-llm-v1"
  emb_model: "amazon.titan-embed-text-v1"
  region: "us-east-1"

local: # Settings for local models (e.g., Ollama)
  chat_model: "llama3.1:8b"
  emb_model: "llama3.1:8b"

huggingface:
  chat_model: "meta-llama/Llama-2-7b-chat-hf"
  emb_model: "sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2"

# --- Data Processing Settings ---

pdf:
  # List of paths to PDF files or folders containing PDFs.
  # Pydantic converts these strings to pathlib.Path objects.
  data:
    - "C:/path/folder"
  unstructured: false # Use the unstructured PDF loader?
  chunk_size: 1000
  chunk_overlap: 200
  add_start_index: false

web:
  # List of URLs to scrape for data.
  data:
    - "https://www.example.nl/subdomain"
  chunk_size: 200

chroma_db:
  location: "/app/data/vector_database" # Override default DB path (default: '.chroma_db')
  reset: False # Reset the database on startup? (default: false)