Philosophy-RAG-demo/config.yaml
2025-04-18 15:33:35 +02:00

71 lines
2.0 KiB
YAML

# Define your application settings here.
chat_backend: local # Select the primary chat backend (azure, openai, google_vertex, aws, local)
emb_backend: local # Select the primary embedding backend (azure, openai, google_vertex, aws, local, huggingface)
use_conditional_graph: false # Use a conditional RAG model with historical chat context, or a non-conditional model without access to the current conversation
use_reranker: false # Use a LLM to rerank the retrieved context documents
# --- Provider Specific Settings ---
azure:
llm_endpoint: "https://example.openai.azure.com"
llm_deployment_name: "gpt-4o-mini"
llm_api_version: "2025-01-01-preview"
emb_endpoint: "https://example.openai.azure.com"
emb_deployment_name: "text-embedding-3-large"
emb_api_version: "2023-05-15"
openai:
chat_model: "gpt-4o-mini"
emb_model: "text-embedding-3-large"
google_vertex:
project_id: "your_gcp_project_id"
location: "europe-west4"
chat_model: "gemini-pro"
emb_model: "textembedding-gecko@001"
aws:
chat_model: "amazon.titan-llm-v1"
emb_model: "amazon.titan-embed-text-v1"
region: "us-east-1"
local: # Settings for local models (e.g., Ollama)
chat_model: "llama3.1:8b"
emb_model: "llama3.1:8b"
huggingface:
chat_model: "meta-llama/Llama-2-7b-chat-hf"
emb_model: "sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2"
# --- Data Processing Settings ---
#pdf:
# # List of paths to PDF files or folders containing PDFs.
# # Pydantic converts these strings to pathlib.Path objects.
# data:
# - "C:/path/folder"
# unstructured: false # Use the unstructured PDF loader?
# chunk_size: 1000
# chunk_overlap: 200
# add_start_index: false
#
#web:
# # List of URLs to scrape for data.
# data:
# - "https://www.example.nl/subdomain"
# chunk_size: 200
text:
data:
- "../transcriptions"
unstructured: true
chunk_size: 500
chunk_overlap: 100
add_start_index: false
chroma_db:
location: "/app/data/vector_database" # Override default DB path (default: '.chroma_db')
reset: False # Reset the database on startup? (default: false)