import yaml from pathlib import Path from typing import List, Optional from enum import Enum from pydantic import ( BaseModel, Field, ValidationError, SecretStr, ) import sys class ChatBackend(str, Enum): azure = "azure" openai = "openai" google_vertex = "google_vertex" aws = "aws" local = "local" def __str__(self): return self.value class EmbeddingBackend(str, Enum): azure = "azure" openai = "openai" google_vertex = "google_vertex" aws = "aws" local = "local" huggingface = "huggingface" def __str__(self): return self.value class AzureSettings(BaseModel): """Azure specific settings.""" openai_api_key: Optional[SecretStr] = None llm_endpoint: Optional[str] = None llm_deployment_name: Optional[str] = None llm_api_version: Optional[str] = None emb_endpoint: Optional[str] = None emb_deployment_name: Optional[str] = None emb_api_version: Optional[str] = None class OpenAISettings(BaseModel): """OpenAI specific settings.""" api_key: Optional[SecretStr] = None class GoogleVertexSettings(BaseModel): """Google Vertex specific settings.""" project_id: Optional[str] = None location: Optional[str] = None chat_model: Optional[str] = None emb_model: Optional[str] = None class AwsSettings(BaseModel): """AWS specific settings (e.g., for Bedrock).""" access_key_id: Optional[SecretStr] = None secret_access_key: Optional[SecretStr] = None region_name: Optional[str] = None class LocalSettings(BaseModel): """Local backend specific settings (e.g., Ollama models).""" chat_model: Optional[str] = None emb_model: Optional[str] = None class HuggingFaceSettings(BaseModel): """HuggingFace specific settings (if different from local embeddings).""" emb_model: Optional[str] = None api_token: Optional[SecretStr] = None class PdfSettings(BaseModel): """PDF processing settings.""" data: List[Path] = Field(default_factory=list) unstructured: bool = Field(default=False) chunk_size: int = Field(default=1000) chunk_overlap: int = Field(default=200) add_start_index: bool = Field(default=False) class WebSettings(BaseModel): """Web data processing settings.""" data: List[str] = Field(default_factory=list) chunk_size: int = Field(default=200) class ChromaDbSettings(BaseModel): """Chroma DB settings.""" location: Path = Field(default=Path(".chroma_db")) reset: bool = Field(default=False) class AppSettings(BaseModel): """ Main application settings model. Loads configuration from a YAML file using the structure defined by the nested models. """ # --- Top-level settings --- chat_backend: ChatBackend = Field(default=ChatBackend.local) emb_backend: EmbeddingBackend = Field(default=EmbeddingBackend.huggingface) use_conditional_graph: bool = Field(default=False) # --- Provider-specific settings --- azure: Optional[AzureSettings] = None openai: Optional[OpenAISettings] = None google_vertex: Optional[GoogleVertexSettings] = None aws: Optional[AwsSettings] = None local: Optional[LocalSettings] = None huggingface: Optional[HuggingFaceSettings] = None # Separate HF config if needed # --- Data processing settings --- pdf: PdfSettings = Field(default_factory=PdfSettings) web: WebSettings = Field(default_factory=WebSettings) chroma_db: ChromaDbSettings = Field(default_factory=ChromaDbSettings) # --- Configuration Loading Function --- def load_settings(config_path: Path = Path("config.yaml")) -> AppSettings: """ Loads settings from a YAML file and validates them using Pydantic models. Args: config_path: The path to the configuration YAML file. Returns: An instance of AppSettings containing the loaded configuration. Raises: FileNotFoundError: If the config file does not exist. yaml.YAMLError: If the file is not valid YAML. ValidationError: If the data in the file doesn't match the AppSettings model. """ if not config_path.is_file(): print(f"Error: Configuration file not found at '{config_path}'", file=sys.stderr) raise FileNotFoundError(f"Configuration file not found: {config_path}") print(f"--- Loading settings from '{config_path}' ---") try: with open(config_path, "r", encoding="utf-8") as f: config_data = yaml.safe_load(f) if config_data is None: config_data = {} settings = AppSettings(**config_data) print("--- Settings loaded and validated successfully ---") return settings except yaml.YAMLError as e: print(f"Error parsing YAML file '{config_path}':\n {e}", file=sys.stderr) raise except ValidationError as e: print(f"Error validating configuration from '{config_path}':\n{e}", file=sys.stderr) raise except Exception as e: print(f"An unexpected error occurred while loading settings from '{config_path}': {e}", file=sys.stderr) raise