Philosophy-RAG-demo/generic_rag/parsers/config.py
2025-04-16 16:06:58 +02:00

181 lines
5.1 KiB
Python

import yaml
from pathlib import Path
from typing import List, Optional
from enum import Enum
from pydantic import (
BaseModel,
Field,
ValidationError,
SecretStr,
)
import sys
class ChatBackend(str, Enum):
azure = "azure"
openai = "openai"
google_vertex = "google_vertex"
aws = "aws"
local = "local"
huggingface = "huggingface"
def __str__(self):
return self.value
class EmbeddingBackend(str, Enum):
azure = "azure"
openai = "openai"
google_vertex = "google_vertex"
aws = "aws"
local = "local"
huggingface = "huggingface"
def __str__(self):
return self.value
class AzureSettings(BaseModel):
"""Azure specific settings."""
openai_api_key: Optional[SecretStr] = None
llm_endpoint: Optional[str] = None
llm_deployment_name: Optional[str] = None
llm_api_version: Optional[str] = None
emb_endpoint: Optional[str] = None
emb_deployment_name: Optional[str] = None
emb_api_version: Optional[str] = None
class OpenAISettings(BaseModel):
"""OpenAI specific settings."""
api_key: Optional[SecretStr] = None
chat_model: Optional[str] = None
emb_model: Optional[str] = None
class GoogleVertexSettings(BaseModel):
"""Google Vertex specific settings."""
project_id: Optional[str] = None
location: Optional[str] = None
chat_model: Optional[str] = None
emb_model: Optional[str] = None
class AwsSettings(BaseModel):
"""AWS specific settings (e.g., for Bedrock)."""
chat_model: Optional[str] = None
emb_model: Optional[str] = None
region: Optional[str] = None
class LocalSettings(BaseModel):
"""Local backend specific settings (e.g., Ollama models)."""
chat_model: Optional[str] = None
emb_model: Optional[str] = None
class HuggingFaceSettings(BaseModel):
"""HuggingFace specific settings (if different from local embeddings)."""
chat_model: Optional[str] = None
emb_model: Optional[str] = None
api_token: Optional[SecretStr] = None
class PdfSettings(BaseModel):
"""PDF processing settings."""
data: List[Path] = Field(default_factory=list)
unstructured: bool = Field(default=False)
chunk_size: int = Field(default=1000)
chunk_overlap: int = Field(default=200)
add_start_index: bool = Field(default=False)
class WebSettings(BaseModel):
"""Web data processing settings."""
data: List[str] = Field(default_factory=list)
chunk_size: int = Field(default=200)
class ChromaDbSettings(BaseModel):
"""Chroma DB settings."""
location: Path = Field(default=Path(".chroma_db"))
reset: bool = Field(default=False)
class AppSettings(BaseModel):
"""
Main application settings model.
Loads configuration from a YAML file using the structure defined
by the nested models.
"""
# --- Top-level settings ---
chat_backend: ChatBackend = Field(default=ChatBackend.local)
emb_backend: EmbeddingBackend = Field(default=EmbeddingBackend.huggingface)
use_conditional_graph: bool = Field(default=False)
# --- Provider-specific settings ---
azure: Optional[AzureSettings] = None
openai: Optional[OpenAISettings] = None
google_vertex: Optional[GoogleVertexSettings] = None
aws: Optional[AwsSettings] = None
local: Optional[LocalSettings] = None
huggingface: Optional[HuggingFaceSettings] = None # Separate HF config if needed
# --- Data processing settings ---
pdf: PdfSettings = Field(default_factory=PdfSettings)
web: WebSettings = Field(default_factory=WebSettings)
chroma_db: ChromaDbSettings = Field(default_factory=ChromaDbSettings)
# --- Configuration Loading Function ---
def load_settings(config_path: Path = Path("config.yaml")) -> AppSettings:
"""
Loads settings from a YAML file and validates them using Pydantic models.
Args:
config_path: The path to the configuration YAML file.
Returns:
An instance of AppSettings containing the loaded configuration.
Raises:
FileNotFoundError: If the config file does not exist.
yaml.YAMLError: If the file is not valid YAML.
ValidationError: If the data in the file doesn't match the AppSettings model.
"""
if not config_path.is_file():
print(f"Error: Configuration file not found at '{config_path}'", file=sys.stderr)
raise FileNotFoundError(f"Configuration file not found: {config_path}")
print(f"--- Loading settings from '{config_path}' ---")
try:
with open(config_path, "r", encoding="utf-8") as f:
config_data = yaml.safe_load(f)
if config_data is None:
config_data = {}
settings = AppSettings(**config_data)
print("--- Settings loaded and validated successfully ---")
return settings
except yaml.YAMLError as e:
print(f"Error parsing YAML file '{config_path}':\n {e}", file=sys.stderr)
raise
except ValidationError as e:
print(f"Error validating configuration from '{config_path}':\n{e}", file=sys.stderr)
raise
except Exception as e:
print(f"An unexpected error occurred while loading settings from '{config_path}': {e}", file=sys.stderr)
raise