forked from AI_team/Philosophy-RAG-demo
177 lines
5.0 KiB
Python
177 lines
5.0 KiB
Python
import yaml
|
|
from pathlib import Path
|
|
from typing import List, Optional
|
|
from enum import Enum
|
|
from pydantic import (
|
|
BaseModel,
|
|
Field,
|
|
ValidationError,
|
|
SecretStr,
|
|
)
|
|
import sys
|
|
|
|
|
|
class ChatBackend(str, Enum):
|
|
azure = "azure"
|
|
openai = "openai"
|
|
google_vertex = "google_vertex"
|
|
aws = "aws"
|
|
local = "local"
|
|
|
|
def __str__(self):
|
|
return self.value
|
|
|
|
|
|
class EmbeddingBackend(str, Enum):
|
|
azure = "azure"
|
|
openai = "openai"
|
|
google_vertex = "google_vertex"
|
|
aws = "aws"
|
|
local = "local"
|
|
huggingface = "huggingface"
|
|
|
|
def __str__(self):
|
|
return self.value
|
|
|
|
|
|
class AzureSettings(BaseModel):
|
|
"""Azure specific settings."""
|
|
|
|
openai_api_key: Optional[SecretStr] = None
|
|
llm_endpoint: Optional[str] = None
|
|
llm_deployment_name: Optional[str] = None
|
|
llm_api_version: Optional[str] = None
|
|
emb_endpoint: Optional[str] = None
|
|
emb_deployment_name: Optional[str] = None
|
|
emb_api_version: Optional[str] = None
|
|
|
|
|
|
class OpenAISettings(BaseModel):
|
|
"""OpenAI specific settings."""
|
|
|
|
api_key: Optional[SecretStr] = None
|
|
|
|
|
|
class GoogleVertexSettings(BaseModel):
|
|
"""Google Vertex specific settings."""
|
|
|
|
project_id: Optional[str] = None
|
|
location: Optional[str] = None
|
|
chat_model: Optional[str] = None
|
|
emb_model: Optional[str] = None
|
|
|
|
|
|
class AwsSettings(BaseModel):
|
|
"""AWS specific settings (e.g., for Bedrock)."""
|
|
|
|
access_key_id: Optional[SecretStr] = None
|
|
secret_access_key: Optional[SecretStr] = None
|
|
region_name: Optional[str] = None
|
|
|
|
|
|
class LocalSettings(BaseModel):
|
|
"""Local backend specific settings (e.g., Ollama models)."""
|
|
|
|
chat_model: Optional[str] = None
|
|
emb_model: Optional[str] = None
|
|
|
|
|
|
class HuggingFaceSettings(BaseModel):
|
|
"""HuggingFace specific settings (if different from local embeddings)."""
|
|
|
|
emb_model: Optional[str] = None
|
|
api_token: Optional[SecretStr] = None
|
|
|
|
|
|
class PdfSettings(BaseModel):
|
|
"""PDF processing settings."""
|
|
|
|
data: List[Path] = Field(default_factory=list)
|
|
unstructured: bool = Field(default=False)
|
|
chunk_size: int = Field(default=1000)
|
|
chunk_overlap: int = Field(default=200)
|
|
add_start_index: bool = Field(default=False)
|
|
|
|
|
|
class WebSettings(BaseModel):
|
|
"""Web data processing settings."""
|
|
|
|
data: List[str] = Field(default_factory=list)
|
|
chunk_size: int = Field(default=200)
|
|
|
|
|
|
class ChromaDbSettings(BaseModel):
|
|
"""Chroma DB settings."""
|
|
|
|
location: Path = Field(default=Path(".chroma_db"))
|
|
reset: bool = Field(default=False)
|
|
|
|
|
|
class AppSettings(BaseModel):
|
|
"""
|
|
Main application settings model.
|
|
|
|
Loads configuration from a YAML file using the structure defined
|
|
by the nested models.
|
|
"""
|
|
|
|
# --- Top-level settings ---
|
|
chat_backend: ChatBackend = Field(default=ChatBackend.local)
|
|
emb_backend: EmbeddingBackend = Field(default=EmbeddingBackend.huggingface)
|
|
use_conditional_graph: bool = Field(default=False)
|
|
|
|
# --- Provider-specific settings ---
|
|
azure: Optional[AzureSettings] = None
|
|
openai: Optional[OpenAISettings] = None
|
|
google_vertex: Optional[GoogleVertexSettings] = None
|
|
aws: Optional[AwsSettings] = None
|
|
local: Optional[LocalSettings] = None
|
|
huggingface: Optional[HuggingFaceSettings] = None # Separate HF config if needed
|
|
|
|
# --- Data processing settings ---
|
|
pdf: PdfSettings = Field(default_factory=PdfSettings)
|
|
web: WebSettings = Field(default_factory=WebSettings)
|
|
chroma_db: ChromaDbSettings = Field(default_factory=ChromaDbSettings)
|
|
|
|
|
|
# --- Configuration Loading Function ---
|
|
def load_settings(config_path: Path = Path("config.yaml")) -> AppSettings:
|
|
"""
|
|
Loads settings from a YAML file and validates them using Pydantic models.
|
|
|
|
Args:
|
|
config_path: The path to the configuration YAML file.
|
|
|
|
Returns:
|
|
An instance of AppSettings containing the loaded configuration.
|
|
|
|
Raises:
|
|
FileNotFoundError: If the config file does not exist.
|
|
yaml.YAMLError: If the file is not valid YAML.
|
|
ValidationError: If the data in the file doesn't match the AppSettings model.
|
|
"""
|
|
if not config_path.is_file():
|
|
print(f"Error: Configuration file not found at '{config_path}'", file=sys.stderr)
|
|
raise FileNotFoundError(f"Configuration file not found: {config_path}")
|
|
|
|
print(f"--- Loading settings from '{config_path}' ---")
|
|
try:
|
|
with open(config_path, "r", encoding="utf-8") as f:
|
|
config_data = yaml.safe_load(f)
|
|
if config_data is None:
|
|
config_data = {}
|
|
|
|
settings = AppSettings(**config_data)
|
|
print("--- Settings loaded and validated successfully ---")
|
|
return settings
|
|
|
|
except yaml.YAMLError as e:
|
|
print(f"Error parsing YAML file '{config_path}':\n {e}", file=sys.stderr)
|
|
raise
|
|
except ValidationError as e:
|
|
print(f"Error validating configuration from '{config_path}':\n{e}", file=sys.stderr)
|
|
raise
|
|
except Exception as e:
|
|
print(f"An unexpected error occurred while loading settings from '{config_path}': {e}", file=sys.stderr)
|
|
raise
|