🎨 Add .env only for API keys

2025-04-18 11:42:40 +02:00 · 2025-04-18 11:42:40 +02:00 · 74dd3b6947
commit 74dd3b6947
parent 26b374cf41
5 changed files with 46 additions and 33 deletions
--- a/.gitignore
+++ b/.gitignore
@ -167,6 +167,3 @@ chainlit.md
 # Chroma DB
 .chroma_db/
 # Settings
 config.yaml
--- a/README.md
+++ b/README.md
@ -14,6 +14,7 @@ A generic Retrieval Augmented Generation (RAG) demo from Sogeti Netherlands buil
      - [Local LLM (optional)](#local-llm-optional)
    - [Running generic RAG demo](#running-generic-rag-demo)
    - [config.yaml file](#configyaml-file)
    - [.env file](#env-file)
    - [Chainlit starters](#chainlit-starters)
  - [Dev details](#dev-details)
    - [Linting](#linting)
@ -96,11 +97,11 @@ python generic_rag/app.py -p data  # will work and parsers all pdf files in ./da
 python generic_rag/app.py --help  # will work and prints command line options
 ```
-Please configure your `config.yaml` file with your cloud provider (backend) of choice. See the `config.example.yaml` file as a starting point that holds all possible options.
+Please configure your `config.yaml` and `.env` file with your cloud provider (backend) of choice. See the sections below for more details.
 ### config.yaml file
-A config.yaml file is required to specify your API endpoints, local backends, and environment variables. Use the provided config.yaml.example as a starting point. Update the file according to your backend settings and project requirements.
+A config.yaml file is required to specify your API endpoints and local backends. Use the provided `config.yaml.example` as a starting point. Update the file according to your backend settings and project requirements.
 Key configuration points include:
 - Chat Backend: Choose among azure, openai, google_vertex, aws, or local.
@ -116,6 +117,15 @@ For more information on configuring Langchain endpoints and models, please see:
 > for local models we currently use Ollama
 ### .env file
 Set the API keys for your chosen cloud provider (backend). This ensures that your application can authenticate and interact with the services.
 ```text
 AZURE_OPENAI_API_KEY=your_azure_api_key
 OPENAI_API_KEY=your_openai_api_key
 ```
 ### Chainlit starters
 Chainlit suggestions (starters) can be set with the `CHAINLIT_STARTERS` environment variable.
--- a/config.example.yaml
+++ b/config.example.yaml
@ -8,16 +8,14 @@ use_conditional_graph: false  # Use a conditional RAG model with historical chat
 # --- Provider Specific Settings ---
 azure:
  openai_api_key: "your_openai_api_key"
  llm_endpoint: "https://example.openai.azure.com"
  llm_deployment_name: "gpt-4o-mini"
  llm_api_version: "2025-01-01-preview"
-  emb_endpoint: "https://example.openai.azure.com" # Can be same as LLM endpoint
+  emb_endpoint: "https://example.openai.azure.com"
  emb_deployment_name: "text-embedding-3-large"
  emb_api_version: "2023-05-15"
 openai:
  openai_api_key: "your_openai_api_key"
  chat_model: "gpt-4o-mini"
  emb_model: "text-embedding-3-large"
@ -31,15 +29,14 @@ aws:
  chat_model: "amazon.titan-llm-v1"
  emb_model: "amazon.titan-embed-text-v1"
  region: "us-east-1"
  credentials: "PATH_TO_YOUR_CREDENTIALS_FILE.json"
 local: # Settings for local models (e.g., Ollama)
  chat_model: "llama3.1:8b"
  emb_model: "llama3.1:8b"
-huggingface: # Settings specific to HuggingFace embedding backend
+huggingface:
  chat_model: "meta-llama/Llama-2-7b-chat-hf"
-  emb_model: "sentence-transformers/paraphrase-MiniLM-L12-v2"
+  emb_model: "sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2"
 # --- Data Processing Settings ---
@ -61,4 +58,4 @@ web:
 chroma_db:
  location: "/app/data/vector_database" # Override default DB path (default: '.chroma_db')
-  reset: True # Reset the database on startup? (default: false)
+  reset: False # Reset the database on startup? (default: false)
--- a/generic_rag/backend/models.py
+++ b/generic_rag/backend/models.py
@ -1,4 +1,5 @@
 import logging
 import os
 from langchain_core.embeddings import Embeddings
 from langchain_core.language_models.chat_models import BaseChatModel
@ -39,19 +40,26 @@ def get_chat_model(settings: AppSettings) -> BaseChatModel:
            raise ValueError(
                "Azure configuration requires 'llm_endpoint', 'llm_deployment_name', and 'llm_api_version'."
            )
        if "AZURE_OPENAI_API_KEY" not in os.environ:
            raise ValueError(
                "The environment variable 'AZURE_OPENAI_API_KEY' is missing. Please set the variable in your '.env' file before running the script."
            )
        return AzureChatOpenAI(
            azure_endpoint=settings.azure.llm_endpoint,
            azure_deployment=settings.azure.llm_deployment_name,
            openai_api_version=settings.azure.llm_api_version,
            openai_api_key=settings.azure.openai_api_key.get_secret_value() if settings.azure.openai_api_key else None,
        )
    if settings.chat_backend == ChatBackend.openai:
        if not settings.openai:
            raise ValueError("OpenAI chat backend selected, but 'openai' configuration section is missing.")
-        if not settings.openai.api_key or not settings.openai.chat_model:
+        if not settings.openai.chat_model:
-            raise ValueError("OpenAI configuration requires 'api_key' and 'chat_model'.")
+            raise ValueError("OpenAI configuration requires 'chat_model'.")
-        return ChatOpenAI(model=settings.openai.chat_model, openai_api_key=settings.openai.api_key.get_secret_value())
+        if "OPENAI_API_KEY" not in os.environ:
            raise ValueError(
                "The environment variable 'OPENAI_API_KEY' is missing. Please set the variable in your '.env' file before running the script."
            )
        return ChatOpenAI(model=settings.openai.chat_model)
    if settings.chat_backend == ChatBackend.google_vertex:
        if not settings.google_vertex:
@ -63,7 +71,7 @@ def get_chat_model(settings: AppSettings) -> BaseChatModel:
            or not settings.google_vertex.project_id
            or not settings.google_vertex.location
        ):
-            raise ValueError("Google Vertex configuration requires 'chat_model' and 'project_id'.")
+            raise ValueError("Google Vertex configuration requires 'chat_model', 'project_id' and 'location'.")
        return ChatVertexAI(
            model_name=settings.google_vertex.chat_model,
            project=settings.google_vertex.project_id,
@ -74,10 +82,10 @@ def get_chat_model(settings: AppSettings) -> BaseChatModel:
        if not settings.aws:
            raise ValueError("AWS Bedrock chat backend selected, but 'aws' configuration section is missing.")
        if not settings.aws.chat_model or not settings.aws.region_name:
-            raise ValueError("AWS Bedrock configuration requires 'chat_model' and 'region_name'")
+            raise ValueError("AWS Bedrock configuration requires 'chat_model' and 'region'")
        return ChatBedrock(
            model_id=settings.aws.chat_model,
-            region_name=settings.aws.region_name,
+            region_name=settings.aws.region,
        )
    if settings.chat_backend == ChatBackend.local:
@ -132,21 +140,26 @@ def get_embedding_model(settings: AppSettings) -> Embeddings:
            raise ValueError(
                "Azure configuration requires 'emb_endpoint', 'emb_deployment_name', and 'emb_api_version'."
            )
        if "AZURE_OPENAI_API_KEY" not in os.environ:
            raise ValueError(
                "The environment variable 'AZURE_OPENAI_API_KEY' is missing. Please set the variable in your '.env' file before running the script."
            )
        return AzureOpenAIEmbeddings(
            azure_endpoint=settings.azure.emb_endpoint,
            azure_deployment=settings.azure.emb_deployment_name,
            openai_api_version=settings.azure.emb_api_version,
            openai_api_key=settings.azure.openai_api_key.get_secret_value() if settings.azure.openai_api_key else None,
        )
    if settings.emb_backend == EmbeddingBackend.openai:
        if not settings.openai:
            raise ValueError("OpenAI embedding backend selected, but 'openai' configuration section is missing.")
-        if not settings.openai.api_key:
+        if not settings.openai.emb_model:
-            raise ValueError("OpenAI configuration requires 'api_key'.")
+            raise ValueError("OpenAI configuration requires 'emb_model'.")
-        return OpenAIEmbeddings(
+        if "OPENAI_API_KEY" not in os.environ:
-            model=settings.openai.emb_model, openai_api_key=settings.openai.api_key.get_secret_value()
+            raise ValueError(
                "The environment variable 'OPENAI_API_KEY' is missing. Please set the variable in your '.env' file before running the script."
            )
        return OpenAIEmbeddings(model=settings.openai.emb_model)
    if settings.emb_backend == EmbeddingBackend.google_vertex:
        if not settings.google_vertex:
@ -168,9 +181,9 @@ def get_embedding_model(settings: AppSettings) -> Embeddings:
    if settings.emb_backend == EmbeddingBackend.aws:
        if not settings.aws:
            raise ValueError("AWS Bedrock embedding backend selected, but 'aws' configuration section is missing.")
-        if not settings.aws.emb_model or not settings.aws.region_name:
+        if not settings.aws.emb_model or not settings.aws.region:
-            raise ValueError("AWS Bedrock configuration requires 'emb_model' and 'region_name'")
+            raise ValueError("AWS Bedrock configuration requires 'emb_model' and 'region'")
-        return BedrockEmbeddings(model_id=settings.aws.emb_model, region_name=settings.aws.region_name)
+        return BedrockEmbeddings(model_id=settings.aws.emb_model, region_name=settings.aws.region)
    if settings.emb_backend == EmbeddingBackend.local:
        if not settings.local:
--- a/generic_rag/parsers/config.py
+++ b/generic_rag/parsers/config.py
@ -6,7 +6,6 @@ from pydantic import (
    BaseModel,
    Field,
    ValidationError,
    SecretStr,
 )
 import sys
@ -38,7 +37,6 @@ class EmbeddingBackend(str, Enum):
 class AzureSettings(BaseModel):
    """Azure specific settings."""
    openai_api_key: Optional[SecretStr] = None
    llm_endpoint: Optional[str] = None
    llm_deployment_name: Optional[str] = None
    llm_api_version: Optional[str] = None
@ -50,7 +48,6 @@ class AzureSettings(BaseModel):
 class OpenAISettings(BaseModel):
    """OpenAI specific settings."""
    api_key: Optional[SecretStr] = None
    chat_model: Optional[str] = None
    emb_model: Optional[str] = None
@ -84,7 +81,6 @@ class HuggingFaceSettings(BaseModel):
    chat_model: Optional[str] = None
    emb_model: Optional[str] = None
    api_token: Optional[SecretStr] = None
 class PdfSettings(BaseModel):