From 74dd3b6947230e41f671956d1d53eedc17c44ee1 Mon Sep 17 00:00:00 2001 From: Ruben Lucas Date: Fri, 18 Apr 2025 11:42:40 +0200 Subject: [PATCH] =?UTF-8?q?=F0=9F=8E=A8=20Add=20.env=20only=20for=20API=20?= =?UTF-8?q?keys?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .gitignore | 5 +--- README.md | 14 ++++++++-- config.example.yaml => config.yaml | 11 +++----- generic_rag/backend/models.py | 45 +++++++++++++++++++----------- generic_rag/parsers/config.py | 4 --- 5 files changed, 46 insertions(+), 33 deletions(-) rename config.example.yaml => config.yaml (80%) diff --git a/.gitignore b/.gitignore index 6578b8f..d7ad83d 100644 --- a/.gitignore +++ b/.gitignore @@ -166,7 +166,4 @@ chainlit.md .files/ # Chroma DB -.chroma_db/ - -# Settings -config.yaml \ No newline at end of file +.chroma_db/ \ No newline at end of file diff --git a/README.md b/README.md index 9980400..d560fde 100644 --- a/README.md +++ b/README.md @@ -14,6 +14,7 @@ A generic Retrieval Augmented Generation (RAG) demo from Sogeti Netherlands buil - [Local LLM (optional)](#local-llm-optional) - [Running generic RAG demo](#running-generic-rag-demo) - [config.yaml file](#configyaml-file) + - [.env file](#env-file) - [Chainlit starters](#chainlit-starters) - [Dev details](#dev-details) - [Linting](#linting) @@ -96,11 +97,11 @@ python generic_rag/app.py -p data # will work and parsers all pdf files in ./da python generic_rag/app.py --help # will work and prints command line options ``` -Please configure your `config.yaml` file with your cloud provider (backend) of choice. See the `config.example.yaml` file as a starting point that holds all possible options. +Please configure your `config.yaml` and `.env` file with your cloud provider (backend) of choice. See the sections below for more details. ### config.yaml file -A config.yaml file is required to specify your API endpoints, local backends, and environment variables. Use the provided config.yaml.example as a starting point. Update the file according to your backend settings and project requirements. +A config.yaml file is required to specify your API endpoints and local backends. Use the provided `config.yaml.example` as a starting point. Update the file according to your backend settings and project requirements. Key configuration points include: - Chat Backend: Choose among azure, openai, google_vertex, aws, or local. @@ -116,6 +117,15 @@ For more information on configuring Langchain endpoints and models, please see: > for local models we currently use Ollama +### .env file + +Set the API keys for your chosen cloud provider (backend). This ensures that your application can authenticate and interact with the services. + +```text +AZURE_OPENAI_API_KEY=your_azure_api_key +OPENAI_API_KEY=your_openai_api_key +``` + ### Chainlit starters Chainlit suggestions (starters) can be set with the `CHAINLIT_STARTERS` environment variable. diff --git a/config.example.yaml b/config.yaml similarity index 80% rename from config.example.yaml rename to config.yaml index 12bd131..4c877d8 100644 --- a/config.example.yaml +++ b/config.yaml @@ -8,16 +8,14 @@ use_conditional_graph: false # Use a conditional RAG model with historical chat # --- Provider Specific Settings --- azure: - openai_api_key: "your_openai_api_key" llm_endpoint: "https://example.openai.azure.com" llm_deployment_name: "gpt-4o-mini" llm_api_version: "2025-01-01-preview" - emb_endpoint: "https://example.openai.azure.com" # Can be same as LLM endpoint + emb_endpoint: "https://example.openai.azure.com" emb_deployment_name: "text-embedding-3-large" emb_api_version: "2023-05-15" openai: - openai_api_key: "your_openai_api_key" chat_model: "gpt-4o-mini" emb_model: "text-embedding-3-large" @@ -31,15 +29,14 @@ aws: chat_model: "amazon.titan-llm-v1" emb_model: "amazon.titan-embed-text-v1" region: "us-east-1" - credentials: "PATH_TO_YOUR_CREDENTIALS_FILE.json" local: # Settings for local models (e.g., Ollama) chat_model: "llama3.1:8b" emb_model: "llama3.1:8b" -huggingface: # Settings specific to HuggingFace embedding backend +huggingface: chat_model: "meta-llama/Llama-2-7b-chat-hf" - emb_model: "sentence-transformers/paraphrase-MiniLM-L12-v2" + emb_model: "sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2" # --- Data Processing Settings --- @@ -61,4 +58,4 @@ web: chroma_db: location: "/app/data/vector_database" # Override default DB path (default: '.chroma_db') - reset: True # Reset the database on startup? (default: false) + reset: False # Reset the database on startup? (default: false) diff --git a/generic_rag/backend/models.py b/generic_rag/backend/models.py index cf21a3e..3f2d0d6 100644 --- a/generic_rag/backend/models.py +++ b/generic_rag/backend/models.py @@ -1,4 +1,5 @@ import logging +import os from langchain_core.embeddings import Embeddings from langchain_core.language_models.chat_models import BaseChatModel @@ -39,19 +40,26 @@ def get_chat_model(settings: AppSettings) -> BaseChatModel: raise ValueError( "Azure configuration requires 'llm_endpoint', 'llm_deployment_name', and 'llm_api_version'." ) + if "AZURE_OPENAI_API_KEY" not in os.environ: + raise ValueError( + "The environment variable 'AZURE_OPENAI_API_KEY' is missing. Please set the variable in your '.env' file before running the script." + ) return AzureChatOpenAI( azure_endpoint=settings.azure.llm_endpoint, azure_deployment=settings.azure.llm_deployment_name, openai_api_version=settings.azure.llm_api_version, - openai_api_key=settings.azure.openai_api_key.get_secret_value() if settings.azure.openai_api_key else None, ) if settings.chat_backend == ChatBackend.openai: if not settings.openai: raise ValueError("OpenAI chat backend selected, but 'openai' configuration section is missing.") - if not settings.openai.api_key or not settings.openai.chat_model: - raise ValueError("OpenAI configuration requires 'api_key' and 'chat_model'.") - return ChatOpenAI(model=settings.openai.chat_model, openai_api_key=settings.openai.api_key.get_secret_value()) + if not settings.openai.chat_model: + raise ValueError("OpenAI configuration requires 'chat_model'.") + if "OPENAI_API_KEY" not in os.environ: + raise ValueError( + "The environment variable 'OPENAI_API_KEY' is missing. Please set the variable in your '.env' file before running the script." + ) + return ChatOpenAI(model=settings.openai.chat_model) if settings.chat_backend == ChatBackend.google_vertex: if not settings.google_vertex: @@ -63,7 +71,7 @@ def get_chat_model(settings: AppSettings) -> BaseChatModel: or not settings.google_vertex.project_id or not settings.google_vertex.location ): - raise ValueError("Google Vertex configuration requires 'chat_model' and 'project_id'.") + raise ValueError("Google Vertex configuration requires 'chat_model', 'project_id' and 'location'.") return ChatVertexAI( model_name=settings.google_vertex.chat_model, project=settings.google_vertex.project_id, @@ -74,10 +82,10 @@ def get_chat_model(settings: AppSettings) -> BaseChatModel: if not settings.aws: raise ValueError("AWS Bedrock chat backend selected, but 'aws' configuration section is missing.") if not settings.aws.chat_model or not settings.aws.region_name: - raise ValueError("AWS Bedrock configuration requires 'chat_model' and 'region_name'") + raise ValueError("AWS Bedrock configuration requires 'chat_model' and 'region'") return ChatBedrock( model_id=settings.aws.chat_model, - region_name=settings.aws.region_name, + region_name=settings.aws.region, ) if settings.chat_backend == ChatBackend.local: @@ -132,21 +140,26 @@ def get_embedding_model(settings: AppSettings) -> Embeddings: raise ValueError( "Azure configuration requires 'emb_endpoint', 'emb_deployment_name', and 'emb_api_version'." ) + if "AZURE_OPENAI_API_KEY" not in os.environ: + raise ValueError( + "The environment variable 'AZURE_OPENAI_API_KEY' is missing. Please set the variable in your '.env' file before running the script." + ) return AzureOpenAIEmbeddings( azure_endpoint=settings.azure.emb_endpoint, azure_deployment=settings.azure.emb_deployment_name, openai_api_version=settings.azure.emb_api_version, - openai_api_key=settings.azure.openai_api_key.get_secret_value() if settings.azure.openai_api_key else None, ) if settings.emb_backend == EmbeddingBackend.openai: if not settings.openai: raise ValueError("OpenAI embedding backend selected, but 'openai' configuration section is missing.") - if not settings.openai.api_key: - raise ValueError("OpenAI configuration requires 'api_key'.") - return OpenAIEmbeddings( - model=settings.openai.emb_model, openai_api_key=settings.openai.api_key.get_secret_value() - ) + if not settings.openai.emb_model: + raise ValueError("OpenAI configuration requires 'emb_model'.") + if "OPENAI_API_KEY" not in os.environ: + raise ValueError( + "The environment variable 'OPENAI_API_KEY' is missing. Please set the variable in your '.env' file before running the script." + ) + return OpenAIEmbeddings(model=settings.openai.emb_model) if settings.emb_backend == EmbeddingBackend.google_vertex: if not settings.google_vertex: @@ -168,9 +181,9 @@ def get_embedding_model(settings: AppSettings) -> Embeddings: if settings.emb_backend == EmbeddingBackend.aws: if not settings.aws: raise ValueError("AWS Bedrock embedding backend selected, but 'aws' configuration section is missing.") - if not settings.aws.emb_model or not settings.aws.region_name: - raise ValueError("AWS Bedrock configuration requires 'emb_model' and 'region_name'") - return BedrockEmbeddings(model_id=settings.aws.emb_model, region_name=settings.aws.region_name) + if not settings.aws.emb_model or not settings.aws.region: + raise ValueError("AWS Bedrock configuration requires 'emb_model' and 'region'") + return BedrockEmbeddings(model_id=settings.aws.emb_model, region_name=settings.aws.region) if settings.emb_backend == EmbeddingBackend.local: if not settings.local: diff --git a/generic_rag/parsers/config.py b/generic_rag/parsers/config.py index 6ce2402..7ef2dee 100644 --- a/generic_rag/parsers/config.py +++ b/generic_rag/parsers/config.py @@ -6,7 +6,6 @@ from pydantic import ( BaseModel, Field, ValidationError, - SecretStr, ) import sys @@ -38,7 +37,6 @@ class EmbeddingBackend(str, Enum): class AzureSettings(BaseModel): """Azure specific settings.""" - openai_api_key: Optional[SecretStr] = None llm_endpoint: Optional[str] = None llm_deployment_name: Optional[str] = None llm_api_version: Optional[str] = None @@ -50,7 +48,6 @@ class AzureSettings(BaseModel): class OpenAISettings(BaseModel): """OpenAI specific settings.""" - api_key: Optional[SecretStr] = None chat_model: Optional[str] = None emb_model: Optional[str] = None @@ -84,7 +81,6 @@ class HuggingFaceSettings(BaseModel): chat_model: Optional[str] = None emb_model: Optional[str] = None - api_token: Optional[SecretStr] = None class PdfSettings(BaseModel):