From e99d26ed96318eb828a76efde5598547cdbb1c06 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Nielson=20Jann=C3=A9?= <nielsonj@gmail.com>
Date: Fri, 14 Mar 2025 23:21:34 +0100
Subject: [PATCH] Apply RUFF linting

---
 generic_rag/app.py | 68 +++++++++++++++++++++++++++++-----------------
 1 file changed, 43 insertions(+), 25 deletions(-)

diff --git a/generic_rag/app.py b/generic_rag/app.py
index 9432ebd..69938b1 100644
--- a/generic_rag/app.py
+++ b/generic_rag/app.py
@@ -18,25 +18,41 @@ logging.basicConfig(level=logging.INFO)
 logger = logging.getLogger(__name__)
 
 parser = argparse.ArgumentParser(description="A Sogeti Nederland Generic RAG demo.")
-parser.add_argument("-b", "--back-end", type=BackendType, choices=list(BackendType), default=BackendType.azure,
-                    help="(Cloud) back-end to use. In the case of local, a locally installed ollama will be used.")
-parser.add_argument("-p", "--pdf-data", type=Path, required=True, nargs="+",
-                    help="One or multiple paths to folders or files to use for retrieval. "
-                         "If a path is a folder, all files in the folder will be used. "
-                         "If a path is a file, only that file will be used. "
-                         "If the path is relative it will be relative to the current working directory.")
-parser.add_argument("--pdf-chunk_size", type=int, default=1000,
-                    help="The size of the chunks to split the text into.")
-parser.add_argument("--pdf-chunk_overlap", type=int, default=200,
-                    help="The overlap between the chunks.")
-parser.add_argument("--pdf-add-start-index", action="store_true",
-                    help="Add the start index to the metadata of the chunks.")
-parser.add_argument("-w", "--web-data", type=str, nargs="*", default=[],
-                    help="One or multiple URLs to use for retrieval.")
-parser.add_argument("--web-chunk-size", type=int, default=200,
-                    help="The size of the chunks to split the text into.")
-parser.add_argument("-c", "--chroma-db-location", type=Path, default=Path(".chroma_db"),
-                    help="file path to store or load a Chroma DB from/to.")
+parser.add_argument(
+    "-b",
+    "--back-end",
+    type=BackendType,
+    choices=list(BackendType),
+    default=BackendType.azure,
+    help="(Cloud) back-end to use. In the case of local, a locally installed ollama will be used.",
+)
+parser.add_argument(
+    "-p",
+    "--pdf-data",
+    type=Path,
+    required=True,
+    nargs="+",
+    help="One or multiple paths to folders or files to use for retrieval. "
+    "If a path is a folder, all files in the folder will be used. "
+    "If a path is a file, only that file will be used. "
+    "If the path is relative it will be relative to the current working directory.",
+)
+parser.add_argument("--pdf-chunk_size", type=int, default=1000, help="The size of the chunks to split the text into.")
+parser.add_argument("--pdf-chunk_overlap", type=int, default=200, help="The overlap between the chunks.")
+parser.add_argument(
+    "--pdf-add-start-index", action="store_true", help="Add the start index to the metadata of the chunks."
+)
+parser.add_argument(
+    "-w", "--web-data", type=str, nargs="*", default=[], help="One or multiple URLs to use for retrieval."
+)
+parser.add_argument("--web-chunk-size", type=int, default=200, help="The size of the chunks to split the text into.")
+parser.add_argument(
+    "-c",
+    "--chroma-db-location",
+    type=Path,
+    default=Path(".chroma_db"),
+    help="File path to store or load a Chroma DB from/to.",
+)
 parser.add_argument("-r", "--reset-chrome-db", action="store_true", help="Reset the Chroma DB.")
 args = parser.parse_args()
 
@@ -60,15 +76,16 @@ def generate(state: State):
     docs_content = "\n\n".join(doc.page_content for doc in state["context"])
     messages = prompt.invoke({"question": state["question"], "context": docs_content})
     response = llm.invoke(messages)
-
     return {"answer": response.content}
 
 
 @cl.on_chat_start
 async def on_chat_start():
-    vector_store = Chroma(collection_name="generic_rag",
-                          embedding_function=get_embedding_model(args.back_end),
-                          persist_directory=str(args.chroma_db_location))
+    vector_store = Chroma(
+        collection_name="generic_rag",
+        embedding_function=get_embedding_model(args.back_end),
+        persist_directory=str(args.chroma_db_location),
+    )
 
     cl.user_session.set("vector_store", vector_store)
     cl.user_session.set("emb_model", get_embedding_model(args.back_end))
@@ -103,8 +120,9 @@ async def set_starters():
         try:
             starters.append(cl.Starter(label=starter["label"], message=starter["message"]))
         except KeyError:
-            logging.warning("CHAINLIT_STARTERS environment is not a list with "
-                            "dictionaries containing 'label' and 'message' keys.")
+            logging.warning(
+                "CHAINLIT_STARTERS environment is not a list with dictionaries containing 'label' and 'message' keys."
+            )
 
     return starters