From e99d26ed96318eb828a76efde5598547cdbb1c06 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Nielson=20Jann=C3=A9?= Date: Fri, 14 Mar 2025 23:21:34 +0100 Subject: [PATCH] Apply RUFF linting --- generic_rag/app.py | 68 +++++++++++++++++++++++++++++----------------- 1 file changed, 43 insertions(+), 25 deletions(-) diff --git a/generic_rag/app.py b/generic_rag/app.py index 9432ebd..69938b1 100644 --- a/generic_rag/app.py +++ b/generic_rag/app.py @@ -18,25 +18,41 @@ logging.basicConfig(level=logging.INFO) logger = logging.getLogger(__name__) parser = argparse.ArgumentParser(description="A Sogeti Nederland Generic RAG demo.") -parser.add_argument("-b", "--back-end", type=BackendType, choices=list(BackendType), default=BackendType.azure, - help="(Cloud) back-end to use. In the case of local, a locally installed ollama will be used.") -parser.add_argument("-p", "--pdf-data", type=Path, required=True, nargs="+", - help="One or multiple paths to folders or files to use for retrieval. " - "If a path is a folder, all files in the folder will be used. " - "If a path is a file, only that file will be used. " - "If the path is relative it will be relative to the current working directory.") -parser.add_argument("--pdf-chunk_size", type=int, default=1000, - help="The size of the chunks to split the text into.") -parser.add_argument("--pdf-chunk_overlap", type=int, default=200, - help="The overlap between the chunks.") -parser.add_argument("--pdf-add-start-index", action="store_true", - help="Add the start index to the metadata of the chunks.") -parser.add_argument("-w", "--web-data", type=str, nargs="*", default=[], - help="One or multiple URLs to use for retrieval.") -parser.add_argument("--web-chunk-size", type=int, default=200, - help="The size of the chunks to split the text into.") -parser.add_argument("-c", "--chroma-db-location", type=Path, default=Path(".chroma_db"), - help="file path to store or load a Chroma DB from/to.") +parser.add_argument( + "-b", + "--back-end", + type=BackendType, + choices=list(BackendType), + default=BackendType.azure, + help="(Cloud) back-end to use. In the case of local, a locally installed ollama will be used.", +) +parser.add_argument( + "-p", + "--pdf-data", + type=Path, + required=True, + nargs="+", + help="One or multiple paths to folders or files to use for retrieval. " + "If a path is a folder, all files in the folder will be used. " + "If a path is a file, only that file will be used. " + "If the path is relative it will be relative to the current working directory.", +) +parser.add_argument("--pdf-chunk_size", type=int, default=1000, help="The size of the chunks to split the text into.") +parser.add_argument("--pdf-chunk_overlap", type=int, default=200, help="The overlap between the chunks.") +parser.add_argument( + "--pdf-add-start-index", action="store_true", help="Add the start index to the metadata of the chunks." +) +parser.add_argument( + "-w", "--web-data", type=str, nargs="*", default=[], help="One or multiple URLs to use for retrieval." +) +parser.add_argument("--web-chunk-size", type=int, default=200, help="The size of the chunks to split the text into.") +parser.add_argument( + "-c", + "--chroma-db-location", + type=Path, + default=Path(".chroma_db"), + help="File path to store or load a Chroma DB from/to.", +) parser.add_argument("-r", "--reset-chrome-db", action="store_true", help="Reset the Chroma DB.") args = parser.parse_args() @@ -60,15 +76,16 @@ def generate(state: State): docs_content = "\n\n".join(doc.page_content for doc in state["context"]) messages = prompt.invoke({"question": state["question"], "context": docs_content}) response = llm.invoke(messages) - return {"answer": response.content} @cl.on_chat_start async def on_chat_start(): - vector_store = Chroma(collection_name="generic_rag", - embedding_function=get_embedding_model(args.back_end), - persist_directory=str(args.chroma_db_location)) + vector_store = Chroma( + collection_name="generic_rag", + embedding_function=get_embedding_model(args.back_end), + persist_directory=str(args.chroma_db_location), + ) cl.user_session.set("vector_store", vector_store) cl.user_session.set("emb_model", get_embedding_model(args.back_end)) @@ -103,8 +120,9 @@ async def set_starters(): try: starters.append(cl.Starter(label=starter["label"], message=starter["message"])) except KeyError: - logging.warning("CHAINLIT_STARTERS environment is not a list with " - "dictionaries containing 'label' and 'message' keys.") + logging.warning( + "CHAINLIT_STARTERS environment is not a list with dictionaries containing 'label' and 'message' keys." + ) return starters