✨ Create single source aggregation definition

2025-04-09 16:03:35 +02:00 · 2025-04-09 16:03:35 +02:00 · ab1235bd28
commit ab1235bd28
parent 3295bb8992
2 changed files with 15 additions and 26 deletions
--- a/generic_rag/app.py
+++ b/generic_rag/app.py
@ -93,15 +93,7 @@ async def on_message(message: cl.Message):
        await process_response(message)


-async def process_response(message):
-    config = {"configurable": {"thread_id": cl.user_session.get("id")}}
-
-    chainlit_response = cl.Message(content="")
-
-    async for response in graph.stream(message.content, config=config):
-        await chainlit_response.stream_token(response)
-
-    pdf_sources = graph.get_last_pdf_sources()
+async def add_sources(chainlit_response: cl.Message, pdf_sources: dict, web_sources: set | list):
    if len(pdf_sources) > 0:
        await chainlit_response.stream_token("\nThe following PDF source were consulted:\n")
        for source, page_numbers in pdf_sources.items():
@ -111,13 +103,24 @@ async def process_response(message):
            chainlit_response.elements.append(cl.Pdf(name="pdf", display="inline", path=source, page=page_numbers[0]))
            await chainlit_response.update()
            await chainlit_response.stream_token(f"- '{source}' on page(s): {page_numbers}\n")
-
-    web_sources = graph.get_last_web_sources()
    if len(web_sources) > 0:
        await chainlit_response.stream_token("\nThe following web sources were consulted:\n")
        for source in web_sources:
            await chainlit_response.stream_token(f"- {source}\n")

+
+async def process_response(message):
+    config = {"configurable": {"thread_id": cl.user_session.get("id")}}
+
+    chainlit_response = cl.Message(content="")
+
+    async for response in graph.stream(message.content, config=config):
+        await chainlit_response.stream_token(response)
+
+    pdf_sources = graph.get_last_pdf_sources()
+    web_sources = graph.get_last_web_sources()
+    await add_sources(chainlit_response, pdf_sources, web_sources)
+
    await chainlit_response.send()


@ -129,20 +132,7 @@ async def process_cond_response(message):
    for response in graph.stream(message.content, config=config):
        await chainlit_response.stream_token(response)

-    if len(graph.last_retrieved_docs) > 0:
-        await chainlit_response.stream_token("\nThe following PDF source were consulted:\n")
-        for source, page_numbers in graph.last_retrieved_docs.items():
-            page_numbers = list(page_numbers)
-            page_numbers.sort()
-            # display="side" seems to be not supported by chainlit for PDF's, so we use "inline" instead.
-            chainlit_response.elements.append(cl.Pdf(name="pdf", display="inline", path=source, page=page_numbers[0]))
-            await chainlit_response.update()
-            await chainlit_response.stream_token(f"- '{source}' on page(s): {page_numbers}\n")
-
-    if len(graph.last_retrieved_sources) > 0:
-        await chainlit_response.stream_token("\nThe following web sources were consulted:\n")
-        for source in graph.last_retrieved_sources:
-            await chainlit_response.stream_token(f"- {source}\n")
+    await add_sources(chainlit_response, graph.last_retrieved_docs, graph.last_retrieved_sources)

    await chainlit_response.send()

--- a/generic_rag/graphs/cond_ret_gen.py
+++ b/generic_rag/graphs/cond_ret_gen.py
@ -63,7 +63,6 @@ class CondRetGenLangGraph:
                )  # Use re.DOTALL if dicts might span newlines
                for dict_str in dictionary_strings:
                    parsed_dict = ast.literal_eval(dict_str)
-                    print(parsed_dict)
                    if "filetype" in parsed_dict and parsed_dict["filetype"] == "web":
                        self.last_retrieved_sources.add(parsed_dict["source"])
                    elif Path(parsed_dict["source"]).suffix == ".pdf":