From ab1235bd28942323d8ba55eaffcfd29f273848fa Mon Sep 17 00:00:00 2001
From: Ruben Lucas <ruben.lucas1997@gmail.com>
Date: Wed, 9 Apr 2025 16:03:35 +0200
Subject: [PATCH] =?UTF-8?q?=E2=9C=A8=20Create=20single=20source=20aggregat?=
 =?UTF-8?q?ion=20definition?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 generic_rag/app.py                 | 40 +++++++++++-------------------
 generic_rag/graphs/cond_ret_gen.py |  1 -
 2 files changed, 15 insertions(+), 26 deletions(-)

diff --git a/generic_rag/app.py b/generic_rag/app.py
index 64ec26d..4ecb79a 100644
--- a/generic_rag/app.py
+++ b/generic_rag/app.py
@@ -93,15 +93,7 @@ async def on_message(message: cl.Message):
         await process_response(message)
 
 
-async def process_response(message):
-    config = {"configurable": {"thread_id": cl.user_session.get("id")}}
-
-    chainlit_response = cl.Message(content="")
-
-    async for response in graph.stream(message.content, config=config):
-        await chainlit_response.stream_token(response)
-
-    pdf_sources = graph.get_last_pdf_sources()
+async def add_sources(chainlit_response: cl.Message, pdf_sources: dict, web_sources: set | list):
     if len(pdf_sources) > 0:
         await chainlit_response.stream_token("\nThe following PDF source were consulted:\n")
         for source, page_numbers in pdf_sources.items():
@@ -111,13 +103,24 @@ async def process_response(message):
             chainlit_response.elements.append(cl.Pdf(name="pdf", display="inline", path=source, page=page_numbers[0]))
             await chainlit_response.update()
             await chainlit_response.stream_token(f"- '{source}' on page(s): {page_numbers}\n")
-
-    web_sources = graph.get_last_web_sources()
     if len(web_sources) > 0:
         await chainlit_response.stream_token("\nThe following web sources were consulted:\n")
         for source in web_sources:
             await chainlit_response.stream_token(f"- {source}\n")
 
+
+async def process_response(message):
+    config = {"configurable": {"thread_id": cl.user_session.get("id")}}
+
+    chainlit_response = cl.Message(content="")
+
+    async for response in graph.stream(message.content, config=config):
+        await chainlit_response.stream_token(response)
+
+    pdf_sources = graph.get_last_pdf_sources()
+    web_sources = graph.get_last_web_sources()
+    await add_sources(chainlit_response, pdf_sources, web_sources)
+
     await chainlit_response.send()
 
 
@@ -129,20 +132,7 @@ async def process_cond_response(message):
     for response in graph.stream(message.content, config=config):
         await chainlit_response.stream_token(response)
 
-    if len(graph.last_retrieved_docs) > 0:
-        await chainlit_response.stream_token("\nThe following PDF source were consulted:\n")
-        for source, page_numbers in graph.last_retrieved_docs.items():
-            page_numbers = list(page_numbers)
-            page_numbers.sort()
-            # display="side" seems to be not supported by chainlit for PDF's, so we use "inline" instead.
-            chainlit_response.elements.append(cl.Pdf(name="pdf", display="inline", path=source, page=page_numbers[0]))
-            await chainlit_response.update()
-            await chainlit_response.stream_token(f"- '{source}' on page(s): {page_numbers}\n")
-
-    if len(graph.last_retrieved_sources) > 0:
-        await chainlit_response.stream_token("\nThe following web sources were consulted:\n")
-        for source in graph.last_retrieved_sources:
-            await chainlit_response.stream_token(f"- {source}\n")
+    await add_sources(chainlit_response, graph.last_retrieved_docs, graph.last_retrieved_sources)
 
     await chainlit_response.send()
 
diff --git a/generic_rag/graphs/cond_ret_gen.py b/generic_rag/graphs/cond_ret_gen.py
index 9b0cdd3..c884a8e 100644
--- a/generic_rag/graphs/cond_ret_gen.py
+++ b/generic_rag/graphs/cond_ret_gen.py
@@ -63,7 +63,6 @@ class CondRetGenLangGraph:
                 )  # Use re.DOTALL if dicts might span newlines
                 for dict_str in dictionary_strings:
                     parsed_dict = ast.literal_eval(dict_str)
-                    print(parsed_dict)
                     if "filetype" in parsed_dict and parsed_dict["filetype"] == "web":
                         self.last_retrieved_sources.add(parsed_dict["source"])
                     elif Path(parsed_dict["source"]).suffix == ".pdf":