From ab1235bd28942323d8ba55eaffcfd29f273848fa Mon Sep 17 00:00:00 2001 From: Ruben Lucas Date: Wed, 9 Apr 2025 16:03:35 +0200 Subject: [PATCH] =?UTF-8?q?=E2=9C=A8=20Create=20single=20source=20aggregat?= =?UTF-8?q?ion=20definition?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- generic_rag/app.py | 40 +++++++++++------------------- generic_rag/graphs/cond_ret_gen.py | 1 - 2 files changed, 15 insertions(+), 26 deletions(-) diff --git a/generic_rag/app.py b/generic_rag/app.py index 64ec26d..4ecb79a 100644 --- a/generic_rag/app.py +++ b/generic_rag/app.py @@ -93,15 +93,7 @@ async def on_message(message: cl.Message): await process_response(message) -async def process_response(message): - config = {"configurable": {"thread_id": cl.user_session.get("id")}} - - chainlit_response = cl.Message(content="") - - async for response in graph.stream(message.content, config=config): - await chainlit_response.stream_token(response) - - pdf_sources = graph.get_last_pdf_sources() +async def add_sources(chainlit_response: cl.Message, pdf_sources: dict, web_sources: set | list): if len(pdf_sources) > 0: await chainlit_response.stream_token("\nThe following PDF source were consulted:\n") for source, page_numbers in pdf_sources.items(): @@ -111,13 +103,24 @@ async def process_response(message): chainlit_response.elements.append(cl.Pdf(name="pdf", display="inline", path=source, page=page_numbers[0])) await chainlit_response.update() await chainlit_response.stream_token(f"- '{source}' on page(s): {page_numbers}\n") - - web_sources = graph.get_last_web_sources() if len(web_sources) > 0: await chainlit_response.stream_token("\nThe following web sources were consulted:\n") for source in web_sources: await chainlit_response.stream_token(f"- {source}\n") + +async def process_response(message): + config = {"configurable": {"thread_id": cl.user_session.get("id")}} + + chainlit_response = cl.Message(content="") + + async for response in graph.stream(message.content, config=config): + await chainlit_response.stream_token(response) + + pdf_sources = graph.get_last_pdf_sources() + web_sources = graph.get_last_web_sources() + await add_sources(chainlit_response, pdf_sources, web_sources) + await chainlit_response.send() @@ -129,20 +132,7 @@ async def process_cond_response(message): for response in graph.stream(message.content, config=config): await chainlit_response.stream_token(response) - if len(graph.last_retrieved_docs) > 0: - await chainlit_response.stream_token("\nThe following PDF source were consulted:\n") - for source, page_numbers in graph.last_retrieved_docs.items(): - page_numbers = list(page_numbers) - page_numbers.sort() - # display="side" seems to be not supported by chainlit for PDF's, so we use "inline" instead. - chainlit_response.elements.append(cl.Pdf(name="pdf", display="inline", path=source, page=page_numbers[0])) - await chainlit_response.update() - await chainlit_response.stream_token(f"- '{source}' on page(s): {page_numbers}\n") - - if len(graph.last_retrieved_sources) > 0: - await chainlit_response.stream_token("\nThe following web sources were consulted:\n") - for source in graph.last_retrieved_sources: - await chainlit_response.stream_token(f"- {source}\n") + await add_sources(chainlit_response, graph.last_retrieved_docs, graph.last_retrieved_sources) await chainlit_response.send() diff --git a/generic_rag/graphs/cond_ret_gen.py b/generic_rag/graphs/cond_ret_gen.py index 9b0cdd3..c884a8e 100644 --- a/generic_rag/graphs/cond_ret_gen.py +++ b/generic_rag/graphs/cond_ret_gen.py @@ -63,7 +63,6 @@ class CondRetGenLangGraph: ) # Use re.DOTALL if dicts might span newlines for dict_str in dictionary_strings: parsed_dict = ast.literal_eval(dict_str) - print(parsed_dict) if "filetype" in parsed_dict and parsed_dict["filetype"] == "web": self.last_retrieved_sources.add(parsed_dict["source"]) elif Path(parsed_dict["source"]).suffix == ".pdf":