Add pdf/web source and viewer to chainlit

This commit is contained in:
Nielson Janné 2025-03-15 15:35:29 +01:00
parent 37aa171924
commit cfbfe5f609

View File

@ -11,6 +11,7 @@ from langchain import hub
from langchain_chroma import Chroma from langchain_chroma import Chroma
from langchain_core.documents import Document from langchain_core.documents import Document
from langgraph.graph import START, StateGraph from langgraph.graph import START, StateGraph
from langgraph.pregel.io import AddableValuesDict
from parsers.parser import add_pdf_files, add_urls from parsers.parser import add_pdf_files, add_urls
from typing_extensions import List, TypedDict from typing_extensions import List, TypedDict
@ -106,7 +107,60 @@ async def on_chat_start():
async def on_message(message: cl.Message): async def on_message(message: cl.Message):
graph = cl.user_session.get("graph") graph = cl.user_session.get("graph")
response = graph.invoke({"question": message.content}) response = graph.invoke({"question": message.content})
await cl.Message(content=response).send()
answer = response["answer"]
answer += "\n\n"
pdf_sources = get_pdf_sources(response)
web_sources = get_web_sources(response)
elements = []
if len(pdf_sources) > 0:
answer += "The following PDF source were consulted:\n"
for source, page_numbers in pdf_sources.items():
page_numbers = list(page_numbers)
page_numbers.sort()
# display="side" seems to be not supported by chainlit for PDF's, so we use "inline" instead
elements.append(cl.Pdf(name="pdf", display="inline", path=source, page=page_numbers[0]))
answer += f"'{source}' on page(s): {page_numbers}\n"
if len(web_sources) > 0:
answer += f"The following web sources were consulted: {web_sources}\n"
await cl.Message(content=answer, elements=elements).send()
def get_pdf_sources(response: AddableValuesDict) -> dict[str, list[int]]:
"""
Function that retrieves the PDF sources with page numbers from a response.
"""
pdf_sources = {}
for context in response["context"]:
try:
if context.metadata["filetype"] == "application/pdf":
source = context.metadata["source"]
page_number = context.metadata["page_number"]
if source in pdf_sources:
pdf_sources[source].add(page_number)
else:
pdf_sources[source] = {page_number}
except KeyError:
pass
return pdf_sources
def get_web_sources(response: AddableValuesDict) -> set:
"""
Function that retrieves the web sources from a response.
"""
web_sources = set()
for context in response["context"]:
try:
if context.metadata["filetype"] == "web":
web_sources.add(context.metadata["source"])
except KeyError:
pass
return web_sources
@cl.set_starters @cl.set_starters