forked from AI_team/Philosophy-RAG-demo
Add pdf/web source and viewer to chainlit
This commit is contained in:
parent
37aa171924
commit
cfbfe5f609
@ -11,6 +11,7 @@ from langchain import hub
|
||||
from langchain_chroma import Chroma
|
||||
from langchain_core.documents import Document
|
||||
from langgraph.graph import START, StateGraph
|
||||
from langgraph.pregel.io import AddableValuesDict
|
||||
from parsers.parser import add_pdf_files, add_urls
|
||||
from typing_extensions import List, TypedDict
|
||||
|
||||
@ -106,7 +107,60 @@ async def on_chat_start():
|
||||
async def on_message(message: cl.Message):
|
||||
graph = cl.user_session.get("graph")
|
||||
response = graph.invoke({"question": message.content})
|
||||
await cl.Message(content=response).send()
|
||||
|
||||
answer = response["answer"]
|
||||
answer += "\n\n"
|
||||
|
||||
pdf_sources = get_pdf_sources(response)
|
||||
web_sources = get_web_sources(response)
|
||||
|
||||
elements = []
|
||||
if len(pdf_sources) > 0:
|
||||
answer += "The following PDF source were consulted:\n"
|
||||
for source, page_numbers in pdf_sources.items():
|
||||
page_numbers = list(page_numbers)
|
||||
page_numbers.sort()
|
||||
# display="side" seems to be not supported by chainlit for PDF's, so we use "inline" instead
|
||||
elements.append(cl.Pdf(name="pdf", display="inline", path=source, page=page_numbers[0]))
|
||||
answer += f"'{source}' on page(s): {page_numbers}\n"
|
||||
|
||||
if len(web_sources) > 0:
|
||||
answer += f"The following web sources were consulted: {web_sources}\n"
|
||||
|
||||
await cl.Message(content=answer, elements=elements).send()
|
||||
|
||||
|
||||
def get_pdf_sources(response: AddableValuesDict) -> dict[str, list[int]]:
|
||||
"""
|
||||
Function that retrieves the PDF sources with page numbers from a response.
|
||||
"""
|
||||
pdf_sources = {}
|
||||
for context in response["context"]:
|
||||
try:
|
||||
if context.metadata["filetype"] == "application/pdf":
|
||||
source = context.metadata["source"]
|
||||
page_number = context.metadata["page_number"]
|
||||
if source in pdf_sources:
|
||||
pdf_sources[source].add(page_number)
|
||||
else:
|
||||
pdf_sources[source] = {page_number}
|
||||
except KeyError:
|
||||
pass
|
||||
return pdf_sources
|
||||
|
||||
|
||||
def get_web_sources(response: AddableValuesDict) -> set:
|
||||
"""
|
||||
Function that retrieves the web sources from a response.
|
||||
"""
|
||||
web_sources = set()
|
||||
for context in response["context"]:
|
||||
try:
|
||||
if context.metadata["filetype"] == "web":
|
||||
web_sources.add(context.metadata["source"])
|
||||
except KeyError:
|
||||
pass
|
||||
return web_sources
|
||||
|
||||
|
||||
@cl.set_starters
|
||||
|
||||
Loading…
Reference in New Issue
Block a user