forked from AI_team/Philosophy-RAG-demo
Add pdf/web source and viewer to chainlit
This commit is contained in:
parent
37aa171924
commit
cfbfe5f609
@ -11,6 +11,7 @@ from langchain import hub
|
|||||||
from langchain_chroma import Chroma
|
from langchain_chroma import Chroma
|
||||||
from langchain_core.documents import Document
|
from langchain_core.documents import Document
|
||||||
from langgraph.graph import START, StateGraph
|
from langgraph.graph import START, StateGraph
|
||||||
|
from langgraph.pregel.io import AddableValuesDict
|
||||||
from parsers.parser import add_pdf_files, add_urls
|
from parsers.parser import add_pdf_files, add_urls
|
||||||
from typing_extensions import List, TypedDict
|
from typing_extensions import List, TypedDict
|
||||||
|
|
||||||
@ -106,7 +107,60 @@ async def on_chat_start():
|
|||||||
async def on_message(message: cl.Message):
|
async def on_message(message: cl.Message):
|
||||||
graph = cl.user_session.get("graph")
|
graph = cl.user_session.get("graph")
|
||||||
response = graph.invoke({"question": message.content})
|
response = graph.invoke({"question": message.content})
|
||||||
await cl.Message(content=response).send()
|
|
||||||
|
answer = response["answer"]
|
||||||
|
answer += "\n\n"
|
||||||
|
|
||||||
|
pdf_sources = get_pdf_sources(response)
|
||||||
|
web_sources = get_web_sources(response)
|
||||||
|
|
||||||
|
elements = []
|
||||||
|
if len(pdf_sources) > 0:
|
||||||
|
answer += "The following PDF source were consulted:\n"
|
||||||
|
for source, page_numbers in pdf_sources.items():
|
||||||
|
page_numbers = list(page_numbers)
|
||||||
|
page_numbers.sort()
|
||||||
|
# display="side" seems to be not supported by chainlit for PDF's, so we use "inline" instead
|
||||||
|
elements.append(cl.Pdf(name="pdf", display="inline", path=source, page=page_numbers[0]))
|
||||||
|
answer += f"'{source}' on page(s): {page_numbers}\n"
|
||||||
|
|
||||||
|
if len(web_sources) > 0:
|
||||||
|
answer += f"The following web sources were consulted: {web_sources}\n"
|
||||||
|
|
||||||
|
await cl.Message(content=answer, elements=elements).send()
|
||||||
|
|
||||||
|
|
||||||
|
def get_pdf_sources(response: AddableValuesDict) -> dict[str, list[int]]:
|
||||||
|
"""
|
||||||
|
Function that retrieves the PDF sources with page numbers from a response.
|
||||||
|
"""
|
||||||
|
pdf_sources = {}
|
||||||
|
for context in response["context"]:
|
||||||
|
try:
|
||||||
|
if context.metadata["filetype"] == "application/pdf":
|
||||||
|
source = context.metadata["source"]
|
||||||
|
page_number = context.metadata["page_number"]
|
||||||
|
if source in pdf_sources:
|
||||||
|
pdf_sources[source].add(page_number)
|
||||||
|
else:
|
||||||
|
pdf_sources[source] = {page_number}
|
||||||
|
except KeyError:
|
||||||
|
pass
|
||||||
|
return pdf_sources
|
||||||
|
|
||||||
|
|
||||||
|
def get_web_sources(response: AddableValuesDict) -> set:
|
||||||
|
"""
|
||||||
|
Function that retrieves the web sources from a response.
|
||||||
|
"""
|
||||||
|
web_sources = set()
|
||||||
|
for context in response["context"]:
|
||||||
|
try:
|
||||||
|
if context.metadata["filetype"] == "web":
|
||||||
|
web_sources.add(context.metadata["source"])
|
||||||
|
except KeyError:
|
||||||
|
pass
|
||||||
|
return web_sources
|
||||||
|
|
||||||
|
|
||||||
@cl.set_starters
|
@cl.set_starters
|
||||||
|
|||||||
Loading…
Reference in New Issue
Block a user