forked from AI_team/Philosophy-RAG-demo
Fix pdf source retrieval information
This commit is contained in:
parent
d1e9b3d8cf
commit
cd14c8add2
@ -1,3 +1,4 @@
|
||||
from pathlib import Path
|
||||
from typing import Any, Union
|
||||
|
||||
from langchain import hub
|
||||
@ -53,15 +54,29 @@ class RetGenLangGraph:
|
||||
pdf_sources = {}
|
||||
for context in self.last_invoke["context"]:
|
||||
try:
|
||||
if context.metadata["filetype"] == "application/pdf":
|
||||
source = context.metadata["source"]
|
||||
page_number = context.metadata["page_number"]
|
||||
if source in pdf_sources:
|
||||
pdf_sources[source].add(page_number)
|
||||
else:
|
||||
pdf_sources[source] = {page_number}
|
||||
Path(context.metadata["source"]).suffix == ".pdf"
|
||||
except KeyError:
|
||||
continue
|
||||
else:
|
||||
source = context.metadata["source"]
|
||||
|
||||
if source not in pdf_sources:
|
||||
pdf_sources[source] = set()
|
||||
|
||||
# The page numbers are in the `page_numer` and `page` fields.
|
||||
try:
|
||||
page_number = context.metadata["page_number"]
|
||||
except KeyError:
|
||||
pass
|
||||
else:
|
||||
pdf_sources[source].add(page_number)
|
||||
|
||||
try:
|
||||
page_number = context.metadata["page"]
|
||||
except KeyError:
|
||||
pass
|
||||
else:
|
||||
pdf_sources[source].add(page_number)
|
||||
|
||||
return pdf_sources
|
||||
|
||||
@ -75,9 +90,10 @@ class RetGenLangGraph:
|
||||
web_sources = set()
|
||||
for context in self.last_invoke["context"]:
|
||||
try:
|
||||
if context.metadata["filetype"] == "web":
|
||||
web_sources.add(context.metadata["source"])
|
||||
context.metadata["filetype"] == "web"
|
||||
except KeyError:
|
||||
pass
|
||||
continue
|
||||
else:
|
||||
web_sources.add(context.metadata["source"])
|
||||
|
||||
return web_sources
|
||||
|
||||
Loading…
Reference in New Issue
Block a user