forked from AI_team/Philosophy-RAG-demo
Merge pull request 'Small fixes' (#22) from small_fixes into main
Reviewed-on: AI_team/generic-RAG-demo#22
This commit is contained in:
commit
a980568e24
@ -8,7 +8,7 @@ from langchain_chroma import Chroma
|
|||||||
from langchain_core.documents import Document
|
from langchain_core.documents import Document
|
||||||
from langchain_core.embeddings import Embeddings
|
from langchain_core.embeddings import Embeddings
|
||||||
from langchain_core.language_models.chat_models import BaseChatModel
|
from langchain_core.language_models.chat_models import BaseChatModel
|
||||||
from langchain_core.messages import BaseMessage, HumanMessage, SystemMessage
|
from langchain_core.messages import BaseMessage, SystemMessage
|
||||||
from langchain_core.tools import tool
|
from langchain_core.tools import tool
|
||||||
from langchain_core.runnables.config import RunnableConfig
|
from langchain_core.runnables.config import RunnableConfig
|
||||||
from langgraph.checkpoint.memory import MemorySaver
|
from langgraph.checkpoint.memory import MemorySaver
|
||||||
|
|||||||
@ -61,7 +61,6 @@ class RetGenLangGraph:
|
|||||||
return pdf_sources
|
return pdf_sources
|
||||||
|
|
||||||
for doc in self.last_retrieved_docs:
|
for doc in self.last_retrieved_docs:
|
||||||
source_candidate = doc.metadata["source"]
|
|
||||||
if "source" in doc.metadata and Path(doc.metadata["source"]).suffix.lower() == ".pdf":
|
if "source" in doc.metadata and Path(doc.metadata["source"]).suffix.lower() == ".pdf":
|
||||||
source = doc.metadata["source"]
|
source = doc.metadata["source"]
|
||||||
else:
|
else:
|
||||||
@ -71,19 +70,11 @@ class RetGenLangGraph:
|
|||||||
pdf_sources[source] = set()
|
pdf_sources[source] = set()
|
||||||
|
|
||||||
# The page numbers are in the `page_numer` and `page` fields.
|
# The page numbers are in the `page_numer` and `page` fields.
|
||||||
try:
|
if "page_number" in doc.metadata:
|
||||||
page_number = doc.metadata["page_number"]
|
pdf_sources[source].add(doc.metadata["page_number"])
|
||||||
except KeyError:
|
|
||||||
pass
|
|
||||||
else:
|
|
||||||
pdf_sources[source].add(page_number)
|
|
||||||
|
|
||||||
try:
|
if "page" in doc.metadata:
|
||||||
page_number = doc.metadata["page"]
|
pdf_sources[source].add(doc.metadata["page"])
|
||||||
except KeyError:
|
|
||||||
pass
|
|
||||||
else:
|
|
||||||
pdf_sources[source].add(page_number)
|
|
||||||
|
|
||||||
if len(pdf_sources[source]) == 0:
|
if len(pdf_sources[source]) == 0:
|
||||||
logging.warning(f"PDF source {source} has no page number. Please check the metadata of the document.")
|
logging.warning(f"PDF source {source} has no page number. Please check the metadata of the document.")
|
||||||
@ -100,10 +91,7 @@ class RetGenLangGraph:
|
|||||||
return web_sources
|
return web_sources
|
||||||
|
|
||||||
for doc in self.last_retrieved_docs:
|
for doc in self.last_retrieved_docs:
|
||||||
try:
|
if "filetype" in doc.metadata and doc.metadata["filetype"] == "web":
|
||||||
if doc.metadata["filetype"] == "web":
|
web_sources.add(doc.metadata["source"])
|
||||||
web_sources.add(doc.metadata["source"])
|
|
||||||
except KeyError:
|
|
||||||
continue
|
|
||||||
|
|
||||||
return web_sources
|
return web_sources
|
||||||
|
|||||||
Loading…
Reference in New Issue
Block a user