Merge pull request 'Small fixes' (#22) from small_fixes into main

Reviewed-on: AI_team/generic-RAG-demo#22
This commit is contained in:
rubenl 2025-04-11 13:01:51 +02:00
commit a980568e24
2 changed files with 7 additions and 19 deletions

View File

@ -8,7 +8,7 @@ from langchain_chroma import Chroma
from langchain_core.documents import Document
from langchain_core.embeddings import Embeddings
from langchain_core.language_models.chat_models import BaseChatModel
from langchain_core.messages import BaseMessage, HumanMessage, SystemMessage
from langchain_core.messages import BaseMessage, SystemMessage
from langchain_core.tools import tool
from langchain_core.runnables.config import RunnableConfig
from langgraph.checkpoint.memory import MemorySaver

View File

@ -61,7 +61,6 @@ class RetGenLangGraph:
return pdf_sources
for doc in self.last_retrieved_docs:
source_candidate = doc.metadata["source"]
if "source" in doc.metadata and Path(doc.metadata["source"]).suffix.lower() == ".pdf":
source = doc.metadata["source"]
else:
@ -71,19 +70,11 @@ class RetGenLangGraph:
pdf_sources[source] = set()
# The page numbers are in the `page_numer` and `page` fields.
try:
page_number = doc.metadata["page_number"]
except KeyError:
pass
else:
pdf_sources[source].add(page_number)
if "page_number" in doc.metadata:
pdf_sources[source].add(doc.metadata["page_number"])
try:
page_number = doc.metadata["page"]
except KeyError:
pass
else:
pdf_sources[source].add(page_number)
if "page" in doc.metadata:
pdf_sources[source].add(doc.metadata["page"])
if len(pdf_sources[source]) == 0:
logging.warning(f"PDF source {source} has no page number. Please check the metadata of the document.")
@ -100,10 +91,7 @@ class RetGenLangGraph:
return web_sources
for doc in self.last_retrieved_docs:
try:
if doc.metadata["filetype"] == "web":
web_sources.add(doc.metadata["source"])
except KeyError:
continue
if "filetype" in doc.metadata and doc.metadata["filetype"] == "web":
web_sources.add(doc.metadata["source"])
return web_sources