diff --git a/generic_rag/graphs/cond_ret_gen.py b/generic_rag/graphs/cond_ret_gen.py index 5d3a6ed..8b6d788 100644 --- a/generic_rag/graphs/cond_ret_gen.py +++ b/generic_rag/graphs/cond_ret_gen.py @@ -8,7 +8,7 @@ from langchain_chroma import Chroma from langchain_core.documents import Document from langchain_core.embeddings import Embeddings from langchain_core.language_models.chat_models import BaseChatModel -from langchain_core.messages import BaseMessage, HumanMessage, SystemMessage +from langchain_core.messages import BaseMessage, SystemMessage from langchain_core.tools import tool from langchain_core.runnables.config import RunnableConfig from langgraph.checkpoint.memory import MemorySaver diff --git a/generic_rag/graphs/ret_gen.py b/generic_rag/graphs/ret_gen.py index 9a05349..ba19f0f 100644 --- a/generic_rag/graphs/ret_gen.py +++ b/generic_rag/graphs/ret_gen.py @@ -61,7 +61,6 @@ class RetGenLangGraph: return pdf_sources for doc in self.last_retrieved_docs: - source_candidate = doc.metadata["source"] if "source" in doc.metadata and Path(doc.metadata["source"]).suffix.lower() == ".pdf": source = doc.metadata["source"] else: @@ -71,19 +70,11 @@ class RetGenLangGraph: pdf_sources[source] = set() # The page numbers are in the `page_numer` and `page` fields. - try: - page_number = doc.metadata["page_number"] - except KeyError: - pass - else: - pdf_sources[source].add(page_number) + if "page_number" in doc.metadata: + pdf_sources[source].add(doc.metadata["page_number"]) - try: - page_number = doc.metadata["page"] - except KeyError: - pass - else: - pdf_sources[source].add(page_number) + if "page" in doc.metadata: + pdf_sources[source].add(doc.metadata["page"]) if len(pdf_sources[source]) == 0: logging.warning(f"PDF source {source} has no page number. Please check the metadata of the document.") @@ -100,10 +91,7 @@ class RetGenLangGraph: return web_sources for doc in self.last_retrieved_docs: - try: - if doc.metadata["filetype"] == "web": - web_sources.add(doc.metadata["source"]) - except KeyError: - continue + if "filetype" in doc.metadata and doc.metadata["filetype"] == "web": + web_sources.add(doc.metadata["source"]) return web_sources