diff --git a/generic_rag/graphs/ret_gen.py b/generic_rag/graphs/ret_gen.py index 280f8cb..ba19f0f 100644 --- a/generic_rag/graphs/ret_gen.py +++ b/generic_rag/graphs/ret_gen.py @@ -70,19 +70,11 @@ class RetGenLangGraph: pdf_sources[source] = set() # The page numbers are in the `page_numer` and `page` fields. - try: - page_number = doc.metadata["page_number"] - except KeyError: - pass - else: - pdf_sources[source].add(page_number) + if "page_number" in doc.metadata: + pdf_sources[source].add(doc.metadata["page_number"]) - try: - page_number = doc.metadata["page"] - except KeyError: - pass - else: - pdf_sources[source].add(page_number) + if "page" in doc.metadata: + pdf_sources[source].add(doc.metadata["page"]) if len(pdf_sources[source]) == 0: logging.warning(f"PDF source {source} has no page number. Please check the metadata of the document.") @@ -99,10 +91,7 @@ class RetGenLangGraph: return web_sources for doc in self.last_retrieved_docs: - try: - if doc.metadata["filetype"] == "web": - web_sources.add(doc.metadata["source"]) - except KeyError: - continue + if "filetype" in doc.metadata and doc.metadata["filetype"] == "web": + web_sources.add(doc.metadata["source"]) return web_sources