From 450a00e6fe5b302e5f8a0877629fc5107346351d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Nielson=20Jann=C3=A9?= Date: Sat, 15 Mar 2025 13:41:56 +0100 Subject: [PATCH] Add logging info when skipping file or website --- generic_rag/parsers/parser.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/generic_rag/parsers/parser.py b/generic_rag/parsers/parser.py index ceca313..11d9bef 100644 --- a/generic_rag/parsers/parser.py +++ b/generic_rag/parsers/parser.py @@ -29,6 +29,7 @@ def add_urls(vector_store: Chroma, urls: list[str], chunk_size: int) -> None: all_splits = [] for url in urls: if len(vector_store.get(where={"source": url}, limit=1)["ids"]) > 0: + logging.info(f"Skipping URL {url}, as it is already in the database.") continue response = requests.get(url) @@ -70,6 +71,8 @@ def add_pdf_files( for pdf_file in pdf_files: if len(vector_store.get(where={"source": str(pdf_file)}, limit=1)["ids"]) == 0: new_pdfs.append(pdf_file) + else: + logging.info(f"Skipping PDF {pdf_file}, as it is already in the database.") if len(new_pdfs) == 0: return