Add logging info when skipping file or website

This commit is contained in:
Nielson Janné 2025-03-15 13:41:56 +01:00
parent a6e9c95a6d
commit 450a00e6fe

View File

@ -29,6 +29,7 @@ def add_urls(vector_store: Chroma, urls: list[str], chunk_size: int) -> None:
all_splits = [] all_splits = []
for url in urls: for url in urls:
if len(vector_store.get(where={"source": url}, limit=1)["ids"]) > 0: if len(vector_store.get(where={"source": url}, limit=1)["ids"]) > 0:
logging.info(f"Skipping URL {url}, as it is already in the database.")
continue continue
response = requests.get(url) response = requests.get(url)
@ -70,6 +71,8 @@ def add_pdf_files(
for pdf_file in pdf_files: for pdf_file in pdf_files:
if len(vector_store.get(where={"source": str(pdf_file)}, limit=1)["ids"]) == 0: if len(vector_store.get(where={"source": str(pdf_file)}, limit=1)["ids"]) == 0:
new_pdfs.append(pdf_file) new_pdfs.append(pdf_file)
else:
logging.info(f"Skipping PDF {pdf_file}, as it is already in the database.")
if len(new_pdfs) == 0: if len(new_pdfs) == 0:
return return