Fixes some logging api mistakes

This commit is contained in:
Nielson Janné 2025-04-11 19:58:11 +02:00
parent fc59aa0d2f
commit b1e8f19f00
4 changed files with 18 additions and 18 deletions

View File

@ -12,7 +12,7 @@ from graphs.ret_gen import RetGenLangGraph
from langchain_chroma import Chroma from langchain_chroma import Chroma
from parsers.parser import add_pdf_files, add_urls from parsers.parser import add_pdf_files, add_urls
logging.basicConfig(level=logging.INFO) logging.basicConfig(level=logging.DEBUG)
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
parser = argparse.ArgumentParser(description="A Sogeti Nederland Generic RAG demo.") parser = argparse.ArgumentParser(description="A Sogeti Nederland Generic RAG demo.")
@ -162,7 +162,7 @@ async def set_starters():
try: try:
starters.append(cl.Starter(label=starter["label"], message=starter["message"])) starters.append(cl.Starter(label=starter["label"], message=starter["message"]))
except KeyError: except KeyError:
logging.warning( logger.warning(
"CHAINLIT_STARTERS environment is not a list with dictionaries containing 'label' and 'message' keys." "CHAINLIT_STARTERS environment is not a list with dictionaries containing 'label' and 'message' keys."
) )

View File

@ -16,6 +16,7 @@ from langgraph.graph import END, MessagesState, StateGraph
from langgraph.prebuilt import InjectedStore, ToolNode, tools_condition from langgraph.prebuilt import InjectedStore, ToolNode, tools_condition
from typing_extensions import Annotated from typing_extensions import Annotated
logger = logging.getLogger(__name__)
class CondRetGenLangGraph: class CondRetGenLangGraph:
def __init__(self, vector_store: Chroma, chat_model: BaseChatModel, embedding_model: Embeddings): def __init__(self, vector_store: Chroma, chat_model: BaseChatModel, embedding_model: Embeddings):
@ -83,8 +84,8 @@ class CondRetGenLangGraph:
# Furthermore, it can not and should not have the `self` parameter. # Furthermore, it can not and should not have the `self` parameter.
# If you want to pass on state, please refer to: # If you want to pass on state, please refer to:
# https://python.langchain.com/docs/concepts/tools/#special-type-annotations # https://python.langchain.com/docs/concepts/tools/#special-type-annotations
logging.info(f"Query: {query}") logger.debug(f"query: {query}")
logging.info(f"user content: {full_user_content}") logger.debug(f"user content: {full_user_content}")
retrieved_docs = [] retrieved_docs = []
retrieved_docs = vector_store.similarity_search(query, k=4) retrieved_docs = vector_store.similarity_search(query, k=4)

View File

@ -11,7 +11,6 @@ from langgraph.checkpoint.memory import MemorySaver
from langgraph.graph import END, START, StateGraph from langgraph.graph import END, START, StateGraph
from typing_extensions import List, TypedDict from typing_extensions import List, TypedDict
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
@ -42,6 +41,7 @@ class RetGenLangGraph:
yield response.content yield response.content
def _retrieve(self, state: State) -> dict: def _retrieve(self, state: State) -> dict:
logger.debug(f"querying VS for: {state["question"]}")
self.last_retrieved_docs = self.vector_store.similarity_search(state["question"]) self.last_retrieved_docs = self.vector_store.similarity_search(state["question"])
return {"context": self.last_retrieved_docs} return {"context": self.last_retrieved_docs}
@ -77,7 +77,7 @@ class RetGenLangGraph:
pdf_sources[source].add(doc.metadata["page"]) pdf_sources[source].add(doc.metadata["page"])
if len(pdf_sources[source]) == 0: if len(pdf_sources[source]) == 0:
logging.warning(f"PDF source {source} has no page number. Please check the metadata of the document.") logger.warning(f"PDF source {source} has no page number. Please check the metadata of the document.")
return pdf_sources return pdf_sources

View File

@ -9,7 +9,6 @@ from langchain_community.vectorstores.utils import filter_complex_metadata
from langchain_text_splitters import HTMLSemanticPreservingSplitter, RecursiveCharacterTextSplitter from langchain_text_splitters import HTMLSemanticPreservingSplitter, RecursiveCharacterTextSplitter
from langchain_unstructured import UnstructuredLoader from langchain_unstructured import UnstructuredLoader
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
@ -32,12 +31,12 @@ def add_urls(vector_store: Chroma, urls: list[str], chunk_size: int) -> None:
The URL's will be fetched and split into chunks of text with the provided chunk size. The URL's will be fetched and split into chunks of text with the provided chunk size.
""" """
logging.info("Web sources to the vector store.") logger.info("Web sources to the vector store.")
all_splits = [] all_splits = []
for url in urls: for url in urls:
if len(vector_store.get(where={"source": url}, limit=1)["ids"]) > 0: if len(vector_store.get(where={"source": url}, limit=1)["ids"]) > 0:
logging.info(f"Skipping URL {url}, as it is already in the database.") logger.info(f"Skipping URL {url}, as it is already in the database.")
continue continue
response = requests.get(url) response = requests.get(url)
@ -67,8 +66,8 @@ def add_urls(vector_store: Chroma, urls: list[str], chunk_size: int) -> None:
if len(all_splits) == 0: if len(all_splits) == 0:
return return
logging.info(f"{len(urls)} web sources split in {len(all_splits)} vector store documents") logger.info(f"{len(urls)} web sources split in {len(all_splits)} vector store documents")
logging.info(f"Adding {len(all_splits)} vector store documents to vector store.") logger.info(f"Adding {len(all_splits)} vector store documents to vector store.")
filtered_splits = filter_complex_metadata(all_splits) filtered_splits = filter_complex_metadata(all_splits)
vector_store.add_documents(documents=filtered_splits) vector_store.add_documents(documents=filtered_splits)
@ -87,22 +86,22 @@ def add_pdf_files(
The PDF file will be parsed per page and split into chunks of text with the provided chunk size and overlap. The PDF file will be parsed per page and split into chunks of text with the provided chunk size and overlap.
""" """
logging.info("Adding PDF files to the vector store.") logger.info("Adding PDF files to the vector store.")
pdf_files = get_all_local_pdf_files(file_paths) pdf_files = get_all_local_pdf_files(file_paths)
logging.info(f"Found {len(pdf_files)} PDF files to add to the vector store.") logger.info(f"Found {len(pdf_files)} PDF files to add to the vector store.")
new_pdfs = [] new_pdfs = []
for pdf_file in pdf_files: for pdf_file in pdf_files:
if len(vector_store.get(where={"source": str(pdf_file)}, limit=1)["ids"]) == 0: if len(vector_store.get(where={"source": str(pdf_file)}, limit=1)["ids"]) == 0:
new_pdfs.append(pdf_file) new_pdfs.append(pdf_file)
else: else:
logging.info(f"Skipping PDF {pdf_file}, as it is already in the database.") logger.info(f"Skipping PDF {pdf_file}, as it is already in the database.")
if len(new_pdfs) == 0: if len(new_pdfs) == 0:
return return
logging.info(f"{len(new_pdfs)} PDF's to add to the vector store.") logger.info(f"{len(new_pdfs)} PDF's to add to the vector store.")
loaded_document = [] loaded_document = []
for file in new_pdfs: for file in new_pdfs:
@ -116,8 +115,8 @@ def add_pdf_files(
pdf_splits = text_splitter.split_documents(loaded_document) pdf_splits = text_splitter.split_documents(loaded_document)
logging.info(f"{len(new_pdfs)} PDF's split in {len(pdf_splits)} vector store documents") logger.info(f"{len(new_pdfs)} PDF's split in {len(pdf_splits)} vector store documents")
logging.info(f"Adding {len(pdf_splits)} vector store documents to vector store.") logger.info(f"Adding {len(pdf_splits)} vector store documents to vector store.")
vector_store.add_documents(documents=filter_complex_metadata(pdf_splits)) vector_store.add_documents(documents=filter_complex_metadata(pdf_splits))
@ -136,6 +135,6 @@ def get_all_local_pdf_files(local_paths: list[Path]) -> list[Path]:
elif path.suffix == ".pdf": elif path.suffix == ".pdf":
all_pdf_files.append(path) all_pdf_files.append(path)
else: else:
logging.warning(f"Ignoring path {path} as it is not a folder or pdf file.") logger.warning(f"Ignoring path {path} as it is not a folder or pdf file.")
return all_pdf_files return all_pdf_files