Fixes some logging api mistakes

This commit is contained in:
Nielson Janné 2025-04-11 19:58:11 +02:00
parent fc59aa0d2f
commit b1e8f19f00
4 changed files with 18 additions and 18 deletions

View File

@ -12,7 +12,7 @@ from graphs.ret_gen import RetGenLangGraph
from langchain_chroma import Chroma
from parsers.parser import add_pdf_files, add_urls
logging.basicConfig(level=logging.INFO)
logging.basicConfig(level=logging.DEBUG)
logger = logging.getLogger(__name__)
parser = argparse.ArgumentParser(description="A Sogeti Nederland Generic RAG demo.")
@ -162,7 +162,7 @@ async def set_starters():
try:
starters.append(cl.Starter(label=starter["label"], message=starter["message"]))
except KeyError:
logging.warning(
logger.warning(
"CHAINLIT_STARTERS environment is not a list with dictionaries containing 'label' and 'message' keys."
)

View File

@ -16,6 +16,7 @@ from langgraph.graph import END, MessagesState, StateGraph
from langgraph.prebuilt import InjectedStore, ToolNode, tools_condition
from typing_extensions import Annotated
logger = logging.getLogger(__name__)
class CondRetGenLangGraph:
def __init__(self, vector_store: Chroma, chat_model: BaseChatModel, embedding_model: Embeddings):
@ -83,8 +84,8 @@ class CondRetGenLangGraph:
# Furthermore, it can not and should not have the `self` parameter.
# If you want to pass on state, please refer to:
# https://python.langchain.com/docs/concepts/tools/#special-type-annotations
logging.info(f"Query: {query}")
logging.info(f"user content: {full_user_content}")
logger.debug(f"query: {query}")
logger.debug(f"user content: {full_user_content}")
retrieved_docs = []
retrieved_docs = vector_store.similarity_search(query, k=4)

View File

@ -11,7 +11,6 @@ from langgraph.checkpoint.memory import MemorySaver
from langgraph.graph import END, START, StateGraph
from typing_extensions import List, TypedDict
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)
@ -42,6 +41,7 @@ class RetGenLangGraph:
yield response.content
def _retrieve(self, state: State) -> dict:
logger.debug(f"querying VS for: {state["question"]}")
self.last_retrieved_docs = self.vector_store.similarity_search(state["question"])
return {"context": self.last_retrieved_docs}
@ -77,7 +77,7 @@ class RetGenLangGraph:
pdf_sources[source].add(doc.metadata["page"])
if len(pdf_sources[source]) == 0:
logging.warning(f"PDF source {source} has no page number. Please check the metadata of the document.")
logger.warning(f"PDF source {source} has no page number. Please check the metadata of the document.")
return pdf_sources

View File

@ -9,7 +9,6 @@ from langchain_community.vectorstores.utils import filter_complex_metadata
from langchain_text_splitters import HTMLSemanticPreservingSplitter, RecursiveCharacterTextSplitter
from langchain_unstructured import UnstructuredLoader
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)
@ -32,12 +31,12 @@ def add_urls(vector_store: Chroma, urls: list[str], chunk_size: int) -> None:
The URL's will be fetched and split into chunks of text with the provided chunk size.
"""
logging.info("Web sources to the vector store.")
logger.info("Web sources to the vector store.")
all_splits = []
for url in urls:
if len(vector_store.get(where={"source": url}, limit=1)["ids"]) > 0:
logging.info(f"Skipping URL {url}, as it is already in the database.")
logger.info(f"Skipping URL {url}, as it is already in the database.")
continue
response = requests.get(url)
@ -67,8 +66,8 @@ def add_urls(vector_store: Chroma, urls: list[str], chunk_size: int) -> None:
if len(all_splits) == 0:
return
logging.info(f"{len(urls)} web sources split in {len(all_splits)} vector store documents")
logging.info(f"Adding {len(all_splits)} vector store documents to vector store.")
logger.info(f"{len(urls)} web sources split in {len(all_splits)} vector store documents")
logger.info(f"Adding {len(all_splits)} vector store documents to vector store.")
filtered_splits = filter_complex_metadata(all_splits)
vector_store.add_documents(documents=filtered_splits)
@ -87,22 +86,22 @@ def add_pdf_files(
The PDF file will be parsed per page and split into chunks of text with the provided chunk size and overlap.
"""
logging.info("Adding PDF files to the vector store.")
logger.info("Adding PDF files to the vector store.")
pdf_files = get_all_local_pdf_files(file_paths)
logging.info(f"Found {len(pdf_files)} PDF files to add to the vector store.")
logger.info(f"Found {len(pdf_files)} PDF files to add to the vector store.")
new_pdfs = []
for pdf_file in pdf_files:
if len(vector_store.get(where={"source": str(pdf_file)}, limit=1)["ids"]) == 0:
new_pdfs.append(pdf_file)
else:
logging.info(f"Skipping PDF {pdf_file}, as it is already in the database.")
logger.info(f"Skipping PDF {pdf_file}, as it is already in the database.")
if len(new_pdfs) == 0:
return
logging.info(f"{len(new_pdfs)} PDF's to add to the vector store.")
logger.info(f"{len(new_pdfs)} PDF's to add to the vector store.")
loaded_document = []
for file in new_pdfs:
@ -116,8 +115,8 @@ def add_pdf_files(
pdf_splits = text_splitter.split_documents(loaded_document)
logging.info(f"{len(new_pdfs)} PDF's split in {len(pdf_splits)} vector store documents")
logging.info(f"Adding {len(pdf_splits)} vector store documents to vector store.")
logger.info(f"{len(new_pdfs)} PDF's split in {len(pdf_splits)} vector store documents")
logger.info(f"Adding {len(pdf_splits)} vector store documents to vector store.")
vector_store.add_documents(documents=filter_complex_metadata(pdf_splits))
@ -136,6 +135,6 @@ def get_all_local_pdf_files(local_paths: list[Path]) -> list[Path]:
elif path.suffix == ".pdf":
all_pdf_files.append(path)
else:
logging.warning(f"Ignoring path {path} as it is not a folder or pdf file.")
logger.warning(f"Ignoring path {path} as it is not a folder or pdf file.")
return all_pdf_files