forked from AI_team/Philosophy-RAG-demo
Fixes some logging api mistakes
This commit is contained in:
parent
fc59aa0d2f
commit
b1e8f19f00
@ -12,7 +12,7 @@ from graphs.ret_gen import RetGenLangGraph
|
||||
from langchain_chroma import Chroma
|
||||
from parsers.parser import add_pdf_files, add_urls
|
||||
|
||||
logging.basicConfig(level=logging.INFO)
|
||||
logging.basicConfig(level=logging.DEBUG)
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
parser = argparse.ArgumentParser(description="A Sogeti Nederland Generic RAG demo.")
|
||||
@ -162,7 +162,7 @@ async def set_starters():
|
||||
try:
|
||||
starters.append(cl.Starter(label=starter["label"], message=starter["message"]))
|
||||
except KeyError:
|
||||
logging.warning(
|
||||
logger.warning(
|
||||
"CHAINLIT_STARTERS environment is not a list with dictionaries containing 'label' and 'message' keys."
|
||||
)
|
||||
|
||||
|
||||
@ -16,6 +16,7 @@ from langgraph.graph import END, MessagesState, StateGraph
|
||||
from langgraph.prebuilt import InjectedStore, ToolNode, tools_condition
|
||||
from typing_extensions import Annotated
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
class CondRetGenLangGraph:
|
||||
def __init__(self, vector_store: Chroma, chat_model: BaseChatModel, embedding_model: Embeddings):
|
||||
@ -83,8 +84,8 @@ class CondRetGenLangGraph:
|
||||
# Furthermore, it can not and should not have the `self` parameter.
|
||||
# If you want to pass on state, please refer to:
|
||||
# https://python.langchain.com/docs/concepts/tools/#special-type-annotations
|
||||
logging.info(f"Query: {query}")
|
||||
logging.info(f"user content: {full_user_content}")
|
||||
logger.debug(f"query: {query}")
|
||||
logger.debug(f"user content: {full_user_content}")
|
||||
|
||||
retrieved_docs = []
|
||||
retrieved_docs = vector_store.similarity_search(query, k=4)
|
||||
|
||||
@ -11,7 +11,6 @@ from langgraph.checkpoint.memory import MemorySaver
|
||||
from langgraph.graph import END, START, StateGraph
|
||||
from typing_extensions import List, TypedDict
|
||||
|
||||
logging.basicConfig(level=logging.INFO)
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
@ -42,6 +41,7 @@ class RetGenLangGraph:
|
||||
yield response.content
|
||||
|
||||
def _retrieve(self, state: State) -> dict:
|
||||
logger.debug(f"querying VS for: {state["question"]}")
|
||||
self.last_retrieved_docs = self.vector_store.similarity_search(state["question"])
|
||||
return {"context": self.last_retrieved_docs}
|
||||
|
||||
@ -77,7 +77,7 @@ class RetGenLangGraph:
|
||||
pdf_sources[source].add(doc.metadata["page"])
|
||||
|
||||
if len(pdf_sources[source]) == 0:
|
||||
logging.warning(f"PDF source {source} has no page number. Please check the metadata of the document.")
|
||||
logger.warning(f"PDF source {source} has no page number. Please check the metadata of the document.")
|
||||
|
||||
return pdf_sources
|
||||
|
||||
|
||||
@ -9,7 +9,6 @@ from langchain_community.vectorstores.utils import filter_complex_metadata
|
||||
from langchain_text_splitters import HTMLSemanticPreservingSplitter, RecursiveCharacterTextSplitter
|
||||
from langchain_unstructured import UnstructuredLoader
|
||||
|
||||
logging.basicConfig(level=logging.INFO)
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
@ -32,12 +31,12 @@ def add_urls(vector_store: Chroma, urls: list[str], chunk_size: int) -> None:
|
||||
|
||||
The URL's will be fetched and split into chunks of text with the provided chunk size.
|
||||
"""
|
||||
logging.info("Web sources to the vector store.")
|
||||
logger.info("Web sources to the vector store.")
|
||||
|
||||
all_splits = []
|
||||
for url in urls:
|
||||
if len(vector_store.get(where={"source": url}, limit=1)["ids"]) > 0:
|
||||
logging.info(f"Skipping URL {url}, as it is already in the database.")
|
||||
logger.info(f"Skipping URL {url}, as it is already in the database.")
|
||||
continue
|
||||
|
||||
response = requests.get(url)
|
||||
@ -67,8 +66,8 @@ def add_urls(vector_store: Chroma, urls: list[str], chunk_size: int) -> None:
|
||||
if len(all_splits) == 0:
|
||||
return
|
||||
|
||||
logging.info(f"{len(urls)} web sources split in {len(all_splits)} vector store documents")
|
||||
logging.info(f"Adding {len(all_splits)} vector store documents to vector store.")
|
||||
logger.info(f"{len(urls)} web sources split in {len(all_splits)} vector store documents")
|
||||
logger.info(f"Adding {len(all_splits)} vector store documents to vector store.")
|
||||
|
||||
filtered_splits = filter_complex_metadata(all_splits)
|
||||
vector_store.add_documents(documents=filtered_splits)
|
||||
@ -87,22 +86,22 @@ def add_pdf_files(
|
||||
|
||||
The PDF file will be parsed per page and split into chunks of text with the provided chunk size and overlap.
|
||||
"""
|
||||
logging.info("Adding PDF files to the vector store.")
|
||||
logger.info("Adding PDF files to the vector store.")
|
||||
|
||||
pdf_files = get_all_local_pdf_files(file_paths)
|
||||
logging.info(f"Found {len(pdf_files)} PDF files to add to the vector store.")
|
||||
logger.info(f"Found {len(pdf_files)} PDF files to add to the vector store.")
|
||||
|
||||
new_pdfs = []
|
||||
for pdf_file in pdf_files:
|
||||
if len(vector_store.get(where={"source": str(pdf_file)}, limit=1)["ids"]) == 0:
|
||||
new_pdfs.append(pdf_file)
|
||||
else:
|
||||
logging.info(f"Skipping PDF {pdf_file}, as it is already in the database.")
|
||||
logger.info(f"Skipping PDF {pdf_file}, as it is already in the database.")
|
||||
|
||||
if len(new_pdfs) == 0:
|
||||
return
|
||||
|
||||
logging.info(f"{len(new_pdfs)} PDF's to add to the vector store.")
|
||||
logger.info(f"{len(new_pdfs)} PDF's to add to the vector store.")
|
||||
|
||||
loaded_document = []
|
||||
for file in new_pdfs:
|
||||
@ -116,8 +115,8 @@ def add_pdf_files(
|
||||
|
||||
pdf_splits = text_splitter.split_documents(loaded_document)
|
||||
|
||||
logging.info(f"{len(new_pdfs)} PDF's split in {len(pdf_splits)} vector store documents")
|
||||
logging.info(f"Adding {len(pdf_splits)} vector store documents to vector store.")
|
||||
logger.info(f"{len(new_pdfs)} PDF's split in {len(pdf_splits)} vector store documents")
|
||||
logger.info(f"Adding {len(pdf_splits)} vector store documents to vector store.")
|
||||
|
||||
vector_store.add_documents(documents=filter_complex_metadata(pdf_splits))
|
||||
|
||||
@ -136,6 +135,6 @@ def get_all_local_pdf_files(local_paths: list[Path]) -> list[Path]:
|
||||
elif path.suffix == ".pdf":
|
||||
all_pdf_files.append(path)
|
||||
else:
|
||||
logging.warning(f"Ignoring path {path} as it is not a folder or pdf file.")
|
||||
logger.warning(f"Ignoring path {path} as it is not a folder or pdf file.")
|
||||
|
||||
return all_pdf_files
|
||||
|
||||
Loading…
Reference in New Issue
Block a user