From b9d2f348a2f563624bbcbacae1d80d71b0e945be Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Nielson=20Jann=C3=A9?= Date: Mon, 17 Mar 2025 12:46:52 +0100 Subject: [PATCH] Add option to configure the us of (un)structred pdfs --- generic_rag/app.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/generic_rag/app.py b/generic_rag/app.py index 8e8a7c8..e0d8abd 100644 --- a/generic_rag/app.py +++ b/generic_rag/app.py @@ -41,9 +41,9 @@ parser.add_argument( parser.add_argument( "--unstructured-pdf", action="store_true", - help="Use an unstructered PDF parser. " - "An unstructured PDF parser might be usefull for PDF files " - "that contain a lot of images, tables or text as images. " + help="Use an unstructered PDF loader. " + "An unstructured PDF loader might be usefull for PDF files " + "that contain a lot of images with text, tables or (scanned) text as images. " "Please use '-r' when switching parsers on already indexed data.", ) parser.add_argument("--pdf-chunk_size", type=int, default=1000, help="The size of the chunks to split the text into.")