Add option to configure the us of (un)structred pdfs

This commit is contained in:
Nielson Janné 2025-03-17 12:46:52 +01:00
parent e79d7b9867
commit b9d2f348a2

View File

@ -41,9 +41,9 @@ parser.add_argument(
parser.add_argument( parser.add_argument(
"--unstructured-pdf", "--unstructured-pdf",
action="store_true", action="store_true",
help="Use an unstructered PDF parser. " help="Use an unstructered PDF loader. "
"An unstructured PDF parser might be usefull for PDF files " "An unstructured PDF loader might be usefull for PDF files "
"that contain a lot of images, tables or text as images. " "that contain a lot of images with text, tables or (scanned) text as images. "
"Please use '-r' when switching parsers on already indexed data.", "Please use '-r' when switching parsers on already indexed data.",
) )
parser.add_argument("--pdf-chunk_size", type=int, default=1000, help="The size of the chunks to split the text into.") parser.add_argument("--pdf-chunk_size", type=int, default=1000, help="The size of the chunks to split the text into.")