---------------------------------------------------------------------------
TypeError Traceback (most recent call last)
Cell In[2], line 1
----> 1 elements = partition_pdf(
2 filename="2024发注方针企划书(移动互联)10.31.pdf",
3 # strategy="hi_res",
4 extract_images_in_pdf=True,
5 # extract_image_block_types=["Image", "Table"],
6 infer_table_strategy=True,
7 hi_res_model_name='yolox',
8 languages=['chi_sim']
9 )
File ~/anaconda3/envs/py3-11-7/lib/python3.11/site-packages/unstructured/documents/elements.py:585, in process_metadata.<locals>.decorator.<locals>.wrapper(*args, **kwargs)
583 @functools.wraps(func)
584 def wrapper(*args: _P.args, **kwargs: _P.kwargs) -> list[Element]:
--> 585 elements = func(*args, **kwargs)
586 call_args = get_call_args_applying_defaults(func, *args, **kwargs)
588 unique_element_ids: bool = call_args.get("unique_element_ids", False)
File ~/anaconda3/envs/py3-11-7/lib/python3.11/site-packages/unstructured/file_utils/filetype.py:811, in add_filetype.<locals>.decorator.<locals>.wrapper(*args, **kwargs)
809 @functools.wraps(func)
810 def wrapper(*args: _P.args, **kwargs: _P.kwargs) -> list[Element]:
--> 811 elements = func(*args, **kwargs)
813 for element in elements:
814 # NOTE(robinson) - Attached files have already run through this logic
815 # in their own partitioning function
816 if element.metadata.attached_to_filename is None:
File ~/anaconda3/envs/py3-11-7/lib/python3.11/site-packages/unstructured/file_utils/filetype.py:769, in add_metadata.<locals>.wrapper(*args, **kwargs)
767 @functools.wraps(func)
768 def wrapper(*args: _P.args, **kwargs: _P.kwargs) -> list[Element]:
--> 769 elements = func(*args, **kwargs)
770 call_args = get_call_args_applying_defaults(func, *args, **kwargs)
772 if call_args.get("metadata_filename"):
File ~/anaconda3/envs/py3-11-7/lib/python3.11/site-packages/unstructured/chunking/dispatch.py:74, in add_chunking_strategy.<locals>.wrapper(*args, **kwargs)
71 """The decorated function is replaced with this one."""
73 # -- call the partitioning function to get the elements --
---> 74 elements = func(*args, **kwargs)
76 # -- look for a chunking-strategy argument --
77 call_args = get_call_args_applying_defaults(func, *args, **kwargs)
File ~/anaconda3/envs/py3-11-7/lib/python3.11/site-packages/unstructured/partition/pdf.py:228, in partition_pdf(filename, file, include_page_breaks, strategy, infer_table_structure, ocr_languages, languages, metadata_filename, metadata_last_modified, chunking_strategy, hi_res_model_name, extract_images_in_pdf, extract_image_block_types, extract_image_block_output_dir, extract_image_block_to_payload, starting_page_number, extract_forms, form_extraction_skip_tables, password, pdfminer_line_margin, pdfminer_char_margin, pdfminer_line_overlap, pdfminer_word_margin, **kwargs)
225 exactly_one(filename=filename, file=file)
227 languages = check_language_args(languages or [], ocr_languages)
--> 228 return partition_pdf_or_image(
229 filename=filename,
230 file=file,
231 include_page_breaks=include_page_breaks,
232 strategy=strategy,
233 infer_table_structure=infer_table_structure,
234 languages=languages,
235 metadata_last_modified=metadata_last_modified,
236 hi_res_model_name=hi_res_model_name,
237 extract_images_in_pdf=extract_images_in_pdf,
238 extract_image_block_types=extract_image_block_types,
239 extract_image_block_output_dir=extract_image_block_output_dir,
240 extract_image_block_to_payload=extract_image_block_to_payload,
241 starting_page_number=starting_page_number,
242 extract_forms=extract_forms,
243 form_extraction_skip_tables=form_extraction_skip_tables,
244 password=password,
245 pdfminer_line_margin=pdfminer_line_margin,
246 pdfminer_char_margin=pdfminer_char_margin,
247 pdfminer_line_overlap=pdfminer_line_overlap,
248 pdfminer_word_margin=pdfminer_word_margin,
249 **kwargs,
250 )
File ~/anaconda3/envs/py3-11-7/lib/python3.11/site-packages/unstructured/partition/pdf.py:341, in partition_pdf_or_image(filename, file, is_image, include_page_breaks, strategy, infer_table_structure, languages, metadata_last_modified, hi_res_model_name, extract_images_in_pdf, extract_image_block_types, extract_image_block_output_dir, extract_image_block_to_payload, starting_page_number, extract_forms, form_extraction_skip_tables, password, pdfminer_line_margin, pdfminer_char_margin, pdfminer_line_overlap, pdfminer_word_margin, ocr_agent, table_ocr_agent, **kwargs)
339 with warnings.catch_warnings():
340 warnings.simplefilter("ignore")
--> 341 elements = _partition_pdf_or_image_local(
342 filename=filename,
343 file=spooled_to_bytes_io_if_needed(file),
344 is_image=is_image,
345 infer_table_structure=infer_table_structure,
346 include_page_breaks=include_page_breaks,
347 languages=languages,
348 ocr_languages=ocr_languages,
349 metadata_last_modified=metadata_last_modified or last_modified,
350 hi_res_model_name=hi_res_model_name,
351 pdf_text_extractable=pdf_text_extractable,
352 extract_images_in_pdf=extract_images_in_pdf,
353 extract_image_block_types=extract_image_block_types,
354 extract_image_block_output_dir=extract_image_block_output_dir,
355 extract_image_block_to_payload=extract_image_block_to_payload,
356 starting_page_number=starting_page_number,
357 extract_forms=extract_forms,
358 form_extraction_skip_tables=form_extraction_skip_tables,
359 password=password,
360 pdfminer_config=pdfminer_config,
361 ocr_agent=ocr_agent,
362 table_ocr_agent=table_ocr_agent,
363 **kwargs,
364 )
365 # NOTE(crag): do not call _process_uncategorized_text_elements here, because
366 # extracted elements (which are text blocks outside of OD-determined blocks)
367 # are likely not Titles and should not be identified as such.
368 return elements
File ~/anaconda3/envs/py3-11-7/lib/python3.11/site-packages/unstructured/utils.py:216, in requires_dependencies.<locals>.decorator.<locals>.wrapper(*args, **kwargs)
213 @wraps(func)
214 def wrapper(*args: _P.args, **kwargs: _P.kwargs):
215 run_check()
--> 216 return func(*args, **kwargs)
File ~/anaconda3/envs/py3-11-7/lib/python3.11/site-packages/unstructured/partition/pdf.py:649, in _partition_pdf_or_image_local(filename, file, is_image, infer_table_structure, include_page_breaks, languages, ocr_languages, ocr_mode, model_name, hi_res_model_name, pdf_image_dpi, metadata_last_modified, pdf_text_extractable, extract_images_in_pdf, extract_image_block_types, extract_image_block_output_dir, extract_image_block_to_payload, analysis, analyzed_image_output_dir_path, starting_page_number, extract_forms, form_extraction_skip_tables, pdf_hi_res_max_pages, password, pdfminer_config, ocr_agent, table_ocr_agent, **kwargs)
646 skip_analysis_dump = env_config.ANALYSIS_DUMP_OD_SKIP
648 if file is None:
--> 649 inferred_document_layout = process_file_with_model(
650 filename,
651 is_image=is_image,
652 model_name=hi_res_model_name,
653 pdf_image_dpi=pdf_image_dpi,
654 password=password,
655 )
657 extracted_layout, layouts_links = (
658 process_file_with_pdfminer(
659 filename=filename,
(...) 665 else ([], [])
666 )
668 if analysis:
File ~/anaconda3/envs/py3-11-7/lib/python3.11/site-packages/unstructured_inference/inference/layout.py:386, in process_file_with_model(filename, model_name, is_image, fixed_layouts, extract_tables, pdf_image_dpi, **kwargs)
374 def process_file_with_model(
375 filename: str,
376 model_name: Optional[str],
(...) 381 **kwargs,
382 ) -> DocumentLayout:
383 """Processes pdf file with name filename into a DocumentLayout by using a model identified by
384 model_name."""
--> 386 model = get_model(model_name, **kwargs)
387 if isinstance(model, UnstructuredObjectDetectionModel):
388 detection_model = model
TypeError: get_model() got an unexpected keyword argument 'password'