Handle PDF parse failures gracefully (#599)

This commit is contained in:
Yuhong Sun 2023-10-19 17:46:13 -07:00 committed by GitHub
parent 76f1f17710
commit f6982b03b6
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23

View File

@ -33,7 +33,11 @@ def read_pdf_file(file: IO[Any], file_name: str, pdf_pass: str | None = None) ->
# can be discoverable by title.
return ""
return "\n".join(page.extract_text() for page in pdf_reader.pages)
try:
return "\n".join(page.extract_text() for page in pdf_reader.pages)
except Exception:
logger.exception(f"Failed to read PDF {file_name}")
return ""
def is_macos_resource_fork_file(file_name: str) -> bool: