mirror of
https://github.com/danswer-ai/danswer.git
synced 2025-06-02 11:09:20 +02:00
Return empty string for encrypted PDF (#369)
This commit is contained in:
parent
d6e87df548
commit
80a08bbf0c
@ -308,9 +308,13 @@ def extract_text(file: dict[str, str], service: discovery.Resource) -> str:
|
|||||||
response = service.files().get_media(fileId=file["id"]).execute()
|
response = service.files().get_media(fileId=file["id"]).execute()
|
||||||
pdf_stream = io.BytesIO(response)
|
pdf_stream = io.BytesIO(response)
|
||||||
pdf_reader = PdfReader(pdf_stream)
|
pdf_reader = PdfReader(pdf_stream)
|
||||||
|
|
||||||
if pdf_reader.is_encrypted:
|
if pdf_reader.is_encrypted:
|
||||||
logger.warning(f"Google drive file: {file['name']} is encrypted danswer will ignore it's content")
|
logger.warning(
|
||||||
else:
|
f"Google drive file: {file['name']} is encrypted - Danswer will ignore it's content"
|
||||||
|
)
|
||||||
|
return ""
|
||||||
|
|
||||||
return "\n".join(page.extract_text() for page in pdf_reader.pages)
|
return "\n".join(page.extract_text() for page in pdf_reader.pages)
|
||||||
|
|
||||||
|
|
||||||
|
Loading…
x
Reference in New Issue
Block a user