gdrive connector ignore encrypted pdf file (#353) (#362)

Thanks for your contribution!
This commit is contained in:
Yohann Fabri 2023-09-01 01:57:08 +02:00 committed by GitHub
parent ac2a4f9051
commit d6e87df548
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23

View File

@ -308,7 +308,10 @@ def extract_text(file: dict[str, str], service: discovery.Resource) -> str:
response = service.files().get_media(fileId=file["id"]).execute()
pdf_stream = io.BytesIO(response)
pdf_reader = PdfReader(pdf_stream)
return "\n".join(page.extract_text() for page in pdf_reader.pages)
if pdf_reader.is_encrypted:
logger.warning(f"Google drive file: {file['name']} is encrypted danswer will ignore it's content")
else:
return "\n".join(page.extract_text() for page in pdf_reader.pages)
class GoogleDriveConnector(LoadConnector, PollConnector):