From 0b5e3e5ee44424136788c38520d0859718736e7c Mon Sep 17 00:00:00 2001 From: Evan Lohn Date: Thu, 29 May 2025 14:09:46 -0400 Subject: [PATCH] skip excel files that openpyxl fails on (#4787) --- backend/onyx/file_processing/extract_file_text.py | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/backend/onyx/file_processing/extract_file_text.py b/backend/onyx/file_processing/extract_file_text.py index 3a0b626b555..b0711641865 100644 --- a/backend/onyx/file_processing/extract_file_text.py +++ b/backend/onyx/file_processing/extract_file_text.py @@ -360,6 +360,13 @@ def xlsx_to_text(file: IO[Any], file_name: str = "") -> str: else: logger.warning(error_str) return "" + except Exception as e: + if "File contains no valid workbook part" in str(e): + logger.error( + f"Failed to extract text from {file_name or 'xlsx file'}. This happens due to a bug in openpyxl. {e}" + ) + return "" + raise e text_content = [] for sheet in workbook.worksheets: