mirror of
https://github.com/danswer-ai/danswer.git
synced 2025-04-07 11:28:09 +02:00
parent
74a472ece7
commit
2ad86aa9a6
@ -358,7 +358,13 @@ def extract_file_text(
|
||||
|
||||
try:
|
||||
if get_unstructured_api_key():
|
||||
return unstructured_to_text(file, file_name)
|
||||
try:
|
||||
return unstructured_to_text(file, file_name)
|
||||
except Exception as unstructured_error:
|
||||
logger.error(
|
||||
f"Failed to process with Unstructured: {str(unstructured_error)}. Falling back to normal processing."
|
||||
)
|
||||
# Fall through to normal processing
|
||||
|
||||
if file_name or extension:
|
||||
if extension is not None:
|
||||
|
@ -52,7 +52,7 @@ def _sdk_partition_request(
|
||||
|
||||
def unstructured_to_text(file: IO[Any], file_name: str) -> str:
|
||||
logger.debug(f"Starting to read file: {file_name}")
|
||||
req = _sdk_partition_request(file, file_name, strategy="auto")
|
||||
req = _sdk_partition_request(file, file_name, strategy="fast")
|
||||
|
||||
unstructured_client = UnstructuredClient(api_key_auth=get_unstructured_api_key())
|
||||
|
||||
|
@ -672,23 +672,25 @@ def upload_files_for_chat(
|
||||
else ChatFileType.PLAIN_TEXT
|
||||
)
|
||||
|
||||
file_content = file.file.read() # Read the file content
|
||||
|
||||
if file_type == ChatFileType.IMAGE:
|
||||
file_content = file.file
|
||||
file_content_io = file.file
|
||||
# NOTE: Image conversion to JPEG used to be enforced here.
|
||||
# This was removed to:
|
||||
# 1. Preserve original file content for downloads
|
||||
# 2. Maintain transparency in formats like PNG
|
||||
# 3. Ameliorate issue with file conversion
|
||||
else:
|
||||
file_content = io.BytesIO(file.file.read())
|
||||
file_content_io = io.BytesIO(file_content)
|
||||
|
||||
new_content_type = file.content_type
|
||||
|
||||
# store the file (now JPEG for images)
|
||||
# Store the file normally
|
||||
file_id = str(uuid.uuid4())
|
||||
file_store.save_file(
|
||||
file_name=file_id,
|
||||
content=file_content,
|
||||
content=file_content_io,
|
||||
display_name=file.filename,
|
||||
file_origin=FileOrigin.CHAT_UPLOAD,
|
||||
file_type=new_content_type or file_type.value,
|
||||
@ -698,7 +700,7 @@ def upload_files_for_chat(
|
||||
# to re-extract it every time we send a message
|
||||
if file_type == ChatFileType.DOC:
|
||||
extracted_text = extract_file_text(
|
||||
file=file.file,
|
||||
file=io.BytesIO(file_content), # use the bytes we already read
|
||||
file_name=file.filename or "",
|
||||
)
|
||||
text_file_id = str(uuid.uuid4())
|
||||
|
Loading…
x
Reference in New Issue
Block a user