mirror of
https://github.com/danswer-ai/danswer.git
synced 2025-09-19 12:03:54 +02:00
Add more metadata options for File connector (#797)
This commit is contained in:
@@ -12,6 +12,7 @@ from danswer.connectors.cross_connector_utils.file_utils import detect_encoding
|
||||
from danswer.connectors.cross_connector_utils.file_utils import load_files_from_zip
|
||||
from danswer.connectors.cross_connector_utils.file_utils import read_file
|
||||
from danswer.connectors.cross_connector_utils.file_utils import read_pdf_file
|
||||
from danswer.connectors.cross_connector_utils.time_utils import time_str_to_utc
|
||||
from danswer.connectors.file.utils import check_file_ext_is_valid
|
||||
from danswer.connectors.file.utils import get_file_ext
|
||||
from danswer.connectors.interfaces import GenerateDocumentsOutput
|
||||
@@ -63,15 +64,20 @@ def _process_file(
|
||||
else:
|
||||
file_content_raw, metadata = read_file(file)
|
||||
|
||||
dt_str = metadata.get("doc_updated_at")
|
||||
final_time_updated = time_str_to_utc(dt_str) if dt_str else time_updated
|
||||
|
||||
return [
|
||||
Document(
|
||||
id=file_name,
|
||||
sections=[
|
||||
Section(link=metadata.get("link", None), text=file_content_raw.strip())
|
||||
Section(link=metadata.get("link"), text=file_content_raw.strip())
|
||||
],
|
||||
source=DocumentSource.FILE,
|
||||
semantic_identifier=file_name,
|
||||
doc_updated_at=time_updated,
|
||||
doc_updated_at=final_time_updated,
|
||||
primary_owners=metadata.get("primary_owners"),
|
||||
secondary_owners=metadata.get("secondary_owners"),
|
||||
metadata={},
|
||||
)
|
||||
]
|
||||
|
Reference in New Issue
Block a user