mirror of
https://github.com/danswer-ai/danswer.git
synced 2025-06-05 04:31:03 +02:00
added html support to file text extractor (#1611)
This commit is contained in:
parent
e8306b0fa5
commit
180b592afe
@ -47,6 +47,7 @@ VALID_FILE_EXTENSIONS = PLAIN_TEXT_FILE_EXTENSIONS + [
|
||||
".xlsx",
|
||||
".eml",
|
||||
".epub",
|
||||
".html",
|
||||
]
|
||||
|
||||
|
||||
@ -287,5 +288,8 @@ def extract_file_text(
|
||||
elif extension == ".epub":
|
||||
return epub_to_text(file)
|
||||
|
||||
elif extension == ".html":
|
||||
return parse_html_page_basic(file)
|
||||
|
||||
else:
|
||||
return file_io_to_text(file)
|
||||
|
Loading…
x
Reference in New Issue
Block a user