mirror of
https://github.com/danswer-ai/danswer.git
synced 2025-06-06 13:09:39 +02:00
possible fix for confluence query filter (#4280)
* possible fix for confluence query filter * nuke the attachment filter query ... it doesn't work! --------- Co-authored-by: Richard Kuo (Onyx) <rkuo@onyx.app>
This commit is contained in:
parent
2aa4697ac8
commit
036648146d
@ -65,20 +65,6 @@ _RESTRICTIONS_EXPANSION_FIELDS = [
|
|||||||
|
|
||||||
_SLIM_DOC_BATCH_SIZE = 5000
|
_SLIM_DOC_BATCH_SIZE = 5000
|
||||||
|
|
||||||
_ATTACHMENT_EXTENSIONS_TO_FILTER_OUT = [
|
|
||||||
"gif",
|
|
||||||
"mp4",
|
|
||||||
"mov",
|
|
||||||
"mp3",
|
|
||||||
"wav",
|
|
||||||
]
|
|
||||||
_FULL_EXTENSION_FILTER_STRING = "".join(
|
|
||||||
[
|
|
||||||
f" and title!~'*.{extension}'"
|
|
||||||
for extension in _ATTACHMENT_EXTENSIONS_TO_FILTER_OUT
|
|
||||||
]
|
|
||||||
)
|
|
||||||
|
|
||||||
ONE_HOUR = 3600
|
ONE_HOUR = 3600
|
||||||
|
|
||||||
|
|
||||||
@ -209,7 +195,6 @@ class ConfluenceConnector(
|
|||||||
def _construct_attachment_query(self, confluence_page_id: str) -> str:
|
def _construct_attachment_query(self, confluence_page_id: str) -> str:
|
||||||
attachment_query = f"type=attachment and container='{confluence_page_id}'"
|
attachment_query = f"type=attachment and container='{confluence_page_id}'"
|
||||||
attachment_query += self.cql_label_filter
|
attachment_query += self.cql_label_filter
|
||||||
attachment_query += _FULL_EXTENSION_FILTER_STRING
|
|
||||||
return attachment_query
|
return attachment_query
|
||||||
|
|
||||||
def _get_comment_string_for_page_id(self, page_id: str) -> str:
|
def _get_comment_string_for_page_id(self, page_id: str) -> str:
|
||||||
@ -374,11 +359,13 @@ class ConfluenceConnector(
|
|||||||
if not validate_attachment_filetype(
|
if not validate_attachment_filetype(
|
||||||
attachment,
|
attachment,
|
||||||
):
|
):
|
||||||
|
logger.info(f"Skipping attachment: {attachment['title']}")
|
||||||
continue
|
continue
|
||||||
|
|
||||||
|
logger.info(f"Processing attachment: {attachment['title']}")
|
||||||
|
|
||||||
# Attempt to get textual content or image summarization:
|
# Attempt to get textual content or image summarization:
|
||||||
try:
|
try:
|
||||||
logger.info(f"Processing attachment: {attachment['title']}")
|
|
||||||
response = convert_attachment_to_content(
|
response = convert_attachment_to_content(
|
||||||
confluence_client=self.confluence_client,
|
confluence_client=self.confluence_client,
|
||||||
attachment=attachment,
|
attachment=attachment,
|
||||||
|
@ -15,6 +15,7 @@ EXCLUDED_IMAGE_TYPES = [
|
|||||||
"image/tiff",
|
"image/tiff",
|
||||||
"image/gif",
|
"image/gif",
|
||||||
"image/svg+xml",
|
"image/svg+xml",
|
||||||
|
"image/avif",
|
||||||
]
|
]
|
||||||
|
|
||||||
|
|
||||||
|
Loading…
x
Reference in New Issue
Block a user