diff --git a/backend/onyx/connectors/onyx_jira/connector.py b/backend/onyx/connectors/onyx_jira/connector.py index 99cde93e866..3e29d3acd4f 100644 --- a/backend/onyx/connectors/onyx_jira/connector.py +++ b/backend/onyx/connectors/onyx_jira/connector.py @@ -44,6 +44,18 @@ JIRA_API_VERSION = os.environ.get("JIRA_API_VERSION") or "2" _JIRA_SLIM_PAGE_SIZE = 500 _JIRA_FULL_PAGE_SIZE = 50 +# Constants for Jira field names +_FIELD_REPORTER = "reporter" +_FIELD_ASSIGNEE = "assignee" +_FIELD_PRIORITY = "priority" +_FIELD_STATUS = "status" +_FIELD_RESOLUTION = "resolution" +_FIELD_LABELS = "labels" +_FIELD_KEY = "key" +_FIELD_CREATED = "created" +_FIELD_DUEDATE = "duedate" +_FIELD_ISSUETYPE = "issuetype" + def _perform_jql_search( jira_client: JIRA, @@ -107,32 +119,40 @@ def process_jira_issue( page_url = build_jira_url(jira_client, issue.key) + metadata_dict: dict[str, str | list[str]] = {} people = set() try: - creator = best_effort_get_field_from_issue(issue, "creator") + creator = best_effort_get_field_from_issue(issue, _FIELD_REPORTER) if basic_expert_info := best_effort_basic_expert_info(creator): people.add(basic_expert_info) + metadata_dict[_FIELD_REPORTER] = basic_expert_info.get_semantic_name() except Exception: # Author should exist but if not, doesn't matter pass try: - assignee = best_effort_get_field_from_issue(issue, "assignee") + assignee = best_effort_get_field_from_issue(issue, _FIELD_ASSIGNEE) if basic_expert_info := best_effort_basic_expert_info(assignee): people.add(basic_expert_info) + metadata_dict[_FIELD_ASSIGNEE] = basic_expert_info.get_semantic_name() except Exception: # Author should exist but if not, doesn't matter pass - metadata_dict = {} - if priority := best_effort_get_field_from_issue(issue, "priority"): - metadata_dict["priority"] = priority.name - if status := best_effort_get_field_from_issue(issue, "status"): - metadata_dict["status"] = status.name - if resolution := best_effort_get_field_from_issue(issue, "resolution"): - metadata_dict["resolution"] = resolution.name - if labels := best_effort_get_field_from_issue(issue, "labels"): - metadata_dict["labels"] = labels + if priority := best_effort_get_field_from_issue(issue, _FIELD_PRIORITY): + metadata_dict[_FIELD_PRIORITY] = priority.name + if status := best_effort_get_field_from_issue(issue, _FIELD_STATUS): + metadata_dict[_FIELD_STATUS] = status.name + if resolution := best_effort_get_field_from_issue(issue, _FIELD_RESOLUTION): + metadata_dict[_FIELD_RESOLUTION] = resolution.name + if labels := best_effort_get_field_from_issue(issue, _FIELD_LABELS): + metadata_dict[_FIELD_LABELS] = labels + if created := best_effort_get_field_from_issue(issue, _FIELD_CREATED): + metadata_dict[_FIELD_CREATED] = created + if duedate := best_effort_get_field_from_issue(issue, _FIELD_DUEDATE): + metadata_dict[_FIELD_DUEDATE] = duedate + if issuetype := best_effort_get_field_from_issue(issue, _FIELD_ISSUETYPE): + metadata_dict[_FIELD_ISSUETYPE] = issuetype.name return Document( id=page_url, @@ -277,7 +297,7 @@ class JiraConnector(CheckpointedConnector[JiraConnectorCheckpoint], SlimConnecto max_results=_JIRA_SLIM_PAGE_SIZE, fields="key", ): - issue_key = best_effort_get_field_from_issue(issue, "key") + issue_key = best_effort_get_field_from_issue(issue, _FIELD_KEY) id = build_jira_url(self.jira_client, issue_key) slim_doc_batch.append( SlimDocument( diff --git a/backend/onyx/connectors/onyx_jira/utils.py b/backend/onyx/connectors/onyx_jira/utils.py index b5ec5f0a01d..96fb9276ee9 100644 --- a/backend/onyx/connectors/onyx_jira/utils.py +++ b/backend/onyx/connectors/onyx_jira/utils.py @@ -23,8 +23,8 @@ JIRA_API_VERSION = os.environ.get("JIRA_API_VERSION") or "2" def best_effort_basic_expert_info(obj: Any) -> BasicExpertInfo | None: display_name = None email = None - if hasattr(obj, "display_name"): - display_name = obj.display_name + if hasattr(obj, "displayName"): + display_name = obj.displayName else: display_name = obj.get("displayName") diff --git a/backend/tests/daily/connectors/blob/test_blob_connector.py b/backend/tests/daily/connectors/blob/test_blob_connector.py index b2819a2d8c3..a699c55560b 100644 --- a/backend/tests/daily/connectors/blob/test_blob_connector.py +++ b/backend/tests/daily/connectors/blob/test_blob_connector.py @@ -9,7 +9,6 @@ from onyx.connectors.blob.connector import BlobStorageConnector from onyx.connectors.models import Document from onyx.connectors.models import TextSection from onyx.file_processing.extract_file_text import ACCEPTED_DOCUMENT_FILE_EXTENSIONS -from onyx.file_processing.extract_file_text import ACCEPTED_IMAGE_FILE_EXTENSIONS from onyx.file_processing.extract_file_text import ACCEPTED_PLAIN_TEXT_FILE_EXTENSIONS from onyx.file_processing.extract_file_text import get_file_ext @@ -42,7 +41,8 @@ def test_blob_s3_connector( """ Plain and document file types should be fully indexed. - Multimedia and unknown file types will be indexed by title only with one empty section. + Multimedia and unknown file types will be indexed be skipped unless `set_allow_images` + is called with `True`. This is intentional in order to allow searching by just the title even if we can't index the file content. @@ -53,8 +53,7 @@ def test_blob_s3_connector( for doc in doc_batch: all_docs.append(doc) - # - assert len(all_docs) == 19 + assert len(all_docs) == 15 for doc in all_docs: section = doc.sections[0] @@ -69,9 +68,5 @@ def test_blob_s3_connector( assert len(section.text) > 0 continue - if file_extension in ACCEPTED_IMAGE_FILE_EXTENSIONS: - assert len(section.text) == 0 - continue - # unknown extension assert len(section.text) == 0 diff --git a/backend/tests/daily/connectors/confluence/test_confluence_permissions_basic.py b/backend/tests/daily/connectors/confluence/test_confluence_permissions_basic.py index 9b0c15b0a6e..2338483fc58 100644 --- a/backend/tests/daily/connectors/confluence/test_confluence_permissions_basic.py +++ b/backend/tests/daily/connectors/confluence/test_confluence_permissions_basic.py @@ -34,7 +34,12 @@ def confluence_connector() -> ConfluenceConnector: # This should never fail because even if the docs in the cloud change, # the full doc ids retrieved should always be a subset of the slim doc ids +@patch( + "onyx.file_processing.extract_file_text.get_unstructured_api_key", + return_value=None, +) def test_confluence_connector_permissions( + mock_get_api_key: MagicMock, confluence_connector: ConfluenceConnector, ) -> None: # Get all doc IDs from the full connector @@ -76,6 +81,8 @@ def test_confluence_connector_restriction_handling( "confluence_username": os.environ["CONFLUENCE_USER_NAME"], "confluence_access_token": os.environ["CONFLUENCE_ACCESS_TOKEN"], } + # this prevents redis calls inside of OnyxConfluence + mock_provider_instance.is_dynamic.return_value = False # Make the class return our configured instance when called mock_db_provider_class.return_value = mock_provider_instance diff --git a/backend/tests/daily/connectors/jira/test_jira_basic.py b/backend/tests/daily/connectors/jira/test_jira_basic.py index 885d4f2cacf..f4cb25a7948 100644 --- a/backend/tests/daily/connectors/jira/test_jira_basic.py +++ b/backend/tests/daily/connectors/jira/test_jira_basic.py @@ -1,9 +1,12 @@ import os import time +from unittest.mock import MagicMock +from unittest.mock import patch import pytest from onyx.configs.constants import DocumentSource +from onyx.connectors.models import Document from onyx.connectors.onyx_jira.connector import JiraConnector from tests.daily.connectors.utils import load_all_docs_from_checkpoint_connector @@ -24,25 +27,72 @@ def jira_connector() -> JiraConnector: return connector -def test_jira_connector_basic(jira_connector: JiraConnector) -> None: +@patch( + "onyx.file_processing.extract_file_text.get_unstructured_api_key", + return_value=None, +) +def test_jira_connector_basic( + mock_get_api_key: MagicMock, jira_connector: JiraConnector +) -> None: docs = load_all_docs_from_checkpoint_connector( connector=jira_connector, start=0, end=time.time(), ) - assert len(docs) == 1 - doc = docs[0] + assert len(docs) == 2 - assert doc.id == "https://danswerai.atlassian.net/browse/AS-2" - assert doc.semantic_identifier == "AS-2: test123small" - assert doc.source == DocumentSource.JIRA - assert doc.metadata == {"priority": "Medium", "status": "Backlog"} - assert doc.secondary_owners is None - assert doc.title == "AS-2 test123small" - assert doc.from_ingestion_api is False - assert doc.additional_info is None + # Find story and epic + story: Document | None = None + epic: Document | None = None + for doc in docs: + if doc.metadata["issuetype"] == "Story": + story = doc + elif doc.metadata["issuetype"] == "Epic": + epic = doc - assert len(doc.sections) == 1 - section = doc.sections[0] + assert story is not None + assert epic is not None + + # Check task + assert story.id == "https://danswerai.atlassian.net/browse/AS-3" + assert story.semantic_identifier == "AS-3: test123small" + assert story.source == DocumentSource.JIRA + assert story.metadata == { + "priority": "Medium", + "status": "Backlog", + "reporter": "Chris Weaver", + "assignee": "Chris Weaver", + "issuetype": "Story", + "created": "2025-04-16T16:44:06.716-0700", + } + assert story.secondary_owners is None + assert story.title == "AS-3 test123small" + assert story.from_ingestion_api is False + assert story.additional_info is None + + assert len(story.sections) == 1 + section = story.sections[0] assert section.text == "example_text\n" - assert section.link == "https://danswerai.atlassian.net/browse/AS-2" + assert section.link == "https://danswerai.atlassian.net/browse/AS-3" + + # Check epic + assert epic.id == "https://danswerai.atlassian.net/browse/AS-4" + assert epic.semantic_identifier == "AS-4: EPIC" + assert epic.source == DocumentSource.JIRA + assert epic.metadata == { + "priority": "Medium", + "status": "Backlog", + "reporter": "Founder Onyx", + "assignee": "Chris Weaver", + "issuetype": "Epic", + "created": "2025-04-16T16:55:53.068-0700", + } + assert epic.secondary_owners is None + assert epic.title == "AS-4 EPIC" + assert epic.from_ingestion_api is False + assert epic.additional_info is None + + assert len(epic.sections) == 1 + section = epic.sections[0] + assert section.text == "example_text\n" + assert section.link == "https://danswerai.atlassian.net/browse/AS-4"