mirror of
https://github.com/danswer-ai/danswer.git
synced 2025-06-02 19:19:30 +02:00
Pull in more fields for Jira (#4547)
* Pull in more fields for Jira * Fix tests * Fix * more fix * Fix * Fix S3 test * fix
This commit is contained in:
parent
fe94bdf936
commit
6df1c6c72f
@ -44,6 +44,18 @@ JIRA_API_VERSION = os.environ.get("JIRA_API_VERSION") or "2"
|
||||
_JIRA_SLIM_PAGE_SIZE = 500
|
||||
_JIRA_FULL_PAGE_SIZE = 50
|
||||
|
||||
# Constants for Jira field names
|
||||
_FIELD_REPORTER = "reporter"
|
||||
_FIELD_ASSIGNEE = "assignee"
|
||||
_FIELD_PRIORITY = "priority"
|
||||
_FIELD_STATUS = "status"
|
||||
_FIELD_RESOLUTION = "resolution"
|
||||
_FIELD_LABELS = "labels"
|
||||
_FIELD_KEY = "key"
|
||||
_FIELD_CREATED = "created"
|
||||
_FIELD_DUEDATE = "duedate"
|
||||
_FIELD_ISSUETYPE = "issuetype"
|
||||
|
||||
|
||||
def _perform_jql_search(
|
||||
jira_client: JIRA,
|
||||
@ -107,32 +119,40 @@ def process_jira_issue(
|
||||
|
||||
page_url = build_jira_url(jira_client, issue.key)
|
||||
|
||||
metadata_dict: dict[str, str | list[str]] = {}
|
||||
people = set()
|
||||
try:
|
||||
creator = best_effort_get_field_from_issue(issue, "creator")
|
||||
creator = best_effort_get_field_from_issue(issue, _FIELD_REPORTER)
|
||||
if basic_expert_info := best_effort_basic_expert_info(creator):
|
||||
people.add(basic_expert_info)
|
||||
metadata_dict[_FIELD_REPORTER] = basic_expert_info.get_semantic_name()
|
||||
except Exception:
|
||||
# Author should exist but if not, doesn't matter
|
||||
pass
|
||||
|
||||
try:
|
||||
assignee = best_effort_get_field_from_issue(issue, "assignee")
|
||||
assignee = best_effort_get_field_from_issue(issue, _FIELD_ASSIGNEE)
|
||||
if basic_expert_info := best_effort_basic_expert_info(assignee):
|
||||
people.add(basic_expert_info)
|
||||
metadata_dict[_FIELD_ASSIGNEE] = basic_expert_info.get_semantic_name()
|
||||
except Exception:
|
||||
# Author should exist but if not, doesn't matter
|
||||
pass
|
||||
|
||||
metadata_dict = {}
|
||||
if priority := best_effort_get_field_from_issue(issue, "priority"):
|
||||
metadata_dict["priority"] = priority.name
|
||||
if status := best_effort_get_field_from_issue(issue, "status"):
|
||||
metadata_dict["status"] = status.name
|
||||
if resolution := best_effort_get_field_from_issue(issue, "resolution"):
|
||||
metadata_dict["resolution"] = resolution.name
|
||||
if labels := best_effort_get_field_from_issue(issue, "labels"):
|
||||
metadata_dict["labels"] = labels
|
||||
if priority := best_effort_get_field_from_issue(issue, _FIELD_PRIORITY):
|
||||
metadata_dict[_FIELD_PRIORITY] = priority.name
|
||||
if status := best_effort_get_field_from_issue(issue, _FIELD_STATUS):
|
||||
metadata_dict[_FIELD_STATUS] = status.name
|
||||
if resolution := best_effort_get_field_from_issue(issue, _FIELD_RESOLUTION):
|
||||
metadata_dict[_FIELD_RESOLUTION] = resolution.name
|
||||
if labels := best_effort_get_field_from_issue(issue, _FIELD_LABELS):
|
||||
metadata_dict[_FIELD_LABELS] = labels
|
||||
if created := best_effort_get_field_from_issue(issue, _FIELD_CREATED):
|
||||
metadata_dict[_FIELD_CREATED] = created
|
||||
if duedate := best_effort_get_field_from_issue(issue, _FIELD_DUEDATE):
|
||||
metadata_dict[_FIELD_DUEDATE] = duedate
|
||||
if issuetype := best_effort_get_field_from_issue(issue, _FIELD_ISSUETYPE):
|
||||
metadata_dict[_FIELD_ISSUETYPE] = issuetype.name
|
||||
|
||||
return Document(
|
||||
id=page_url,
|
||||
@ -277,7 +297,7 @@ class JiraConnector(CheckpointedConnector[JiraConnectorCheckpoint], SlimConnecto
|
||||
max_results=_JIRA_SLIM_PAGE_SIZE,
|
||||
fields="key",
|
||||
):
|
||||
issue_key = best_effort_get_field_from_issue(issue, "key")
|
||||
issue_key = best_effort_get_field_from_issue(issue, _FIELD_KEY)
|
||||
id = build_jira_url(self.jira_client, issue_key)
|
||||
slim_doc_batch.append(
|
||||
SlimDocument(
|
||||
|
@ -23,8 +23,8 @@ JIRA_API_VERSION = os.environ.get("JIRA_API_VERSION") or "2"
|
||||
def best_effort_basic_expert_info(obj: Any) -> BasicExpertInfo | None:
|
||||
display_name = None
|
||||
email = None
|
||||
if hasattr(obj, "display_name"):
|
||||
display_name = obj.display_name
|
||||
if hasattr(obj, "displayName"):
|
||||
display_name = obj.displayName
|
||||
else:
|
||||
display_name = obj.get("displayName")
|
||||
|
||||
|
@ -9,7 +9,6 @@ from onyx.connectors.blob.connector import BlobStorageConnector
|
||||
from onyx.connectors.models import Document
|
||||
from onyx.connectors.models import TextSection
|
||||
from onyx.file_processing.extract_file_text import ACCEPTED_DOCUMENT_FILE_EXTENSIONS
|
||||
from onyx.file_processing.extract_file_text import ACCEPTED_IMAGE_FILE_EXTENSIONS
|
||||
from onyx.file_processing.extract_file_text import ACCEPTED_PLAIN_TEXT_FILE_EXTENSIONS
|
||||
from onyx.file_processing.extract_file_text import get_file_ext
|
||||
|
||||
@ -42,7 +41,8 @@ def test_blob_s3_connector(
|
||||
"""
|
||||
Plain and document file types should be fully indexed.
|
||||
|
||||
Multimedia and unknown file types will be indexed by title only with one empty section.
|
||||
Multimedia and unknown file types will be indexed be skipped unless `set_allow_images`
|
||||
is called with `True`.
|
||||
|
||||
This is intentional in order to allow searching by just the title even if we can't
|
||||
index the file content.
|
||||
@ -53,8 +53,7 @@ def test_blob_s3_connector(
|
||||
for doc in doc_batch:
|
||||
all_docs.append(doc)
|
||||
|
||||
#
|
||||
assert len(all_docs) == 19
|
||||
assert len(all_docs) == 15
|
||||
|
||||
for doc in all_docs:
|
||||
section = doc.sections[0]
|
||||
@ -69,9 +68,5 @@ def test_blob_s3_connector(
|
||||
assert len(section.text) > 0
|
||||
continue
|
||||
|
||||
if file_extension in ACCEPTED_IMAGE_FILE_EXTENSIONS:
|
||||
assert len(section.text) == 0
|
||||
continue
|
||||
|
||||
# unknown extension
|
||||
assert len(section.text) == 0
|
||||
|
@ -34,7 +34,12 @@ def confluence_connector() -> ConfluenceConnector:
|
||||
|
||||
# This should never fail because even if the docs in the cloud change,
|
||||
# the full doc ids retrieved should always be a subset of the slim doc ids
|
||||
@patch(
|
||||
"onyx.file_processing.extract_file_text.get_unstructured_api_key",
|
||||
return_value=None,
|
||||
)
|
||||
def test_confluence_connector_permissions(
|
||||
mock_get_api_key: MagicMock,
|
||||
confluence_connector: ConfluenceConnector,
|
||||
) -> None:
|
||||
# Get all doc IDs from the full connector
|
||||
@ -76,6 +81,8 @@ def test_confluence_connector_restriction_handling(
|
||||
"confluence_username": os.environ["CONFLUENCE_USER_NAME"],
|
||||
"confluence_access_token": os.environ["CONFLUENCE_ACCESS_TOKEN"],
|
||||
}
|
||||
# this prevents redis calls inside of OnyxConfluence
|
||||
mock_provider_instance.is_dynamic.return_value = False
|
||||
# Make the class return our configured instance when called
|
||||
mock_db_provider_class.return_value = mock_provider_instance
|
||||
|
||||
|
@ -1,9 +1,12 @@
|
||||
import os
|
||||
import time
|
||||
from unittest.mock import MagicMock
|
||||
from unittest.mock import patch
|
||||
|
||||
import pytest
|
||||
|
||||
from onyx.configs.constants import DocumentSource
|
||||
from onyx.connectors.models import Document
|
||||
from onyx.connectors.onyx_jira.connector import JiraConnector
|
||||
from tests.daily.connectors.utils import load_all_docs_from_checkpoint_connector
|
||||
|
||||
@ -24,25 +27,72 @@ def jira_connector() -> JiraConnector:
|
||||
return connector
|
||||
|
||||
|
||||
def test_jira_connector_basic(jira_connector: JiraConnector) -> None:
|
||||
@patch(
|
||||
"onyx.file_processing.extract_file_text.get_unstructured_api_key",
|
||||
return_value=None,
|
||||
)
|
||||
def test_jira_connector_basic(
|
||||
mock_get_api_key: MagicMock, jira_connector: JiraConnector
|
||||
) -> None:
|
||||
docs = load_all_docs_from_checkpoint_connector(
|
||||
connector=jira_connector,
|
||||
start=0,
|
||||
end=time.time(),
|
||||
)
|
||||
assert len(docs) == 1
|
||||
doc = docs[0]
|
||||
assert len(docs) == 2
|
||||
|
||||
assert doc.id == "https://danswerai.atlassian.net/browse/AS-2"
|
||||
assert doc.semantic_identifier == "AS-2: test123small"
|
||||
assert doc.source == DocumentSource.JIRA
|
||||
assert doc.metadata == {"priority": "Medium", "status": "Backlog"}
|
||||
assert doc.secondary_owners is None
|
||||
assert doc.title == "AS-2 test123small"
|
||||
assert doc.from_ingestion_api is False
|
||||
assert doc.additional_info is None
|
||||
# Find story and epic
|
||||
story: Document | None = None
|
||||
epic: Document | None = None
|
||||
for doc in docs:
|
||||
if doc.metadata["issuetype"] == "Story":
|
||||
story = doc
|
||||
elif doc.metadata["issuetype"] == "Epic":
|
||||
epic = doc
|
||||
|
||||
assert len(doc.sections) == 1
|
||||
section = doc.sections[0]
|
||||
assert story is not None
|
||||
assert epic is not None
|
||||
|
||||
# Check task
|
||||
assert story.id == "https://danswerai.atlassian.net/browse/AS-3"
|
||||
assert story.semantic_identifier == "AS-3: test123small"
|
||||
assert story.source == DocumentSource.JIRA
|
||||
assert story.metadata == {
|
||||
"priority": "Medium",
|
||||
"status": "Backlog",
|
||||
"reporter": "Chris Weaver",
|
||||
"assignee": "Chris Weaver",
|
||||
"issuetype": "Story",
|
||||
"created": "2025-04-16T16:44:06.716-0700",
|
||||
}
|
||||
assert story.secondary_owners is None
|
||||
assert story.title == "AS-3 test123small"
|
||||
assert story.from_ingestion_api is False
|
||||
assert story.additional_info is None
|
||||
|
||||
assert len(story.sections) == 1
|
||||
section = story.sections[0]
|
||||
assert section.text == "example_text\n"
|
||||
assert section.link == "https://danswerai.atlassian.net/browse/AS-2"
|
||||
assert section.link == "https://danswerai.atlassian.net/browse/AS-3"
|
||||
|
||||
# Check epic
|
||||
assert epic.id == "https://danswerai.atlassian.net/browse/AS-4"
|
||||
assert epic.semantic_identifier == "AS-4: EPIC"
|
||||
assert epic.source == DocumentSource.JIRA
|
||||
assert epic.metadata == {
|
||||
"priority": "Medium",
|
||||
"status": "Backlog",
|
||||
"reporter": "Founder Onyx",
|
||||
"assignee": "Chris Weaver",
|
||||
"issuetype": "Epic",
|
||||
"created": "2025-04-16T16:55:53.068-0700",
|
||||
}
|
||||
assert epic.secondary_owners is None
|
||||
assert epic.title == "AS-4 EPIC"
|
||||
assert epic.from_ingestion_api is False
|
||||
assert epic.additional_info is None
|
||||
|
||||
assert len(epic.sections) == 1
|
||||
section = epic.sections[0]
|
||||
assert section.text == "example_text\n"
|
||||
assert section.link == "https://danswerai.atlassian.net/browse/AS-4"
|
||||
|
Loading…
x
Reference in New Issue
Block a user