mirror of
https://github.com/danswer-ai/danswer.git
synced 2025-03-26 17:51:54 +01:00
add size limit to jira tickets
This commit is contained in:
parent
fc45354aea
commit
f4d7e34fa3
@ -247,6 +247,10 @@ JIRA_CONNECTOR_LABELS_TO_SKIP = [
|
||||
for ignored_tag in os.environ.get("JIRA_CONNECTOR_LABELS_TO_SKIP", "").split(",")
|
||||
if ignored_tag
|
||||
]
|
||||
# Maximum size for Jira tickets in bytes (default: 100KB)
|
||||
JIRA_CONNECTOR_MAX_TICKET_SIZE = int(
|
||||
os.environ.get("JIRA_CONNECTOR_MAX_TICKET_SIZE", 100 * 1024)
|
||||
)
|
||||
|
||||
GONG_CONNECTOR_START_TIME = os.environ.get("GONG_CONNECTOR_START_TIME")
|
||||
|
||||
|
@ -9,6 +9,7 @@ from jira.resources import Issue
|
||||
|
||||
from danswer.configs.app_configs import INDEX_BATCH_SIZE
|
||||
from danswer.configs.app_configs import JIRA_CONNECTOR_LABELS_TO_SKIP
|
||||
from danswer.configs.app_configs import JIRA_CONNECTOR_MAX_TICKET_SIZE
|
||||
from danswer.configs.constants import DocumentSource
|
||||
from danswer.connectors.cross_connector_utils.miscellaneous_utils import time_str_to_utc
|
||||
from danswer.connectors.interfaces import GenerateDocumentsOutput
|
||||
@ -134,10 +135,18 @@ def fetch_jira_issues_batch(
|
||||
else extract_text_from_adf(jira.raw["fields"]["description"])
|
||||
)
|
||||
comments = _get_comment_strs(jira, comment_email_blacklist)
|
||||
semantic_rep = f"{description}\n" + "\n".join(
|
||||
ticket_content = f"{description}\n" + "\n".join(
|
||||
[f"Comment: {comment}" for comment in comments if comment]
|
||||
)
|
||||
|
||||
# Check ticket size
|
||||
if len(ticket_content.encode("utf-8")) > JIRA_CONNECTOR_MAX_TICKET_SIZE:
|
||||
logger.info(
|
||||
f"Skipping {jira.key} because it exceeds the maximum size of "
|
||||
f"{JIRA_CONNECTOR_MAX_TICKET_SIZE} bytes."
|
||||
)
|
||||
continue
|
||||
|
||||
page_url = f"{jira_client.client_info()}/browse/{jira.key}"
|
||||
|
||||
people = set()
|
||||
@ -180,7 +189,7 @@ def fetch_jira_issues_batch(
|
||||
doc_batch.append(
|
||||
Document(
|
||||
id=page_url,
|
||||
sections=[Section(link=page_url, text=semantic_rep)],
|
||||
sections=[Section(link=page_url, text=ticket_content)],
|
||||
source=DocumentSource.JIRA,
|
||||
semantic_identifier=jira.fields.summary,
|
||||
doc_updated_at=time_str_to_utc(jira.fields.updated),
|
||||
|
@ -0,0 +1,136 @@
|
||||
from collections.abc import Callable
|
||||
from collections.abc import Generator
|
||||
from typing import Any
|
||||
from unittest.mock import MagicMock
|
||||
from unittest.mock import patch
|
||||
|
||||
import pytest
|
||||
from jira.resources import Issue
|
||||
from pytest_mock import MockFixture
|
||||
|
||||
from danswer.connectors.danswer_jira.connector import fetch_jira_issues_batch
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def mock_jira_client() -> MagicMock:
|
||||
return MagicMock()
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def mock_issue_small() -> MagicMock:
|
||||
issue = MagicMock()
|
||||
issue.key = "SMALL-1"
|
||||
issue.fields.description = "Small description"
|
||||
issue.fields.comment.comments = [
|
||||
MagicMock(body="Small comment 1"),
|
||||
MagicMock(body="Small comment 2"),
|
||||
]
|
||||
issue.fields.creator.displayName = "John Doe"
|
||||
issue.fields.creator.emailAddress = "john@example.com"
|
||||
issue.fields.summary = "Small Issue"
|
||||
issue.fields.updated = "2023-01-01T00:00:00+0000"
|
||||
issue.fields.labels = []
|
||||
return issue
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def mock_issue_large() -> MagicMock:
|
||||
# This will be larger than 100KB
|
||||
issue = MagicMock()
|
||||
issue.key = "LARGE-1"
|
||||
issue.fields.description = "a" * 99_000
|
||||
issue.fields.comment.comments = [
|
||||
MagicMock(body="Large comment " * 1000),
|
||||
MagicMock(body="Another large comment " * 1000),
|
||||
]
|
||||
issue.fields.creator.displayName = "Jane Doe"
|
||||
issue.fields.creator.emailAddress = "jane@example.com"
|
||||
issue.fields.summary = "Large Issue"
|
||||
issue.fields.updated = "2023-01-02T00:00:00+0000"
|
||||
issue.fields.labels = []
|
||||
return issue
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def patched_type() -> Callable[[Any], type]:
|
||||
def _patched_type(obj: Any) -> type:
|
||||
if isinstance(obj, MagicMock):
|
||||
return Issue
|
||||
return type(obj)
|
||||
|
||||
return _patched_type
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def mock_jira_api_version() -> Generator[Any, Any, Any]:
|
||||
with patch("danswer.connectors.danswer_jira.connector.JIRA_API_VERSION", "2"):
|
||||
yield
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def patched_environment(
|
||||
patched_type: type,
|
||||
mock_jira_api_version: MockFixture,
|
||||
) -> Generator[Any, Any, Any]:
|
||||
with patch("danswer.connectors.danswer_jira.connector.type", patched_type):
|
||||
yield
|
||||
|
||||
|
||||
def test_fetch_jira_issues_batch_small_ticket(
|
||||
mock_jira_client: MagicMock,
|
||||
mock_issue_small: MagicMock,
|
||||
patched_environment: MockFixture,
|
||||
) -> None:
|
||||
mock_jira_client.search_issues.return_value = [mock_issue_small]
|
||||
|
||||
docs, count = fetch_jira_issues_batch("project = TEST", 0, mock_jira_client)
|
||||
|
||||
assert count == 1
|
||||
assert len(docs) == 1
|
||||
assert docs[0].id.endswith("/SMALL-1")
|
||||
assert "Small description" in docs[0].sections[0].text
|
||||
assert "Small comment 1" in docs[0].sections[0].text
|
||||
assert "Small comment 2" in docs[0].sections[0].text
|
||||
|
||||
|
||||
def test_fetch_jira_issues_batch_large_ticket(
|
||||
mock_jira_client: MagicMock,
|
||||
mock_issue_large: MagicMock,
|
||||
patched_environment: MockFixture,
|
||||
) -> None:
|
||||
mock_jira_client.search_issues.return_value = [mock_issue_large]
|
||||
|
||||
docs, count = fetch_jira_issues_batch("project = TEST", 0, mock_jira_client)
|
||||
|
||||
assert count == 1
|
||||
assert len(docs) == 0 # The large ticket should be skipped
|
||||
|
||||
|
||||
def test_fetch_jira_issues_batch_mixed_tickets(
|
||||
mock_jira_client: MagicMock,
|
||||
mock_issue_small: MagicMock,
|
||||
mock_issue_large: MagicMock,
|
||||
patched_environment: MockFixture,
|
||||
) -> None:
|
||||
mock_jira_client.search_issues.return_value = [mock_issue_small, mock_issue_large]
|
||||
|
||||
docs, count = fetch_jira_issues_batch("project = TEST", 0, mock_jira_client)
|
||||
|
||||
assert count == 2
|
||||
assert len(docs) == 1 # Only the small ticket should be included
|
||||
assert docs[0].id.endswith("/SMALL-1")
|
||||
|
||||
|
||||
@patch("danswer.connectors.danswer_jira.connector.JIRA_CONNECTOR_MAX_TICKET_SIZE", 50)
|
||||
def test_fetch_jira_issues_batch_custom_size_limit(
|
||||
mock_jira_client: MagicMock,
|
||||
mock_issue_small: MagicMock,
|
||||
mock_issue_large: MagicMock,
|
||||
patched_environment: MockFixture,
|
||||
) -> None:
|
||||
mock_jira_client.search_issues.return_value = [mock_issue_small, mock_issue_large]
|
||||
|
||||
docs, count = fetch_jira_issues_batch("project = TEST", 0, mock_jira_client)
|
||||
|
||||
assert count == 2
|
||||
assert len(docs) == 0 # Both tickets should be skipped due to the low size limit
|
Loading…
x
Reference in New Issue
Block a user