From 03006743ab1f1333ba9efb75621c50b8b50e36c9 Mon Sep 17 00:00:00 2001 From: Yuhong Sun Date: Sat, 24 Jun 2023 17:48:38 -0700 Subject: [PATCH] DAN-118 Jira connector (#102) * Small confluence page QoL changes * Prevent getting into a bad state with orphan connectors for Jira / Confluence * Jira connector + admin page --------- Co-authored-by: Weves --- backend/danswer/configs/constants.py | 1 + .../connectors/danswer_jira/__init__.py | 0 .../connectors/danswer_jira/connector.py | 155 ++++++++++ backend/danswer/connectors/factory.py | 2 + backend/requirements/default.txt | 1 + backend/scripts/list_typesense_docs.py | 27 ++ backend/scripts/reset_postgres.py | 5 + .../app/admin/connectors/confluence/page.tsx | 28 +- .../admin/connectors/google-drive/page.tsx | 30 +- web/src/app/admin/connectors/jira/page.tsx | 273 ++++++++++++++++++ web/src/app/admin/indexing/status/page.tsx | 7 + web/src/app/admin/layout.tsx | 10 + .../admin/connectors/ConnectorForm.tsx | 25 +- web/src/components/admin/connectors/Popup.tsx | 17 ++ .../connectors/table/ConnectorsTable.tsx | 4 +- web/src/components/icons/icons.tsx | 15 +- web/src/components/search/Filters.tsx | 1 + web/src/components/source.tsx | 7 + web/src/lib/connector.ts | 26 +- web/src/lib/types.ts | 10 + 20 files changed, 595 insertions(+), 49 deletions(-) create mode 100644 backend/danswer/connectors/danswer_jira/__init__.py create mode 100644 backend/danswer/connectors/danswer_jira/connector.py create mode 100644 backend/scripts/list_typesense_docs.py create mode 100644 web/src/app/admin/connectors/jira/page.tsx diff --git a/backend/danswer/configs/constants.py b/backend/danswer/configs/constants.py index 60540be55..016eb1d0d 100644 --- a/backend/danswer/configs/constants.py +++ b/backend/danswer/configs/constants.py @@ -22,4 +22,5 @@ class DocumentSource(str, Enum): GOOGLE_DRIVE = "google_drive" GITHUB = "github" CONFLUENCE = "confluence" + JIRA = "jira" FILE = "file" diff --git a/backend/danswer/connectors/danswer_jira/__init__.py b/backend/danswer/connectors/danswer_jira/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/backend/danswer/connectors/danswer_jira/connector.py b/backend/danswer/connectors/danswer_jira/connector.py new file mode 100644 index 000000000..d4799fd5c --- /dev/null +++ b/backend/danswer/connectors/danswer_jira/connector.py @@ -0,0 +1,155 @@ +from datetime import datetime +from datetime import timezone +from typing import Any +from urllib.parse import urlparse + +from danswer.configs.app_configs import INDEX_BATCH_SIZE +from danswer.configs.constants import DocumentSource +from danswer.connectors.interfaces import GenerateDocumentsOutput +from danswer.connectors.interfaces import LoadConnector +from danswer.connectors.interfaces import PollConnector +from danswer.connectors.interfaces import SecondsSinceUnixEpoch +from danswer.connectors.models import Document +from danswer.connectors.models import Section +from danswer.utils.logging import setup_logger +from jira import JIRA +from jira.resources import Issue + + +logger = setup_logger() +PROJECT_URL_PAT = "projects" + + +def extract_jira_project(url: str) -> tuple[str, str]: + parsed_url = urlparse(url) + jira_base = parsed_url.scheme + "://" + parsed_url.netloc + + # Split the path by '/' and find the position of 'projects' to get the project name + split_path = parsed_url.path.split("/") + if PROJECT_URL_PAT in split_path: + project_pos = split_path.index(PROJECT_URL_PAT) + if len(split_path) > project_pos + 1: + jira_project = split_path[project_pos + 1] + else: + raise ValueError("No project name found in the URL") + else: + raise ValueError("'projects' not found in the URL") + + return jira_base, jira_project + + +def fetch_jira_issues_batch( + jql: str, + start_index: int, + jira_client: JIRA, + batch_size: int = INDEX_BATCH_SIZE, +) -> tuple[list[Document], int]: + doc_batch = [] + + batch = jira_client.search_issues( + jql, + startAt=start_index, + maxResults=batch_size, + ) + + for jira in batch: + if type(jira) != Issue: + logger.warning(f"Found Jira object not of type Issue {jira}") + continue + + semantic_rep = ( + f"Jira Ticket Summary: {jira.fields.summary}\n" + f"Description: {jira.fields.description}\n" + + "\n".join( + [f"Comment: {comment.body}" for comment in jira.fields.comment.comments] + ) + ) + + page_url = f"{jira_client.client_info()}/browse/{jira.key}" + + doc_batch.append( + Document( + id=page_url, + sections=[Section(link=page_url, text=semantic_rep)], + source=DocumentSource.JIRA, + semantic_identifier=jira.fields.summary, + metadata={}, + ) + ) + return doc_batch, len(batch) + + +class JiraConnector(LoadConnector, PollConnector): + def __init__( + self, + jira_project_url: str, + batch_size: int = INDEX_BATCH_SIZE, + ) -> None: + self.batch_size = batch_size + self.jira_base, self.jira_project = extract_jira_project(jira_project_url) + self.jira_client: JIRA | None = None + + def load_credentials(self, credentials: dict[str, Any]) -> dict[str, Any] | None: + email = credentials["jira_user_email"] + api_token = credentials["jira_api_token"] + self.jira_client = JIRA(basic_auth=(email, api_token), server=self.jira_base) + return None + + def load_from_state(self) -> GenerateDocumentsOutput: + if self.jira_client is None: + raise PermissionError( + "Jira Client is not set up, was load_credentials called?" + ) + + start_ind = 0 + while True: + doc_batch, fetched_batch_size = fetch_jira_issues_batch( + f"project = {self.jira_project}", + start_ind, + self.jira_client, + self.batch_size, + ) + + if doc_batch: + yield doc_batch + + start_ind += fetched_batch_size + if fetched_batch_size < self.batch_size: + break + + def poll_source( + self, start: SecondsSinceUnixEpoch, end: SecondsSinceUnixEpoch + ) -> GenerateDocumentsOutput: + if self.jira_client is None: + raise PermissionError( + "Jira Client is not set up, was load_credentials called?" + ) + + start_date_str = datetime.fromtimestamp(start, tz=timezone.utc).strftime( + "%Y-%m-%d %H:%M" + ) + end_date_str = datetime.fromtimestamp(end, tz=timezone.utc).strftime( + "%Y-%m-%d %H:%M" + ) + + jql = ( + f"project = {self.jira_project} AND " + f"updated >= '{start_date_str}' AND " + f"updated <= '{end_date_str}'" + ) + + start_ind = 0 + while True: + doc_batch, fetched_batch_size = fetch_jira_issues_batch( + jql, + start_ind, + self.jira_client, + self.batch_size, + ) + + if doc_batch: + yield doc_batch + + start_ind += fetched_batch_size + if fetched_batch_size < self.batch_size: + break diff --git a/backend/danswer/connectors/factory.py b/backend/danswer/connectors/factory.py index f6d30a5db..d742a1e49 100644 --- a/backend/danswer/connectors/factory.py +++ b/backend/danswer/connectors/factory.py @@ -3,6 +3,7 @@ from typing import Type from danswer.configs.constants import DocumentSource from danswer.connectors.confluence.connector import ConfluenceConnector +from danswer.connectors.danswer_jira.connector import JiraConnector from danswer.connectors.file.connector import LocalFileConnector from danswer.connectors.github.connector import GithubConnector from danswer.connectors.google_drive.connector import GoogleDriveConnector @@ -36,6 +37,7 @@ def identify_connector_class( DocumentSource.GITHUB: GithubConnector, DocumentSource.GOOGLE_DRIVE: GoogleDriveConnector, DocumentSource.CONFLUENCE: ConfluenceConnector, + DocumentSource.JIRA: JiraConnector, } connector_by_source = connector_map.get(source, {}) diff --git a/backend/requirements/default.txt b/backend/requirements/default.txt index 92f33cc30..23470f1c6 100644 --- a/backend/requirements/default.txt +++ b/backend/requirements/default.txt @@ -12,6 +12,7 @@ google-auth-oauthlib==1.0.0 httpcore==0.16.3 httpx==0.23.3 httpx-oauth==0.11.2 +jira==3.5.1 Mako==1.2.4 nltk==3.8.1 openai==0.27.6 diff --git a/backend/scripts/list_typesense_docs.py b/backend/scripts/list_typesense_docs.py new file mode 100644 index 000000000..429030e4d --- /dev/null +++ b/backend/scripts/list_typesense_docs.py @@ -0,0 +1,27 @@ +from danswer.configs.app_configs import TYPESENSE_DEFAULT_COLLECTION +from danswer.utils.clients import get_typesense_client + + +if __name__ == "__main__": + ts_client = get_typesense_client() + + page_number = 1 + per_page = 100 # number of documents to retrieve per page + while True: + params = { + "q": "", + "query_by": "content", + "page": page_number, + "per_page": per_page, + } + response = ts_client.collections[TYPESENSE_DEFAULT_COLLECTION].documents.search( + params + ) + documents = response.get("hits") + if not documents: + break # if there are no more documents, break out of the loop + + for document in documents: + print(document) + + page_number += 1 # move on to the next page diff --git a/backend/scripts/reset_postgres.py b/backend/scripts/reset_postgres.py index 364c4792d..0b4bc9a1e 100644 --- a/backend/scripts/reset_postgres.py +++ b/backend/scripts/reset_postgres.py @@ -27,6 +27,11 @@ def wipe_all_rows(database: str) -> None: table_names = cur.fetchall() + # have to delete from these first to not run into psycopg2.errors.ForeignKeyViolation + cur.execute(f"DELETE FROM connector_credential_pair") + cur.execute(f"DELETE FROM index_attempt") + conn.commit() + for table_name in table_names: if table_name[0] == "alembic_version": continue diff --git a/web/src/app/admin/connectors/confluence/page.tsx b/web/src/app/admin/connectors/confluence/page.tsx index f6a2668b6..ff1311161 100644 --- a/web/src/app/admin/connectors/confluence/page.tsx +++ b/web/src/app/admin/connectors/confluence/page.tsx @@ -17,8 +17,11 @@ import { LoadingAnimation } from "@/components/Loading"; import { deleteCredential, linkCredential } from "@/lib/credential"; import { ConnectorForm } from "@/components/admin/connectors/ConnectorForm"; import { ConnectorsTable } from "@/components/admin/connectors/table/ConnectorsTable"; +import { usePopup } from "@/components/admin/connectors/Popup"; const Main = () => { + const { popup, setPopup } = usePopup(); + const { mutate } = useSWRConfig(); const { data: connectorIndexingStatuses, @@ -62,6 +65,7 @@ const Main = () => { return ( <> + {popup}

Step 1: Provide your access token

@@ -82,6 +86,14 @@ const Main = () => { + + + ) : ( + <> +

+ To use the Jira connector, first follow the guide{" "} + + here + {" "} + to generate an Access Token. +

+
+ + formBody={ + <> + + + + } + validationSchema={Yup.object().shape({ + jira_user_email: Yup.string().required( + "Please enter your username on Jira" + ), + jira_api_token: Yup.string().required( + "Please enter your Jira access token" + ), + })} + initialValues={{ + jira_user_email: "", + jira_api_token: "", + }} + onSubmit={(isSuccess) => { + if (isSuccess) { + mutate("/api/manage/credential"); + } + }} + /> +
+ + )} + + {/* TODO: make this periodic */} +

+ Step 2: Which spaces do you want to make searchable? +

+ {jiraCredential ? ( + <> + {" "} +

+ Specify any link to a Jira page below and click "Index" to + Index. Based on the provided link, we will index the ENTIRE SPACE, + not just the specified page. For example, entering{" "} + + https://danswer.atlassian.net/wiki/spaces/Engineering/overview + {" "} + and clicking the Index button will index the whole{" "} + Engineering Confluence space. +

+ {jiraConnectorIndexingStatuses.length > 0 && ( + <> +

+ We pull the latest pages and comments from each space listed + below every 10 minutes. +

+
+ + connectorIndexingStatuses={jiraConnectorIndexingStatuses} + liveCredential={jiraCredential} + getCredential={(credential) => { + return ( +
+

{credential.credential_json.jira_api_token}

+
+ ); + }} + onCredentialLink={async (connectorId) => { + if (jiraCredential) { + await linkCredential(connectorId, jiraCredential.id); + mutate("/api/manage/admin/connector/indexing-status"); + } + }} + specialColumns={[ + { + header: "Url", + key: "url", + getValue: (connector) => ( + + {connector.connector_specific_config.jira_project_url} + + ), + }, + ]} + onUpdate={() => + mutate("/api/manage/admin/connector/indexing-status") + } + /> +
+ + )} +
+

Add a New Space

+ + nameBuilder={(values) => + `JiraConnector-${values.jira_project_url}` + } + source="jira" + inputType="poll" + formBody={ + <> + + + } + validationSchema={Yup.object().shape({ + jira_project_url: Yup.string().required( + "Please enter any link to your jira project e.g. https://danswer.atlassian.net/jira/software/projects/DAN/boards/1" + ), + })} + initialValues={{ + jira_project_url: "", + }} + refreshFreq={10 * 60} // 10 minutes + onSubmit={async (isSuccess, responseJson) => { + if (isSuccess && responseJson) { + await linkCredential(responseJson.id, jiraCredential.id); + mutate("/api/manage/admin/connector/indexing-status"); + } + }} + /> +
+ + ) : ( + <> +

+ Please provide your access token in Step 1 first! Once done with + that, you can then specify which Jira projects you want to make + searchable. +

+ + )} + + ); +}; + +export default function Page() { + return ( +
+
+ +
+
+ +

Jira

+
+
+
+ ); +} diff --git a/web/src/app/admin/indexing/status/page.tsx b/web/src/app/admin/indexing/status/page.tsx index 150837c7f..64799dc58 100644 --- a/web/src/app/admin/indexing/status/page.tsx +++ b/web/src/app/admin/indexing/status/page.tsx @@ -39,6 +39,13 @@ const getSourceDisplay = ( ); } + if (connector.source === "jira") { + return ( + sourceMetadata.displayName + + ` [${connector.connector_specific_config?.jira_project_url}]` + ); + } + if ( connector.source === "google_drive" && !connectorIndexingStatus.public_doc diff --git a/web/src/app/admin/layout.tsx b/web/src/app/admin/layout.tsx index 658665a7a..4c38eea3b 100644 --- a/web/src/app/admin/layout.tsx +++ b/web/src/app/admin/layout.tsx @@ -9,6 +9,7 @@ import { KeyIcon, ConfluenceIcon, FileIcon, + JiraIcon, } from "@/components/icons/icons"; import { DISABLE_AUTH } from "@/lib/constants"; import { getCurrentUserSS } from "@/lib/userSS"; @@ -90,6 +91,15 @@ export default async function AdminLayout({ ), link: "/admin/connectors/confluence", }, + { + name: ( +
+ +
Jira
+
+ ), + link: "/admin/connectors/jira", + }, { name: (
diff --git a/web/src/components/admin/connectors/ConnectorForm.tsx b/web/src/components/admin/connectors/ConnectorForm.tsx index b6aad78b2..24fa9a4a4 100644 --- a/web/src/components/admin/connectors/ConnectorForm.tsx +++ b/web/src/components/admin/connectors/ConnectorForm.tsx @@ -68,26 +68,27 @@ export function ConnectorForm({ { + onSubmit={async (values, formikHelpers) => { formikHelpers.setSubmitting(true); - submitConnector({ + + const { message, isSuccess, response } = await submitConnector({ name: nameBuilder(values), source, input_type: inputType, connector_specific_config: values, refresh_freq: refreshFreq || 0, disabled: false, - }).then(({ message, isSuccess, response }) => { - setPopup({ message, type: isSuccess ? "success" : "error" }); - formikHelpers.setSubmitting(false); - if (isSuccess) { - formikHelpers.resetForm(); - } - setTimeout(() => { - setPopup(null); - }, 4000); - onSubmit(isSuccess, response); }); + + setPopup({ message, type: isSuccess ? "success" : "error" }); + formikHelpers.setSubmitting(false); + if (isSuccess) { + formikHelpers.resetForm(); + } + setTimeout(() => { + setPopup(null); + }, 4000); + onSubmit(isSuccess, response); }} > {({ isSubmitting }) => ( diff --git a/web/src/components/admin/connectors/Popup.tsx b/web/src/components/admin/connectors/Popup.tsx index dff82243c..26e7ce004 100644 --- a/web/src/components/admin/connectors/Popup.tsx +++ b/web/src/components/admin/connectors/Popup.tsx @@ -1,3 +1,5 @@ +import { useState } from "react"; + export interface PopupSpec { message: string; type: "success" | "error"; @@ -12,3 +14,18 @@ export const Popup: React.FC = ({ message, type }) => ( {message}
); + +export const usePopup = () => { + const [popup, setPopup] = useState(null); + const setPopupWithExpiration = (popupSpec: PopupSpec | null) => { + setPopup(popupSpec); + setTimeout(() => { + setPopup(null); + }, 4000); + }; + + return { + popup: popup && , + setPopup: setPopupWithExpiration, + }; +}; diff --git a/web/src/components/admin/connectors/table/ConnectorsTable.tsx b/web/src/components/admin/connectors/table/ConnectorsTable.tsx index e11396846..ff7467138 100644 --- a/web/src/components/admin/connectors/table/ConnectorsTable.tsx +++ b/web/src/components/admin/connectors/table/ConnectorsTable.tsx @@ -146,9 +146,9 @@ export function ConnectorsTable({ const credential = connectorIncludesCredential ? { credential: hasValidCredentials ? ( -

+

{getCredential(liveCredential)} -

+
) : liveCredential ? ( onCredentialLink(connector.id)} diff --git a/web/src/components/icons/icons.tsx b/web/src/components/icons/icons.tsx index 0edc699ed..0f6b8e2c8 100644 --- a/web/src/components/icons/icons.tsx +++ b/web/src/components/icons/icons.tsx @@ -12,7 +12,13 @@ import { Bird, Brain, } from "@phosphor-icons/react"; -import { SiConfluence, SiGithub, SiGoogledrive, SiSlack } from "react-icons/si"; +import { + SiConfluence, + SiGithub, + SiGoogledrive, + SiJira, + SiSlack, +} from "react-icons/si"; import { FaFile, FaGlobe } from "react-icons/fa"; interface IconProps { @@ -113,6 +119,13 @@ export const ConfluenceIcon = ({ return ; }; +export const JiraIcon = ({ + size = "16", + className = defaultTailwindCSS, +}: IconProps) => { + return ; +}; + export const InfoIcon = ({ size = "16", className = defaultTailwindCSS, diff --git a/web/src/components/search/Filters.tsx b/web/src/components/search/Filters.tsx index 448a2c4b6..f8beb74c2 100644 --- a/web/src/components/search/Filters.tsx +++ b/web/src/components/search/Filters.tsx @@ -8,6 +8,7 @@ const sources: Source[] = [ { displayName: "Google Drive", internalName: "google_drive" }, { displayName: "Slack", internalName: "slack" }, { displayName: "Confluence", internalName: "confluence" }, + { displayName: "Jira", internalName: "jira" }, { displayName: "Github PRs", internalName: "github" }, { displayName: "Web", internalName: "web" }, { displayName: "File", internalName: "file" }, diff --git a/web/src/components/source.tsx b/web/src/components/source.tsx index 1b377d85c..d61dc92cf 100644 --- a/web/src/components/source.tsx +++ b/web/src/components/source.tsx @@ -5,6 +5,7 @@ import { GithubIcon, GlobeIcon, GoogleDriveIcon, + JiraIcon, SlackIcon, } from "./icons/icons"; @@ -52,6 +53,12 @@ export const getSourceMetadata = (sourceType: ValidSources): SourceMetadata => { displayName: "Confluence", adminPageLink: "/admin/connectors/confluence", }; + case "jira": + return { + icon: JiraIcon, + displayName: "Jira", + adminPageLink: "/admin/connectors/jira", + }; default: throw new Error("Invalid source type"); } diff --git a/web/src/lib/connector.ts b/web/src/lib/connector.ts index 2a79503a0..732eb7c13 100644 --- a/web/src/lib/connector.ts +++ b/web/src/lib/connector.ts @@ -1,4 +1,4 @@ -import { Connector, ConnectorBase } from "./types"; +import { Connector, ConnectorBase, ValidSources } from "./types"; async function handleResponse( response: Response @@ -65,3 +65,27 @@ export async function runConnector( } return null; } + +export async function deleteConnectorIfExists({ + source, + name, +}: { + source: ValidSources; + name?: string; +}): Promise { + const connectorsResponse = await fetch("/api/manage/connector"); + if (connectorsResponse.ok) { + const connectors = (await connectorsResponse.json()) as Connector[]; + const googleDriveConnectors = connectors.filter( + (connector) => + connector.source === source && (!name || connector.name === name) + ); + if (googleDriveConnectors.length > 0) { + const errorMsg = await deleteConnector(googleDriveConnectors[0].id); + if (errorMsg) { + return errorMsg; + } + } + } + return null; +} diff --git a/web/src/lib/types.ts b/web/src/lib/types.ts index e9503005d..9ca395045 100644 --- a/web/src/lib/types.ts +++ b/web/src/lib/types.ts @@ -13,6 +13,7 @@ export type ValidSources = | "slack" | "google_drive" | "confluence" + | "jira" | "file"; export type ValidInputTypes = "load_state" | "poll" | "event"; @@ -46,6 +47,10 @@ export interface ConfluenceConfig { wiki_page_url: string; } +export interface JiraConfig { + jira_project_url: string; +} + export interface SlackConfig { workspace: string; } @@ -85,6 +90,11 @@ export interface ConfluenceCredentialJson { confluence_access_token: string; } +export interface JiraCredentialJson { + jira_user_email: string; + jira_api_token: string; +} + export interface SlackCredentialJson { slack_bot_token: string; }