DAN-118 Jira connector (#102)

* Small confluence page QoL changes

* Prevent getting into a bad state with orphan connectors for Jira / Confluence

* Jira connector + admin page
---------

Co-authored-by: Weves <chrisweaver101@gmail.com>
This commit is contained in:
Yuhong Sun 2023-06-24 17:48:38 -07:00 committed by GitHub
parent 3701239283
commit 03006743ab
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
20 changed files with 595 additions and 49 deletions

View File

@ -22,4 +22,5 @@ class DocumentSource(str, Enum):
GOOGLE_DRIVE = "google_drive"
GITHUB = "github"
CONFLUENCE = "confluence"
JIRA = "jira"
FILE = "file"

View File

@ -0,0 +1,155 @@
from datetime import datetime
from datetime import timezone
from typing import Any
from urllib.parse import urlparse
from danswer.configs.app_configs import INDEX_BATCH_SIZE
from danswer.configs.constants import DocumentSource
from danswer.connectors.interfaces import GenerateDocumentsOutput
from danswer.connectors.interfaces import LoadConnector
from danswer.connectors.interfaces import PollConnector
from danswer.connectors.interfaces import SecondsSinceUnixEpoch
from danswer.connectors.models import Document
from danswer.connectors.models import Section
from danswer.utils.logging import setup_logger
from jira import JIRA
from jira.resources import Issue
logger = setup_logger()
PROJECT_URL_PAT = "projects"
def extract_jira_project(url: str) -> tuple[str, str]:
parsed_url = urlparse(url)
jira_base = parsed_url.scheme + "://" + parsed_url.netloc
# Split the path by '/' and find the position of 'projects' to get the project name
split_path = parsed_url.path.split("/")
if PROJECT_URL_PAT in split_path:
project_pos = split_path.index(PROJECT_URL_PAT)
if len(split_path) > project_pos + 1:
jira_project = split_path[project_pos + 1]
else:
raise ValueError("No project name found in the URL")
else:
raise ValueError("'projects' not found in the URL")
return jira_base, jira_project
def fetch_jira_issues_batch(
jql: str,
start_index: int,
jira_client: JIRA,
batch_size: int = INDEX_BATCH_SIZE,
) -> tuple[list[Document], int]:
doc_batch = []
batch = jira_client.search_issues(
jql,
startAt=start_index,
maxResults=batch_size,
)
for jira in batch:
if type(jira) != Issue:
logger.warning(f"Found Jira object not of type Issue {jira}")
continue
semantic_rep = (
f"Jira Ticket Summary: {jira.fields.summary}\n"
f"Description: {jira.fields.description}\n"
+ "\n".join(
[f"Comment: {comment.body}" for comment in jira.fields.comment.comments]
)
)
page_url = f"{jira_client.client_info()}/browse/{jira.key}"
doc_batch.append(
Document(
id=page_url,
sections=[Section(link=page_url, text=semantic_rep)],
source=DocumentSource.JIRA,
semantic_identifier=jira.fields.summary,
metadata={},
)
)
return doc_batch, len(batch)
class JiraConnector(LoadConnector, PollConnector):
def __init__(
self,
jira_project_url: str,
batch_size: int = INDEX_BATCH_SIZE,
) -> None:
self.batch_size = batch_size
self.jira_base, self.jira_project = extract_jira_project(jira_project_url)
self.jira_client: JIRA | None = None
def load_credentials(self, credentials: dict[str, Any]) -> dict[str, Any] | None:
email = credentials["jira_user_email"]
api_token = credentials["jira_api_token"]
self.jira_client = JIRA(basic_auth=(email, api_token), server=self.jira_base)
return None
def load_from_state(self) -> GenerateDocumentsOutput:
if self.jira_client is None:
raise PermissionError(
"Jira Client is not set up, was load_credentials called?"
)
start_ind = 0
while True:
doc_batch, fetched_batch_size = fetch_jira_issues_batch(
f"project = {self.jira_project}",
start_ind,
self.jira_client,
self.batch_size,
)
if doc_batch:
yield doc_batch
start_ind += fetched_batch_size
if fetched_batch_size < self.batch_size:
break
def poll_source(
self, start: SecondsSinceUnixEpoch, end: SecondsSinceUnixEpoch
) -> GenerateDocumentsOutput:
if self.jira_client is None:
raise PermissionError(
"Jira Client is not set up, was load_credentials called?"
)
start_date_str = datetime.fromtimestamp(start, tz=timezone.utc).strftime(
"%Y-%m-%d %H:%M"
)
end_date_str = datetime.fromtimestamp(end, tz=timezone.utc).strftime(
"%Y-%m-%d %H:%M"
)
jql = (
f"project = {self.jira_project} AND "
f"updated >= '{start_date_str}' AND "
f"updated <= '{end_date_str}'"
)
start_ind = 0
while True:
doc_batch, fetched_batch_size = fetch_jira_issues_batch(
jql,
start_ind,
self.jira_client,
self.batch_size,
)
if doc_batch:
yield doc_batch
start_ind += fetched_batch_size
if fetched_batch_size < self.batch_size:
break

View File

@ -3,6 +3,7 @@ from typing import Type
from danswer.configs.constants import DocumentSource
from danswer.connectors.confluence.connector import ConfluenceConnector
from danswer.connectors.danswer_jira.connector import JiraConnector
from danswer.connectors.file.connector import LocalFileConnector
from danswer.connectors.github.connector import GithubConnector
from danswer.connectors.google_drive.connector import GoogleDriveConnector
@ -36,6 +37,7 @@ def identify_connector_class(
DocumentSource.GITHUB: GithubConnector,
DocumentSource.GOOGLE_DRIVE: GoogleDriveConnector,
DocumentSource.CONFLUENCE: ConfluenceConnector,
DocumentSource.JIRA: JiraConnector,
}
connector_by_source = connector_map.get(source, {})

View File

@ -12,6 +12,7 @@ google-auth-oauthlib==1.0.0
httpcore==0.16.3
httpx==0.23.3
httpx-oauth==0.11.2
jira==3.5.1
Mako==1.2.4
nltk==3.8.1
openai==0.27.6

View File

@ -0,0 +1,27 @@
from danswer.configs.app_configs import TYPESENSE_DEFAULT_COLLECTION
from danswer.utils.clients import get_typesense_client
if __name__ == "__main__":
ts_client = get_typesense_client()
page_number = 1
per_page = 100 # number of documents to retrieve per page
while True:
params = {
"q": "",
"query_by": "content",
"page": page_number,
"per_page": per_page,
}
response = ts_client.collections[TYPESENSE_DEFAULT_COLLECTION].documents.search(
params
)
documents = response.get("hits")
if not documents:
break # if there are no more documents, break out of the loop
for document in documents:
print(document)
page_number += 1 # move on to the next page

View File

@ -27,6 +27,11 @@ def wipe_all_rows(database: str) -> None:
table_names = cur.fetchall()
# have to delete from these first to not run into psycopg2.errors.ForeignKeyViolation
cur.execute(f"DELETE FROM connector_credential_pair")
cur.execute(f"DELETE FROM index_attempt")
conn.commit()
for table_name in table_names:
if table_name[0] == "alembic_version":
continue

View File

@ -17,8 +17,11 @@ import { LoadingAnimation } from "@/components/Loading";
import { deleteCredential, linkCredential } from "@/lib/credential";
import { ConnectorForm } from "@/components/admin/connectors/ConnectorForm";
import { ConnectorsTable } from "@/components/admin/connectors/table/ConnectorsTable";
import { usePopup } from "@/components/admin/connectors/Popup";
const Main = () => {
const { popup, setPopup } = usePopup();
const { mutate } = useSWRConfig();
const {
data: connectorIndexingStatuses,
@ -62,6 +65,7 @@ const Main = () => {
return (
<>
{popup}
<h2 className="font-bold mb-2 mt-6 ml-auto mr-auto">
Step 1: Provide your access token
</h2>
@ -82,6 +86,14 @@ const Main = () => {
<button
className="ml-1 hover:bg-gray-700 rounded-full p-1"
onClick={async () => {
if (confluenceConnectorIndexingStatuses.length > 0) {
setPopup({
type: "error",
message:
"Must delete all connectors before deleting credentials",
});
return;
}
await deleteCredential(confluenceCredential.id);
mutate("/api/manage/credential");
}}
@ -93,11 +105,10 @@ const Main = () => {
) : (
<>
<p className="text-sm">
To use the Confluence connector, you must first follow the guide
described{" "}
To use the Confluence connector, first follow the guide{" "}
<a
className="text-blue-500"
href="https://docs.danswer.dev/connectors/slack#setting-up"
href="https://docs.danswer.dev/connectors/confluence#setting-up"
>
here
</a>{" "}
@ -143,16 +154,7 @@ const Main = () => {
{confluenceCredential ? (
<>
<p className="text-sm mb-4">
To use the Confluence connector, you must first follow the guide
described{" "}
<a
className="text-blue-500"
href="https://docs.danswer.dev/connectors/slack#setting-up"
>
here
</a>{" "}
to give the Danswer backend read access to your documents. Once that
is setup, specify any link to a Confluence page below and click
Specify any link to a Confluence page below and click
&quot;Index&quot; to Index. Based on the provided link, we will
index the ENTIRE SPACE, not just the specified page. For example,
entering{" "}

View File

@ -16,7 +16,7 @@ import {
Credential,
GoogleDriveCredentialJson,
} from "@/lib/types";
import { deleteConnector } from "@/lib/connector";
import { deleteConnector, deleteConnectorIfExists } from "@/lib/connector";
import { StatusRow } from "@/components/admin/connectors/table/ConnectorsTable";
import { setupGoogleDriveOAuth } from "@/lib/googleDrive";
import Cookies from "js-cookie";
@ -133,25 +133,15 @@ const GoogleDriveConnectorManagement = ({
// best effort check to see if existing connector exists
// delete it if it does, the current assumption is that only
// one google drive connector will exist at a time
const connectorsResponse = await fetch("/api/manage/connector");
if (connectorsResponse.ok) {
const connectors =
(await connectorsResponse.json()) as Connector<any>[];
const googleDriveConnectors = connectors.filter(
(connector) => connector.source === "google_drive"
);
if (googleDriveConnectors.length > 0) {
const errorMsg = await deleteConnector(
googleDriveConnectors[0].id
);
if (errorMsg) {
setPopup({
message: `Unable to delete existing connector - ${errorMsg}`,
type: "error",
});
return;
}
}
const errorMsg = await deleteConnectorIfExists({
source: "google_drive",
});
if (errorMsg) {
setPopup({
message: `Unable to delete existing connector - ${errorMsg}`,
type: "error",
});
return;
}
const connectorBase: ConnectorBase<{}> = {

View File

@ -0,0 +1,273 @@
"use client";
import * as Yup from "yup";
import { JiraIcon, TrashIcon } from "@/components/icons/icons";
import { TextFormField } from "@/components/admin/connectors/Field";
import { HealthCheckBanner } from "@/components/health/healthcheck";
import { CredentialForm } from "@/components/admin/connectors/CredentialForm";
import {
Credential,
JiraConfig,
JiraCredentialJson,
ConnectorIndexingStatus,
} from "@/lib/types";
import useSWR, { useSWRConfig } from "swr";
import { fetcher } from "@/lib/fetcher";
import { LoadingAnimation } from "@/components/Loading";
import { deleteCredential, linkCredential } from "@/lib/credential";
import { ConnectorForm } from "@/components/admin/connectors/ConnectorForm";
import { ConnectorsTable } from "@/components/admin/connectors/table/ConnectorsTable";
import { usePopup } from "@/components/admin/connectors/Popup";
const Main = () => {
const { popup, setPopup } = usePopup();
const { mutate } = useSWRConfig();
const {
data: connectorIndexingStatuses,
isLoading: isConnectorIndexingStatusesLoading,
error: isConnectorIndexingStatusesError,
} = useSWR<ConnectorIndexingStatus<any>[]>(
"/api/manage/admin/connector/indexing-status",
fetcher
);
const {
data: credentialsData,
isLoading: isCredentialsLoading,
isValidating: isCredentialsValidating,
error: isCredentialsError,
} = useSWR<Credential<any>[]>("/api/manage/credential", fetcher);
if (
isConnectorIndexingStatusesLoading ||
isCredentialsLoading ||
isCredentialsValidating
) {
return <LoadingAnimation text="Loading" />;
}
if (isConnectorIndexingStatusesError || !connectorIndexingStatuses) {
return <div>Failed to load connectors</div>;
}
if (isCredentialsError || !credentialsData) {
return <div>Failed to load credentials</div>;
}
const jiraConnectorIndexingStatuses = connectorIndexingStatuses.filter(
(connectorIndexingStatus) =>
connectorIndexingStatus.connector.source === "jira"
);
const jiraCredential = credentialsData.filter(
(credential) => credential.credential_json?.jira_api_token
)[0];
return (
<>
{popup}
<h2 className="font-bold mb-2 mt-6 ml-auto mr-auto">
Step 1: Provide your Credentials
</h2>
{jiraCredential ? (
<>
<div className="flex mb-1 text-sm">
{/* <div className="flex">
<p className="my-auto">Existing Username: </p>
<p className="ml-1 italic my-auto max-w-md truncate">
{confluenceCredential.credential_json?.confluence_username}
</p>{" "}
</div> */}
<p className="my-auto">Existing Access Token: </p>
<p className="ml-1 italic my-auto max-w-md truncate">
{jiraCredential.credential_json?.jira_api_token}
</p>
<button
className="ml-1 hover:bg-gray-700 rounded-full p-1"
onClick={async () => {
if (jiraConnectorIndexingStatuses.length > 0) {
setPopup({
type: "error",
message:
"Must delete all connectors before deleting credentials",
});
return;
}
await deleteCredential(jiraCredential.id);
mutate("/api/manage/credential");
}}
>
<TrashIcon />
</button>
</div>
</>
) : (
<>
<p className="text-sm">
To use the Jira connector, first follow the guide{" "}
<a
className="text-blue-500"
href="https://docs.danswer.dev/connectors/jira#setting-up"
>
here
</a>{" "}
to generate an Access Token.
</p>
<div className="border-solid border-gray-600 border rounded-md p-6 mt-2">
<CredentialForm<JiraCredentialJson>
formBody={
<>
<TextFormField name="jira_user_email" label="Username:" />
<TextFormField
name="jira_api_token"
label="Access Token:"
type="password"
/>
</>
}
validationSchema={Yup.object().shape({
jira_user_email: Yup.string().required(
"Please enter your username on Jira"
),
jira_api_token: Yup.string().required(
"Please enter your Jira access token"
),
})}
initialValues={{
jira_user_email: "",
jira_api_token: "",
}}
onSubmit={(isSuccess) => {
if (isSuccess) {
mutate("/api/manage/credential");
}
}}
/>
</div>
</>
)}
{/* TODO: make this periodic */}
<h2 className="font-bold mb-2 mt-6 ml-auto mr-auto">
Step 2: Which spaces do you want to make searchable?
</h2>
{jiraCredential ? (
<>
{" "}
<p className="text-sm mb-4">
Specify any link to a Jira page below and click &quot;Index&quot; to
Index. Based on the provided link, we will index the ENTIRE SPACE,
not just the specified page. For example, entering{" "}
<i>
https://danswer.atlassian.net/wiki/spaces/Engineering/overview
</i>{" "}
and clicking the Index button will index the whole{" "}
<i>Engineering</i> Confluence space.
</p>
{jiraConnectorIndexingStatuses.length > 0 && (
<>
<p className="text-sm mb-2">
We pull the latest pages and comments from each space listed
below every <b>10</b> minutes.
</p>
<div className="mb-2">
<ConnectorsTable<JiraConfig, JiraCredentialJson>
connectorIndexingStatuses={jiraConnectorIndexingStatuses}
liveCredential={jiraCredential}
getCredential={(credential) => {
return (
<div>
<p>{credential.credential_json.jira_api_token}</p>
</div>
);
}}
onCredentialLink={async (connectorId) => {
if (jiraCredential) {
await linkCredential(connectorId, jiraCredential.id);
mutate("/api/manage/admin/connector/indexing-status");
}
}}
specialColumns={[
{
header: "Url",
key: "url",
getValue: (connector) => (
<a
className="text-blue-500"
href={
connector.connector_specific_config.jira_project_url
}
>
{connector.connector_specific_config.jira_project_url}
</a>
),
},
]}
onUpdate={() =>
mutate("/api/manage/admin/connector/indexing-status")
}
/>
</div>
</>
)}
<div className="border-solid border-gray-600 border rounded-md p-6 mt-4">
<h2 className="font-bold mb-3">Add a New Space</h2>
<ConnectorForm<JiraConfig>
nameBuilder={(values) =>
`JiraConnector-${values.jira_project_url}`
}
source="jira"
inputType="poll"
formBody={
<>
<TextFormField
name="jira_project_url"
label="Jira Project URL:"
/>
</>
}
validationSchema={Yup.object().shape({
jira_project_url: Yup.string().required(
"Please enter any link to your jira project e.g. https://danswer.atlassian.net/jira/software/projects/DAN/boards/1"
),
})}
initialValues={{
jira_project_url: "",
}}
refreshFreq={10 * 60} // 10 minutes
onSubmit={async (isSuccess, responseJson) => {
if (isSuccess && responseJson) {
await linkCredential(responseJson.id, jiraCredential.id);
mutate("/api/manage/admin/connector/indexing-status");
}
}}
/>
</div>
</>
) : (
<>
<p className="text-sm">
Please provide your access token in Step 1 first! Once done with
that, you can then specify which Jira projects you want to make
searchable.
</p>
</>
)}
</>
);
};
export default function Page() {
return (
<div className="mx-auto container">
<div className="mb-4">
<HealthCheckBanner />
</div>
<div className="border-solid border-gray-600 border-b mb-4 pb-2 flex">
<JiraIcon size="32" />
<h1 className="text-3xl font-bold pl-2">Jira</h1>
</div>
<Main />
</div>
);
}

View File

@ -39,6 +39,13 @@ const getSourceDisplay = (
);
}
if (connector.source === "jira") {
return (
sourceMetadata.displayName +
` [${connector.connector_specific_config?.jira_project_url}]`
);
}
if (
connector.source === "google_drive" &&
!connectorIndexingStatus.public_doc

View File

@ -9,6 +9,7 @@ import {
KeyIcon,
ConfluenceIcon,
FileIcon,
JiraIcon,
} from "@/components/icons/icons";
import { DISABLE_AUTH } from "@/lib/constants";
import { getCurrentUserSS } from "@/lib/userSS";
@ -90,6 +91,15 @@ export default async function AdminLayout({
),
link: "/admin/connectors/confluence",
},
{
name: (
<div className="flex">
<JiraIcon size="16" />
<div className="ml-1">Jira</div>
</div>
),
link: "/admin/connectors/jira",
},
{
name: (
<div className="flex">

View File

@ -68,26 +68,27 @@ export function ConnectorForm<T extends Yup.AnyObject>({
<Formik
initialValues={initialValues}
validationSchema={validationSchema}
onSubmit={(values, formikHelpers) => {
onSubmit={async (values, formikHelpers) => {
formikHelpers.setSubmitting(true);
submitConnector<T>({
const { message, isSuccess, response } = await submitConnector<T>({
name: nameBuilder(values),
source,
input_type: inputType,
connector_specific_config: values,
refresh_freq: refreshFreq || 0,
disabled: false,
}).then(({ message, isSuccess, response }) => {
setPopup({ message, type: isSuccess ? "success" : "error" });
formikHelpers.setSubmitting(false);
if (isSuccess) {
formikHelpers.resetForm();
}
setTimeout(() => {
setPopup(null);
}, 4000);
onSubmit(isSuccess, response);
});
setPopup({ message, type: isSuccess ? "success" : "error" });
formikHelpers.setSubmitting(false);
if (isSuccess) {
formikHelpers.resetForm();
}
setTimeout(() => {
setPopup(null);
}, 4000);
onSubmit(isSuccess, response);
}}
>
{({ isSubmitting }) => (

View File

@ -1,3 +1,5 @@
import { useState } from "react";
export interface PopupSpec {
message: string;
type: "success" | "error";
@ -12,3 +14,18 @@ export const Popup: React.FC<PopupSpec> = ({ message, type }) => (
{message}
</div>
);
export const usePopup = () => {
const [popup, setPopup] = useState<PopupSpec | null>(null);
const setPopupWithExpiration = (popupSpec: PopupSpec | null) => {
setPopup(popupSpec);
setTimeout(() => {
setPopup(null);
}, 4000);
};
return {
popup: popup && <Popup {...popup} />,
setPopup: setPopupWithExpiration,
};
};

View File

@ -146,9 +146,9 @@ export function ConnectorsTable<ConnectorConfigType, ConnectorCredentialType>({
const credential = connectorIncludesCredential
? {
credential: hasValidCredentials ? (
<p className="max-w-sm truncate">
<div className="max-w-sm truncate">
{getCredential(liveCredential)}
</p>
</div>
) : liveCredential ? (
<AttachCredentialButtonForTable
onClick={() => onCredentialLink(connector.id)}

View File

@ -12,7 +12,13 @@ import {
Bird,
Brain,
} from "@phosphor-icons/react";
import { SiConfluence, SiGithub, SiGoogledrive, SiSlack } from "react-icons/si";
import {
SiConfluence,
SiGithub,
SiGoogledrive,
SiJira,
SiSlack,
} from "react-icons/si";
import { FaFile, FaGlobe } from "react-icons/fa";
interface IconProps {
@ -113,6 +119,13 @@ export const ConfluenceIcon = ({
return <SiConfluence size={size} className={className} />;
};
export const JiraIcon = ({
size = "16",
className = defaultTailwindCSS,
}: IconProps) => {
return <SiJira size={size} className={className} />;
};
export const InfoIcon = ({
size = "16",
className = defaultTailwindCSS,

View File

@ -8,6 +8,7 @@ const sources: Source[] = [
{ displayName: "Google Drive", internalName: "google_drive" },
{ displayName: "Slack", internalName: "slack" },
{ displayName: "Confluence", internalName: "confluence" },
{ displayName: "Jira", internalName: "jira" },
{ displayName: "Github PRs", internalName: "github" },
{ displayName: "Web", internalName: "web" },
{ displayName: "File", internalName: "file" },

View File

@ -5,6 +5,7 @@ import {
GithubIcon,
GlobeIcon,
GoogleDriveIcon,
JiraIcon,
SlackIcon,
} from "./icons/icons";
@ -52,6 +53,12 @@ export const getSourceMetadata = (sourceType: ValidSources): SourceMetadata => {
displayName: "Confluence",
adminPageLink: "/admin/connectors/confluence",
};
case "jira":
return {
icon: JiraIcon,
displayName: "Jira",
adminPageLink: "/admin/connectors/jira",
};
default:
throw new Error("Invalid source type");
}

View File

@ -1,4 +1,4 @@
import { Connector, ConnectorBase } from "./types";
import { Connector, ConnectorBase, ValidSources } from "./types";
async function handleResponse(
response: Response
@ -65,3 +65,27 @@ export async function runConnector(
}
return null;
}
export async function deleteConnectorIfExists({
source,
name,
}: {
source: ValidSources;
name?: string;
}): Promise<string | null> {
const connectorsResponse = await fetch("/api/manage/connector");
if (connectorsResponse.ok) {
const connectors = (await connectorsResponse.json()) as Connector<any>[];
const googleDriveConnectors = connectors.filter(
(connector) =>
connector.source === source && (!name || connector.name === name)
);
if (googleDriveConnectors.length > 0) {
const errorMsg = await deleteConnector(googleDriveConnectors[0].id);
if (errorMsg) {
return errorMsg;
}
}
}
return null;
}

View File

@ -13,6 +13,7 @@ export type ValidSources =
| "slack"
| "google_drive"
| "confluence"
| "jira"
| "file";
export type ValidInputTypes = "load_state" | "poll" | "event";
@ -46,6 +47,10 @@ export interface ConfluenceConfig {
wiki_page_url: string;
}
export interface JiraConfig {
jira_project_url: string;
}
export interface SlackConfig {
workspace: string;
}
@ -85,6 +90,11 @@ export interface ConfluenceCredentialJson {
confluence_access_token: string;
}
export interface JiraCredentialJson {
jira_user_email: string;
jira_api_token: string;
}
export interface SlackCredentialJson {
slack_bot_token: string;
}