mirror of
https://github.com/danswer-ai/danswer.git
synced 2025-10-09 20:55:06 +02:00
@@ -22,6 +22,7 @@ class DocumentSource(str, Enum):
|
|||||||
WEB = "web"
|
WEB = "web"
|
||||||
GOOGLE_DRIVE = "google_drive"
|
GOOGLE_DRIVE = "google_drive"
|
||||||
GITHUB = "github"
|
GITHUB = "github"
|
||||||
|
GURU = "guru"
|
||||||
BOOKSTACK = "bookstack"
|
BOOKSTACK = "bookstack"
|
||||||
CONFLUENCE = "confluence"
|
CONFLUENCE = "confluence"
|
||||||
SLAB = "slab"
|
SLAB = "slab"
|
||||||
|
@@ -4,22 +4,17 @@ from collections.abc import Callable
|
|||||||
from datetime import datetime
|
from datetime import datetime
|
||||||
from typing import Any
|
from typing import Any
|
||||||
|
|
||||||
from bs4 import BeautifulSoup
|
|
||||||
from danswer.configs.app_configs import INDEX_BATCH_SIZE
|
from danswer.configs.app_configs import INDEX_BATCH_SIZE
|
||||||
from danswer.configs.constants import DocumentSource
|
from danswer.configs.constants import DocumentSource
|
||||||
from danswer.configs.constants import HTML_SEPARATOR
|
|
||||||
from danswer.connectors.bookstack.client import BookStackApiClient
|
from danswer.connectors.bookstack.client import BookStackApiClient
|
||||||
from danswer.connectors.interfaces import GenerateDocumentsOutput
|
from danswer.connectors.interfaces import GenerateDocumentsOutput
|
||||||
from danswer.connectors.interfaces import LoadConnector
|
from danswer.connectors.interfaces import LoadConnector
|
||||||
from danswer.connectors.interfaces import PollConnector
|
from danswer.connectors.interfaces import PollConnector
|
||||||
from danswer.connectors.interfaces import SecondsSinceUnixEpoch
|
from danswer.connectors.interfaces import SecondsSinceUnixEpoch
|
||||||
|
from danswer.connectors.models import ConnectorMissingCredentialError
|
||||||
from danswer.connectors.models import Document
|
from danswer.connectors.models import Document
|
||||||
from danswer.connectors.models import Section
|
from danswer.connectors.models import Section
|
||||||
|
from danswer.utils.text_processing import parse_html_page_basic
|
||||||
|
|
||||||
class BookstackClientNotSetUpError(PermissionError):
|
|
||||||
def __init__(self) -> None:
|
|
||||||
super().__init__("BookStack Client is not set up, was load_credentials called?")
|
|
||||||
|
|
||||||
|
|
||||||
class BookstackConnector(LoadConnector, PollConnector):
|
class BookstackConnector(LoadConnector, PollConnector):
|
||||||
@@ -135,8 +130,7 @@ class BookstackConnector(LoadConnector, PollConnector):
|
|||||||
page_html = (
|
page_html = (
|
||||||
"<h1>" + html.escape(page_name) + "</h1>" + str(page_data.get("html"))
|
"<h1>" + html.escape(page_name) + "</h1>" + str(page_data.get("html"))
|
||||||
)
|
)
|
||||||
soup = BeautifulSoup(page_html, "html.parser")
|
text = parse_html_page_basic(page_html)
|
||||||
text = soup.get_text(HTML_SEPARATOR)
|
|
||||||
time.sleep(0.1)
|
time.sleep(0.1)
|
||||||
return Document(
|
return Document(
|
||||||
id="page:" + page_id,
|
id="page:" + page_id,
|
||||||
@@ -148,7 +142,7 @@ class BookstackConnector(LoadConnector, PollConnector):
|
|||||||
|
|
||||||
def load_from_state(self) -> GenerateDocumentsOutput:
|
def load_from_state(self) -> GenerateDocumentsOutput:
|
||||||
if self.bookstack_client is None:
|
if self.bookstack_client is None:
|
||||||
raise BookstackClientNotSetUpError()
|
raise ConnectorMissingCredentialError("Bookstack")
|
||||||
|
|
||||||
return self.poll_source(None, None)
|
return self.poll_source(None, None)
|
||||||
|
|
||||||
@@ -156,7 +150,7 @@ class BookstackConnector(LoadConnector, PollConnector):
|
|||||||
self, start: SecondsSinceUnixEpoch | None, end: SecondsSinceUnixEpoch | None
|
self, start: SecondsSinceUnixEpoch | None, end: SecondsSinceUnixEpoch | None
|
||||||
) -> GenerateDocumentsOutput:
|
) -> GenerateDocumentsOutput:
|
||||||
if self.bookstack_client is None:
|
if self.bookstack_client is None:
|
||||||
raise BookstackClientNotSetUpError()
|
raise ConnectorMissingCredentialError("Bookstack")
|
||||||
|
|
||||||
transform_by_endpoint: dict[
|
transform_by_endpoint: dict[
|
||||||
str, Callable[[BookStackApiClient, dict], Document]
|
str, Callable[[BookStackApiClient, dict], Document]
|
||||||
|
@@ -6,16 +6,16 @@ from typing import Any
|
|||||||
from urllib.parse import urlparse
|
from urllib.parse import urlparse
|
||||||
|
|
||||||
from atlassian import Confluence # type:ignore
|
from atlassian import Confluence # type:ignore
|
||||||
from bs4 import BeautifulSoup
|
|
||||||
from danswer.configs.app_configs import INDEX_BATCH_SIZE
|
from danswer.configs.app_configs import INDEX_BATCH_SIZE
|
||||||
from danswer.configs.constants import DocumentSource
|
from danswer.configs.constants import DocumentSource
|
||||||
from danswer.configs.constants import HTML_SEPARATOR
|
|
||||||
from danswer.connectors.interfaces import GenerateDocumentsOutput
|
from danswer.connectors.interfaces import GenerateDocumentsOutput
|
||||||
from danswer.connectors.interfaces import LoadConnector
|
from danswer.connectors.interfaces import LoadConnector
|
||||||
from danswer.connectors.interfaces import PollConnector
|
from danswer.connectors.interfaces import PollConnector
|
||||||
from danswer.connectors.interfaces import SecondsSinceUnixEpoch
|
from danswer.connectors.interfaces import SecondsSinceUnixEpoch
|
||||||
|
from danswer.connectors.models import ConnectorMissingCredentialError
|
||||||
from danswer.connectors.models import Document
|
from danswer.connectors.models import Document
|
||||||
from danswer.connectors.models import Section
|
from danswer.connectors.models import Section
|
||||||
|
from danswer.utils.text_processing import parse_html_page_basic
|
||||||
|
|
||||||
# Potential Improvements
|
# Potential Improvements
|
||||||
# 1. If wiki page instead of space, do a search of all the children of the page instead of index all in the space
|
# 1. If wiki page instead of space, do a search of all the children of the page instead of index all in the space
|
||||||
@@ -23,13 +23,6 @@ from danswer.connectors.models import Section
|
|||||||
# 3. Segment into Sections for more accurate linking, can split by headers but make sure no text/ordering is lost
|
# 3. Segment into Sections for more accurate linking, can split by headers but make sure no text/ordering is lost
|
||||||
|
|
||||||
|
|
||||||
class ConfluenceClientNotSetUpError(PermissionError):
|
|
||||||
def __init__(self) -> None:
|
|
||||||
super().__init__(
|
|
||||||
"Confluence Client is not set up, was load_credentials called?"
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
def extract_confluence_keys_from_url(wiki_url: str) -> tuple[str, str]:
|
def extract_confluence_keys_from_url(wiki_url: str) -> tuple[str, str]:
|
||||||
"""Sample
|
"""Sample
|
||||||
https://danswer.atlassian.net/wiki/spaces/1234abcd/overview
|
https://danswer.atlassian.net/wiki/spaces/1234abcd/overview
|
||||||
@@ -59,8 +52,7 @@ def _comment_dfs(
|
|||||||
) -> str:
|
) -> str:
|
||||||
for comment_page in comment_pages:
|
for comment_page in comment_pages:
|
||||||
comment_html = comment_page["body"]["storage"]["value"]
|
comment_html = comment_page["body"]["storage"]["value"]
|
||||||
soup = BeautifulSoup(comment_html, "html.parser")
|
comments_str += "\nComment:\n" + parse_html_page_basic(comment_html)
|
||||||
comments_str += "\nComment:\n" + soup.get_text(HTML_SEPARATOR)
|
|
||||||
child_comment_pages = confluence_client.get_page_child_by_type(
|
child_comment_pages = confluence_client.get_page_child_by_type(
|
||||||
comment_page["id"],
|
comment_page["id"],
|
||||||
type="comment",
|
type="comment",
|
||||||
@@ -101,7 +93,7 @@ class ConfluenceConnector(LoadConnector, PollConnector):
|
|||||||
doc_batch: list[Document] = []
|
doc_batch: list[Document] = []
|
||||||
|
|
||||||
if self.confluence_client is None:
|
if self.confluence_client is None:
|
||||||
raise ConfluenceClientNotSetUpError()
|
raise ConnectorMissingCredentialError("Confluence")
|
||||||
|
|
||||||
batch = self.confluence_client.get_all_pages_from_space(
|
batch = self.confluence_client.get_all_pages_from_space(
|
||||||
self.space,
|
self.space,
|
||||||
@@ -116,8 +108,9 @@ class ConfluenceConnector(LoadConnector, PollConnector):
|
|||||||
|
|
||||||
if time_filter is None or time_filter(last_modified):
|
if time_filter is None or time_filter(last_modified):
|
||||||
page_html = page["body"]["storage"]["value"]
|
page_html = page["body"]["storage"]["value"]
|
||||||
soup = BeautifulSoup(page_html, "html.parser")
|
page_text = (
|
||||||
page_text = page.get("title", "") + "\n" + soup.get_text(HTML_SEPARATOR)
|
page.get("title", "") + "\n" + parse_html_page_basic(page_html)
|
||||||
|
)
|
||||||
comment_pages = self.confluence_client.get_page_child_by_type(
|
comment_pages = self.confluence_client.get_page_child_by_type(
|
||||||
page["id"],
|
page["id"],
|
||||||
type="comment",
|
type="comment",
|
||||||
@@ -146,7 +139,7 @@ class ConfluenceConnector(LoadConnector, PollConnector):
|
|||||||
|
|
||||||
def load_from_state(self) -> GenerateDocumentsOutput:
|
def load_from_state(self) -> GenerateDocumentsOutput:
|
||||||
if self.confluence_client is None:
|
if self.confluence_client is None:
|
||||||
raise ConfluenceClientNotSetUpError()
|
raise ConnectorMissingCredentialError("Confluence")
|
||||||
|
|
||||||
start_ind = 0
|
start_ind = 0
|
||||||
while True:
|
while True:
|
||||||
@@ -162,7 +155,7 @@ class ConfluenceConnector(LoadConnector, PollConnector):
|
|||||||
self, start: SecondsSinceUnixEpoch, end: SecondsSinceUnixEpoch
|
self, start: SecondsSinceUnixEpoch, end: SecondsSinceUnixEpoch
|
||||||
) -> GenerateDocumentsOutput:
|
) -> GenerateDocumentsOutput:
|
||||||
if self.confluence_client is None:
|
if self.confluence_client is None:
|
||||||
raise ConfluenceClientNotSetUpError()
|
raise ConnectorMissingCredentialError("Confluence")
|
||||||
|
|
||||||
start_time = datetime.fromtimestamp(start, tz=timezone.utc)
|
start_time = datetime.fromtimestamp(start, tz=timezone.utc)
|
||||||
end_time = datetime.fromtimestamp(end, tz=timezone.utc)
|
end_time = datetime.fromtimestamp(end, tz=timezone.utc)
|
||||||
|
@@ -9,6 +9,7 @@ from danswer.connectors.interfaces import GenerateDocumentsOutput
|
|||||||
from danswer.connectors.interfaces import LoadConnector
|
from danswer.connectors.interfaces import LoadConnector
|
||||||
from danswer.connectors.interfaces import PollConnector
|
from danswer.connectors.interfaces import PollConnector
|
||||||
from danswer.connectors.interfaces import SecondsSinceUnixEpoch
|
from danswer.connectors.interfaces import SecondsSinceUnixEpoch
|
||||||
|
from danswer.connectors.models import ConnectorMissingCredentialError
|
||||||
from danswer.connectors.models import Document
|
from danswer.connectors.models import Document
|
||||||
from danswer.connectors.models import Section
|
from danswer.connectors.models import Section
|
||||||
from danswer.utils.logger import setup_logger
|
from danswer.utils.logger import setup_logger
|
||||||
@@ -97,9 +98,7 @@ class JiraConnector(LoadConnector, PollConnector):
|
|||||||
|
|
||||||
def load_from_state(self) -> GenerateDocumentsOutput:
|
def load_from_state(self) -> GenerateDocumentsOutput:
|
||||||
if self.jira_client is None:
|
if self.jira_client is None:
|
||||||
raise PermissionError(
|
raise ConnectorMissingCredentialError("Jira")
|
||||||
"Jira Client is not set up, was load_credentials called?"
|
|
||||||
)
|
|
||||||
|
|
||||||
start_ind = 0
|
start_ind = 0
|
||||||
while True:
|
while True:
|
||||||
@@ -121,9 +120,7 @@ class JiraConnector(LoadConnector, PollConnector):
|
|||||||
self, start: SecondsSinceUnixEpoch, end: SecondsSinceUnixEpoch
|
self, start: SecondsSinceUnixEpoch, end: SecondsSinceUnixEpoch
|
||||||
) -> GenerateDocumentsOutput:
|
) -> GenerateDocumentsOutput:
|
||||||
if self.jira_client is None:
|
if self.jira_client is None:
|
||||||
raise PermissionError(
|
raise ConnectorMissingCredentialError("Jira")
|
||||||
"Jira Client is not set up, was load_credentials called?"
|
|
||||||
)
|
|
||||||
|
|
||||||
start_date_str = datetime.fromtimestamp(start, tz=timezone.utc).strftime(
|
start_date_str = datetime.fromtimestamp(start, tz=timezone.utc).strftime(
|
||||||
"%Y-%m-%d %H:%M"
|
"%Y-%m-%d %H:%M"
|
||||||
|
@@ -9,6 +9,7 @@ from danswer.connectors.file.connector import LocalFileConnector
|
|||||||
from danswer.connectors.github.connector import GithubConnector
|
from danswer.connectors.github.connector import GithubConnector
|
||||||
from danswer.connectors.google_drive.connector import GoogleDriveConnector
|
from danswer.connectors.google_drive.connector import GoogleDriveConnector
|
||||||
from danswer.connectors.notion.connector import NotionConnector
|
from danswer.connectors.notion.connector import NotionConnector
|
||||||
|
from danswer.connectors.guru.connector import GuruConnector
|
||||||
from danswer.connectors.interfaces import BaseConnector
|
from danswer.connectors.interfaces import BaseConnector
|
||||||
from danswer.connectors.interfaces import EventConnector
|
from danswer.connectors.interfaces import EventConnector
|
||||||
from danswer.connectors.interfaces import LoadConnector
|
from danswer.connectors.interfaces import LoadConnector
|
||||||
@@ -46,6 +47,7 @@ def identify_connector_class(
|
|||||||
DocumentSource.PRODUCTBOARD: ProductboardConnector,
|
DocumentSource.PRODUCTBOARD: ProductboardConnector,
|
||||||
DocumentSource.SLAB: SlabConnector,
|
DocumentSource.SLAB: SlabConnector,
|
||||||
DocumentSource.NOTION: NotionConnector,
|
DocumentSource.NOTION: NotionConnector,
|
||||||
|
DocumentSource.GURU: GuruConnector,
|
||||||
}
|
}
|
||||||
connector_by_source = connector_map.get(source, {})
|
connector_by_source = connector_map.get(source, {})
|
||||||
|
|
||||||
|
@@ -6,6 +6,7 @@ from danswer.configs.app_configs import INDEX_BATCH_SIZE
|
|||||||
from danswer.configs.constants import DocumentSource
|
from danswer.configs.constants import DocumentSource
|
||||||
from danswer.connectors.interfaces import GenerateDocumentsOutput
|
from danswer.connectors.interfaces import GenerateDocumentsOutput
|
||||||
from danswer.connectors.interfaces import LoadConnector
|
from danswer.connectors.interfaces import LoadConnector
|
||||||
|
from danswer.connectors.models import ConnectorMissingCredentialError
|
||||||
from danswer.connectors.models import Document
|
from danswer.connectors.models import Document
|
||||||
from danswer.connectors.models import Section
|
from danswer.connectors.models import Section
|
||||||
from danswer.utils.logger import setup_logger
|
from danswer.utils.logger import setup_logger
|
||||||
@@ -48,9 +49,7 @@ class GithubConnector(LoadConnector):
|
|||||||
|
|
||||||
def load_from_state(self) -> GenerateDocumentsOutput:
|
def load_from_state(self) -> GenerateDocumentsOutput:
|
||||||
if self.github_client is None:
|
if self.github_client is None:
|
||||||
raise PermissionError(
|
raise ConnectorMissingCredentialError("GitHub")
|
||||||
"Github Client is not set up, was load_credentials called?"
|
|
||||||
)
|
|
||||||
repo = self.github_client.get_repo(f"{self.repo_owner}/{self.repo_name}")
|
repo = self.github_client.get_repo(f"{self.repo_owner}/{self.repo_name}")
|
||||||
pull_requests = repo.get_pulls(state=self.state_filter)
|
pull_requests = repo.get_pulls(state=self.state_filter)
|
||||||
for pr_batch in get_pr_batches(pull_requests, self.batch_size):
|
for pr_batch in get_pr_batches(pull_requests, self.batch_size):
|
||||||
|
0
backend/danswer/connectors/guru/__init__.py
Normal file
0
backend/danswer/connectors/guru/__init__.py
Normal file
110
backend/danswer/connectors/guru/connector.py
Normal file
110
backend/danswer/connectors/guru/connector.py
Normal file
@@ -0,0 +1,110 @@
|
|||||||
|
import json
|
||||||
|
from datetime import datetime
|
||||||
|
from datetime import timezone
|
||||||
|
from typing import Any
|
||||||
|
|
||||||
|
import requests
|
||||||
|
from danswer.configs.app_configs import INDEX_BATCH_SIZE
|
||||||
|
from danswer.configs.constants import DocumentSource
|
||||||
|
from danswer.connectors.interfaces import GenerateDocumentsOutput
|
||||||
|
from danswer.connectors.interfaces import LoadConnector
|
||||||
|
from danswer.connectors.interfaces import PollConnector
|
||||||
|
from danswer.connectors.interfaces import SecondsSinceUnixEpoch
|
||||||
|
from danswer.connectors.models import ConnectorMissingCredentialError
|
||||||
|
from danswer.connectors.models import Document
|
||||||
|
from danswer.connectors.models import Section
|
||||||
|
from danswer.utils.logger import setup_logger
|
||||||
|
from danswer.utils.text_processing import parse_html_page_basic
|
||||||
|
|
||||||
|
# Potential Improvements
|
||||||
|
# 1. Support fetching per collection via collection token (configured at connector creation)
|
||||||
|
|
||||||
|
GURU_API_BASE = "https://api.getguru.com/api/v1/"
|
||||||
|
GURU_QUERY_ENDPOINT = GURU_API_BASE + "search/query"
|
||||||
|
GURU_CARDS_URL = "https://app.getguru.com/card/"
|
||||||
|
logger = setup_logger()
|
||||||
|
|
||||||
|
|
||||||
|
def unixtime_to_guru_time_str(unix_time: SecondsSinceUnixEpoch) -> str:
|
||||||
|
date_obj = datetime.fromtimestamp(unix_time, tz=timezone.utc)
|
||||||
|
date_str = date_obj.strftime("%Y-%m-%dT%H:%M:%S.%f")[:-3]
|
||||||
|
tz_str = date_obj.strftime("%z")
|
||||||
|
return date_str + tz_str
|
||||||
|
|
||||||
|
|
||||||
|
class GuruConnector(LoadConnector, PollConnector):
|
||||||
|
def __init__(
|
||||||
|
self,
|
||||||
|
batch_size: int = INDEX_BATCH_SIZE,
|
||||||
|
guru_user: str | None = None,
|
||||||
|
guru_user_token: str | None = None,
|
||||||
|
) -> None:
|
||||||
|
self.batch_size = batch_size
|
||||||
|
self.guru_user = guru_user
|
||||||
|
self.guru_user_token = guru_user_token
|
||||||
|
|
||||||
|
def load_credentials(self, credentials: dict[str, Any]) -> dict[str, Any] | None:
|
||||||
|
self.guru_user = credentials["guru_user"]
|
||||||
|
self.guru_user_token = credentials["guru_user_token"]
|
||||||
|
return None
|
||||||
|
|
||||||
|
def _process_cards(
|
||||||
|
self, start_str: str | None = None, end_str: str | None = None
|
||||||
|
) -> GenerateDocumentsOutput:
|
||||||
|
if self.guru_user is None or self.guru_user_token is None:
|
||||||
|
raise ConnectorMissingCredentialError("Guru")
|
||||||
|
|
||||||
|
doc_batch: list[Document] = []
|
||||||
|
|
||||||
|
session = requests.Session()
|
||||||
|
session.auth = (self.guru_user, self.guru_user_token)
|
||||||
|
|
||||||
|
params: dict[str, str | int] = {"maxResults": self.batch_size}
|
||||||
|
|
||||||
|
if start_str is not None and end_str is not None:
|
||||||
|
params["q"] = f"lastModified >= {start_str} AND lastModified < {end_str}"
|
||||||
|
|
||||||
|
current_url = GURU_QUERY_ENDPOINT # This is how they handle pagination, a different url will be provided
|
||||||
|
while True:
|
||||||
|
response = session.get(current_url, params=params)
|
||||||
|
|
||||||
|
if response.status_code == 204:
|
||||||
|
break
|
||||||
|
|
||||||
|
cards = json.loads(response.text)
|
||||||
|
for card in cards:
|
||||||
|
title = card["preferredPhrase"]
|
||||||
|
link = GURU_CARDS_URL + card["slug"]
|
||||||
|
content_text = title + "\n" + parse_html_page_basic(card["content"])
|
||||||
|
|
||||||
|
doc_batch.append(
|
||||||
|
Document(
|
||||||
|
id=card["id"],
|
||||||
|
sections=[Section(link=link, text=content_text)],
|
||||||
|
source=DocumentSource.GURU,
|
||||||
|
semantic_identifier=title,
|
||||||
|
metadata={},
|
||||||
|
)
|
||||||
|
)
|
||||||
|
|
||||||
|
if len(doc_batch) >= self.batch_size:
|
||||||
|
yield doc_batch
|
||||||
|
doc_batch = []
|
||||||
|
|
||||||
|
if not hasattr(response, "links") or not response.links:
|
||||||
|
break
|
||||||
|
current_url = response.links["next-page"]["url"]
|
||||||
|
|
||||||
|
if doc_batch:
|
||||||
|
yield doc_batch
|
||||||
|
|
||||||
|
def load_from_state(self) -> GenerateDocumentsOutput:
|
||||||
|
return self._process_cards()
|
||||||
|
|
||||||
|
def poll_source(
|
||||||
|
self, start: SecondsSinceUnixEpoch, end: SecondsSinceUnixEpoch
|
||||||
|
) -> GenerateDocumentsOutput:
|
||||||
|
start_time = unixtime_to_guru_time_str(start)
|
||||||
|
end_time = unixtime_to_guru_time_str(end)
|
||||||
|
|
||||||
|
return self._process_cards(start_time, end_time)
|
@@ -6,6 +6,14 @@ from danswer.configs.constants import DocumentSource
|
|||||||
from pydantic import BaseModel
|
from pydantic import BaseModel
|
||||||
|
|
||||||
|
|
||||||
|
class ConnectorMissingCredentialError(PermissionError):
|
||||||
|
def __init__(self, connector_name: str) -> None:
|
||||||
|
connector_name = connector_name or "Unknown"
|
||||||
|
super().__init__(
|
||||||
|
f"{connector_name} connector missing credentials, was load_credentials called?"
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
@dataclass
|
@dataclass
|
||||||
class Section:
|
class Section:
|
||||||
link: str
|
link: str
|
||||||
|
@@ -13,6 +13,7 @@ from danswer.connectors.interfaces import GenerateDocumentsOutput
|
|||||||
from danswer.connectors.interfaces import LoadConnector
|
from danswer.connectors.interfaces import LoadConnector
|
||||||
from danswer.connectors.interfaces import PollConnector
|
from danswer.connectors.interfaces import PollConnector
|
||||||
from danswer.connectors.interfaces import SecondsSinceUnixEpoch
|
from danswer.connectors.interfaces import SecondsSinceUnixEpoch
|
||||||
|
from danswer.connectors.models import ConnectorMissingCredentialError
|
||||||
from danswer.connectors.models import Document
|
from danswer.connectors.models import Document
|
||||||
from danswer.connectors.models import Section
|
from danswer.connectors.models import Section
|
||||||
from danswer.utils.logger import setup_logger
|
from danswer.utils.logger import setup_logger
|
||||||
@@ -24,11 +25,6 @@ SLAB_API_URL = "https://api.slab.com/v1/graphql"
|
|||||||
logger = setup_logger()
|
logger = setup_logger()
|
||||||
|
|
||||||
|
|
||||||
class SlabBotTokenNotFoundError(PermissionError):
|
|
||||||
def __init__(self) -> None:
|
|
||||||
super().__init__("Slab Bot Token not found, was load_credentials called?")
|
|
||||||
|
|
||||||
|
|
||||||
def run_graphql_request(
|
def run_graphql_request(
|
||||||
graphql_query: dict, bot_token: str, max_tries: int = SLAB_GRAPHQL_MAX_TRIES
|
graphql_query: dict, bot_token: str, max_tries: int = SLAB_GRAPHQL_MAX_TRIES
|
||||||
) -> str:
|
) -> str:
|
||||||
@@ -179,7 +175,7 @@ class SlabConnector(LoadConnector, PollConnector):
|
|||||||
doc_batch: list[Document] = []
|
doc_batch: list[Document] = []
|
||||||
|
|
||||||
if self.slab_bot_token is None:
|
if self.slab_bot_token is None:
|
||||||
raise SlabBotTokenNotFoundError()
|
raise ConnectorMissingCredentialError("Slab")
|
||||||
|
|
||||||
all_post_ids: list[str] = get_all_post_ids(self.slab_bot_token)
|
all_post_ids: list[str] = get_all_post_ids(self.slab_bot_token)
|
||||||
|
|
||||||
|
@@ -12,6 +12,7 @@ from danswer.connectors.interfaces import GenerateDocumentsOutput
|
|||||||
from danswer.connectors.interfaces import LoadConnector
|
from danswer.connectors.interfaces import LoadConnector
|
||||||
from danswer.connectors.interfaces import PollConnector
|
from danswer.connectors.interfaces import PollConnector
|
||||||
from danswer.connectors.interfaces import SecondsSinceUnixEpoch
|
from danswer.connectors.interfaces import SecondsSinceUnixEpoch
|
||||||
|
from danswer.connectors.models import ConnectorMissingCredentialError
|
||||||
from danswer.connectors.models import Document
|
from danswer.connectors.models import Document
|
||||||
from danswer.connectors.models import Section
|
from danswer.connectors.models import Section
|
||||||
from danswer.connectors.slack.utils import get_message_link
|
from danswer.connectors.slack.utils import get_message_link
|
||||||
@@ -285,9 +286,7 @@ class SlackPollConnector(PollConnector):
|
|||||||
self, start: SecondsSinceUnixEpoch, end: SecondsSinceUnixEpoch
|
self, start: SecondsSinceUnixEpoch, end: SecondsSinceUnixEpoch
|
||||||
) -> GenerateDocumentsOutput:
|
) -> GenerateDocumentsOutput:
|
||||||
if self.client is None:
|
if self.client is None:
|
||||||
raise PermissionError(
|
raise ConnectorMissingCredentialError("Slack")
|
||||||
"Slack Client is not set up, was load_credentials called?"
|
|
||||||
)
|
|
||||||
|
|
||||||
documents: list[Document] = []
|
documents: list[Document] = []
|
||||||
for document in get_all_docs(
|
for document in get_all_docs(
|
||||||
|
@@ -1,3 +1,7 @@
|
|||||||
|
from bs4 import BeautifulSoup
|
||||||
|
from danswer.configs.constants import HTML_SEPARATOR
|
||||||
|
|
||||||
|
|
||||||
def clean_model_quote(quote: str, trim_length: int) -> str:
|
def clean_model_quote(quote: str, trim_length: int) -> str:
|
||||||
quote_clean = quote.strip()
|
quote_clean = quote.strip()
|
||||||
if quote_clean[0] == '"':
|
if quote_clean[0] == '"':
|
||||||
@@ -29,3 +33,8 @@ def shared_precompare_cleanup(text: str) -> str:
|
|||||||
text = text.replace("-", "")
|
text = text.replace("-", "")
|
||||||
|
|
||||||
return text
|
return text
|
||||||
|
|
||||||
|
|
||||||
|
def parse_html_page_basic(text: str) -> str:
|
||||||
|
soup = BeautifulSoup(text, "html.parser")
|
||||||
|
return soup.get_text(HTML_SEPARATOR)
|
||||||
|
1
web/public/Guru.svg
Normal file
1
web/public/Guru.svg
Normal file
@@ -0,0 +1 @@
|
|||||||
|
<?xml version="1.0" encoding="UTF-8"?><svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 162.31 162.01"><defs><style>.d{fill:#fff;}.e{fill:#080b0e;}</style></defs><g id="a"/><g id="b"><g id="c"><g><path class="e" d="M162.28,96.29c.57,20.21-8.19,35.32-25.42,45.02-23.91,13.45-49.54,21.53-77.3,20.64-15.64-.5-28.15-8.01-36.35-21.09C7.51,115.82-.94,88.36,.08,58.64c.54-15.6,8.6-27.88,21.78-35.94C46.67,7.52,73.69-.9,102.98,.08c15.87,.53,28.12,8.76,36.46,22.05,14.21,22.64,20.84,47.73,22.84,74.16Z"/><g><path class="d" d="M92.34,111.57c-6.26,2.28-12.96,2.67-18.88,1.08-14.71-3.95-25.39-17.48-25.39-32.16,0-11.7,5.26-19.36,9.68-23.73,6.08-6.01,14.53-9.58,22.67-9.58,.1,0,.21,0,.31,0,15.52,.17,25.35,11.89,25.44,12,1.35,1.66,3.78,1.9,5.43,.56,1.65-1.34,1.91-3.78,.56-5.43-.49-.6-12.13-14.64-31.35-14.85-10.38-.06-20.79,4.19-28.49,11.8-7.72,7.63-11.97,18.01-11.97,29.22,0,18.11,13.08,34.78,31.11,39.62,3.05,.82,6.25,1.22,9.5,1.22,4.68,0,9.47-.84,14.04-2.51,11.87-4.34,23.47-15.76,23.05-28.96-2.62,1.92-5.47,3.67-8.47,5.33-2.31,7.25-9.61,13.6-17.23,16.38Z"/><path class="d" d="M121.5,71.35c-.07,.07-3.39,3.63-8.36,7.2-5.16-5.03-13.38-7.42-19.32-7.42-4.87,0-8.59,1.28-11.07,3.79-1.95,1.98-2.95,4.57-2.91,7.49,.08,5.25,3.85,10.93,14.16,11.16,5.75,.14,11.38-1.72,16.29-4.24,3.43-1.74,5.9-3.48,7.01-4.27,5.76-4.07,9.56-8.13,9.88-8.48,1.45-1.57,1.35-4.01-.22-5.46-1.56-1.44-4-1.35-5.46,.21Zm-27.34,14.5c-2.99-.07-6.56-.74-6.6-3.56-.02-1.23,.52-1.78,.69-1.95,.55-.56,2-1.49,5.57-1.49,3.78,0,8.83,1.45,12.33,3.95-3.76,1.85-7.81,3.14-11.99,3.05Z"/></g></g></g></g></svg>
|
After Width: | Height: | Size: 1.5 KiB |
233
web/src/app/admin/connectors/guru/page.tsx
Normal file
233
web/src/app/admin/connectors/guru/page.tsx
Normal file
@@ -0,0 +1,233 @@
|
|||||||
|
"use client";
|
||||||
|
|
||||||
|
import * as Yup from "yup";
|
||||||
|
import { GuruIcon, TrashIcon } from "@/components/icons/icons";
|
||||||
|
import { TextFormField } from "@/components/admin/connectors/Field";
|
||||||
|
import { HealthCheckBanner } from "@/components/health/healthcheck";
|
||||||
|
import { CredentialForm } from "@/components/admin/connectors/CredentialForm";
|
||||||
|
import {
|
||||||
|
Credential,
|
||||||
|
ProductboardConfig,
|
||||||
|
ConnectorIndexingStatus,
|
||||||
|
GuruConfig,
|
||||||
|
GuruCredentialJson,
|
||||||
|
} from "@/lib/types";
|
||||||
|
import useSWR, { useSWRConfig } from "swr";
|
||||||
|
import { fetcher } from "@/lib/fetcher";
|
||||||
|
import { LoadingAnimation } from "@/components/Loading";
|
||||||
|
import { deleteCredential, linkCredential } from "@/lib/credential";
|
||||||
|
import { ConnectorForm } from "@/components/admin/connectors/ConnectorForm";
|
||||||
|
import { ConnectorsTable } from "@/components/admin/connectors/table/ConnectorsTable";
|
||||||
|
import { usePopup } from "@/components/admin/connectors/Popup";
|
||||||
|
|
||||||
|
const Main = () => {
|
||||||
|
const { popup, setPopup } = usePopup();
|
||||||
|
|
||||||
|
const { mutate } = useSWRConfig();
|
||||||
|
const {
|
||||||
|
data: connectorIndexingStatuses,
|
||||||
|
isLoading: isConnectorIndexingStatusesLoading,
|
||||||
|
error: isConnectorIndexingStatusesError,
|
||||||
|
} = useSWR<ConnectorIndexingStatus<any>[]>(
|
||||||
|
"/api/manage/admin/connector/indexing-status",
|
||||||
|
fetcher
|
||||||
|
);
|
||||||
|
const {
|
||||||
|
data: credentialsData,
|
||||||
|
isLoading: isCredentialsLoading,
|
||||||
|
isValidating: isCredentialsValidating,
|
||||||
|
error: isCredentialsError,
|
||||||
|
} = useSWR<Credential<any>[]>("/api/manage/credential", fetcher);
|
||||||
|
|
||||||
|
if (
|
||||||
|
isConnectorIndexingStatusesLoading ||
|
||||||
|
isCredentialsLoading ||
|
||||||
|
isCredentialsValidating
|
||||||
|
) {
|
||||||
|
return <LoadingAnimation text="Loading" />;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (isConnectorIndexingStatusesError || !connectorIndexingStatuses) {
|
||||||
|
return <div>Failed to load connectors</div>;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (isCredentialsError || !credentialsData) {
|
||||||
|
return <div>Failed to load credentials</div>;
|
||||||
|
}
|
||||||
|
|
||||||
|
const guruConnectorIndexingStatuses: ConnectorIndexingStatus<GuruConfig>[] =
|
||||||
|
connectorIndexingStatuses.filter(
|
||||||
|
(connectorIndexingStatus) =>
|
||||||
|
connectorIndexingStatus.connector.source === "guru"
|
||||||
|
);
|
||||||
|
const guruCredential: Credential<GuruCredentialJson> = credentialsData.filter(
|
||||||
|
(credential) => credential.credential_json?.guru_user
|
||||||
|
)[0];
|
||||||
|
|
||||||
|
return (
|
||||||
|
<>
|
||||||
|
{popup}
|
||||||
|
<p className="text-sm">
|
||||||
|
This connector allows you to sync all your Guru Cards into Danswer.
|
||||||
|
</p>
|
||||||
|
|
||||||
|
<h2 className="font-bold mb-2 mt-6 ml-auto mr-auto">
|
||||||
|
Step 1: Provide your Credentials
|
||||||
|
</h2>
|
||||||
|
|
||||||
|
{guruCredential ? (
|
||||||
|
<>
|
||||||
|
<div className="flex mb-1 text-sm">
|
||||||
|
<p className="my-auto">Existing Access Token: </p>
|
||||||
|
<p className="ml-1 italic my-auto max-w-md truncate">
|
||||||
|
{guruCredential.credential_json?.guru_user_token}
|
||||||
|
</p>
|
||||||
|
<button
|
||||||
|
className="ml-1 hover:bg-gray-700 rounded-full p-1"
|
||||||
|
onClick={async () => {
|
||||||
|
if (guruConnectorIndexingStatuses.length > 0) {
|
||||||
|
setPopup({
|
||||||
|
type: "error",
|
||||||
|
message:
|
||||||
|
"Must delete all connectors before deleting credentials",
|
||||||
|
});
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
await deleteCredential(guruCredential.id);
|
||||||
|
mutate("/api/manage/credential");
|
||||||
|
}}
|
||||||
|
>
|
||||||
|
<TrashIcon />
|
||||||
|
</button>
|
||||||
|
</div>
|
||||||
|
</>
|
||||||
|
) : (
|
||||||
|
<>
|
||||||
|
<p className="text-sm">
|
||||||
|
To use the Guru connector, first follow the guide{" "}
|
||||||
|
<a
|
||||||
|
className="text-blue-500"
|
||||||
|
href="https://help.getguru.com/s/article/how-to-obtain-your-api-credentials"
|
||||||
|
target="_blank"
|
||||||
|
>
|
||||||
|
here
|
||||||
|
</a>{" "}
|
||||||
|
to generate a User Token.
|
||||||
|
</p>
|
||||||
|
<div className="border-solid border-gray-600 border rounded-md p-6 mt-2">
|
||||||
|
<CredentialForm<GuruCredentialJson>
|
||||||
|
formBody={
|
||||||
|
<>
|
||||||
|
<TextFormField name="guru_user" label="Username:" />
|
||||||
|
<TextFormField
|
||||||
|
name="guru_user_token"
|
||||||
|
label="User Token:"
|
||||||
|
type="password"
|
||||||
|
/>
|
||||||
|
</>
|
||||||
|
}
|
||||||
|
validationSchema={Yup.object().shape({
|
||||||
|
guru_user: Yup.string().required(
|
||||||
|
"Please enter your Guru username"
|
||||||
|
),
|
||||||
|
guru_user_token: Yup.string().required(
|
||||||
|
"Please enter your Guru access token"
|
||||||
|
),
|
||||||
|
})}
|
||||||
|
initialValues={{
|
||||||
|
guru_user: "",
|
||||||
|
guru_user_token: "",
|
||||||
|
}}
|
||||||
|
onSubmit={(isSuccess) => {
|
||||||
|
if (isSuccess) {
|
||||||
|
mutate("/api/manage/credential");
|
||||||
|
}
|
||||||
|
}}
|
||||||
|
/>
|
||||||
|
</div>
|
||||||
|
</>
|
||||||
|
)}
|
||||||
|
|
||||||
|
<h2 className="font-bold mb-2 mt-6 ml-auto mr-auto">
|
||||||
|
Step 2: Start indexing!
|
||||||
|
</h2>
|
||||||
|
{guruCredential ? (
|
||||||
|
!guruConnectorIndexingStatuses.length ? (
|
||||||
|
<>
|
||||||
|
<p className="text-sm mb-2">
|
||||||
|
Click the button below to start indexing! We will pull the latest
|
||||||
|
features, components, and products from Guru every <b>10</b>{" "}
|
||||||
|
minutes.
|
||||||
|
</p>
|
||||||
|
<div className="flex">
|
||||||
|
<ConnectorForm<GuruConfig>
|
||||||
|
nameBuilder={() => "GuruConnector"}
|
||||||
|
source="guru"
|
||||||
|
inputType="poll"
|
||||||
|
formBody={null}
|
||||||
|
validationSchema={Yup.object().shape({})}
|
||||||
|
initialValues={{}}
|
||||||
|
refreshFreq={10 * 60} // 10 minutes
|
||||||
|
onSubmit={async (isSuccess, responseJson) => {
|
||||||
|
if (isSuccess && responseJson) {
|
||||||
|
await linkCredential(responseJson.id, guruCredential.id);
|
||||||
|
mutate("/api/manage/admin/connector/indexing-status");
|
||||||
|
}
|
||||||
|
}}
|
||||||
|
/>
|
||||||
|
</div>
|
||||||
|
</>
|
||||||
|
) : (
|
||||||
|
<>
|
||||||
|
<p className="text-sm mb-2">
|
||||||
|
Guru connector is setup! We are pulling the latest cards from Guru
|
||||||
|
every <b>10</b> minutes.
|
||||||
|
</p>
|
||||||
|
<ConnectorsTable<GuruConfig, GuruCredentialJson>
|
||||||
|
connectorIndexingStatuses={guruConnectorIndexingStatuses}
|
||||||
|
liveCredential={guruCredential}
|
||||||
|
getCredential={(credential) => {
|
||||||
|
return (
|
||||||
|
<div>
|
||||||
|
<p>{credential.credential_json.guru_user}</p>
|
||||||
|
</div>
|
||||||
|
);
|
||||||
|
}}
|
||||||
|
onCredentialLink={async (connectorId) => {
|
||||||
|
if (guruCredential) {
|
||||||
|
await linkCredential(connectorId, guruCredential.id);
|
||||||
|
mutate("/api/manage/admin/connector/indexing-status");
|
||||||
|
}
|
||||||
|
}}
|
||||||
|
onUpdate={() =>
|
||||||
|
mutate("/api/manage/admin/connector/indexing-status")
|
||||||
|
}
|
||||||
|
/>
|
||||||
|
</>
|
||||||
|
)
|
||||||
|
) : (
|
||||||
|
<>
|
||||||
|
<p className="text-sm">
|
||||||
|
Please provide your access token in Step 1 first! Once done with
|
||||||
|
that, you can then start indexing all your Guru cards.
|
||||||
|
</p>
|
||||||
|
</>
|
||||||
|
)}
|
||||||
|
</>
|
||||||
|
);
|
||||||
|
};
|
||||||
|
|
||||||
|
export default function Page() {
|
||||||
|
return (
|
||||||
|
<div className="mx-auto container">
|
||||||
|
<div className="mb-4">
|
||||||
|
<HealthCheckBanner />
|
||||||
|
</div>
|
||||||
|
<div className="border-solid border-gray-600 border-b mb-4 pb-2 flex">
|
||||||
|
<GuruIcon size="32" />
|
||||||
|
<h1 className="text-3xl font-bold pl-2">Guru</h1>
|
||||||
|
</div>
|
||||||
|
<Main />
|
||||||
|
</div>
|
||||||
|
);
|
||||||
|
}
|
@@ -9,6 +9,7 @@ import {
|
|||||||
KeyIcon,
|
KeyIcon,
|
||||||
BookstackIcon,
|
BookstackIcon,
|
||||||
ConfluenceIcon,
|
ConfluenceIcon,
|
||||||
|
GuruIcon,
|
||||||
FileIcon,
|
FileIcon,
|
||||||
JiraIcon,
|
JiraIcon,
|
||||||
SlabIcon,
|
SlabIcon,
|
||||||
@@ -131,6 +132,15 @@ export default async function AdminLayout({
|
|||||||
),
|
),
|
||||||
link: "/admin/connectors/notion",
|
link: "/admin/connectors/notion",
|
||||||
},
|
},
|
||||||
|
{
|
||||||
|
name: (
|
||||||
|
<div className="flex">
|
||||||
|
<GuruIcon size={16} />
|
||||||
|
<div className="ml-1">Guru</div>
|
||||||
|
</div>
|
||||||
|
),
|
||||||
|
link: "/admin/connectors/guru",
|
||||||
|
},
|
||||||
{
|
{
|
||||||
name: (
|
name: (
|
||||||
<div className="flex">
|
<div className="flex">
|
||||||
|
@@ -18,6 +18,7 @@ import { FaFile, FaGlobe } from "react-icons/fa";
|
|||||||
import Image from "next/image";
|
import Image from "next/image";
|
||||||
import jiraSVG from "../../../public/Jira.svg";
|
import jiraSVG from "../../../public/Jira.svg";
|
||||||
import confluenceSVG from "../../../public/Confluence.svg";
|
import confluenceSVG from "../../../public/Confluence.svg";
|
||||||
|
import guruIcon from "../../../public/Guru.svg";
|
||||||
|
|
||||||
interface IconProps {
|
interface IconProps {
|
||||||
size?: number;
|
size?: number;
|
||||||
@@ -237,3 +238,15 @@ export const NotionIcon = ({
|
|||||||
</div>
|
</div>
|
||||||
);
|
);
|
||||||
};
|
};
|
||||||
|
|
||||||
|
export const GuruIcon = ({
|
||||||
|
size = 16,
|
||||||
|
className = defaultTailwindCSS,
|
||||||
|
}: IconProps) => (
|
||||||
|
<div
|
||||||
|
style={{ width: `${size}px`, height: `${size}px` }}
|
||||||
|
className={`w-[${size}px] h-[${size}px] ` + className}
|
||||||
|
>
|
||||||
|
<Image src={guruIcon} alt="Logo" width="96" height="96" />
|
||||||
|
</div>
|
||||||
|
);
|
||||||
|
@@ -14,6 +14,7 @@ const sources: Source[] = [
|
|||||||
{ displayName: "Slab", internalName: "slab" },
|
{ displayName: "Slab", internalName: "slab" },
|
||||||
{ displayName: "Github PRs", internalName: "github" },
|
{ displayName: "Github PRs", internalName: "github" },
|
||||||
{ displayName: "Web", internalName: "web" },
|
{ displayName: "Web", internalName: "web" },
|
||||||
|
{ displayName: "Guru", internalName: "guru" },
|
||||||
{ displayName: "File", internalName: "file" },
|
{ displayName: "File", internalName: "file" },
|
||||||
{ displayName: "Notion", internalName: "notion" },
|
{ displayName: "Notion", internalName: "notion" },
|
||||||
];
|
];
|
||||||
|
@@ -6,6 +6,7 @@ import {
|
|||||||
GithubIcon,
|
GithubIcon,
|
||||||
GlobeIcon,
|
GlobeIcon,
|
||||||
GoogleDriveIcon,
|
GoogleDriveIcon,
|
||||||
|
GuruIcon,
|
||||||
JiraIcon,
|
JiraIcon,
|
||||||
NotionIcon,
|
NotionIcon,
|
||||||
ProductboardIcon,
|
ProductboardIcon,
|
||||||
@@ -87,6 +88,12 @@ export const getSourceMetadata = (sourceType: ValidSources): SourceMetadata => {
|
|||||||
displayName: "Notion",
|
displayName: "Notion",
|
||||||
adminPageLink: "/admin/connectors/notion",
|
adminPageLink: "/admin/connectors/notion",
|
||||||
};
|
};
|
||||||
|
case "guru":
|
||||||
|
return {
|
||||||
|
icon: GuruIcon,
|
||||||
|
displayName: "Guru",
|
||||||
|
adminPageLink: "/admin/connectors/guru",
|
||||||
|
};
|
||||||
default:
|
default:
|
||||||
throw new Error("Invalid source type");
|
throw new Error("Invalid source type");
|
||||||
}
|
}
|
||||||
|
@@ -17,8 +17,9 @@ export type ValidSources =
|
|||||||
| "jira"
|
| "jira"
|
||||||
| "productboard"
|
| "productboard"
|
||||||
| "slab"
|
| "slab"
|
||||||
| "file"
|
| "notion"
|
||||||
| "notion";
|
| "guru"
|
||||||
|
| "file";
|
||||||
export type ValidInputTypes = "load_state" | "poll" | "event";
|
export type ValidInputTypes = "load_state" | "poll" | "event";
|
||||||
|
|
||||||
// CONNECTORS
|
// CONNECTORS
|
||||||
@@ -72,6 +73,8 @@ export interface SlabConfig {
|
|||||||
base_url: string;
|
base_url: string;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
export interface GuruConfig {}
|
||||||
|
|
||||||
export interface FileConfig {
|
export interface FileConfig {
|
||||||
file_locations: string[];
|
file_locations: string[];
|
||||||
}
|
}
|
||||||
@@ -139,3 +142,7 @@ export interface SlabCredentialJson {
|
|||||||
export interface NotionCredentialJson {
|
export interface NotionCredentialJson {
|
||||||
notion_integration_token: string;
|
notion_integration_token: string;
|
||||||
}
|
}
|
||||||
|
export interface GuruCredentialJson {
|
||||||
|
guru_user: string;
|
||||||
|
guru_user_token: string;
|
||||||
|
}
|
||||||
|
Reference in New Issue
Block a user