mirror of
https://github.com/danswer-ai/danswer.git
synced 2025-06-26 16:01:09 +02:00
Discourse Connector (#1420)
This commit is contained in:
parent
03911de8b2
commit
060a8d0aad
@ -94,6 +94,7 @@ class DocumentSource(str, Enum):
|
||||
ZENDESK = "zendesk"
|
||||
LOOPIO = "loopio"
|
||||
SHAREPOINT = "sharepoint"
|
||||
DISCOURSE = "discourse"
|
||||
AXERO = "axero"
|
||||
|
||||
|
||||
|
0
backend/danswer/connectors/discourse/__init__.py
Normal file
0
backend/danswer/connectors/discourse/__init__.py
Normal file
215
backend/danswer/connectors/discourse/connector.py
Normal file
215
backend/danswer/connectors/discourse/connector.py
Normal file
@ -0,0 +1,215 @@
|
||||
import time
|
||||
import urllib.parse
|
||||
from datetime import datetime
|
||||
from datetime import timezone
|
||||
from typing import Any
|
||||
|
||||
import requests
|
||||
from pydantic import BaseModel
|
||||
from requests import Response
|
||||
|
||||
from danswer.configs.app_configs import INDEX_BATCH_SIZE
|
||||
from danswer.configs.constants import DocumentSource
|
||||
from danswer.connectors.cross_connector_utils.html_utils import parse_html_page_basic
|
||||
from danswer.connectors.cross_connector_utils.miscellaneous_utils import time_str_to_utc
|
||||
from danswer.connectors.cross_connector_utils.retry_wrapper import retry_builder
|
||||
from danswer.connectors.interfaces import GenerateDocumentsOutput
|
||||
from danswer.connectors.interfaces import PollConnector
|
||||
from danswer.connectors.interfaces import SecondsSinceUnixEpoch
|
||||
from danswer.connectors.models import BasicExpertInfo
|
||||
from danswer.connectors.models import ConnectorMissingCredentialError
|
||||
from danswer.connectors.models import Document
|
||||
from danswer.connectors.models import Section
|
||||
from danswer.utils.logger import setup_logger
|
||||
|
||||
logger = setup_logger()
|
||||
|
||||
|
||||
class DiscoursePerms(BaseModel):
|
||||
api_key: str
|
||||
api_username: str
|
||||
|
||||
|
||||
@retry_builder()
|
||||
def discourse_request(
|
||||
endpoint: str, perms: DiscoursePerms, params: dict | None = None
|
||||
) -> Response:
|
||||
headers = {"Api-Key": perms.api_key, "Api-Username": perms.api_username}
|
||||
|
||||
response = requests.get(endpoint, headers=headers, params=params)
|
||||
response.raise_for_status()
|
||||
|
||||
return response
|
||||
|
||||
|
||||
class DiscourseConnector(PollConnector):
|
||||
def __init__(
|
||||
self,
|
||||
base_url: str,
|
||||
categories: list[str] | None = None,
|
||||
batch_size: int = INDEX_BATCH_SIZE,
|
||||
) -> None:
|
||||
parsed_url = urllib.parse.urlparse(base_url)
|
||||
if not parsed_url.scheme:
|
||||
base_url = "https://" + base_url
|
||||
self.base_url = base_url
|
||||
|
||||
self.categories = [c.lower() for c in categories] if categories else []
|
||||
self.category_id_map: dict[int, str] = {}
|
||||
|
||||
self.batch_size = batch_size
|
||||
|
||||
self.permissions: DiscoursePerms | None = None
|
||||
|
||||
def _get_categories_map(
|
||||
self,
|
||||
) -> None:
|
||||
assert self.permissions is not None
|
||||
categories_endpoint = urllib.parse.urljoin(self.base_url, "categories.json")
|
||||
response = discourse_request(
|
||||
endpoint=categories_endpoint,
|
||||
perms=self.permissions,
|
||||
params={"include_subcategories": True},
|
||||
)
|
||||
categories = response.json()["category_list"]["categories"]
|
||||
|
||||
self.category_id_map = {
|
||||
category["id"]: category["name"]
|
||||
for category in categories
|
||||
if not self.categories or category["name"].lower() in self.categories
|
||||
}
|
||||
|
||||
def _get_latest_topics(
|
||||
self, start: datetime | None, end: datetime | None
|
||||
) -> list[int]:
|
||||
assert self.permissions is not None
|
||||
topic_ids = []
|
||||
|
||||
valid_categories = set(self.category_id_map.keys())
|
||||
|
||||
latest_endpoint = urllib.parse.urljoin(self.base_url, "latest.json")
|
||||
response = discourse_request(endpoint=latest_endpoint, perms=self.permissions)
|
||||
topics = response.json()["topic_list"]["topics"]
|
||||
for topic in topics:
|
||||
last_time = topic.get("last_posted_at")
|
||||
if not last_time:
|
||||
continue
|
||||
last_time_dt = time_str_to_utc(last_time)
|
||||
|
||||
if start and start > last_time_dt:
|
||||
continue
|
||||
if end and end < last_time_dt:
|
||||
continue
|
||||
|
||||
if valid_categories and topic.get("category_id") not in valid_categories:
|
||||
continue
|
||||
|
||||
topic_ids.append(topic["id"])
|
||||
|
||||
return topic_ids
|
||||
|
||||
def _get_doc_from_topic(self, topic_id: int) -> Document:
|
||||
assert self.permissions is not None
|
||||
topic_endpoint = urllib.parse.urljoin(self.base_url, f"t/{topic_id}.json")
|
||||
response = discourse_request(
|
||||
endpoint=topic_endpoint,
|
||||
perms=self.permissions,
|
||||
)
|
||||
topic = response.json()
|
||||
|
||||
topic_url = urllib.parse.urljoin(self.base_url, f"t/{topic['slug']}")
|
||||
|
||||
sections = []
|
||||
poster = None
|
||||
responders = []
|
||||
seen_names = set()
|
||||
for ind, post in enumerate(topic["post_stream"]["posts"]):
|
||||
if ind == 0:
|
||||
poster_name = post.get("name")
|
||||
if poster_name:
|
||||
seen_names.add(poster_name)
|
||||
poster = BasicExpertInfo(display_name=poster_name)
|
||||
else:
|
||||
responder_name = post.get("name")
|
||||
if responder_name and responder_name not in seen_names:
|
||||
seen_names.add(responder_name)
|
||||
responders.append(BasicExpertInfo(display_name=responder_name))
|
||||
|
||||
sections.append(
|
||||
Section(link=topic_url, text=parse_html_page_basic(post["cooked"]))
|
||||
)
|
||||
|
||||
metadata: dict[str, str | list[str]] = {
|
||||
"category": self.category_id_map[topic["category_id"]],
|
||||
}
|
||||
if topic.get("tags"):
|
||||
metadata["tags"] = topic["tags"]
|
||||
|
||||
doc = Document(
|
||||
id="_".join([DocumentSource.DISCOURSE.value, str(topic["id"])]),
|
||||
sections=sections,
|
||||
source=DocumentSource.DISCOURSE,
|
||||
semantic_identifier=topic["title"],
|
||||
doc_updated_at=time_str_to_utc(topic["last_posted_at"]),
|
||||
primary_owners=[poster] if poster else None,
|
||||
secondary_owners=responders or None,
|
||||
metadata=metadata,
|
||||
)
|
||||
return doc
|
||||
|
||||
def _yield_discourse_documents(
|
||||
self, topic_ids: list[int]
|
||||
) -> GenerateDocumentsOutput:
|
||||
doc_batch: list[Document] = []
|
||||
for topic_id in topic_ids:
|
||||
doc_batch.append(self._get_doc_from_topic(topic_id))
|
||||
|
||||
if len(doc_batch) >= self.batch_size:
|
||||
yield doc_batch
|
||||
doc_batch = []
|
||||
|
||||
if doc_batch:
|
||||
yield doc_batch
|
||||
|
||||
def load_credentials(self, credentials: dict[str, Any]) -> dict[str, Any] | None:
|
||||
self.permissions = DiscoursePerms(
|
||||
api_key=credentials["discourse_api_key"],
|
||||
api_username=credentials["discourse_api_username"],
|
||||
)
|
||||
|
||||
return None
|
||||
|
||||
def poll_source(
|
||||
self, start: SecondsSinceUnixEpoch, end: SecondsSinceUnixEpoch
|
||||
) -> GenerateDocumentsOutput:
|
||||
if self.permissions is None:
|
||||
raise ConnectorMissingCredentialError("Discourse")
|
||||
start_datetime = datetime.utcfromtimestamp(start).replace(tzinfo=timezone.utc)
|
||||
end_datetime = datetime.utcfromtimestamp(end).replace(tzinfo=timezone.utc)
|
||||
|
||||
self._get_categories_map()
|
||||
|
||||
latest_topic_ids = self._get_latest_topics(
|
||||
start=start_datetime, end=end_datetime
|
||||
)
|
||||
|
||||
return self._yield_discourse_documents(latest_topic_ids)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
import os
|
||||
|
||||
connector = DiscourseConnector(base_url=os.environ["DISCOURSE_BASE_URL"])
|
||||
connector.load_credentials(
|
||||
{
|
||||
"discourse_api_key": os.environ["DISCOURSE_API_KEY"],
|
||||
"discourse_api_username": os.environ["DISCOURSE_API_USERNAME"],
|
||||
}
|
||||
)
|
||||
|
||||
current = time.time()
|
||||
one_year_ago = current - 24 * 60 * 60 * 360
|
||||
|
||||
latest_docs = connector.poll_source(one_year_ago, current)
|
||||
|
||||
print(next(latest_docs))
|
@ -6,6 +6,7 @@ from danswer.connectors.axero.connector import AxeroConnector
|
||||
from danswer.connectors.bookstack.connector import BookstackConnector
|
||||
from danswer.connectors.confluence.connector import ConfluenceConnector
|
||||
from danswer.connectors.danswer_jira.connector import JiraConnector
|
||||
from danswer.connectors.discourse.connector import DiscourseConnector
|
||||
from danswer.connectors.document360.connector import Document360Connector
|
||||
from danswer.connectors.file.connector import LocalFileConnector
|
||||
from danswer.connectors.github.connector import GithubConnector
|
||||
@ -71,6 +72,7 @@ def identify_connector_class(
|
||||
DocumentSource.ZENDESK: ZendeskConnector,
|
||||
DocumentSource.LOOPIO: LoopioConnector,
|
||||
DocumentSource.SHAREPOINT: SharepointConnector,
|
||||
DocumentSource.DISCOURSE: DiscourseConnector,
|
||||
DocumentSource.AXERO: AxeroConnector,
|
||||
}
|
||||
connector_by_source = connector_map.get(source, {})
|
||||
|
BIN
web/public/Discourse.png
Normal file
BIN
web/public/Discourse.png
Normal file
Binary file not shown.
After Width: | Height: | Size: 42 KiB |
274
web/src/app/admin/connectors/discourse/page.tsx
Normal file
274
web/src/app/admin/connectors/discourse/page.tsx
Normal file
@ -0,0 +1,274 @@
|
||||
"use client";
|
||||
|
||||
import * as Yup from "yup";
|
||||
import { DiscourseIcon, TrashIcon } from "@/components/icons/icons";
|
||||
import {
|
||||
TextFormField,
|
||||
TextArrayFieldBuilder,
|
||||
} from "@/components/admin/connectors/Field";
|
||||
import { HealthCheckBanner } from "@/components/health/healthcheck";
|
||||
import { CredentialForm } from "@/components/admin/connectors/CredentialForm";
|
||||
import {
|
||||
Credential,
|
||||
ConnectorIndexingStatus,
|
||||
DiscourseConfig,
|
||||
DiscourseCredentialJson,
|
||||
} from "@/lib/types";
|
||||
import useSWR, { useSWRConfig } from "swr";
|
||||
import { fetcher } from "@/lib/fetcher";
|
||||
import { LoadingAnimation } from "@/components/Loading";
|
||||
import { adminDeleteCredential, linkCredential } from "@/lib/credential";
|
||||
import { ConnectorForm } from "@/components/admin/connectors/ConnectorForm";
|
||||
import { ConnectorsTable } from "@/components/admin/connectors/table/ConnectorsTable";
|
||||
import { usePopup } from "@/components/admin/connectors/Popup";
|
||||
import { usePublicCredentials } from "@/lib/hooks";
|
||||
import { Card, Divider, Text, Title } from "@tremor/react";
|
||||
import { AdminPageTitle } from "@/components/admin/Title";
|
||||
|
||||
const Main = () => {
|
||||
const { popup, setPopup } = usePopup();
|
||||
|
||||
const { mutate } = useSWRConfig();
|
||||
const {
|
||||
data: connectorIndexingStatuses,
|
||||
isLoading: isConnectorIndexingStatusesLoading,
|
||||
error: isConnectorIndexingStatusesError,
|
||||
} = useSWR<ConnectorIndexingStatus<any, any>[]>(
|
||||
"/api/manage/admin/connector/indexing-status",
|
||||
fetcher
|
||||
);
|
||||
|
||||
const {
|
||||
data: credentialsData,
|
||||
isLoading: isCredentialsLoading,
|
||||
error: isCredentialsError,
|
||||
refreshCredentials,
|
||||
} = usePublicCredentials();
|
||||
|
||||
if (
|
||||
(!connectorIndexingStatuses && isConnectorIndexingStatusesLoading) ||
|
||||
(!credentialsData && isCredentialsLoading)
|
||||
) {
|
||||
return <LoadingAnimation text="Loading" />;
|
||||
}
|
||||
|
||||
if (isConnectorIndexingStatusesError || !connectorIndexingStatuses) {
|
||||
return <div>Failed to load connectors</div>;
|
||||
}
|
||||
|
||||
if (isCredentialsError || !credentialsData) {
|
||||
return <div>Failed to load credentials</div>;
|
||||
}
|
||||
|
||||
const discourseConnectorIndexingStatuses: ConnectorIndexingStatus<
|
||||
DiscourseConfig,
|
||||
DiscourseCredentialJson
|
||||
>[] = connectorIndexingStatuses.filter(
|
||||
(connectorIndexingStatus) =>
|
||||
connectorIndexingStatus.connector.source === "discourse"
|
||||
);
|
||||
const discourseCredential: Credential<DiscourseCredentialJson> | undefined =
|
||||
credentialsData.find(
|
||||
(credential) => credential.credential_json?.discourse_api_username
|
||||
);
|
||||
|
||||
return (
|
||||
<>
|
||||
{popup}
|
||||
<Text>
|
||||
This connector allows you to sync all your Discourse Topics into
|
||||
Danswer. More details on how to setup the Discourse connector can be
|
||||
found in{" "}
|
||||
<a
|
||||
className="text-link"
|
||||
href="https://docs.danswer.dev/connectors/discourse"
|
||||
target="_blank"
|
||||
>
|
||||
this guide.
|
||||
</a>
|
||||
</Text>
|
||||
|
||||
<Title className="mb-2 mt-6 ml-auto mr-auto">
|
||||
Step 1: Provide your API Access info
|
||||
</Title>
|
||||
|
||||
{discourseCredential ? (
|
||||
<>
|
||||
<div className="flex mb-1 text-sm">
|
||||
<p className="my-auto">Existing API Key: </p>
|
||||
<p className="ml-1 italic my-auto max-w-md truncate">
|
||||
{discourseCredential.credential_json?.discourse_api_key}
|
||||
</p>
|
||||
<button
|
||||
className="ml-1 hover:bg-hover rounded p-1"
|
||||
onClick={async () => {
|
||||
if (discourseConnectorIndexingStatuses.length > 0) {
|
||||
setPopup({
|
||||
type: "error",
|
||||
message:
|
||||
"Must delete all connectors before deleting credentials",
|
||||
});
|
||||
return;
|
||||
}
|
||||
await adminDeleteCredential(discourseCredential.id);
|
||||
refreshCredentials();
|
||||
}}
|
||||
>
|
||||
<TrashIcon />
|
||||
</button>
|
||||
</div>
|
||||
</>
|
||||
) : (
|
||||
<>
|
||||
<Card className="mt-4">
|
||||
<CredentialForm<DiscourseCredentialJson>
|
||||
formBody={
|
||||
<>
|
||||
<TextFormField
|
||||
name="discourse_api_username"
|
||||
label="API Key Username:"
|
||||
/>
|
||||
<TextFormField
|
||||
name="discourse_api_key"
|
||||
label="API Key:"
|
||||
type="password"
|
||||
/>
|
||||
</>
|
||||
}
|
||||
validationSchema={Yup.object().shape({
|
||||
discourse_api_username: Yup.string().required(
|
||||
"Please enter the Username associated with the API key"
|
||||
),
|
||||
discourse_api_key: Yup.string().required(
|
||||
"Please enter the API key"
|
||||
),
|
||||
})}
|
||||
initialValues={{
|
||||
discourse_api_username: "",
|
||||
discourse_api_key: "",
|
||||
}}
|
||||
onSubmit={(isSuccess) => {
|
||||
if (isSuccess) {
|
||||
refreshCredentials();
|
||||
}
|
||||
}}
|
||||
/>
|
||||
</Card>
|
||||
</>
|
||||
)}
|
||||
|
||||
<Title className="mb-2 mt-6 ml-auto mr-auto">
|
||||
Step 2: Which Categories do you want to make searchable?
|
||||
</Title>
|
||||
|
||||
{discourseConnectorIndexingStatuses.length > 0 && (
|
||||
<>
|
||||
<Text className="mb-2">
|
||||
We pull Topics with new Posts every <b>10</b> minutes.
|
||||
</Text>
|
||||
<div className="mb-2">
|
||||
<ConnectorsTable<DiscourseConfig, DiscourseCredentialJson>
|
||||
connectorIndexingStatuses={discourseConnectorIndexingStatuses}
|
||||
liveCredential={discourseCredential}
|
||||
getCredential={(credential) =>
|
||||
credential.credential_json.discourse_api_username
|
||||
}
|
||||
specialColumns={[
|
||||
{
|
||||
header: "Categories",
|
||||
key: "categories",
|
||||
getValue: (ccPairStatus) =>
|
||||
ccPairStatus.connector.connector_specific_config
|
||||
.categories &&
|
||||
ccPairStatus.connector.connector_specific_config.categories
|
||||
.length > 0
|
||||
? ccPairStatus.connector.connector_specific_config.categories.join(
|
||||
", "
|
||||
)
|
||||
: "",
|
||||
},
|
||||
]}
|
||||
includeName={true}
|
||||
onUpdate={() =>
|
||||
mutate("/api/manage/admin/connector/indexing-status")
|
||||
}
|
||||
onCredentialLink={async (connectorId) => {
|
||||
if (discourseCredential) {
|
||||
await linkCredential(connectorId, discourseCredential.id);
|
||||
mutate("/api/manage/admin/connector/indexing-status");
|
||||
}
|
||||
}}
|
||||
/>
|
||||
</div>
|
||||
<Divider />
|
||||
</>
|
||||
)}
|
||||
|
||||
{discourseCredential ? (
|
||||
<>
|
||||
<Card className="mt-4">
|
||||
<h2 className="font-bold mb-3">Create a new Discourse Connector</h2>
|
||||
<ConnectorForm<DiscourseConfig>
|
||||
nameBuilder={(values) =>
|
||||
values.categories
|
||||
? `${values.base_url}-${values.categories.join("_")}`
|
||||
: `${values.base_url}-All`
|
||||
}
|
||||
source="discourse"
|
||||
inputType="poll"
|
||||
formBody={
|
||||
<>
|
||||
<TextFormField
|
||||
name="base_url"
|
||||
label="Base URL:"
|
||||
subtext="This might be something like https://danswer.discourse.group/ or https://community.yourcompany.com/"
|
||||
/>
|
||||
</>
|
||||
}
|
||||
formBodyBuilder={TextArrayFieldBuilder({
|
||||
name: "categories",
|
||||
label: "Categories:",
|
||||
subtext:
|
||||
"Specify 0 or more Categories to index. If no Categories are specified, Topics from " +
|
||||
"all categories will be indexed.",
|
||||
})}
|
||||
validationSchema={Yup.object().shape({
|
||||
base_url: Yup.string().required(
|
||||
"Please the base URL of your Discourse site."
|
||||
),
|
||||
categories: Yup.array().of(
|
||||
Yup.string().required("Category names must be strings")
|
||||
),
|
||||
})}
|
||||
initialValues={{
|
||||
categories: [],
|
||||
base_url: "",
|
||||
}}
|
||||
refreshFreq={10 * 60} // 10 minutes
|
||||
credentialId={discourseCredential.id}
|
||||
/>
|
||||
</Card>
|
||||
</>
|
||||
) : (
|
||||
<Text>
|
||||
Please provide your API Key Info in Step 1 first! Once done with that,
|
||||
you can then start indexing all your Discourse Topics.
|
||||
</Text>
|
||||
)}
|
||||
</>
|
||||
);
|
||||
};
|
||||
|
||||
export default function Page() {
|
||||
return (
|
||||
<div className="mx-auto container">
|
||||
<div className="mb-4">
|
||||
<HealthCheckBanner />
|
||||
</div>
|
||||
|
||||
<AdminPageTitle icon={<DiscourseIcon size={32} />} title="Discourse" />
|
||||
|
||||
<Main />
|
||||
</div>
|
||||
);
|
||||
}
|
@ -52,6 +52,7 @@ import document360Icon from "../../../public/Document360.png";
|
||||
import googleSitesIcon from "../../../public/GoogleSites.png";
|
||||
import zendeskIcon from "../../../public/Zendesk.svg";
|
||||
import sharepointIcon from "../../../public/Sharepoint.png";
|
||||
import discourseIcon from "../../../public/Discourse.png";
|
||||
import { FaRobot } from "react-icons/fa";
|
||||
|
||||
interface IconProps {
|
||||
@ -601,6 +602,18 @@ export const ZendeskIcon = ({
|
||||
</div>
|
||||
);
|
||||
|
||||
export const DiscourseIcon = ({
|
||||
size = 16,
|
||||
className = defaultTailwindCSS,
|
||||
}: IconProps) => (
|
||||
<div
|
||||
style={{ width: `${size}px`, height: `${size}px` }}
|
||||
className={`w-[${size}px] h-[${size}px] ` + className}
|
||||
>
|
||||
<Image src={discourseIcon} alt="Logo" width="96" height="96" />
|
||||
</div>
|
||||
);
|
||||
|
||||
export const AxeroIcon = ({
|
||||
size = 16,
|
||||
className = defaultTailwindCSS,
|
||||
|
@ -2,6 +2,7 @@ import {
|
||||
AxeroIcon,
|
||||
BookstackIcon,
|
||||
ConfluenceIcon,
|
||||
DiscourseIcon,
|
||||
Document360Icon,
|
||||
FileIcon,
|
||||
GithubIcon,
|
||||
@ -155,6 +156,11 @@ const SOURCE_METADATA_MAP: SourceMap = {
|
||||
displayName: "Sharepoint",
|
||||
category: SourceCategory.AppConnection,
|
||||
},
|
||||
discourse: {
|
||||
icon: DiscourseIcon,
|
||||
displayName: "Discourse",
|
||||
category: SourceCategory.AppConnection,
|
||||
},
|
||||
axero: {
|
||||
icon: AxeroIcon,
|
||||
displayName: "Axero",
|
||||
|
@ -39,6 +39,7 @@ export type ValidSources =
|
||||
| "loopio"
|
||||
| "sharepoint"
|
||||
| "zendesk"
|
||||
| "discourse"
|
||||
| "axero";
|
||||
|
||||
export type ValidInputTypes = "load_state" | "poll" | "event";
|
||||
@ -118,6 +119,11 @@ export interface SharepointConfig {
|
||||
sites?: string[];
|
||||
}
|
||||
|
||||
export interface DiscourseConfig {
|
||||
base_url: string;
|
||||
categories?: string[];
|
||||
}
|
||||
|
||||
export interface AxeroConfig {
|
||||
spaces?: string[];
|
||||
}
|
||||
@ -337,6 +343,11 @@ export interface SharepointCredentialJson {
|
||||
aad_directory_id: string;
|
||||
}
|
||||
|
||||
export interface DiscourseCredentialJson {
|
||||
discourse_api_key: string;
|
||||
discourse_api_username: string;
|
||||
}
|
||||
|
||||
export interface AxeroCredentialJson {
|
||||
base_url: string;
|
||||
axero_api_token: string;
|
||||
|
Loading…
x
Reference in New Issue
Block a user