mirror of
https://github.com/danswer-ai/danswer.git
synced 2025-03-17 21:32:36 +01:00
Create connector
This commit is contained in:
parent
5f5cc9a724
commit
7ff18e0a93
@ -126,6 +126,7 @@ class DocumentSource(str, Enum):
|
||||
XENFORO = "xenforo"
|
||||
NOT_APPLICABLE = "not_applicable"
|
||||
FRESHDESK = "freshdesk"
|
||||
FIREFLIES = "fireflies"
|
||||
|
||||
|
||||
DocumentSourceRequiringTenantContext: list[DocumentSource] = [DocumentSource.FILE]
|
||||
|
@ -16,6 +16,7 @@ from danswer.connectors.discourse.connector import DiscourseConnector
|
||||
from danswer.connectors.document360.connector import Document360Connector
|
||||
from danswer.connectors.dropbox.connector import DropboxConnector
|
||||
from danswer.connectors.file.connector import LocalFileConnector
|
||||
from danswer.connectors.fireflies.connector import FirefliesConnector
|
||||
from danswer.connectors.freshdesk.connector import FreshdeskConnector
|
||||
from danswer.connectors.github.connector import GithubConnector
|
||||
from danswer.connectors.gitlab.connector import GitlabConnector
|
||||
@ -101,6 +102,7 @@ def identify_connector_class(
|
||||
DocumentSource.OCI_STORAGE: BlobStorageConnector,
|
||||
DocumentSource.XENFORO: XenforoConnector,
|
||||
DocumentSource.FRESHDESK: FreshdeskConnector,
|
||||
DocumentSource.FIREFLIES: FirefliesConnector,
|
||||
}
|
||||
connector_by_source = connector_map.get(source, {})
|
||||
|
||||
|
0
backend/danswer/connectors/fireflies/__init__.py
Normal file
0
backend/danswer/connectors/fireflies/__init__.py
Normal file
179
backend/danswer/connectors/fireflies/connector.py
Normal file
179
backend/danswer/connectors/fireflies/connector.py
Normal file
@ -0,0 +1,179 @@
|
||||
# TODO: Fix the transcript text parsing for the document
|
||||
# TODO: Remove the host email from the secondary owners
|
||||
# TODO: Figure out if to use semantic identifier or title
|
||||
# TODO: Fix date parsing in graphql query
|
||||
# TODO: Fix credentials loading
|
||||
from collections.abc import Iterator
|
||||
from datetime import datetime
|
||||
from datetime import timezone
|
||||
from typing import List
|
||||
|
||||
import requests
|
||||
|
||||
from danswer.configs.app_configs import INDEX_BATCH_SIZE
|
||||
from danswer.configs.constants import DocumentSource
|
||||
from danswer.connectors.interfaces import GenerateDocumentsOutput
|
||||
from danswer.connectors.interfaces import LoadConnector
|
||||
from danswer.connectors.interfaces import PollConnector
|
||||
from danswer.connectors.interfaces import SecondsSinceUnixEpoch
|
||||
from danswer.connectors.models import BasicExpertInfo
|
||||
from danswer.connectors.models import ConnectorMissingCredentialError
|
||||
from danswer.connectors.models import Document
|
||||
from danswer.connectors.models import Section
|
||||
from danswer.utils.logger import setup_logger
|
||||
|
||||
logger = setup_logger()
|
||||
|
||||
_FIREFLIES_ID_PREFIX = "FIREFLIES_"
|
||||
|
||||
_FIREFLIES_API_URL = "https://api.fireflies.ai/graphql"
|
||||
|
||||
_FIREFLIES_API_HEADERS = {"Content-Type": "application/json", "Authorization": ""}
|
||||
|
||||
|
||||
def _create_doc_from_transcript(transcript: dict) -> Document:
|
||||
meeting_text = ""
|
||||
sentences = transcript.get("sentences", [])
|
||||
meeting_text = str(sentences)
|
||||
# for sentence in sentences:
|
||||
# meeting_text += (
|
||||
# sentence.get("speaker_name", "Unknown Speaker")
|
||||
# + ": "
|
||||
# + sentence.get("text", "")
|
||||
# + "\n\n"
|
||||
# )
|
||||
|
||||
link = transcript.get("transcript_url", "")
|
||||
|
||||
id = _FIREFLIES_ID_PREFIX + transcript.get("id", "")
|
||||
|
||||
title = transcript.get("title", "")
|
||||
|
||||
meeting_date_unix = transcript.get("date", "")
|
||||
meeting_date = datetime.fromtimestamp(meeting_date_unix / 1000, tz=timezone.utc)
|
||||
|
||||
meeting_host_email = [BasicExpertInfo(email=transcript.get("host_email", ""))]
|
||||
|
||||
meeting_participants_emails = []
|
||||
for participant in transcript.get("participants", []):
|
||||
meeting_participants_emails.append(BasicExpertInfo(email=participant))
|
||||
|
||||
return Document(
|
||||
id=id,
|
||||
sections=[
|
||||
Section(
|
||||
link=link,
|
||||
text=meeting_text,
|
||||
)
|
||||
],
|
||||
source=DocumentSource.FIREFLIES,
|
||||
semantic_identifier=title,
|
||||
metadata={},
|
||||
doc_updated_at=meeting_date,
|
||||
primary_owners=meeting_host_email,
|
||||
secondary_owners=meeting_participants_emails,
|
||||
)
|
||||
|
||||
|
||||
class FirefliesConnector(PollConnector, LoadConnector):
|
||||
def __init__(self, batch_size: int = INDEX_BATCH_SIZE) -> None:
|
||||
self.batch_size = batch_size
|
||||
|
||||
def load_credentials(self, credentials: dict[str, str | int]) -> None:
|
||||
api_key = credentials.get("fireflies_api_key")
|
||||
|
||||
if not isinstance(api_key, str):
|
||||
raise ConnectorMissingCredentialError(
|
||||
"The Fireflies API key must be a string"
|
||||
)
|
||||
|
||||
self.api_key = str(api_key)
|
||||
|
||||
def _fetch_transcripts(
|
||||
self, start: datetime | None = None, end: datetime | None = None
|
||||
) -> Iterator[List[dict]]:
|
||||
if self.api_key is None:
|
||||
raise ConnectorMissingCredentialError("Missing API key")
|
||||
|
||||
headers = _FIREFLIES_API_HEADERS.copy()
|
||||
headers["Authorization"] = "Bearer 790bc814-e2f8-4349-af78-2d0b5affdaa5"
|
||||
|
||||
limit = 4
|
||||
skip = 0
|
||||
date_filters = ""
|
||||
if start:
|
||||
date_filters = f"fromDate: {start.isoformat()},"
|
||||
if end:
|
||||
date_filters += f"toDate: {end.isoformat()}"
|
||||
|
||||
api_query = {
|
||||
"query": f"""
|
||||
query {{
|
||||
transcripts(
|
||||
limit: {limit},
|
||||
skip: {skip}
|
||||
) {{
|
||||
title
|
||||
id
|
||||
date
|
||||
host_email
|
||||
participants
|
||||
transcript_url
|
||||
sentences {{
|
||||
text
|
||||
speaker_name
|
||||
}}
|
||||
}}
|
||||
}}
|
||||
"""
|
||||
}
|
||||
|
||||
while True:
|
||||
response = requests.post(
|
||||
_FIREFLIES_API_URL, headers=headers, json=api_query
|
||||
)
|
||||
|
||||
response.raise_for_status()
|
||||
|
||||
if response.status_code == 204:
|
||||
break
|
||||
|
||||
transcripts = response.json().get("data", {}).get("transcripts", [])
|
||||
|
||||
if not transcripts:
|
||||
break
|
||||
|
||||
yield transcripts
|
||||
|
||||
if len(transcripts) < limit:
|
||||
break
|
||||
|
||||
skip += limit
|
||||
|
||||
def _process_transcripts(
|
||||
self, start: datetime | None = None, end: datetime | None = None
|
||||
) -> GenerateDocumentsOutput:
|
||||
doc_batch: List[Document] = []
|
||||
|
||||
for transcript_batch in self._fetch_transcripts(start, end):
|
||||
for transcript in transcript_batch:
|
||||
print(transcript)
|
||||
doc_batch.append(_create_doc_from_transcript(transcript))
|
||||
|
||||
if len(doc_batch) >= self.batch_size:
|
||||
yield doc_batch
|
||||
doc_batch = []
|
||||
|
||||
if doc_batch:
|
||||
yield doc_batch
|
||||
|
||||
def load_from_state(self) -> GenerateDocumentsOutput:
|
||||
return self._process_transcripts()
|
||||
|
||||
def poll_source(
|
||||
self, start: SecondsSinceUnixEpoch, end: SecondsSinceUnixEpoch
|
||||
) -> GenerateDocumentsOutput:
|
||||
start_datetime = datetime.fromtimestamp(start, tz=timezone.utc)
|
||||
end_datetime = datetime.fromtimestamp(end, tz=timezone.utc)
|
||||
|
||||
yield from self._process_transcripts(start_datetime, end_datetime)
|
@ -28,7 +28,8 @@ os.environ["TOKENIZERS_PARALLELISM"] = "false"
|
||||
os.environ["HF_HUB_DISABLE_TELEMETRY"] = "1"
|
||||
|
||||
HF_CACHE_PATH = Path("/root/.cache/huggingface/")
|
||||
TEMP_HF_CACHE_PATH = Path("/root/.cache/temp_huggingface/")
|
||||
# Changed this to be compatible with Linux
|
||||
TEMP_HF_CACHE_PATH = Path.home() / ".cache" / "temp_huggingface"
|
||||
|
||||
transformer_logging.set_verbosity_error()
|
||||
|
||||
|
BIN
web/public/Fireflies.png
Normal file
BIN
web/public/Fireflies.png
Normal file
Binary file not shown.
After Width: | Height: | Size: 10 KiB |
@ -75,9 +75,13 @@ async function handleRequest(request: NextRequest, path: string[]) {
|
||||
backendUrl.searchParams.append(key, value);
|
||||
});
|
||||
|
||||
// Added this to allow it to run properly on Linux
|
||||
const headers = new Headers(request.headers);
|
||||
headers.delete("connection");
|
||||
|
||||
const response = await fetch(backendUrl, {
|
||||
method: request.method,
|
||||
headers: request.headers,
|
||||
headers: headers,
|
||||
body: request.body,
|
||||
signal: request.signal,
|
||||
// @ts-ignore
|
||||
|
@ -75,6 +75,7 @@ import s3Icon from "../../../public/S3.png";
|
||||
import r2Icon from "../../../public/r2.png";
|
||||
import salesforceIcon from "../../../public/Salesforce.png";
|
||||
import freshdeskIcon from "../../../public/Freshdesk.png";
|
||||
import firefliesIcon from "../../../public/Fireflies.png";
|
||||
|
||||
import sharepointIcon from "../../../public/Sharepoint.png";
|
||||
import teamsIcon from "../../../public/Teams.png";
|
||||
@ -88,6 +89,7 @@ import voyageIcon from "../../../public/Voyage.png";
|
||||
import googleIcon from "../../../public/Google.webp";
|
||||
import xenforoIcon from "../../../public/Xenforo.svg";
|
||||
import { FaRobot } from "react-icons/fa";
|
||||
import { size } from "lodash";
|
||||
|
||||
export interface IconProps {
|
||||
size?: number;
|
||||
@ -1301,6 +1303,13 @@ export const FreshdeskIcon = ({
|
||||
<LogoIcon size={size} className={className} src={freshdeskIcon} />
|
||||
);
|
||||
|
||||
export const FirefliesIcon = ({
|
||||
size = 16,
|
||||
className = defaultTailwindCSS,
|
||||
}: IconProps) => (
|
||||
<LogoIcon size={size} className={className} src={firefliesIcon} />
|
||||
);
|
||||
|
||||
/*
|
||||
EE Icons
|
||||
*/
|
||||
|
@ -950,7 +950,11 @@ For example, specifying .*-support.* as a "channel" will cause the connector to
|
||||
values: [],
|
||||
advanced_values: [],
|
||||
},
|
||||
|
||||
fireflies: {
|
||||
description: "Configure Fireflies connector",
|
||||
values: [],
|
||||
advanced_values: [],
|
||||
},
|
||||
};
|
||||
export function createConnectorInitialValues(
|
||||
connector: ConfigurableSources
|
||||
@ -1210,6 +1214,7 @@ export interface AsanaConfig {
|
||||
|
||||
export interface FreshdeskConfig {}
|
||||
|
||||
export interface FirefliesConfig {}
|
||||
|
||||
export interface MediaWikiConfig extends MediaWikiBaseConfig {
|
||||
hostname: string;
|
||||
|
@ -187,6 +187,10 @@ export interface FreshdeskCredentialJson {
|
||||
freshdesk_api_key: string;
|
||||
}
|
||||
|
||||
export interface FirefliesCredentialJson {
|
||||
fireflies_api_key: string;
|
||||
}
|
||||
|
||||
export interface MediaWikiCredentialJson {}
|
||||
export interface WikipediaCredentialJson extends MediaWikiCredentialJson {}
|
||||
|
||||
@ -290,6 +294,9 @@ export const credentialTemplates: Record<ValidSources, any> = {
|
||||
freshdesk_password: "",
|
||||
freshdesk_api_key: "",
|
||||
} as FreshdeskCredentialJson,
|
||||
fireflies: {
|
||||
fireflies_api_key: "",
|
||||
} as FirefliesCredentialJson,
|
||||
xenforo: null,
|
||||
google_sites: null,
|
||||
file: null,
|
||||
@ -435,7 +442,11 @@ export const credentialDisplayNames: Record<string, string> = {
|
||||
freshdesk_domain: "Freshdesk Domain",
|
||||
freshdesk_password: "Freshdesk Password",
|
||||
freshdesk_api_key: "Freshdesk API Key",
|
||||
|
||||
// Fireflies
|
||||
fireflies_api_key: "Fireflies API Key",
|
||||
};
|
||||
|
||||
export function getDisplayNameForCredentialKey(key: string): string {
|
||||
return credentialDisplayNames[key] || key;
|
||||
}
|
||||
|
@ -37,6 +37,7 @@ import {
|
||||
ColorSlackIcon,
|
||||
XenforoIcon,
|
||||
FreshdeskIcon,
|
||||
FirefliesIcon,
|
||||
} from "@/components/icons/icons";
|
||||
import { ValidSources } from "./types";
|
||||
import {
|
||||
@ -289,6 +290,12 @@ const SOURCE_METADATA_MAP: SourceMap = {
|
||||
category: SourceCategory.CustomerSupport,
|
||||
docs: "https://docs.danswer.dev/connectors/freshdesk",
|
||||
},
|
||||
fireflies: {
|
||||
icon: FirefliesIcon,
|
||||
displayName: "Fireflies",
|
||||
category: SourceCategory.CustomerSupport,
|
||||
docs: "https://docs.danswer.dev/connectors/fireflies",
|
||||
},
|
||||
// currently used for the Internet Search tool docs, which is why
|
||||
// a globe is used
|
||||
not_applicable: {
|
||||
|
@ -265,6 +265,7 @@ const validSources = [
|
||||
"not_applicable",
|
||||
"ingestion_api",
|
||||
"freshdesk",
|
||||
"fireflies",
|
||||
] as const;
|
||||
|
||||
export type ValidSources = (typeof validSources)[number];
|
||||
|
Loading…
x
Reference in New Issue
Block a user