Merge pull request #3073 from skylares/fireflies-dev

Fireflies connector
This commit is contained in:
hagen-danswer 2024-11-08 06:50:22 -08:00 committed by GitHub
commit 1fb4cdfcc3
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
10 changed files with 219 additions and 0 deletions

View File

@ -126,6 +126,7 @@ class DocumentSource(str, Enum):
XENFORO = "xenforo"
NOT_APPLICABLE = "not_applicable"
FRESHDESK = "freshdesk"
FIREFLIES = "fireflies"
DocumentSourceRequiringTenantContext: list[DocumentSource] = [DocumentSource.FILE]

View File

@ -16,6 +16,7 @@ from danswer.connectors.discourse.connector import DiscourseConnector
from danswer.connectors.document360.connector import Document360Connector
from danswer.connectors.dropbox.connector import DropboxConnector
from danswer.connectors.file.connector import LocalFileConnector
from danswer.connectors.fireflies.connector import FirefliesConnector
from danswer.connectors.freshdesk.connector import FreshdeskConnector
from danswer.connectors.github.connector import GithubConnector
from danswer.connectors.gitlab.connector import GitlabConnector
@ -101,6 +102,7 @@ def identify_connector_class(
DocumentSource.OCI_STORAGE: BlobStorageConnector,
DocumentSource.XENFORO: XenforoConnector,
DocumentSource.FRESHDESK: FreshdeskConnector,
DocumentSource.FIREFLIES: FirefliesConnector,
}
connector_by_source = connector_map.get(source, {})

View File

@ -0,0 +1,182 @@
from collections.abc import Iterator
from datetime import datetime
from datetime import timezone
from typing import List
import requests
from danswer.configs.app_configs import INDEX_BATCH_SIZE
from danswer.configs.constants import DocumentSource
from danswer.connectors.interfaces import GenerateDocumentsOutput
from danswer.connectors.interfaces import LoadConnector
from danswer.connectors.interfaces import PollConnector
from danswer.connectors.interfaces import SecondsSinceUnixEpoch
from danswer.connectors.models import BasicExpertInfo
from danswer.connectors.models import ConnectorMissingCredentialError
from danswer.connectors.models import Document
from danswer.connectors.models import Section
from danswer.utils.logger import setup_logger
logger = setup_logger()
_FIREFLIES_ID_PREFIX = "FIREFLIES_"
_FIREFLIES_API_URL = "https://api.fireflies.ai/graphql"
_FIREFLIES_TRANSCRIPT_QUERY_SIZE = 50 # Max page size is 50
_FIREFLIES_API_QUERY = """
query Transcripts($fromDate: DateTime, $toDate: DateTime, $limit: Int!, $skip: Int!) {
transcripts(fromDate: $fromDate, toDate: $toDate, limit: $limit, skip: $skip) {
id
title
host_email
participants
date
transcript_url
sentences {
text
speaker_name
}
}
}
"""
def _create_doc_from_transcript(transcript: dict) -> Document | None:
meeting_text = ""
sentences = transcript.get("sentences", [])
if sentences:
for sentence in sentences:
meeting_text += sentence.get("speaker_name") or "Unknown Speaker"
meeting_text += ": " + sentence.get("text", "") + "\n\n"
else:
return None
meeting_link = transcript["transcript_url"]
fireflies_id = _FIREFLIES_ID_PREFIX + transcript["id"]
meeting_title = transcript["title"] or "No Title"
meeting_date_unix = transcript["date"]
meeting_date = datetime.fromtimestamp(meeting_date_unix / 1000, tz=timezone.utc)
meeting_host_email = transcript["host_email"]
host_email_user_info = [BasicExpertInfo(email=meeting_host_email)]
meeting_participants_email_list = []
for participant in transcript.get("participants", []):
if participant != meeting_host_email and participant:
meeting_participants_email_list.append(BasicExpertInfo(email=participant))
return Document(
id=fireflies_id,
sections=[
Section(
link=meeting_link,
text=meeting_text,
)
],
source=DocumentSource.FIREFLIES,
semantic_identifier=meeting_title,
metadata={},
doc_updated_at=meeting_date,
primary_owners=host_email_user_info,
secondary_owners=meeting_participants_email_list,
)
class FirefliesConnector(PollConnector, LoadConnector):
def __init__(self, batch_size: int = INDEX_BATCH_SIZE) -> None:
self.batch_size = batch_size
def load_credentials(self, credentials: dict[str, str]) -> None:
api_key = credentials.get("fireflies_api_key")
if not isinstance(api_key, str):
raise ConnectorMissingCredentialError(
"The Fireflies API key must be a string"
)
self.api_key = api_key
return None
def _fetch_transcripts(
self, start_datetime: str | None = None, end_datetime: str | None = None
) -> Iterator[List[dict]]:
if self.api_key is None:
raise ConnectorMissingCredentialError("Missing API key")
headers = {
"Content-Type": "application/json",
"Authorization": "Bearer " + self.api_key,
}
skip = 0
variables: dict[str, int | str] = {
"limit": _FIREFLIES_TRANSCRIPT_QUERY_SIZE,
}
if start_datetime:
variables["fromDate"] = start_datetime
if end_datetime:
variables["toDate"] = end_datetime
while True:
variables["skip"] = skip
response = requests.post(
_FIREFLIES_API_URL,
headers=headers,
json={"query": _FIREFLIES_API_QUERY, "variables": variables},
)
response.raise_for_status()
if response.status_code == 204:
break
recieved_transcripts = response.json()
parsed_transcripts = recieved_transcripts.get("data", {}).get(
"transcripts", []
)
yield parsed_transcripts
if len(parsed_transcripts) < _FIREFLIES_TRANSCRIPT_QUERY_SIZE:
break
skip += _FIREFLIES_TRANSCRIPT_QUERY_SIZE
def _process_transcripts(
self, start: str | None = None, end: str | None = None
) -> GenerateDocumentsOutput:
doc_batch: List[Document] = []
for transcript_batch in self._fetch_transcripts(start, end):
for transcript in transcript_batch:
if doc := _create_doc_from_transcript(transcript):
doc_batch.append(doc)
if len(doc_batch) >= self.batch_size:
yield doc_batch
doc_batch = []
if doc_batch:
yield doc_batch
def load_from_state(self) -> GenerateDocumentsOutput:
return self._process_transcripts()
def poll_source(
self, start_unixtime: SecondsSinceUnixEpoch, end_unixtime: SecondsSinceUnixEpoch
) -> GenerateDocumentsOutput:
start_datetime = datetime.fromtimestamp(
start_unixtime, tz=timezone.utc
).strftime("%Y-%m-%dT%H:%M:%S.000Z")
end_datetime = datetime.fromtimestamp(end_unixtime, tz=timezone.utc).strftime(
"%Y-%m-%dT%H:%M:%S.000Z"
)
yield from self._process_transcripts(start_datetime, end_datetime)

BIN
web/public/Fireflies.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 10 KiB

View File

@ -67,6 +67,7 @@ import s3Icon from "../../../public/S3.png";
import r2Icon from "../../../public/r2.png";
import salesforceIcon from "../../../public/Salesforce.png";
import freshdeskIcon from "../../../public/Freshdesk.png";
import firefliesIcon from "../../../public/Fireflies.png";
import sharepointIcon from "../../../public/Sharepoint.png";
import teamsIcon from "../../../public/Teams.png";
@ -1293,6 +1294,13 @@ export const FreshdeskIcon = ({
<LogoIcon size={size} className={className} src={freshdeskIcon} />
);
export const FirefliesIcon = ({
size = 16,
className = defaultTailwindCSS,
}: IconProps) => (
<LogoIcon size={size} className={className} src={firefliesIcon} />
);
/*
EE Icons
*/

View File

@ -950,6 +950,11 @@ For example, specifying .*-support.* as a "channel" will cause the connector to
values: [],
advanced_values: [],
},
fireflies: {
description: "Configure Fireflies connector",
values: [],
advanced_values: [],
},
};
export function createConnectorInitialValues(
connector: ConfigurableSources
@ -1209,6 +1214,8 @@ export interface AsanaConfig {
export interface FreshdeskConfig {}
export interface FirefliesConfig {}
export interface MediaWikiConfig extends MediaWikiBaseConfig {
hostname: string;
}

View File

@ -188,6 +188,10 @@ export interface FreshdeskCredentialJson {
freshdesk_api_key: string;
}
export interface FirefliesCredentialJson {
fireflies_api_key: string;
}
export interface MediaWikiCredentialJson {}
export interface WikipediaCredentialJson extends MediaWikiCredentialJson {}
@ -291,6 +295,9 @@ export const credentialTemplates: Record<ValidSources, any> = {
freshdesk_password: "",
freshdesk_api_key: "",
} as FreshdeskCredentialJson,
fireflies: {
fireflies_api_key: "",
} as FirefliesCredentialJson,
xenforo: null,
google_sites: null,
file: null,
@ -427,7 +434,11 @@ export const credentialDisplayNames: Record<string, string> = {
freshdesk_domain: "Freshdesk Domain",
freshdesk_password: "Freshdesk Password",
freshdesk_api_key: "Freshdesk API Key",
// Fireflies
fireflies_api_key: "Fireflies API Key",
};
export function getDisplayNameForCredentialKey(key: string): string {
return credentialDisplayNames[key] || key;
}

View File

@ -37,6 +37,7 @@ import {
ColorSlackIcon,
XenforoIcon,
FreshdeskIcon,
FirefliesIcon,
} from "@/components/icons/icons";
import { ValidSources } from "./types";
import {
@ -289,6 +290,12 @@ const SOURCE_METADATA_MAP: SourceMap = {
category: SourceCategory.CustomerSupport,
docs: "https://docs.danswer.dev/connectors/freshdesk",
},
fireflies: {
icon: FirefliesIcon,
displayName: "Fireflies",
category: SourceCategory.Other,
docs: "https://docs.danswer.dev/connectors/fireflies",
},
// currently used for the Internet Search tool docs, which is why
// a globe is used
not_applicable: {

View File

@ -266,6 +266,7 @@ const validSources = [
"not_applicable",
"ingestion_api",
"freshdesk",
"fireflies",
] as const;
export type ValidSources = (typeof validSources)[number];