From 692fdb4597e6cc2eb110e545d83e73dcb74cdcfa Mon Sep 17 00:00:00 2001 From: Itay Date: Tue, 23 Jan 2024 02:25:10 +0200 Subject: [PATCH] Gmail Connector (#946) --------- Co-authored-by: Yuhong Sun --- README.md | 1 + backend/danswer/configs/constants.py | 1 + backend/danswer/connectors/factory.py | 2 + backend/danswer/connectors/gmail/__init__.py | 0 backend/danswer/connectors/gmail/connector.py | 220 +++++++++ .../connectors/gmail/connector_auth.py | 191 ++++++++ backend/danswer/connectors/gmail/constants.py | 7 + .../connectors/google_drive/connector.py | 6 +- backend/danswer/db/credentials.py | 16 + backend/danswer/server/documents/connector.py | 159 +++++++ backend/danswer/server/documents/models.py | 6 + backend/requirements/default.txt | 1 + .../connectors/gmail/test_connector.py | 226 +++++++++ web/public/Gmail.png | Bin 0 -> 7186 bytes .../app/admin/connectors/gmail/Credential.tsx | 437 ++++++++++++++++++ .../connectors/gmail/GmailConnectorsTable.tsx | 127 +++++ .../connectors/gmail/auth/callback/route.ts | 34 ++ web/src/app/admin/connectors/gmail/page.tsx | 265 +++++++++++ web/src/app/admin/connectors/gmail/utils.ts | 4 + web/src/components/icons/icons.tsx | 14 + web/src/lib/constants.ts | 2 + web/src/lib/gmail.ts | 41 ++ web/src/lib/sources.ts | 6 + web/src/lib/types.ts | 12 + 24 files changed, 1773 insertions(+), 5 deletions(-) create mode 100644 backend/danswer/connectors/gmail/__init__.py create mode 100644 backend/danswer/connectors/gmail/connector.py create mode 100644 backend/danswer/connectors/gmail/connector_auth.py create mode 100644 backend/danswer/connectors/gmail/constants.py create mode 100644 backend/tests/unit/danswer/connectors/gmail/test_connector.py create mode 100644 web/public/Gmail.png create mode 100644 web/src/app/admin/connectors/gmail/Credential.tsx create mode 100644 web/src/app/admin/connectors/gmail/GmailConnectorsTable.tsx create mode 100644 web/src/app/admin/connectors/gmail/auth/callback/route.ts create mode 100644 web/src/app/admin/connectors/gmail/page.tsx create mode 100644 web/src/app/admin/connectors/gmail/utils.ts create mode 100644 web/src/lib/gmail.ts diff --git a/README.md b/README.md index b1295d5c5..9b465d94c 100644 --- a/README.md +++ b/README.md @@ -64,6 +64,7 @@ We also have built-in support for deployment on Kubernetes. Files for that can b Efficiently pulls the latest changes from: * Slack * GitHub + * Gmail * Google Drive * Confluence * Jira diff --git a/backend/danswer/configs/constants.py b/backend/danswer/configs/constants.py index 791bf030f..7afd3902d 100644 --- a/backend/danswer/configs/constants.py +++ b/backend/danswer/configs/constants.py @@ -61,6 +61,7 @@ class DocumentSource(str, Enum): SLACK = "slack" WEB = "web" GOOGLE_DRIVE = "google_drive" + GMAIL = "gmail" REQUESTTRACKER = "requesttracker" GITHUB = "github" GITLAB = "gitlab" diff --git a/backend/danswer/connectors/factory.py b/backend/danswer/connectors/factory.py index b8f490b2c..f25aab615 100644 --- a/backend/danswer/connectors/factory.py +++ b/backend/danswer/connectors/factory.py @@ -9,6 +9,7 @@ from danswer.connectors.document360.connector import Document360Connector from danswer.connectors.file.connector import LocalFileConnector from danswer.connectors.github.connector import GithubConnector from danswer.connectors.gitlab.connector import GitlabConnector +from danswer.connectors.gmail.connector import GmailConnector from danswer.connectors.gong.connector import GongConnector from danswer.connectors.google_drive.connector import GoogleDriveConnector from danswer.connectors.google_site.connector import GoogleSitesConnector @@ -48,6 +49,7 @@ def identify_connector_class( InputType.POLL: SlackPollConnector, }, DocumentSource.GITHUB: GithubConnector, + DocumentSource.GMAIL: GmailConnector, DocumentSource.GITLAB: GitlabConnector, DocumentSource.GOOGLE_DRIVE: GoogleDriveConnector, DocumentSource.BOOKSTACK: BookstackConnector, diff --git a/backend/danswer/connectors/gmail/__init__.py b/backend/danswer/connectors/gmail/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/backend/danswer/connectors/gmail/connector.py b/backend/danswer/connectors/gmail/connector.py new file mode 100644 index 000000000..8e94c3468 --- /dev/null +++ b/backend/danswer/connectors/gmail/connector.py @@ -0,0 +1,220 @@ +from base64 import urlsafe_b64decode +from typing import Any +from typing import cast +from typing import Dict + +from google.auth.credentials import Credentials # type: ignore +from googleapiclient import discovery # type: ignore + +from danswer.configs.app_configs import INDEX_BATCH_SIZE +from danswer.configs.constants import DocumentSource +from danswer.connectors.cross_connector_utils.miscellaneous_utils import time_str_to_utc +from danswer.connectors.gmail.connector_auth import ( + get_gmail_creds_for_authorized_user, +) +from danswer.connectors.gmail.connector_auth import ( + get_gmail_creds_for_service_account, +) +from danswer.connectors.gmail.constants import ( + DB_CREDENTIALS_DICT_DELEGATED_USER_KEY, +) +from danswer.connectors.gmail.constants import DB_CREDENTIALS_DICT_TOKEN_KEY +from danswer.connectors.gmail.constants import ( + GMAIL_DB_CREDENTIALS_DICT_SERVICE_ACCOUNT_KEY, +) +from danswer.connectors.interfaces import GenerateDocumentsOutput +from danswer.connectors.interfaces import LoadConnector +from danswer.connectors.interfaces import PollConnector +from danswer.connectors.interfaces import SecondsSinceUnixEpoch +from danswer.connectors.models import Document +from danswer.connectors.models import Section +from danswer.utils.logger import setup_logger + +logger = setup_logger() + + +class GmailConnector(LoadConnector, PollConnector): + def __init__(self, batch_size: int = INDEX_BATCH_SIZE) -> None: + self.batch_size = batch_size + self.creds: Credentials | None = None + + def load_credentials(self, credentials: dict[str, Any]) -> dict[str, str] | None: + """Checks for two different types of credentials. + (1) A credential which holds a token acquired via a user going thorugh + the Google OAuth flow. + (2) A credential which holds a service account key JSON file, which + can then be used to impersonate any user in the workspace. + """ + creds = None + new_creds_dict = None + if DB_CREDENTIALS_DICT_TOKEN_KEY in credentials: + access_token_json_str = cast( + str, credentials[DB_CREDENTIALS_DICT_TOKEN_KEY] + ) + creds = get_gmail_creds_for_authorized_user( + token_json_str=access_token_json_str + ) + + # tell caller to update token stored in DB if it has changed + # (e.g. the token has been refreshed) + new_creds_json_str = creds.to_json() if creds else "" + if new_creds_json_str != access_token_json_str: + new_creds_dict = {DB_CREDENTIALS_DICT_TOKEN_KEY: new_creds_json_str} + + if GMAIL_DB_CREDENTIALS_DICT_SERVICE_ACCOUNT_KEY in credentials: + service_account_key_json_str = credentials[ + GMAIL_DB_CREDENTIALS_DICT_SERVICE_ACCOUNT_KEY + ] + creds = get_gmail_creds_for_service_account( + service_account_key_json_str=service_account_key_json_str + ) + + # "Impersonate" a user if one is specified + delegated_user_email = cast( + str | None, credentials.get(DB_CREDENTIALS_DICT_DELEGATED_USER_KEY) + ) + if delegated_user_email: + creds = creds.with_subject(delegated_user_email) if creds else None + + if creds is None: + raise PermissionError( + "Unable to access Gmail - unknown credential structure." + ) + + self.creds = creds + return new_creds_dict + + def _get_email_body(self, payload: dict[str, Any]) -> str: + parts = payload.get("parts", []) + email_body = "" + for part in parts: + mime_type = part.get("mimeType") + body = part.get("body") + if mime_type == "text/plain": + data = body.get("data", "") + text = urlsafe_b64decode(data).decode() + email_body += text + return email_body + + def _email_to_document(self, full_email: Dict[str, Any]) -> Document: + email_id = full_email["id"] + payload = full_email["payload"] + headers = payload.get("headers") + labels = full_email.get("labelIds", []) + metadata = {} + if headers: + for header in headers: + name = header.get("name").lower() + value = header.get("value") + if name in ["from", "to", "subject", "date", "cc", "bcc"]: + metadata[name] = value + email_data = "" + for name, value in metadata.items(): + email_data += f"{name}: {value}\n" + metadata["labels"] = labels + logger.debug(f"{email_data}") + email_body_text: str = self._get_email_body(payload) + date_str = metadata.get("date") + email_updated_at = time_str_to_utc(date_str) if date_str else None + link = f"https://mail.google.com/mail/u/0/#inbox/{email_id}" + return Document( + id=email_id, + sections=[Section(link=link, text=email_data + email_body_text)], + source=DocumentSource.GMAIL, + title=metadata.get("subject"), + semantic_identifier=metadata.get("subject", "Untitled Email"), + doc_updated_at=email_updated_at, + metadata=metadata, + ) + + @staticmethod + def _build_time_range_query( + time_range_start: SecondsSinceUnixEpoch | None = None, + time_range_end: SecondsSinceUnixEpoch | None = None, + ) -> str | None: + query = "" + if time_range_start is not None and time_range_start != 0: + query += f"after:{int(time_range_start)}" + if time_range_end is not None and time_range_end != 0: + query += f" before:{int(time_range_end)}" + query = query.strip() + + if len(query) == 0: + return None + + return query + + def _fetch_mails_from_gmail( + self, + time_range_start: SecondsSinceUnixEpoch | None = None, + time_range_end: SecondsSinceUnixEpoch | None = None, + ) -> GenerateDocumentsOutput: + if self.creds is None: + raise PermissionError("Not logged into Gmail") + page_token = "" + query = GmailConnector._build_time_range_query(time_range_start, time_range_end) + service = discovery.build("gmail", "v1", credentials=self.creds) + while page_token is not None: + result = ( + service.users() + .messages() + .list( + userId="me", + pageToken=page_token, + q=query, + maxResults=self.batch_size, + ) + .execute() + ) + page_token = result.get("nextPageToken") + messages = result.get("messages", []) + doc_batch = [] + for message in messages: + message_id = message["id"] + msg = ( + service.users() + .messages() + .get(userId="me", id=message_id, format="full") + .execute() + ) + doc = self._email_to_document(msg) + doc_batch.append(doc) + if len(doc_batch) > 0: + yield doc_batch + + def load_from_state(self) -> GenerateDocumentsOutput: + yield from self._fetch_mails_from_gmail() + + def poll_source( + self, start: SecondsSinceUnixEpoch, end: SecondsSinceUnixEpoch + ) -> GenerateDocumentsOutput: + yield from self._fetch_mails_from_gmail(start, end) + + +if __name__ == "__main__": + import json + import os + + service_account_json_path = os.environ.get("GOOGLE_SERVICE_ACCOUNT_KEY_JSON_PATH") + if not service_account_json_path: + raise ValueError( + "Please set GOOGLE_SERVICE_ACCOUNT_KEY_JSON_PATH environment variable" + ) + with open(service_account_json_path) as f: + creds = json.load(f) + + credentials_dict = { + DB_CREDENTIALS_DICT_TOKEN_KEY: json.dumps(creds), + } + delegated_user = os.environ.get("GMAIL_DELEGATED_USER") + if delegated_user: + credentials_dict[DB_CREDENTIALS_DICT_DELEGATED_USER_KEY] = delegated_user + + connector = GmailConnector() + connector.load_credentials( + json.loads(credentials_dict[DB_CREDENTIALS_DICT_TOKEN_KEY]) + ) + document_batch_generator = connector.load_from_state() + for document_batch in document_batch_generator: + print(document_batch) + break diff --git a/backend/danswer/connectors/gmail/connector_auth.py b/backend/danswer/connectors/gmail/connector_auth.py new file mode 100644 index 000000000..f6cfa5a74 --- /dev/null +++ b/backend/danswer/connectors/gmail/connector_auth.py @@ -0,0 +1,191 @@ +import json +from typing import cast +from urllib.parse import parse_qs +from urllib.parse import ParseResult +from urllib.parse import urlparse + +from google.auth.transport.requests import Request # type: ignore +from google.oauth2.credentials import Credentials as OAuthCredentials # type: ignore +from google.oauth2.service_account import Credentials as ServiceAccountCredentials # type: ignore +from google_auth_oauthlib.flow import InstalledAppFlow # type: ignore +from sqlalchemy.orm import Session + +from danswer.configs.app_configs import WEB_DOMAIN +from danswer.connectors.gmail.constants import CRED_KEY +from danswer.connectors.gmail.constants import ( + DB_CREDENTIALS_DICT_DELEGATED_USER_KEY, +) +from danswer.connectors.gmail.constants import DB_CREDENTIALS_DICT_TOKEN_KEY +from danswer.connectors.gmail.constants import GMAIL_CRED_KEY +from danswer.connectors.gmail.constants import ( + GMAIL_DB_CREDENTIALS_DICT_SERVICE_ACCOUNT_KEY, +) +from danswer.connectors.gmail.constants import GMAIL_SERVICE_ACCOUNT_KEY +from danswer.connectors.gmail.constants import SCOPES +from danswer.db.credentials import update_credential_json +from danswer.db.models import User +from danswer.dynamic_configs import get_dynamic_config_store +from danswer.server.documents.models import CredentialBase +from danswer.server.documents.models import GoogleAppCredentials +from danswer.server.documents.models import GoogleServiceAccountKey +from danswer.utils.logger import setup_logger + +logger = setup_logger() + + +def _build_frontend_gmail_redirect() -> str: + return f"{WEB_DOMAIN}/admin/connectors/gmail/auth/callback" + + +def get_gmail_creds_for_authorized_user( + token_json_str: str, +) -> OAuthCredentials | None: + creds_json = json.loads(token_json_str) + creds = OAuthCredentials.from_authorized_user_info(creds_json, SCOPES) + if creds.valid: + return creds + + if creds.expired and creds.refresh_token: + try: + creds.refresh(Request()) + if creds.valid: + logger.info("Refreshed Gmail tokens.") + return creds + except Exception as e: + logger.exception(f"Failed to refresh gmail access token due to: {e}") + return None + + return None + + +def get_gmail_creds_for_service_account( + service_account_key_json_str: str, +) -> ServiceAccountCredentials | None: + service_account_key = json.loads(service_account_key_json_str) + creds = ServiceAccountCredentials.from_service_account_info( + service_account_key, scopes=SCOPES + ) + if not creds.valid or not creds.expired: + creds.refresh(Request()) + return creds if creds.valid else None + + +def verify_csrf(credential_id: int, state: str) -> None: + csrf = get_dynamic_config_store().load(CRED_KEY.format(str(credential_id))) + if csrf != state: + raise PermissionError( + "State from Gmail Connector callback does not match expected" + ) + + +def get_gmail_auth_url(credential_id: int) -> str: + creds_str = str(get_dynamic_config_store().load(GMAIL_CRED_KEY)) + credential_json = json.loads(creds_str) + flow = InstalledAppFlow.from_client_config( + credential_json, + scopes=SCOPES, + redirect_uri=_build_frontend_gmail_redirect(), + ) + auth_url, _ = flow.authorization_url(prompt="consent") + + parsed_url = cast(ParseResult, urlparse(auth_url)) + params = parse_qs(parsed_url.query) + + get_dynamic_config_store().store(CRED_KEY.format(credential_id), params.get("state", [None])[0]) # type: ignore + return str(auth_url) + + +def get_auth_url(credential_id: int) -> str: + creds_str = str(get_dynamic_config_store().load(GMAIL_CRED_KEY)) + credential_json = json.loads(creds_str) + flow = InstalledAppFlow.from_client_config( + credential_json, + scopes=SCOPES, + redirect_uri=_build_frontend_gmail_redirect(), + ) + auth_url, _ = flow.authorization_url(prompt="consent") + + parsed_url = cast(ParseResult, urlparse(auth_url)) + params = parse_qs(parsed_url.query) + + get_dynamic_config_store().store(CRED_KEY.format(credential_id), params.get("state", [None])[0]) # type: ignore + return str(auth_url) + + +def update_gmail_credential_access_tokens( + auth_code: str, + credential_id: int, + user: User, + db_session: Session, +) -> OAuthCredentials | None: + app_credentials = get_google_app_gmail_cred() + flow = InstalledAppFlow.from_client_config( + app_credentials.dict(), + scopes=SCOPES, + redirect_uri=_build_frontend_gmail_redirect(), + ) + flow.fetch_token(code=auth_code) + creds = flow.credentials + token_json_str = creds.to_json() + new_creds_dict = {DB_CREDENTIALS_DICT_TOKEN_KEY: token_json_str} + + if not update_credential_json(credential_id, new_creds_dict, user, db_session): + return None + return creds + + +def build_service_account_creds( + delegated_user_email: str | None = None, +) -> CredentialBase: + service_account_key = get_gmail_service_account_key() + + credential_dict = { + GMAIL_DB_CREDENTIALS_DICT_SERVICE_ACCOUNT_KEY: service_account_key.json(), + } + if delegated_user_email: + credential_dict[DB_CREDENTIALS_DICT_DELEGATED_USER_KEY] = delegated_user_email + + return CredentialBase( + credential_json=credential_dict, + admin_public=True, + ) + + +def get_google_app_gmail_cred() -> GoogleAppCredentials: + creds_str = str(get_dynamic_config_store().load(GMAIL_CRED_KEY)) + return GoogleAppCredentials(**json.loads(creds_str)) + + +def upsert_google_app_gmail_cred(app_credentials: GoogleAppCredentials) -> None: + get_dynamic_config_store().store(GMAIL_CRED_KEY, app_credentials.json()) + + +def delete_google_app_gmail_cred() -> None: + get_dynamic_config_store().delete(GMAIL_CRED_KEY) + + +def get_gmail_service_account_key() -> GoogleServiceAccountKey: + creds_str = str(get_dynamic_config_store().load(GMAIL_SERVICE_ACCOUNT_KEY)) + return GoogleServiceAccountKey(**json.loads(creds_str)) + + +def upsert_gmail_service_account_key( + service_account_key: GoogleServiceAccountKey, +) -> None: + get_dynamic_config_store().store( + GMAIL_SERVICE_ACCOUNT_KEY, service_account_key.json() + ) + + +def upsert_service_account_key(service_account_key: GoogleServiceAccountKey) -> None: + get_dynamic_config_store().store( + GMAIL_SERVICE_ACCOUNT_KEY, service_account_key.json() + ) + + +def delete_gmail_service_account_key() -> None: + get_dynamic_config_store().delete(GMAIL_SERVICE_ACCOUNT_KEY) + + +def delete_service_account_key() -> None: + get_dynamic_config_store().delete(GMAIL_SERVICE_ACCOUNT_KEY) diff --git a/backend/danswer/connectors/gmail/constants.py b/backend/danswer/connectors/gmail/constants.py new file mode 100644 index 000000000..1660f54be --- /dev/null +++ b/backend/danswer/connectors/gmail/constants.py @@ -0,0 +1,7 @@ +DB_CREDENTIALS_DICT_TOKEN_KEY = "gmail_tokens" +GMAIL_DB_CREDENTIALS_DICT_SERVICE_ACCOUNT_KEY = "gmail_service_account_key" +DB_CREDENTIALS_DICT_DELEGATED_USER_KEY = "gmail_delegated_user" +CRED_KEY = "credential_id_{}" +GMAIL_CRED_KEY = "gmail_app_credential" +GMAIL_SERVICE_ACCOUNT_KEY = "gmail_service_account_key" +SCOPES = ["https://www.googleapis.com/auth/gmail.readonly"] diff --git a/backend/danswer/connectors/google_drive/connector.py b/backend/danswer/connectors/google_drive/connector.py index 4eda2d532..e8f3b17b8 100644 --- a/backend/danswer/connectors/google_drive/connector.py +++ b/backend/danswer/connectors/google_drive/connector.py @@ -46,8 +46,6 @@ from danswer.utils.logger import setup_logger logger = setup_logger() -# allow 10 minutes for modifiedTime to get propagated -DRIVE_START_TIME_OFFSET = 60 * 10 DRIVE_FOLDER_TYPE = "application/vnd.google-apps.folder" DRIVE_SHORTCUT_TYPE = "application/vnd.google-apps.shortcut" UNSUPPORTED_FILE_TYPE_CONTENT = "" # keep empty for now @@ -502,9 +500,7 @@ class GoogleDriveConnector(LoadConnector, PollConnector): # propogation if a document is modified, it takes some time for the API to # reflect these changes if we do not have an offset, then we may "miss" the # update when polling - yield from self._fetch_docs_from_drive( - max(start - DRIVE_START_TIME_OFFSET, 0, 0), end - ) + yield from self._fetch_docs_from_drive(start, end) if __name__ == "__main__": diff --git a/backend/danswer/db/credentials.py b/backend/danswer/db/credentials.py index 30d2415bb..c37bc5934 100644 --- a/backend/danswer/db/credentials.py +++ b/backend/danswer/db/credentials.py @@ -6,6 +6,9 @@ from sqlalchemy.orm import Session from sqlalchemy.sql.expression import or_ from danswer.auth.schemas import UserRole +from danswer.connectors.gmail.constants import ( + GMAIL_DB_CREDENTIALS_DICT_SERVICE_ACCOUNT_KEY, +) from danswer.connectors.google_drive.constants import ( DB_CREDENTIALS_DICT_SERVICE_ACCOUNT_KEY, ) @@ -165,6 +168,19 @@ def create_initial_public_credential(db_session: Session) -> None: db_session.commit() +def delete_gmail_service_account_credentials( + user: User | None, db_session: Session +) -> None: + credentials = fetch_credentials(db_session=db_session, user=user) + for credential in credentials: + if credential.credential_json.get( + GMAIL_DB_CREDENTIALS_DICT_SERVICE_ACCOUNT_KEY + ): + db_session.delete(credential) + + db_session.commit() + + def delete_google_drive_service_account_credentials( user: User | None, db_session: Session ) -> None: diff --git a/backend/danswer/server/documents/connector.py b/backend/danswer/server/documents/connector.py index 721ef1d93..ca3dd325a 100644 --- a/backend/danswer/server/documents/connector.py +++ b/backend/danswer/server/documents/connector.py @@ -13,6 +13,18 @@ from danswer.auth.users import current_user from danswer.background.celery.celery_utils import get_deletion_status from danswer.configs.constants import DocumentSource from danswer.connectors.file.utils import write_temp_files +from danswer.connectors.gmail.connector_auth import delete_gmail_service_account_key +from danswer.connectors.gmail.connector_auth import delete_google_app_gmail_cred +from danswer.connectors.gmail.connector_auth import get_gmail_auth_url +from danswer.connectors.gmail.connector_auth import get_gmail_service_account_key +from danswer.connectors.gmail.connector_auth import get_google_app_gmail_cred +from danswer.connectors.gmail.connector_auth import ( + update_gmail_credential_access_tokens, +) +from danswer.connectors.gmail.connector_auth import ( + upsert_gmail_service_account_key, +) +from danswer.connectors.gmail.connector_auth import upsert_google_app_gmail_cred from danswer.connectors.google_drive.connector_auth import build_service_account_creds from danswer.connectors.google_drive.connector_auth import delete_google_app_cred from danswer.connectors.google_drive.connector_auth import delete_service_account_key @@ -37,6 +49,7 @@ from danswer.db.connector import get_connector_credential_ids from danswer.db.connector import update_connector from danswer.db.connector_credential_pair import get_connector_credential_pairs from danswer.db.credentials import create_credential +from danswer.db.credentials import delete_gmail_service_account_credentials from danswer.db.credentials import delete_google_drive_service_account_credentials from danswer.db.credentials import fetch_credential_by_id from danswer.db.deletion_attempt import check_deletion_attempt_is_allowed @@ -55,6 +68,7 @@ from danswer.server.documents.models import ConnectorSnapshot from danswer.server.documents.models import CredentialSnapshot from danswer.server.documents.models import FileUploadResponse from danswer.server.documents.models import GDriveCallback +from danswer.server.documents.models import GmailCallback from danswer.server.documents.models import GoogleAppCredentials from danswer.server.documents.models import GoogleServiceAccountCredentialRequest from danswer.server.documents.models import GoogleServiceAccountKey @@ -63,6 +77,7 @@ from danswer.server.documents.models import ObjectCreationIdResponse from danswer.server.documents.models import RunConnectorRequest from danswer.server.models import StatusResponse +_GMAIL_CREDENTIAL_ID_COOKIE_NAME = "gmail_credential_id" _GOOGLE_DRIVE_CREDENTIAL_ID_COOKIE_NAME = "google_drive_credential_id" @@ -72,6 +87,44 @@ router = APIRouter(prefix="/manage") """Admin only API endpoints""" +@router.get("/admin/connector/gmail/app-credential") +def check_google_app_gmail_credentials_exist( + _: User = Depends(current_admin_user), +) -> dict[str, str]: + try: + return {"client_id": get_google_app_gmail_cred().web.client_id} + except ConfigNotFoundError: + raise HTTPException(status_code=404, detail="Google App Credentials not found") + + +@router.put("/admin/connector/gmail/app-credential") +def upsert_google_app_gmail_credentials( + app_credentials: GoogleAppCredentials, _: User = Depends(current_admin_user) +) -> StatusResponse: + try: + upsert_google_app_gmail_cred(app_credentials) + except ValueError as e: + raise HTTPException(status_code=400, detail=str(e)) + + return StatusResponse( + success=True, message="Successfully saved Google App Credentials" + ) + + +@router.delete("/admin/connector/gmail/app-credential") +def delete_google_app_gmail_credentials( + _: User = Depends(current_admin_user), +) -> StatusResponse: + try: + delete_google_app_gmail_cred() + except ConfigNotFoundError as e: + raise HTTPException(status_code=400, detail=str(e)) + + return StatusResponse( + success=True, message="Successfully deleted Google App Credentials" + ) + + @router.get("/admin/connector/google-drive/app-credential") def check_google_app_credentials_exist( _: User = Depends(current_admin_user), @@ -110,6 +163,46 @@ def delete_google_app_credentials( ) +@router.get("/admin/connector/gmail/service-account-key") +def check_google_service_gmail_account_key_exist( + _: User = Depends(current_admin_user), +) -> dict[str, str]: + try: + return {"service_account_email": get_gmail_service_account_key().client_email} + except ConfigNotFoundError: + raise HTTPException( + status_code=404, detail="Google Service Account Key not found" + ) + + +@router.put("/admin/connector/gmail/service-account-key") +def upsert_google_service_gmail_account_key( + service_account_key: GoogleServiceAccountKey, _: User = Depends(current_admin_user) +) -> StatusResponse: + try: + upsert_gmail_service_account_key(service_account_key) + except ValueError as e: + raise HTTPException(status_code=400, detail=str(e)) + + return StatusResponse( + success=True, message="Successfully saved Google Service Account Key" + ) + + +@router.delete("/admin/connector/gmail/service-account-key") +def delete_google_service_gmail_account_key( + _: User = Depends(current_admin_user), +) -> StatusResponse: + try: + delete_gmail_service_account_key() + except ConfigNotFoundError as e: + raise HTTPException(status_code=400, detail=str(e)) + + return StatusResponse( + success=True, message="Successfully deleted Google Service Account Key" + ) + + @router.get("/admin/connector/google-drive/service-account-key") def check_google_service_account_key_exist( _: User = Depends(current_admin_user), @@ -175,6 +268,31 @@ def upsert_service_account_credential( return ObjectCreationIdResponse(id=credential.id) +@router.put("/admin/connector/gmail/service-account-credential") +def upsert_gmail_service_account_credential( + service_account_credential_request: GoogleServiceAccountCredentialRequest, + user: User | None = Depends(current_admin_user), + db_session: Session = Depends(get_session), +) -> ObjectCreationIdResponse: + """Special API which allows the creation of a credential for a service account. + Combines the input with the saved service account key to create an entry in the + `Credential` table.""" + try: + credential_base = build_service_account_creds( + delegated_user_email=service_account_credential_request.gmail_delegated_user + ) + except ConfigNotFoundError as e: + raise HTTPException(status_code=400, detail=str(e)) + + # first delete all existing service account credentials + delete_gmail_service_account_credentials(user, db_session) + # `user=None` since this credential is not a personal credential + credential = create_credential( + credential_data=credential_base, user=user, db_session=db_session + ) + return ObjectCreationIdResponse(id=credential.id) + + @router.get("/admin/connector/google-drive/check-auth/{credential_id}") def check_drive_tokens( credential_id: int, @@ -408,6 +526,20 @@ def connector_run_once( """Endpoints for basic users""" +@router.get("/connector/gmail/authorize/{credential_id}") +def gmail_auth( + response: Response, credential_id: str, _: User = Depends(current_user) +) -> AuthUrl: + # set a cookie that we can read in the callback (used for `verify_csrf`) + response.set_cookie( + key=_GMAIL_CREDENTIAL_ID_COOKIE_NAME, + value=credential_id, + httponly=True, + max_age=600, + ) + return AuthUrl(auth_url=get_gmail_auth_url(int(credential_id))) + + @router.get("/connector/google-drive/authorize/{credential_id}") def google_drive_auth( response: Response, credential_id: str, _: User = Depends(current_user) @@ -422,6 +554,33 @@ def google_drive_auth( return AuthUrl(auth_url=get_auth_url(int(credential_id))) +@router.get("/connector/gmail/callback") +def gmail_callback( + request: Request, + callback: GmailCallback = Depends(), + user: User = Depends(current_user), + db_session: Session = Depends(get_session), +) -> StatusResponse: + credential_id_cookie = request.cookies.get(_GMAIL_CREDENTIAL_ID_COOKIE_NAME) + if credential_id_cookie is None or not credential_id_cookie.isdigit(): + raise HTTPException( + status_code=401, detail="Request did not pass CSRF verification." + ) + credential_id = int(credential_id_cookie) + verify_csrf(credential_id, callback.state) + if ( + update_gmail_credential_access_tokens( + callback.code, credential_id, user, db_session + ) + is None + ): + raise HTTPException( + status_code=500, detail="Unable to fetch Gmail access tokens" + ) + + return StatusResponse(success=True, message="Updated Gmail access tokens") + + @router.get("/connector/google-drive/callback") def google_drive_callback( request: Request, diff --git a/backend/danswer/server/documents/models.py b/backend/danswer/server/documents/models.py index 4b5342209..734e7c9fe 100644 --- a/backend/danswer/server/documents/models.py +++ b/backend/danswer/server/documents/models.py @@ -225,6 +225,7 @@ class GoogleServiceAccountKey(BaseModel): class GoogleServiceAccountCredentialRequest(BaseModel): google_drive_delegated_user: str | None # email of user to impersonate + gmail_delegated_user: str | None # email of user to impersonate class FileUploadResponse(BaseModel): @@ -243,6 +244,11 @@ class AuthUrl(BaseModel): auth_url: str +class GmailCallback(BaseModel): + state: str + code: str + + class GDriveCallback(BaseModel): state: str code: str diff --git a/backend/requirements/default.txt b/backend/requirements/default.txt index f8dcbe5db..8f0572f15 100644 --- a/backend/requirements/default.txt +++ b/backend/requirements/default.txt @@ -38,6 +38,7 @@ pydantic==1.10.7 PyGithub==1.58.2 python-gitlab==3.9.0 pypdf==3.17.0 +pytest-mock==3.12.0 pytest-playwright==0.3.2 python-dotenv==1.0.0 python-multipart==0.0.6 diff --git a/backend/tests/unit/danswer/connectors/gmail/test_connector.py b/backend/tests/unit/danswer/connectors/gmail/test_connector.py new file mode 100644 index 000000000..91f39e10a --- /dev/null +++ b/backend/tests/unit/danswer/connectors/gmail/test_connector.py @@ -0,0 +1,226 @@ +import datetime +from unittest.mock import MagicMock + +import pytest +from pytest_mock import MockFixture + +from danswer.configs.constants import DocumentSource +from danswer.connectors.cross_connector_utils.miscellaneous_utils import time_str_to_utc +from danswer.connectors.gmail.connector import GmailConnector +from danswer.connectors.models import Document + + +def test_email_to_document() -> None: + connector = GmailConnector() + email_id = "18cabedb1ea46b03" + email_subject = "Danswer Test Subject" + email_sender = "Google " + email_recipient = "test.mail@gmail.com" + email_date = "Wed, 27 Dec 2023 15:38:49 GMT" + email_labels = ["UNREAD", "IMPORTANT", "CATEGORY_UPDATES", "STARRED", "INBOX"] + full_email = { + "id": email_id, + "threadId": email_id, + "labelIds": email_labels, + "snippet": "A new sign-in. We noticed a new sign-in to your Google Account. If this was you, you don't need to do", + "payload": { + "partId": "", + "mimeType": "multipart/alternative", + "filename": "", + "headers": [ + {"name": "Delivered-To", "value": email_recipient}, + {"name": "Date", "value": email_date}, + { + "name": "Message-ID", + "value": "", + }, + {"name": "Subject", "value": email_subject}, + {"name": "From", "value": email_sender}, + {"name": "To", "value": email_recipient}, + ], + "body": {"size": 0}, + "parts": [ + { + "partId": "0", + "mimeType": "text/plain", + "filename": "", + "headers": [ + { + "name": "Content-Type", + "value": 'text/plain; charset="UTF-8"; format=flowed; delsp=yes', + }, + {"name": "Content-Transfer-Encoding", "value": "base64"}, + ], + "body": { + "size": 9, + "data": "dGVzdCBkYXRh", + }, + }, + { + "partId": "1", + "mimeType": "text/html", + "filename": "", + "headers": [ + {"name": "Content-Type", "value": 'text/html; charset="UTF-8"'}, + { + "name": "Content-Transfer-Encoding", + "value": "quoted-printable", + }, + ], + "body": { + "size": 9, + "data": "dGVzdCBkYXRh", + }, + }, + ], + }, + "sizeEstimate": 12048, + "historyId": "697762", + "internalDate": "1703691529000", + } + doc = connector._email_to_document(full_email) + assert type(doc) == Document + assert doc.source == DocumentSource.GMAIL + assert doc.title == "Danswer Test Subject" + assert doc.doc_updated_at == datetime.datetime( + 2023, 12, 27, 15, 38, 49, tzinfo=datetime.timezone.utc + ) + assert doc.metadata == { + "labels": email_labels, + "from": email_sender, + "to": email_recipient, + "date": email_date, + "subject": email_subject, + } + + +def test_fetch_mails_from_gmail_empty(mocker: MockFixture) -> None: + mock_discovery = mocker.patch("danswer.connectors.gmail.connector.discovery") + mock_discovery.build.return_value.users.return_value.messages.return_value.list.return_value.execute.return_value = { + "messages": [] + } + connector = GmailConnector() + connector.creds = MagicMock() + with pytest.raises(StopIteration): + next(connector.load_from_state()) + + +def test_fetch_mails_from_gmail(mocker: MockFixture) -> None: + mock_discovery = mocker.patch("danswer.connectors.gmail.connector.discovery") + email_id = "18cabedb1ea46b03" + email_subject = "Danswer Test Subject" + email_sender = "Google " + email_recipient = "test.mail@gmail.com" + mock_discovery.build.return_value.users.return_value.messages.return_value.list.return_value.execute.return_value = { + "messages": [{"id": email_id, "threadId": email_id}], + "nextP`ageToken": "14473313008248105741", + "resultSizeEstimate": 201, + } + mock_discovery.build.return_value.users.return_value.messages.return_value.get.return_value.execute.return_value = { + "id": email_id, + "threadId": email_id, + "labelIds": ["UNREAD", "IMPORTANT", "CATEGORY_UPDATES", "STARRED", "INBOX"], + "snippet": "A new sign-in. We noticed a new sign-in to your Google Account. If this was you, you don't need to do", + "payload": { + "partId": "", + "mimeType": "multipart/alternative", + "filename": "", + "headers": [ + {"name": "Delivered-To", "value": email_recipient}, + {"name": "Date", "value": "Wed, 27 Dec 2023 15:38:49 GMT"}, + { + "name": "Message-ID", + "value": "", + }, + {"name": "Subject", "value": email_subject}, + {"name": "From", "value": email_sender}, + {"name": "To", "value": email_recipient}, + ], + "body": {"size": 0}, + "parts": [ + { + "partId": "0", + "mimeType": "text/plain", + "filename": "", + "headers": [ + { + "name": "Content-Type", + "value": 'text/plain; charset="UTF-8"; format=flowed; delsp=yes', + }, + {"name": "Content-Transfer-Encoding", "value": "base64"}, + ], + "body": { + "size": 9, + "data": "dGVzdCBkYXRh", + }, + }, + { + "partId": "1", + "mimeType": "text/html", + "filename": "", + "headers": [ + {"name": "Content-Type", "value": 'text/html; charset="UTF-8"'}, + { + "name": "Content-Transfer-Encoding", + "value": "quoted-printable", + }, + ], + "body": { + "size": 9, + "data": "dGVzdCBkYXRh", + }, + }, + ], + }, + "sizeEstimate": 12048, + "historyId": "697762", + "internalDate": "1703691529000", + } + + connector = GmailConnector() + connector.creds = MagicMock() + docs = next(connector.load_from_state()) + assert len(docs) == 1 + doc: Document = docs[0] + assert type(doc) == Document + assert doc.id == email_id + assert doc.title == email_subject + assert email_recipient in doc.sections[0].text + assert email_sender in doc.sections[0].text + + +def test_build_time_range_query() -> None: + time_range_start = 1703066296.159339 + time_range_end = 1704984791.657404 + query = GmailConnector._build_time_range_query(time_range_start, time_range_end) + assert query == "after:1703066296 before:1704984791" + query = GmailConnector._build_time_range_query(time_range_start, None) + assert query == "after:1703066296" + query = GmailConnector._build_time_range_query(None, time_range_end) + assert query == "before:1704984791" + query = GmailConnector._build_time_range_query(0.0, time_range_end) + assert query == "before:1704984791" + query = GmailConnector._build_time_range_query(None, None) + assert query is None + + +def test_time_str_to_utc() -> None: + str_to_dt = { + "Tue, 5 Oct 2021 09:38:25 GMT": datetime.datetime( + 2021, 10, 5, 9, 38, 25, tzinfo=datetime.timezone.utc + ), + "Sat, 24 Jul 2021 09:21:20 +0000 (UTC)": datetime.datetime( + 2021, 7, 24, 9, 21, 20, tzinfo=datetime.timezone.utc + ), + "Thu, 29 Jul 2021 04:20:37 -0400 (EDT)": datetime.datetime( + 2021, 7, 29, 8, 20, 37, tzinfo=datetime.timezone.utc + ), + "30 Jun 2023 18:45:01 +0300": datetime.datetime( + 2023, 6, 30, 15, 45, 1, tzinfo=datetime.timezone.utc + ), + "22 Mar 2020 20:12:18 +0000 (GMT)": datetime.datetime( + 2020, 3, 22, 20, 12, 18, tzinfo=datetime.timezone.utc + ), + } + for strptime, expected_datetime in str_to_dt.items(): + assert time_str_to_utc(strptime) == expected_datetime diff --git a/web/public/Gmail.png b/web/public/Gmail.png new file mode 100644 index 0000000000000000000000000000000000000000..ece92556c2fb7e372d9e0c447580ea084cd7f755 GIT binary patch literal 7186 zcmeHMX;@R&x{gzFune_mg)k%mhdN;h8=@eA5JYBO!3tj>r2v_c`bOxAqa5(?7INa=v^W|$-;Bb@FQQR81zbH&s#Fs=aK zQ%97O&C02uyvD;_MJB&jP6fXoc)D3vP65mAkzE1XSj7~y3|H&nK3YDd#)CXsF>SOQ zXRe%rE%qRP;H)9LO{j1W$ZjLbY4^zF`&Jvzsb7o~estXpC zgQ-2>o8{Zb@_pb*dJj(-t(f|NOa_*K=fU!6_bKk6g}Cc1-^ z31cYK6XMsyf{ek-piAoj-W>{s931p1SsQY2fOcseGw&UADJD2?(Dz~#OY|-{-vnKb zwl0XGUW&rTqJiLpCZEgEtV4sr`>_G@;9xKYz5|zhu3%HZ;BzTD7!0mNQ)N*;CDCA+ z@1=EsCnRr>m51T95-=SCqCS_`1&iK+F*9!vY!~Rlz@WX`?+O~+zDxDHmw#Q^Y*4sv z@5`S9V$S@w7J)++Jh4$QJVd7s^(LPDIp<4 zrN~g2+M{F|pQB&h1db+c`)XAv?(j6X)nHPa5yLVm!Zsm$dm0l2WSGMf%9<5rC%zaDbdjhMs+JXCL5?gbpel??19?< z*4M5I(_?yE&V*+{TVVeS&w}{zI~UWB=fp*2bc))!Lq}2~8AEMH<|_h)L((KgsOndn zHuOiM``$N*e?btxJ9}_Zs60uw_jMimY;R9Xe(cWinu|vFN@a8LnWZaLQK5glYj_e_ zS=P*a_h@O>VHZ;NSZUvNQtCN0H)XK?pkk3G{?&!i6SKF(EFHgCV4G&SJW{{-J#=GV z)}G!~58{Q(M^`F#cWn2JjnB@D-8X&X1=6ouH~dmq5T<(j!@QcDWgRwEf~Bfu3x@B- zj+L!cjeK$={z8j?_58Q*jz-6Bw9Tn0D*DsR?|i?x^xCwE{+FdByry`B(YNM;chZ5` z@3o_kJwGpBN&pR`w3nHl_rcl6Y00ZDClmI}JhkvKD73qGk7y z)>8DUhTXJH3)SpUyY}y8bZiO`m3XgVC3ep$Fq+ysHiPK`+_vv$8nyDU%>3pJ1W{7n_7{V4UwyoD!uu5ae;{9S!C91a)U++hHrN^$@?>S@3-_C z#&3oZEO6J=G5}pkTh9KOH?g<4_~9BfANJw&_!DW5;aD*mfA~grFk)L+n8w&AJIJGt zO3(I0qds8C)xFEuzw_wX@4V0ybu(78%ohN;clCdi%VIPb>tBYK=%!`-bDFCmxfjdl z0yFSUR#*W4oNCj0i}1x&oilX1$#wrJZb!5*X31! z$D5a_K5R^T=QFw0D7nT<(!SW_&A!j0zc~ICY&w)89B3AWAH>V;#~R z1OnliyaB=Ak>9|=uvaVC>bL+vu`KqGEqv72BzNBaN0bba zh1-Eb+4lh=!+}gd_v&F0llM#w=)_nzb1+SStLI5e@Bsa(ht2-}fV)`Tjs2-m03JqY|MS|N* zA0B`@U?UOYVTBh2KQVoB0c3`goFP2$6`&5Rm6tjJ`z*u;FKz_37+jN3>tyH5HO)b* z2;yb{ZQ+{sXkTE^d~^#z{10JU*V z9PJAXT8eJLM;rwGyBXSQL&k?4%%e4u#pk zp}DcLnHi03KrLLhoU;>?T#n)i5v_nsXl!edUje6DIiPz*gl>T*(F+2WsAL?Fjq}oX zXqSNliIfOjL^az11vpnHhyhr6`Wp<+d7wSNE>57`16tFZRKAt>yU_F(=JT7G9n0A2 zz_m!Bs0Q1?65TX{*d(cwm{k+2vYP$jYj#8PqRa=ptMDynH z!RL!No6XDpRh3?-G{|ip#wBR)+d4ne)k{j7;xe^)o$i601BZ?!BKfSE75r+Z;~Bvl z`4J-RBpmdjU2#&`Cbww_%+TJqbFS7EVlwdZm=RGGv)iA-f@CC*5w7HuSy@-F2>3Bhj%5B_fU&AtIS~GVx5$cD{_Jv)P zXtjPEB(b^tBBrNA*&#ndq8(g~81us$b?fosP-KfJEI~b!2**3o*7IgNLqynYOFT%4 zUTrP&1s!^xRNj+6a-tm!My?$S@6*k~i^Gvy1hHDO;59FPIs2;7i^|F3Dgrn$hr(xE zFpDe}iLQenjzVS;#7&X~2YB&x_6lx+JG2f4)2zPt(R0h(p>3e7Uf3bYtOwiG)tMH+c?#4{1GUtB z+yWQq0PJELW_nRQH0fA;hV9l_2S)lI>R(~tqYxJR-;$0PZ%$9O0S zx(jQZpmJ78Mw75hazx!K(5^Nzc5DCUAiYaRt6B5r@QqAxpZ(4(UcsLJT$k@4J*yKE z#Cs4&ym(l$z?DbxVgJNMU7>eyfgALiRr0o8_*7z3-^ITE@+k=M^p$I)AOAAri(`C9<4pObI2p{1BJGekgY^8VSqW!FSeta48 z$!cT@z=2D(@S)^LQ|opubjp02`2-wXkCBnA&NCuZPD`C|K%TrqJ+ql7^JBl}DS|k} zhr)aE!Ef0l5^5kUo%=J7OgAHNl zPplU{kSC|8XU^iueAqEu#d1zzy=6?Eyh}ZEh%57Dv$=|8oR)ga8+mdnP~^(!Y;fby zIm8CbQ+aYaP~^%2*xz#%zMR4a%N{MXjQyQItZ}EUgEtbPR6~_3w8UQe)8CPS{9_xC z-S~)Nm}Fb%mmj{C!aqtWJ}nS4pX@@=oF${v!V_i6I?0Lp%DeJpI{K~DnC8PC?b9T;kIx)cxHv2TTrm4YA62MWn%C;h>Sz*5XdL|1Qf5WA_ z(SF13FfN*OTG=Og1XgM#%AjK1_55R8WP|9C&hk>9#sJT7IuVnY!#9IZ8f7CUfrp^%n`qpUqy`6`3#e35@aCW`(KGniWG}qYg$7QUhfOxa&s}J# zdPX`nafY9}(yr?nN!X+t-snba)HAkYlREgh8?8^z5Mq-f@N;+Cl1^|_f=M#GkwlB= zWMpEKFX87TT52aF6`R<=0$b@bu8amcZXeOb9SY~ckxrn2^elwIDyh`fI$)-0dGIw;9`Qi1jgBvEZ2|-vT-8urcv-B_c6Ok{pOqFqu?MAX^Q| z%=k)zRKE7Bb;w1_2RZ$yQ$H29117F>F$^e3pv3mIisW7 z0huxes---MKwL1H*mHt6lJ5EF5GHfIE-+g?_`SItlNpBc!#BbTXRJJuryl&l-18Am zhZWc%V{q0P^LHQNTsYqKJ#nQ8jo@wYPg55JAN^&?%+((kPv}hT6GL4oGR>tNPye0@czkknt*i_M<4f3q6BUM7O>(K>nZZ>Z2LWuV z3&6g@!eYt*FxLl+bm&?x096dz+6%Dv0Au}|0C`6aR3oqenfPx4q!SiguwHv@|205P zFUE2m=+#j1Zvy0wK@e~ZAV9jh{7Zl|wB5&4u_pY0K%?%aLm~JRgwCZga7Zr2G+*il z>Xzj0-$lOX@@ojQ{0Ng*%mGY zCX@}87G^Y-dS!4KgR>A5g3uB|#4XT&tDy~%U%~ol*TuR)v%neh&kIh8N_sF!UV6Ir zsT&~UBiHQ;AF@iw=y6{9PHhZ{HVx(%3BYX!o|EcIlP=a7vd1nr@zS?x|FD%F&?z-> zx!MtIWY)w9wae_l$WiYV$k81N2fM+2?gr&x6|V3#dtxmmS|T{9n@bS)0=~cqBwJL& zSI@*!<}GFic&KwDzgIq$XJWdS9>yhsun6dQoi}Kl$rxQ7_ly%n!RWH!T8F~37+saF zm=K}G=(Zw1ispf5qW%I6{e$U92Xq>#;Hlik70}IZ3}kEb>;c^q0}{m9mJjIm@=$lE z8l$@pEA0#4Vst$^3qE2DqvQWG#OW`r;Gbc7dSmWn0t>Sk8_h>sL}6^8wFU`TPY^eN zf+F3~ouV-CdP*#=Pru8un>9*!I*}^7`d7T!5)l<^_jDVQJQq zOHw;5Xn_*hR){kd^QX@_UM)X?xj$vzdy^nGSXGvuUCs%$Uaq$ZjobkA1ZEl=8E`bh#R^i1cAmtA1Zuaq&F<+;X_Rborm8(2WsDNEVO2 z*Z#^FCvk?b&+&1i#Ua&xWJ6Z7-Yb-j2K}~Nj_ipKjW_49c@yWGZFZurNJrN%b#eJSk@#N&UduxH5eO{?{CT9h;c@JvxKm2oW}Wy#{ml58 zmXc&l>9a;ln6b2{JE zhO~XW_LuJmx_+;V6KzA{sa3Fv!}w4)lSkiY$a!=7&r8iK2X95Lrt$|CM31e^(&=vm zFGZ(EmyE2-YSnvzvFaC|cIo)~tTw%Sq!+5XvHS6>osphay<578lD4%I8{TyEc#O8p zpL5!LX5z-{%@4Az)_&(I2EB*QZZI7M(Lxe~{E#xcSBN#R?xCgrR?73yit@u%!!|aZ zIhPD+LGzxPF^->4nP07$fBJouwefXzmLe5D;GNQgK4P^whoAp)T$?JM=t#hrPO2?M zJL50jpk#`yk~6ugj=d>C?;QH8m;R&>hV;Yj-tQxm!<3c{S#L2LQCSGHl$4Dw(q9PY zAf|o3)Zy2|XC|ljTQ4Rc{3=6#d9q$v4+1~xhC{R INWy{t0EleDIRF3v literal 0 HcmV?d00001 diff --git a/web/src/app/admin/connectors/gmail/Credential.tsx b/web/src/app/admin/connectors/gmail/Credential.tsx new file mode 100644 index 000000000..68f5bba2d --- /dev/null +++ b/web/src/app/admin/connectors/gmail/Credential.tsx @@ -0,0 +1,437 @@ +import { Button } from "@/components/Button"; +import { PopupSpec } from "@/components/admin/connectors/Popup"; +import { useState } from "react"; +import { useSWRConfig } from "swr"; +import * as Yup from "yup"; +import { useRouter } from "next/navigation"; +import { + Credential, + GmailCredentialJson, + GmailServiceAccountCredentialJson, +} from "@/lib/types"; +import { adminDeleteCredential } from "@/lib/credential"; +import { setupGmailOAuth } from "@/lib/gmail"; +import { GMAIL_AUTH_IS_ADMIN_COOKIE_NAME } from "@/lib/constants"; +import Cookies from "js-cookie"; +import { TextFormField } from "@/components/admin/connectors/Field"; +import { Form, Formik } from "formik"; +import { Card } from "@tremor/react"; + +type GmailCredentialJsonTypes = "authorized_user" | "service_account"; + +const DriveJsonUpload = ({ + setPopup, +}: { + setPopup: (popupSpec: PopupSpec | null) => void; +}) => { + const { mutate } = useSWRConfig(); + const [credentialJsonStr, setCredentialJsonStr] = useState< + string | undefined + >(); + + return ( + <> + { + if (!event.target.files) { + return; + } + const file = event.target.files[0]; + const reader = new FileReader(); + + reader.onload = function (loadEvent) { + if (!loadEvent?.target?.result) { + return; + } + const fileContents = loadEvent.target.result; + setCredentialJsonStr(fileContents as string); + }; + + reader.readAsText(file); + }} + /> + + + + ); +}; + +interface DriveJsonUploadSectionProps { + setPopup: (popupSpec: PopupSpec | null) => void; + appCredentialData?: { client_id: string }; + serviceAccountCredentialData?: { service_account_email: string }; +} + +export const GmailJsonUploadSection = ({ + setPopup, + appCredentialData, + serviceAccountCredentialData, +}: DriveJsonUploadSectionProps) => { + const { mutate } = useSWRConfig(); + + if (serviceAccountCredentialData?.service_account_email) { + return ( +
+
+ Found existing service account key with the following Email: +

+ {serviceAccountCredentialData.service_account_email} +

+
+
+ If you want to update these credentials, delete the existing + credentials through the button below, and then upload a new + credentials JSON. +
+ +
+ ); + } + + if (appCredentialData?.client_id) { + return ( +
+
+ Found existing app credentials with the following Client ID: +

{appCredentialData.client_id}

+
+
+ If you want to update these credentials, delete the existing + credentials through the button below, and then upload a new + credentials JSON. +
+ +
+ ); + } + + return ( +
+

+ Follow the guide{" "} + + here + {" "} + to setup a google OAuth App in your company workspace. +
+
+ Download the credentials JSON and upload it here. +

+ +
+ ); +}; + +interface DriveCredentialSectionProps { + gmailPublicCredential?: Credential; + gmailServiceAccountCredential?: Credential; + serviceAccountKeyData?: { service_account_email: string }; + appCredentialData?: { client_id: string }; + setPopup: (popupSpec: PopupSpec | null) => void; + refreshCredentials: () => void; + connectorExists: boolean; +} + +export const GmailOAuthSection = ({ + gmailPublicCredential, + gmailServiceAccountCredential, + serviceAccountKeyData, + appCredentialData, + setPopup, + refreshCredentials, + connectorExists, +}: DriveCredentialSectionProps) => { + const router = useRouter(); + + const existingCredential = + gmailPublicCredential || gmailServiceAccountCredential; + if (existingCredential) { + return ( + <> +

+ Existing credential already setup! +

+ + + ); + } + + if (serviceAccountKeyData?.service_account_email) { + return ( +
+

+ When using a Gmail Service Account, you can either have Danswer act as + the service account itself OR you can specify an account for the + service account to impersonate. +
+
+ If you want to use the service account itself, leave the{" "} + 'User email to impersonate' field blank when + submitting. If you do choose this option, make sure you have shared + the documents you want to index with the service account. +

+ + + { + formikHelpers.setSubmitting(true); + + const response = await fetch( + "/api/manage/admin/connector/gmail/service-account-credential", + { + method: "PUT", + headers: { + "Content-Type": "application/json", + }, + body: JSON.stringify({ + gmail_delegated_user: values.gmail_delegated_user, + }), + } + ); + + if (response.ok) { + setPopup({ + message: "Successfully created service account credential", + type: "success", + }); + } else { + const errorMsg = await response.text(); + setPopup({ + message: `Failed to create service account credential - ${errorMsg}`, + type: "error", + }); + } + refreshCredentials(); + }} + > + {({ isSubmitting }) => ( +
+ +
+ +
+ + )} +
+
+
+ ); + } + + if (appCredentialData?.client_id) { + return ( +
+

+ Next, you must provide credentials via OAuth. This gives us read + access to the docs you have access to in your gmail account. +

+ +
+ ); + } + + // case where no keys have been uploaded in step 1 + return ( +

+ Please upload a OAuth Client Credential JSON in Step 1 before moving onto + Step 2. +

+ ); +}; diff --git a/web/src/app/admin/connectors/gmail/GmailConnectorsTable.tsx b/web/src/app/admin/connectors/gmail/GmailConnectorsTable.tsx new file mode 100644 index 000000000..f9571204d --- /dev/null +++ b/web/src/app/admin/connectors/gmail/GmailConnectorsTable.tsx @@ -0,0 +1,127 @@ +import { BasicTable } from "@/components/admin/connectors/BasicTable"; +import { PopupSpec } from "@/components/admin/connectors/Popup"; +import { StatusRow } from "@/components/admin/connectors/table/ConnectorsTable"; +import { deleteConnector } from "@/lib/connector"; +import { + GmailConfig, + ConnectorIndexingStatus, + GmailCredentialJson, +} from "@/lib/types"; +import { useSWRConfig } from "swr"; +import { DeleteColumn } from "@/components/admin/connectors/table/DeleteColumn"; +import { + Table, + TableHead, + TableRow, + TableHeaderCell, + TableBody, + TableCell, +} from "@tremor/react"; + +interface TableProps { + gmailConnectorIndexingStatuses: ConnectorIndexingStatus< + GmailConfig, + GmailCredentialJson + >[]; + setPopup: (popupSpec: PopupSpec | null) => void; +} + +export const GmailConnectorsTable = ({ + gmailConnectorIndexingStatuses: gmailConnectorIndexingStatuses, + setPopup, +}: TableProps) => { + const { mutate } = useSWRConfig(); + + // Sorting to maintain a consistent ordering + const sortedGmailConnectorIndexingStatuses = [ + ...gmailConnectorIndexingStatuses, + ]; + sortedGmailConnectorIndexingStatuses.sort( + (a, b) => a.connector.id - b.connector.id + ); + + return ( +
+ + + + Status + Delete + + + + {sortedGmailConnectorIndexingStatuses.map( + (connectorIndexingStatus) => { + return ( + + + { + mutate("/api/manage/admin/connector/indexing-status"); + }} + /> + + + + mutate("/api/manage/admin/connector/indexing-status") + } + /> + + + ); + } + )} + +
+
+ ); + + return ( + ({ + status: ( + { + mutate("/api/manage/admin/connector/indexing-status"); + }} + /> + ), + delete: ( + + mutate("/api/manage/admin/connector/indexing-status") + } + /> + ), + }) + )} + /> + ); +}; diff --git a/web/src/app/admin/connectors/gmail/auth/callback/route.ts b/web/src/app/admin/connectors/gmail/auth/callback/route.ts new file mode 100644 index 000000000..71e28f597 --- /dev/null +++ b/web/src/app/admin/connectors/gmail/auth/callback/route.ts @@ -0,0 +1,34 @@ +import { getDomain } from "@/lib/redirectSS"; +import { buildUrl } from "@/lib/utilsSS"; +import { NextRequest, NextResponse } from "next/server"; +import { cookies } from "next/headers"; +import { GMAIL_AUTH_IS_ADMIN_COOKIE_NAME } from "@/lib/constants"; +import { processCookies } from "@/lib/userSS"; + +export const GET = async (request: NextRequest) => { + // Wrapper around the FastAPI endpoint /connectors/gmail/callback, + // which adds back a redirect to the Gmail admin page. + const url = new URL(buildUrl("/manage/connector/gmail/callback")); + url.search = request.nextUrl.search; + + const response = await fetch(url.toString(), { + headers: { + cookie: processCookies(cookies()), + }, + }); + + if (!response.ok) { + console.log("Error in Gmail callback:", (await response.json()).detail); + return NextResponse.redirect(new URL("/auth/error", getDomain(request))); + } + + if ( + cookies().get(GMAIL_AUTH_IS_ADMIN_COOKIE_NAME)?.value?.toLowerCase() === + "true" + ) { + return NextResponse.redirect( + new URL("/admin/connectors/gmail", getDomain(request)) + ); + } + return NextResponse.redirect(new URL("/user/connectors", getDomain(request))); +}; diff --git a/web/src/app/admin/connectors/gmail/page.tsx b/web/src/app/admin/connectors/gmail/page.tsx new file mode 100644 index 000000000..041421a36 --- /dev/null +++ b/web/src/app/admin/connectors/gmail/page.tsx @@ -0,0 +1,265 @@ +"use client"; + +import * as Yup from "yup"; +import { GmailIcon } from "@/components/icons/icons"; +import useSWR, { useSWRConfig } from "swr"; +import { fetcher } from "@/lib/fetcher"; +import { LoadingAnimation } from "@/components/Loading"; +import { PopupSpec, usePopup } from "@/components/admin/connectors/Popup"; +import { HealthCheckBanner } from "@/components/health/healthcheck"; +import { + ConnectorIndexingStatus, + Credential, + GmailCredentialJson, + GmailServiceAccountCredentialJson, + GmailConfig, +} from "@/lib/types"; +import { ConnectorForm } from "@/components/admin/connectors/ConnectorForm"; +import { GmailConnectorsTable } from "./GmailConnectorsTable"; +import { gmailConnectorNameBuilder } from "./utils"; +import { GmailOAuthSection, GmailJsonUploadSection } from "./Credential"; +import { usePublicCredentials } from "@/lib/hooks"; +import { AdminPageTitle } from "@/components/admin/Title"; +import { Card, Divider, Text, Title } from "@tremor/react"; + +interface GmailConnectorManagementProps { + gmailPublicCredential?: Credential; + gmailServiceAccountCredential?: Credential; + gmailConnectorIndexingStatus: ConnectorIndexingStatus< + GmailConfig, + GmailCredentialJson + > | null; + gmailConnectorIndexingStatuses: ConnectorIndexingStatus< + GmailConfig, + GmailCredentialJson + >[]; + credentialIsLinked: boolean; + setPopup: (popupSpec: PopupSpec | null) => void; +} + +const GmailConnectorManagement = ({ + gmailPublicCredential: gmailPublicCredential, + gmailServiceAccountCredential: gmailServiceAccountCredential, + gmailConnectorIndexingStatuses: gmailConnectorIndexingStatuses, + setPopup, +}: GmailConnectorManagementProps) => { + const { mutate } = useSWRConfig(); + + const liveCredential = gmailPublicCredential || gmailServiceAccountCredential; + if (!liveCredential) { + return ( + + Please authenticate with Gmail as described in Step 2! Once done with + that, you can then move on to enable this connector. + + ); + } + + return ( +
+ +
+ {gmailConnectorIndexingStatuses.length > 0 ? ( + <> + Checkout the{" "} + + status page + {" "} + for the latest indexing status. We fetch the latest mails from + Gmail every 10 minutes. + + ) : ( +

+ Fill out the form below to create a connector. We will refresh the + latest documents from Gmail every 10 minutes. +

+ )} +
+
+ {gmailConnectorIndexingStatuses.length > 0 && ( + <> +
Existing Connectors:
+ + + + )} + + {gmailConnectorIndexingStatuses.length > 0 && ( +

Add New Connector:

+ )} + + + nameBuilder={gmailConnectorNameBuilder} + source="gmail" + inputType="poll" + formBody={null} + validationSchema={Yup.object().shape({})} + initialValues={{}} + refreshFreq={10 * 60} // 10 minutes + credentialId={liveCredential.id} + /> + +
+ ); +}; + +const Main = () => { + const { + data: appCredentialData, + isLoading: isAppCredentialLoading, + error: isAppCredentialError, + } = useSWR<{ client_id: string }>( + "/api/manage/admin/connector/gmail/app-credential", + fetcher + ); + const { + data: serviceAccountKeyData, + isLoading: isServiceAccountKeyLoading, + error: isServiceAccountKeyError, + } = useSWR<{ service_account_email: string }>( + "/api/manage/admin/connector/gmail/service-account-key", + fetcher + ); + const { + data: connectorIndexingStatuses, + isLoading: isConnectorIndexingStatusesLoading, + error: isConnectorIndexingStatusesError, + } = useSWR[]>( + "/api/manage/admin/connector/indexing-status", + fetcher + ); + const { + data: credentialsData, + isLoading: isCredentialsLoading, + error: isCredentialsError, + refreshCredentials, + } = usePublicCredentials(); + + const { popup, setPopup } = usePopup(); + + if ( + (!appCredentialData && isAppCredentialLoading) || + (!serviceAccountKeyData && isServiceAccountKeyLoading) || + (!connectorIndexingStatuses && isConnectorIndexingStatusesLoading) || + (!credentialsData && isCredentialsLoading) + ) { + return ( +
+ +
+ ); + } + + if (isCredentialsError || !credentialsData) { + return ( +
+
Failed to load credentials.
+
+ ); + } + + if (isConnectorIndexingStatusesError || !connectorIndexingStatuses) { + return ( +
+
Failed to load connectors.
+
+ ); + } + + if (isAppCredentialError || isServiceAccountKeyError) { + return ( +
+
+ Error loading Gmail app credentials. Contact an administrator. +
+
+ ); + } + + const gmailPublicCredential: Credential | undefined = + credentialsData.find( + (credential) => + credential.credential_json?.gmail_tokens && credential.admin_public + ); + const gmailServiceAccountCredential: + | Credential + | undefined = credentialsData.find( + (credential) => credential.credential_json?.gmail_service_account_key + ); + const gmailConnectorIndexingStatuses: ConnectorIndexingStatus< + GmailConfig, + GmailCredentialJson + >[] = connectorIndexingStatuses.filter( + (connectorIndexingStatus) => + connectorIndexingStatus.connector.source === "gmail" + ); + const gmailConnectorIndexingStatus = gmailConnectorIndexingStatuses[0]; + + const credentialIsLinked = + (gmailConnectorIndexingStatus !== undefined && + gmailPublicCredential !== undefined && + gmailConnectorIndexingStatus.connector.credential_ids.includes( + gmailPublicCredential.id + )) || + (gmailConnectorIndexingStatus !== undefined && + gmailServiceAccountCredential !== undefined && + gmailConnectorIndexingStatus.connector.credential_ids.includes( + gmailServiceAccountCredential.id + )); + + return ( + <> + {popup} + + Step 1: Provide your Credentials + + + + + Step 2: Authenticate with Danswer + + 0} + /> + + + Step 3: Start Indexing! + + + + ); +}; + +export default function Page() { + return ( +
+
+ +
+ + } title="Gmail" /> + +
+
+ ); +} diff --git a/web/src/app/admin/connectors/gmail/utils.ts b/web/src/app/admin/connectors/gmail/utils.ts new file mode 100644 index 000000000..e7f8a24b3 --- /dev/null +++ b/web/src/app/admin/connectors/gmail/utils.ts @@ -0,0 +1,4 @@ +import { GmailConfig } from "@/lib/types"; + +export const gmailConnectorNameBuilder = (values: GmailConfig) => + "GmailConnector"; diff --git a/web/src/components/icons/icons.tsx b/web/src/components/icons/icons.tsx index 3d2fe20eb..cdd6c9388 100644 --- a/web/src/components/icons/icons.tsx +++ b/web/src/components/icons/icons.tsx @@ -356,6 +356,20 @@ export const GithubIcon = ({ ); }; +export const GmailIcon = ({ + size = 16, + className = defaultTailwindCSS, +}: IconProps) => { + return ( +
+ Logo +
+ ); +}; + export const GoogleDriveIcon = ({ size = 16, className = defaultTailwindCSS, diff --git a/web/src/lib/constants.ts b/web/src/lib/constants.ts index ab610fa95..ceb3164fc 100644 --- a/web/src/lib/constants.ts +++ b/web/src/lib/constants.ts @@ -4,6 +4,8 @@ export const INTERNAL_URL = process.env.INTERNAL_URL || "http://127.0.0.1:8080"; export const NEXT_PUBLIC_DISABLE_STREAMING = process.env.NEXT_PUBLIC_DISABLE_STREAMING?.toLowerCase() === "true"; +export const GMAIL_AUTH_IS_ADMIN_COOKIE_NAME = "gmail_auth_is_admin"; + export const GOOGLE_DRIVE_AUTH_IS_ADMIN_COOKIE_NAME = "google_drive_auth_is_admin"; diff --git a/web/src/lib/gmail.ts b/web/src/lib/gmail.ts new file mode 100644 index 000000000..2706361ec --- /dev/null +++ b/web/src/lib/gmail.ts @@ -0,0 +1,41 @@ +import { Credential } from "@/lib/types"; + +export const setupGmailOAuth = async ({ + isAdmin, +}: { + isAdmin: boolean; +}): Promise<[string | null, string]> => { + const credentialCreationResponse = await fetch("/api/manage/credential", { + method: "POST", + headers: { + "Content-Type": "application/json", + }, + body: JSON.stringify({ + admin_public: isAdmin, + credential_json: {}, + }), + }); + if (!credentialCreationResponse.ok) { + return [ + null, + `Failed to create credential - ${credentialCreationResponse.status}`, + ]; + } + const credential = + (await credentialCreationResponse.json()) as Credential<{}>; + + const authorizationUrlResponse = await fetch( + `/api/manage/connector/gmail/authorize/${credential.id}` + ); + if (!authorizationUrlResponse.ok) { + return [ + null, + `Failed to create credential - ${authorizationUrlResponse.status}`, + ]; + } + const authorizationUrlJson = (await authorizationUrlResponse.json()) as { + auth_url: string; + }; + + return [authorizationUrlJson.auth_url, ""]; +}; diff --git a/web/src/lib/sources.ts b/web/src/lib/sources.ts index 2e654a2f5..67b524669 100644 --- a/web/src/lib/sources.ts +++ b/web/src/lib/sources.ts @@ -6,6 +6,7 @@ import { GithubIcon, GitlabIcon, GlobeIcon, + GmailIcon, GongIcon, GoogleDriveIcon, GoogleSitesIcon, @@ -51,6 +52,11 @@ const SOURCE_METADATA_MAP: SourceMap = { displayName: "Slack", category: SourceCategory.AppConnection, }, + gmail: { + icon: GmailIcon, + displayName: "Gmail", + category: SourceCategory.AppConnection, + }, google_drive: { icon: GoogleDriveIcon, displayName: "Google Drive", diff --git a/web/src/lib/types.ts b/web/src/lib/types.ts index 69d5dfe4c..27de27bd8 100644 --- a/web/src/lib/types.ts +++ b/web/src/lib/types.ts @@ -15,6 +15,7 @@ export type ValidSources = | "gitlab" | "slack" | "google_drive" + | "gmail" | "bookstack" | "confluence" | "jira" @@ -91,6 +92,8 @@ export interface GoogleDriveConfig { follow_shortcuts?: boolean; } +export interface GmailConfig {} + export interface BookstackConfig {} export interface ConfluenceConfig { @@ -226,10 +229,19 @@ export interface SlackCredentialJson { slack_bot_token: string; } +export interface GmailCredentialJson { + gmail_tokens: string; +} + export interface GoogleDriveCredentialJson { google_drive_tokens: string; } +export interface GmailServiceAccountCredentialJson { + gmail_service_account_key: string; + gmail_delegated_user: string; +} + export interface GoogleDriveServiceAccountCredentialJson { google_drive_service_account_key: string; google_drive_delegated_user: string;