reworked drive+confluence frontend and implied backend changes (#3143)

* reworked drive+confluence frontend and implied backend changes

* fixed oauth admin tests

* fixed service account tests

* frontend cleanup

* copy change

* details!

* added key

* so good

* whoops!

* fixed mnore treljsertjoslijt

* has issue with boolean form

* should be done
This commit is contained in:
hagen-danswer 2024-11-15 19:38:30 -08:00 committed by GitHub
parent 259fc049b7
commit 6e83fe3a39
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
16 changed files with 1102 additions and 405 deletions

View File

@ -20,6 +20,7 @@ env:
JIRA_API_TOKEN: ${{ secrets.JIRA_API_TOKEN }}
# Google
GOOGLE_DRIVE_SERVICE_ACCOUNT_JSON_STR: ${{ secrets.GOOGLE_DRIVE_SERVICE_ACCOUNT_JSON_STR }}
GOOGLE_DRIVE_OAUTH_CREDENTIALS_JSON_STR_TEST_USER_1: ${{ secrets.GOOGLE_DRIVE_OAUTH_CREDENTIALS_JSON_STR_TEST_USER_1 }}
GOOGLE_DRIVE_OAUTH_CREDENTIALS_JSON_STR: ${{ secrets.GOOGLE_DRIVE_OAUTH_CREDENTIALS_JSON_STR }}
GOOGLE_GMAIL_SERVICE_ACCOUNT_JSON_STR: ${{ secrets.GOOGLE_GMAIL_SERVICE_ACCOUNT_JSON_STR }}
GOOGLE_GMAIL_OAUTH_CREDENTIALS_JSON_STR: ${{ secrets.GOOGLE_GMAIL_OAUTH_CREDENTIALS_JSON_STR }}

View File

@ -81,15 +81,15 @@ class ConfluenceConnector(LoadConnector, PollConnector, SlimConnector):
if cql_query:
# if a cql_query is provided, we will use it to fetch the pages
cql_page_query = cql_query
elif space:
# if no cql_query is provided, we will use the space to fetch the pages
cql_page_query += f" and space='{quote(space)}'"
elif page_id:
# if a cql_query is not provided, we will use the page_id to fetch the page
if index_recursively:
cql_page_query += f" and ancestor='{page_id}'"
else:
# if neither a space nor a cql_query is provided, we will use the page_id to fetch the page
cql_page_query += f" and id='{page_id}'"
elif space:
# if no cql_query or page_id is provided, we will use the space to fetch the pages
cql_page_query += f" and space='{quote(space)}'"
self.cql_page_query = cql_page_query
self.cql_time_filter = ""

View File

@ -15,6 +15,7 @@ from danswer.connectors.google_drive.doc_conversion import (
convert_drive_item_to_document,
)
from danswer.connectors.google_drive.file_retrieval import crawl_folders_for_files
from danswer.connectors.google_drive.file_retrieval import get_all_files_for_oauth
from danswer.connectors.google_drive.file_retrieval import get_all_files_in_my_drive
from danswer.connectors.google_drive.file_retrieval import get_files_in_shared_drive
from danswer.connectors.google_drive.models import GoogleDriveFileType
@ -82,12 +83,31 @@ def _process_files_batch(
yield doc_batch
def _clean_requested_drive_ids(
requested_drive_ids: set[str],
requested_folder_ids: set[str],
all_drive_ids_available: set[str],
) -> tuple[set[str], set[str]]:
invalid_requested_drive_ids = requested_drive_ids - all_drive_ids_available
filtered_folder_ids = requested_folder_ids - all_drive_ids_available
if invalid_requested_drive_ids:
logger.warning(
f"Some shared drive IDs were not found. IDs: {invalid_requested_drive_ids}"
)
logger.warning("Checking for folder access instead...")
filtered_folder_ids.update(invalid_requested_drive_ids)
valid_requested_drive_ids = requested_drive_ids - invalid_requested_drive_ids
return valid_requested_drive_ids, filtered_folder_ids
class GoogleDriveConnector(LoadConnector, PollConnector, SlimConnector):
def __init__(
self,
include_shared_drives: bool = True,
include_shared_drives: bool = False,
include_my_drives: bool = False,
include_files_shared_with_me: bool = False,
shared_drive_urls: str | None = None,
include_my_drives: bool = True,
my_drive_emails: str | None = None,
shared_folder_urls: str | None = None,
batch_size: int = INDEX_BATCH_SIZE,
@ -120,22 +140,36 @@ class GoogleDriveConnector(LoadConnector, PollConnector, SlimConnector):
if (
not include_shared_drives
and not include_my_drives
and not include_files_shared_with_me
and not shared_folder_urls
and not my_drive_emails
and not shared_drive_urls
):
raise ValueError(
"At least one of include_shared_drives, include_my_drives,"
" or shared_folder_urls must be true"
"Nothing to index. Please specify at least one of the following: "
"include_shared_drives, include_my_drives, include_files_shared_with_me, "
"shared_folder_urls, or my_drive_emails"
)
self.batch_size = batch_size
self.include_shared_drives = include_shared_drives
specific_requests_made = False
if bool(shared_drive_urls) or bool(my_drive_emails) or bool(shared_folder_urls):
specific_requests_made = True
self.include_files_shared_with_me = (
False if specific_requests_made else include_files_shared_with_me
)
self.include_my_drives = False if specific_requests_made else include_my_drives
self.include_shared_drives = (
False if specific_requests_made else include_shared_drives
)
shared_drive_url_list = _extract_str_list_from_comma_str(shared_drive_urls)
self._requested_shared_drive_ids = set(
_extract_ids_from_urls(shared_drive_url_list)
)
self.include_my_drives = include_my_drives
self._requested_my_drive_emails = set(
_extract_str_list_from_comma_str(my_drive_emails)
)
@ -225,26 +259,20 @@ class GoogleDriveConnector(LoadConnector, PollConnector, SlimConnector):
creds=self.creds,
user_email=self.primary_admin_email,
)
is_service_account = isinstance(self.creds, ServiceAccountCredentials)
all_drive_ids = set()
# We don't want to fail if we're using OAuth because you can
# access your my drive as a non admin user in an org still
ignore_fetch_failure = isinstance(self.creds, OAuthCredentials)
for drive in execute_paginated_retrieval(
retrieval_function=primary_drive_service.drives().list,
list_key="drives",
continue_on_404_or_403=ignore_fetch_failure,
useDomainAdminAccess=True,
useDomainAdminAccess=is_service_account,
fields="drives(id)",
):
all_drive_ids.add(drive["id"])
if not all_drive_ids:
logger.warning(
"No drives found. This is likely because oauth user "
"is not an admin and cannot view all drive IDs. "
"Continuing with only the shared drive IDs specified in the config."
"No drives found even though we are indexing shared drives was requested."
)
all_drive_ids = set(self._requested_shared_drive_ids)
return all_drive_ids
@ -261,14 +289,9 @@ class GoogleDriveConnector(LoadConnector, PollConnector, SlimConnector):
# if we are including my drives, try to get the current user's my
# drive if any of the following are true:
# - no specific emails were requested
# - include_my_drives is true
# - the current user's email is in the requested emails
# - we are using OAuth (in which case we assume that is the only email we will try)
if self.include_my_drives and (
not self._requested_my_drive_emails
or user_email in self._requested_my_drive_emails
or isinstance(self.creds, OAuthCredentials)
):
if self.include_my_drives or user_email in self._requested_my_drive_emails:
yield from get_all_files_in_my_drive(
service=drive_service,
update_traversed_ids_func=self._update_traversed_parent_ids,
@ -299,7 +322,7 @@ class GoogleDriveConnector(LoadConnector, PollConnector, SlimConnector):
end=end,
)
def _fetch_drive_items(
def _manage_service_account_retrieval(
self,
is_slim: bool,
start: SecondsSinceUnixEpoch | None = None,
@ -309,29 +332,16 @@ class GoogleDriveConnector(LoadConnector, PollConnector, SlimConnector):
all_drive_ids: set[str] = self._get_all_drive_ids()
# remove drive ids from the folder ids because they are queried differently
filtered_folder_ids = self._requested_folder_ids - all_drive_ids
# Remove drive_ids that are not in the all_drive_ids and check them as folders instead
invalid_drive_ids = self._requested_shared_drive_ids - all_drive_ids
if invalid_drive_ids:
logger.warning(
f"Some shared drive IDs were not found. IDs: {invalid_drive_ids}"
drive_ids_to_retrieve: set[str] = set()
folder_ids_to_retrieve: set[str] = set()
if self._requested_shared_drive_ids or self._requested_folder_ids:
drive_ids_to_retrieve, folder_ids_to_retrieve = _clean_requested_drive_ids(
requested_drive_ids=self._requested_shared_drive_ids,
requested_folder_ids=self._requested_folder_ids,
all_drive_ids_available=all_drive_ids,
)
logger.warning("Checking for folder access instead...")
filtered_folder_ids.update(invalid_drive_ids)
# If including shared drives, use the requested IDs if provided,
# otherwise use all drive IDs
filtered_drive_ids = set()
if self.include_shared_drives:
if self._requested_shared_drive_ids:
# Remove invalid drive IDs from requested IDs
filtered_drive_ids = (
self._requested_shared_drive_ids - invalid_drive_ids
)
else:
filtered_drive_ids = all_drive_ids
elif self.include_shared_drives:
drive_ids_to_retrieve = all_drive_ids
# Process users in parallel using ThreadPoolExecutor
with ThreadPoolExecutor(max_workers=10) as executor:
@ -340,8 +350,8 @@ class GoogleDriveConnector(LoadConnector, PollConnector, SlimConnector):
self._impersonate_user_for_retrieval,
email,
is_slim,
filtered_drive_ids,
filtered_folder_ids,
drive_ids_to_retrieve,
folder_ids_to_retrieve,
start,
end,
): email
@ -353,13 +363,101 @@ class GoogleDriveConnector(LoadConnector, PollConnector, SlimConnector):
yield from future.result()
remaining_folders = (
filtered_drive_ids | filtered_folder_ids
drive_ids_to_retrieve | folder_ids_to_retrieve
) - self._retrieved_ids
if remaining_folders:
logger.warning(
f"Some folders/drives were not retrieved. IDs: {remaining_folders}"
)
def _manage_oauth_retrieval(
self,
is_slim: bool,
start: SecondsSinceUnixEpoch | None = None,
end: SecondsSinceUnixEpoch | None = None,
) -> Iterator[GoogleDriveFileType]:
drive_service = get_drive_service(self.creds, self.primary_admin_email)
if self.include_files_shared_with_me or self.include_my_drives:
yield from get_all_files_for_oauth(
service=drive_service,
include_files_shared_with_me=self.include_files_shared_with_me,
include_my_drives=self.include_my_drives,
include_shared_drives=self.include_shared_drives,
is_slim=is_slim,
start=start,
end=end,
)
all_requested = (
self.include_files_shared_with_me
and self.include_my_drives
and self.include_shared_drives
)
if all_requested:
# If all 3 are true, we already yielded from get_all_files_for_oauth
return
all_drive_ids = self._get_all_drive_ids()
drive_ids_to_retrieve: set[str] = set()
folder_ids_to_retrieve: set[str] = set()
if self._requested_shared_drive_ids or self._requested_folder_ids:
drive_ids_to_retrieve, folder_ids_to_retrieve = _clean_requested_drive_ids(
requested_drive_ids=self._requested_shared_drive_ids,
requested_folder_ids=self._requested_folder_ids,
all_drive_ids_available=all_drive_ids,
)
elif self.include_shared_drives:
drive_ids_to_retrieve = all_drive_ids
for drive_id in drive_ids_to_retrieve:
yield from get_files_in_shared_drive(
service=drive_service,
drive_id=drive_id,
is_slim=is_slim,
update_traversed_ids_func=self._update_traversed_parent_ids,
start=start,
end=end,
)
# Even if no folders were requested, we still check if any drives were requested
# that could be folders.
remaining_folders = folder_ids_to_retrieve - self._retrieved_ids
for folder_id in remaining_folders:
yield from crawl_folders_for_files(
service=drive_service,
parent_id=folder_id,
traversed_parent_ids=self._retrieved_ids,
update_traversed_ids_func=self._update_traversed_parent_ids,
start=start,
end=end,
)
remaining_folders = (
drive_ids_to_retrieve | folder_ids_to_retrieve
) - self._retrieved_ids
if remaining_folders:
logger.warning(
f"Some folders/drives were not retrieved. IDs: {remaining_folders}"
)
def _fetch_drive_items(
self,
is_slim: bool,
start: SecondsSinceUnixEpoch | None = None,
end: SecondsSinceUnixEpoch | None = None,
) -> Iterator[GoogleDriveFileType]:
retrieval_method = (
self._manage_service_account_retrieval
if isinstance(self.creds, ServiceAccountCredentials)
else self._manage_oauth_retrieval
)
return retrieval_method(
is_slim=is_slim,
start=start,
end=end,
)
def _extract_docs_from_google_drive(
self,
start: SecondsSinceUnixEpoch | None = None,

View File

@ -140,8 +140,8 @@ def get_files_in_shared_drive(
) -> Iterator[GoogleDriveFileType]:
# If we know we are going to folder crawl later, we can cache the folders here
# Get all folders being queried and add them to the traversed set
query = f"mimeType = '{DRIVE_FOLDER_TYPE}'"
query += " and trashed = false"
folder_query = f"mimeType = '{DRIVE_FOLDER_TYPE}'"
folder_query += " and trashed = false"
found_folders = False
for file in execute_paginated_retrieval(
retrieval_function=service.files().list,
@ -152,7 +152,7 @@ def get_files_in_shared_drive(
supportsAllDrives=True,
includeItemsFromAllDrives=True,
fields="nextPageToken, files(id)",
q=query,
q=folder_query,
):
update_traversed_ids_func(file["id"])
found_folders = True
@ -160,9 +160,9 @@ def get_files_in_shared_drive(
update_traversed_ids_func(drive_id)
# Get all files in the shared drive
query = f"mimeType != '{DRIVE_FOLDER_TYPE}'"
query += " and trashed = false"
query += _generate_time_range_filter(start, end)
file_query = f"mimeType != '{DRIVE_FOLDER_TYPE}'"
file_query += " and trashed = false"
file_query += _generate_time_range_filter(start, end)
yield from execute_paginated_retrieval(
retrieval_function=service.files().list,
list_key="files",
@ -172,7 +172,7 @@ def get_files_in_shared_drive(
supportsAllDrives=True,
includeItemsFromAllDrives=True,
fields=SLIM_FILE_FIELDS if is_slim else FILE_FIELDS,
q=query,
q=file_query,
)
@ -185,14 +185,16 @@ def get_all_files_in_my_drive(
) -> Iterator[GoogleDriveFileType]:
# If we know we are going to folder crawl later, we can cache the folders here
# Get all folders being queried and add them to the traversed set
query = "trashed = false and 'me' in owners"
folder_query = f"mimeType = '{DRIVE_FOLDER_TYPE}'"
folder_query += " and trashed = false"
folder_query += " and 'me' in owners"
found_folders = False
for file in execute_paginated_retrieval(
retrieval_function=service.files().list,
list_key="files",
corpora="user",
fields=SLIM_FILE_FIELDS if is_slim else FILE_FIELDS,
q=query,
q=folder_query,
):
update_traversed_ids_func(file["id"])
found_folders = True
@ -200,18 +202,52 @@ def get_all_files_in_my_drive(
update_traversed_ids_func(get_root_folder_id(service))
# Then get the files
query = "trashed = false and 'me' in owners"
query += _generate_time_range_filter(start, end)
fields = "files(id, name, mimeType, webViewLink, modifiedTime, createdTime)"
if not is_slim:
fields += ", files(permissions, permissionIds, owners)"
file_query = f"mimeType != '{DRIVE_FOLDER_TYPE}'"
file_query += " and trashed = false"
file_query += " and 'me' in owners"
file_query += _generate_time_range_filter(start, end)
yield from execute_paginated_retrieval(
retrieval_function=service.files().list,
list_key="files",
corpora="user",
fields=SLIM_FILE_FIELDS if is_slim else FILE_FIELDS,
q=query,
q=file_query,
)
def get_all_files_for_oauth(
service: Any,
include_files_shared_with_me: bool,
include_my_drives: bool,
# One of the above 2 should be true
include_shared_drives: bool,
is_slim: bool = False,
start: SecondsSinceUnixEpoch | None = None,
end: SecondsSinceUnixEpoch | None = None,
) -> Iterator[GoogleDriveFileType]:
should_get_all = (
include_shared_drives and include_my_drives and include_files_shared_with_me
)
corpora = "allDrives" if should_get_all else "user"
file_query = f"mimeType != '{DRIVE_FOLDER_TYPE}'"
file_query += " and trashed = false"
file_query += _generate_time_range_filter(start, end)
if not should_get_all:
if include_files_shared_with_me and not include_my_drives:
file_query += " and not 'me' in owners"
if not include_files_shared_with_me and include_my_drives:
file_query += " and 'me' in owners"
yield from execute_paginated_retrieval(
retrieval_function=service.files().list,
list_key="files",
corpora=corpora,
includeItemsFromAllDrives=should_get_all,
supportsAllDrives=should_get_all,
fields=SLIM_FILE_FIELDS if is_slim else FILE_FIELDS,
q=file_query,
)

View File

@ -21,6 +21,16 @@ from tests.load_env_vars import load_env_vars
load_env_vars()
_USER_TO_OAUTH_CREDENTIALS_MAP = {
"admin@onyx-test.com": "GOOGLE_DRIVE_OAUTH_CREDENTIALS_JSON_STR",
"test_user_1@onyx-test.com": "GOOGLE_DRIVE_OAUTH_CREDENTIALS_JSON_STR_TEST_USER_1",
}
_USER_TO_SERVICE_ACCOUNT_CREDENTIALS_MAP = {
"admin@onyx-test.com": "GOOGLE_DRIVE_SERVICE_ACCOUNT_JSON_STR",
}
def parse_credentials(env_str: str) -> dict:
"""
Parse a double-escaped JSON string from environment variables into a Python dictionary.
@ -48,23 +58,25 @@ def parse_credentials(env_str: str) -> dict:
@pytest.fixture
def google_drive_oauth_connector_factory() -> Callable[..., GoogleDriveConnector]:
def _connector_factory(
primary_admin_email: str = "admin@onyx-test.com",
include_shared_drives: bool = True,
shared_drive_urls: str | None = None,
include_my_drives: bool = True,
my_drive_emails: str | None = None,
shared_folder_urls: str | None = None,
primary_admin_email: str,
include_shared_drives: bool,
shared_drive_urls: str | None,
include_my_drives: bool,
my_drive_emails: str | None,
shared_folder_urls: str | None,
include_files_shared_with_me: bool,
) -> GoogleDriveConnector:
print("Creating GoogleDriveConnector with OAuth credentials")
connector = GoogleDriveConnector(
include_shared_drives=include_shared_drives,
shared_drive_urls=shared_drive_urls,
include_my_drives=include_my_drives,
include_files_shared_with_me=include_files_shared_with_me,
my_drive_emails=my_drive_emails,
shared_folder_urls=shared_folder_urls,
)
json_string = os.environ["GOOGLE_DRIVE_OAUTH_CREDENTIALS_JSON_STR"]
json_string = os.environ[_USER_TO_OAUTH_CREDENTIALS_MAP[primary_admin_email]]
refried_json_string = json.dumps(parse_credentials(json_string))
credentials_json = {
@ -82,12 +94,13 @@ def google_drive_service_acct_connector_factory() -> (
Callable[..., GoogleDriveConnector]
):
def _connector_factory(
primary_admin_email: str = "admin@onyx-test.com",
include_shared_drives: bool = True,
shared_drive_urls: str | None = None,
include_my_drives: bool = True,
my_drive_emails: str | None = None,
shared_folder_urls: str | None = None,
primary_admin_email: str,
include_shared_drives: bool,
shared_drive_urls: str | None,
include_my_drives: bool,
my_drive_emails: str | None,
shared_folder_urls: str | None,
include_files_shared_with_me: bool,
) -> GoogleDriveConnector:
print("Creating GoogleDriveConnector with service account credentials")
connector = GoogleDriveConnector(
@ -96,9 +109,12 @@ def google_drive_service_acct_connector_factory() -> (
include_my_drives=include_my_drives,
my_drive_emails=my_drive_emails,
shared_folder_urls=shared_folder_urls,
include_files_shared_with_me=include_files_shared_with_me,
)
json_string = os.environ["GOOGLE_DRIVE_SERVICE_ACCOUNT_JSON_STR"]
json_string = os.environ[
_USER_TO_SERVICE_ACCOUNT_CREDENTIALS_MAP[primary_admin_email]
]
refried_json_string = json.dumps(parse_credentials(json_string))
# Load Service Account Credentials

View File

@ -7,14 +7,14 @@ SHARED_DRIVE_FILES = list(range(20, 25))
ADMIN_FILE_IDS = list(range(0, 5))
ADMIN_FOLDER_3_FILE_IDS = list(range(65, 70))
ADMIN_FOLDER_3_FILE_IDS = list(range(65, 70)) # This folder is shared with test_user_1
TEST_USER_1_FILE_IDS = list(range(5, 10))
TEST_USER_2_FILE_IDS = list(range(10, 15))
TEST_USER_3_FILE_IDS = list(range(15, 20))
SHARED_DRIVE_1_FILE_IDS = list(range(20, 25))
FOLDER_1_FILE_IDS = list(range(25, 30))
FOLDER_1_1_FILE_IDS = list(range(30, 35))
FOLDER_1_2_FILE_IDS = list(range(35, 40))
FOLDER_1_2_FILE_IDS = list(range(35, 40)) # This folder is public
SHARED_DRIVE_2_FILE_IDS = list(range(40, 45))
FOLDER_2_FILE_IDS = list(range(45, 50))
FOLDER_2_1_FILE_IDS = list(range(50, 55))
@ -75,26 +75,29 @@ ACCESS_MAPPING: dict[str, list[int]] = {
+ FOLDER_2_2_FILE_IDS
+ SECTIONS_FILE_IDS
),
# This user has access to drive 1
# This user has redundant access to folder 1 because of group access
# This user has been given individual access to files in Admin's My Drive
TEST_USER_1_EMAIL: (
TEST_USER_1_FILE_IDS
# This user has access to drive 1
+ SHARED_DRIVE_1_FILE_IDS
# This user has redundant access to folder 1 because of group access
+ FOLDER_1_FILE_IDS
+ FOLDER_1_1_FILE_IDS
+ FOLDER_1_2_FILE_IDS
# This user has been given shared access to folder 3 in Admin's My Drive
+ ADMIN_FOLDER_3_FILE_IDS
# This user has been given shared access to files 0 and 1 in Admin's My Drive
+ list(range(0, 2))
),
# Group 1 includes this user, giving access to folder 1
# This user has also been given access to folder 2-1
# This user has also been given individual access to files in folder 2
TEST_USER_2_EMAIL: (
TEST_USER_2_FILE_IDS
# Group 1 includes this user, giving access to folder 1
+ FOLDER_1_FILE_IDS
+ FOLDER_1_1_FILE_IDS
# This folder is public
+ FOLDER_1_2_FILE_IDS
# Folder 2-1 is shared with this user
+ FOLDER_2_1_FILE_IDS
# This user has been given shared access to files 45 and 46 in folder 2
+ list(range(45, 47))
),
# This user can only see his own files and public files

View File

@ -5,6 +5,7 @@ from unittest.mock import patch
from danswer.connectors.google_drive.connector import GoogleDriveConnector
from danswer.connectors.models import Document
from tests.daily.connectors.google_drive.consts_and_utils import ADMIN_EMAIL
from tests.daily.connectors.google_drive.consts_and_utils import ADMIN_FILE_IDS
from tests.daily.connectors.google_drive.consts_and_utils import ADMIN_FOLDER_3_FILE_IDS
from tests.daily.connectors.google_drive.consts_and_utils import (
@ -26,8 +27,6 @@ from tests.daily.connectors.google_drive.consts_and_utils import SECTIONS_FILE_I
from tests.daily.connectors.google_drive.consts_and_utils import SHARED_DRIVE_1_FILE_IDS
from tests.daily.connectors.google_drive.consts_and_utils import SHARED_DRIVE_1_URL
from tests.daily.connectors.google_drive.consts_and_utils import SHARED_DRIVE_2_FILE_IDS
from tests.daily.connectors.google_drive.consts_and_utils import TEST_USER_1_EMAIL
from tests.daily.connectors.google_drive.consts_and_utils import TEST_USER_3_EMAIL
@patch(
@ -40,8 +39,13 @@ def test_include_all(
) -> None:
print("\n\nRunning test_include_all")
connector = google_drive_oauth_connector_factory(
primary_admin_email=ADMIN_EMAIL,
include_shared_drives=True,
include_my_drives=True,
include_files_shared_with_me=False,
shared_folder_urls=None,
my_drive_emails=None,
shared_drive_urls=None,
)
retrieved_docs: list[Document] = []
for doc_batch in connector.poll_source(0, time.time()):
@ -77,8 +81,13 @@ def test_include_shared_drives_only(
) -> None:
print("\n\nRunning test_include_shared_drives_only")
connector = google_drive_oauth_connector_factory(
primary_admin_email=ADMIN_EMAIL,
include_shared_drives=True,
include_my_drives=False,
include_files_shared_with_me=False,
shared_folder_urls=None,
my_drive_emails=None,
shared_drive_urls=None,
)
retrieved_docs: list[Document] = []
for doc_batch in connector.poll_source(0, time.time()):
@ -112,8 +121,13 @@ def test_include_my_drives_only(
) -> None:
print("\n\nRunning test_include_my_drives_only")
connector = google_drive_oauth_connector_factory(
primary_admin_email=ADMIN_EMAIL,
include_shared_drives=False,
include_my_drives=True,
include_files_shared_with_me=False,
shared_folder_urls=None,
my_drive_emails=None,
shared_drive_urls=None,
)
retrieved_docs: list[Document] = []
for doc_batch in connector.poll_source(0, time.time()):
@ -138,8 +152,12 @@ def test_drive_one_only(
print("\n\nRunning test_drive_one_only")
drive_urls = [SHARED_DRIVE_1_URL]
connector = google_drive_oauth_connector_factory(
primary_admin_email=ADMIN_EMAIL,
include_shared_drives=True,
include_my_drives=False,
include_files_shared_with_me=False,
shared_folder_urls=None,
my_drive_emails=None,
shared_drive_urls=",".join([str(url) for url in drive_urls]),
)
retrieved_docs: list[Document] = []
@ -170,19 +188,20 @@ def test_folder_and_shared_drive(
drive_urls = [SHARED_DRIVE_1_URL]
folder_urls = [FOLDER_2_URL]
connector = google_drive_oauth_connector_factory(
primary_admin_email=ADMIN_EMAIL,
include_shared_drives=True,
include_my_drives=True,
shared_drive_urls=",".join([str(url) for url in drive_urls]),
include_my_drives=False,
include_files_shared_with_me=False,
shared_folder_urls=",".join([str(url) for url in folder_urls]),
my_drive_emails=None,
shared_drive_urls=",".join([str(url) for url in drive_urls]),
)
retrieved_docs: list[Document] = []
for doc_batch in connector.poll_source(0, time.time()):
retrieved_docs.extend(doc_batch)
expected_file_ids = (
ADMIN_FILE_IDS
+ ADMIN_FOLDER_3_FILE_IDS
+ SHARED_DRIVE_1_FILE_IDS
SHARED_DRIVE_1_FILE_IDS
+ FOLDER_1_FILE_IDS
+ FOLDER_1_1_FILE_IDS
+ FOLDER_1_2_FILE_IDS
@ -216,10 +235,13 @@ def test_folders_only(
FOLDER_1_1_URL,
]
connector = google_drive_oauth_connector_factory(
include_shared_drives=False,
primary_admin_email=ADMIN_EMAIL,
include_shared_drives=True,
include_my_drives=False,
shared_drive_urls=",".join([str(url) for url in shared_drive_urls]),
include_files_shared_with_me=False,
shared_folder_urls=",".join([str(url) for url in folder_urls]),
my_drive_emails=None,
shared_drive_urls=",".join([str(url) for url in shared_drive_urls]),
)
retrieved_docs: list[Document] = []
for doc_batch in connector.poll_source(0, time.time()):
@ -238,37 +260,6 @@ def test_folders_only(
)
@patch(
"danswer.file_processing.extract_file_text.get_unstructured_api_key",
return_value=None,
)
def test_specific_emails(
mock_get_api_key: MagicMock,
google_drive_oauth_connector_factory: Callable[..., GoogleDriveConnector],
) -> None:
print("\n\nRunning test_specific_emails")
my_drive_emails = [
TEST_USER_1_EMAIL,
TEST_USER_3_EMAIL,
]
connector = google_drive_oauth_connector_factory(
include_shared_drives=False,
include_my_drives=True,
my_drive_emails=",".join([str(email) for email in my_drive_emails]),
)
retrieved_docs: list[Document] = []
for doc_batch in connector.poll_source(0, time.time()):
retrieved_docs.extend(doc_batch)
# No matter who is specified, when using oauth, if include_my_drives is True,
# we will get all the files from the admin's My Drive
expected_file_ids = ADMIN_FILE_IDS + ADMIN_FOLDER_3_FILE_IDS
assert_retrieved_docs_match_expected(
retrieved_docs=retrieved_docs,
expected_file_ids=expected_file_ids,
)
@patch(
"danswer.file_processing.extract_file_text.get_unstructured_api_key",
return_value=None,
@ -282,9 +273,13 @@ def test_personal_folders_only(
FOLDER_3_URL,
]
connector = google_drive_oauth_connector_factory(
include_shared_drives=False,
primary_admin_email=ADMIN_EMAIL,
include_shared_drives=True,
include_my_drives=False,
include_files_shared_with_me=False,
shared_folder_urls=",".join([str(url) for url in folder_urls]),
my_drive_emails=None,
shared_drive_urls=None,
)
retrieved_docs: list[Document] = []
for doc_batch in connector.poll_source(0, time.time()):

View File

@ -5,6 +5,7 @@ from unittest.mock import patch
from danswer.connectors.google_drive.connector import GoogleDriveConnector
from danswer.connectors.models import Document
from tests.daily.connectors.google_drive.consts_and_utils import ADMIN_EMAIL
from tests.daily.connectors.google_drive.consts_and_utils import SECTIONS_FOLDER_URL
@ -18,14 +19,22 @@ def test_google_drive_sections(
google_drive_service_acct_connector_factory: Callable[..., GoogleDriveConnector],
) -> None:
oauth_connector = google_drive_oauth_connector_factory(
primary_admin_email=ADMIN_EMAIL,
include_shared_drives=False,
include_my_drives=False,
include_files_shared_with_me=False,
shared_folder_urls=SECTIONS_FOLDER_URL,
shared_drive_urls=None,
my_drive_emails=None,
)
service_acct_connector = google_drive_service_acct_connector_factory(
primary_admin_email=ADMIN_EMAIL,
include_shared_drives=False,
include_my_drives=False,
include_files_shared_with_me=False,
shared_folder_urls=SECTIONS_FOLDER_URL,
shared_drive_urls=None,
my_drive_emails=None,
)
for connector in [oauth_connector, service_acct_connector]:
retrieved_docs: list[Document] = []

View File

@ -5,6 +5,7 @@ from unittest.mock import patch
from danswer.connectors.google_drive.connector import GoogleDriveConnector
from danswer.connectors.models import Document
from tests.daily.connectors.google_drive.consts_and_utils import ADMIN_EMAIL
from tests.daily.connectors.google_drive.consts_and_utils import ADMIN_FILE_IDS
from tests.daily.connectors.google_drive.consts_and_utils import ADMIN_FOLDER_3_FILE_IDS
from tests.daily.connectors.google_drive.consts_and_utils import (
@ -43,8 +44,13 @@ def test_include_all(
) -> None:
print("\n\nRunning test_include_all")
connector = google_drive_service_acct_connector_factory(
primary_admin_email=ADMIN_EMAIL,
include_shared_drives=True,
include_my_drives=True,
include_files_shared_with_me=False,
shared_folder_urls=None,
shared_drive_urls=None,
my_drive_emails=None,
)
retrieved_docs: list[Document] = []
for doc_batch in connector.poll_source(0, time.time()):
@ -83,8 +89,13 @@ def test_include_shared_drives_only(
) -> None:
print("\n\nRunning test_include_shared_drives_only")
connector = google_drive_service_acct_connector_factory(
primary_admin_email=ADMIN_EMAIL,
include_shared_drives=True,
include_my_drives=False,
include_files_shared_with_me=False,
shared_folder_urls=None,
shared_drive_urls=None,
my_drive_emails=None,
)
retrieved_docs: list[Document] = []
for doc_batch in connector.poll_source(0, time.time()):
@ -118,8 +129,13 @@ def test_include_my_drives_only(
) -> None:
print("\n\nRunning test_include_my_drives_only")
connector = google_drive_service_acct_connector_factory(
primary_admin_email=ADMIN_EMAIL,
include_shared_drives=False,
include_my_drives=True,
include_files_shared_with_me=False,
shared_folder_urls=None,
shared_drive_urls=None,
my_drive_emails=None,
)
retrieved_docs: list[Document] = []
for doc_batch in connector.poll_source(0, time.time()):
@ -150,9 +166,13 @@ def test_drive_one_only(
print("\n\nRunning test_drive_one_only")
urls = [SHARED_DRIVE_1_URL]
connector = google_drive_service_acct_connector_factory(
include_shared_drives=True,
primary_admin_email=ADMIN_EMAIL,
include_shared_drives=False,
include_my_drives=False,
include_files_shared_with_me=False,
shared_folder_urls=None,
shared_drive_urls=",".join([str(url) for url in urls]),
my_drive_emails=None,
)
retrieved_docs: list[Document] = []
for doc_batch in connector.poll_source(0, time.time()):
@ -183,10 +203,13 @@ def test_folder_and_shared_drive(
drive_urls = [SHARED_DRIVE_1_URL]
folder_urls = [FOLDER_2_URL]
connector = google_drive_service_acct_connector_factory(
include_shared_drives=True,
include_my_drives=True,
primary_admin_email=ADMIN_EMAIL,
include_shared_drives=False,
include_my_drives=False,
include_files_shared_with_me=False,
shared_drive_urls=",".join([str(url) for url in drive_urls]),
shared_folder_urls=",".join([str(url) for url in folder_urls]),
my_drive_emails=None,
)
retrieved_docs: list[Document] = []
for doc_batch in connector.poll_source(0, time.time()):
@ -194,12 +217,7 @@ def test_folder_and_shared_drive(
# Should get everything except for the top level files in drive 2
expected_file_ids = (
ADMIN_FILE_IDS
+ ADMIN_FOLDER_3_FILE_IDS
+ TEST_USER_1_FILE_IDS
+ TEST_USER_2_FILE_IDS
+ TEST_USER_3_FILE_IDS
+ SHARED_DRIVE_1_FILE_IDS
SHARED_DRIVE_1_FILE_IDS
+ FOLDER_1_FILE_IDS
+ FOLDER_1_1_FILE_IDS
+ FOLDER_1_2_FILE_IDS
@ -233,10 +251,13 @@ def test_folders_only(
FOLDER_1_1_URL,
]
connector = google_drive_service_acct_connector_factory(
primary_admin_email=ADMIN_EMAIL,
include_shared_drives=False,
include_my_drives=False,
include_files_shared_with_me=False,
shared_drive_urls=",".join([str(url) for url in shared_drive_urls]),
shared_folder_urls=",".join([str(url) for url in folder_urls]),
my_drive_emails=None,
)
retrieved_docs: list[Document] = []
for doc_batch in connector.poll_source(0, time.time()):
@ -269,8 +290,12 @@ def test_specific_emails(
TEST_USER_3_EMAIL,
]
connector = google_drive_service_acct_connector_factory(
primary_admin_email=ADMIN_EMAIL,
include_shared_drives=False,
include_my_drives=True,
include_my_drives=False,
include_files_shared_with_me=False,
shared_folder_urls=None,
shared_drive_urls=None,
my_drive_emails=",".join([str(email) for email in my_drive_emails]),
)
retrieved_docs: list[Document] = []
@ -293,42 +318,17 @@ def get_specific_folders_in_my_drive(
google_drive_service_acct_connector_factory: Callable[..., GoogleDriveConnector],
) -> None:
print("\n\nRunning get_specific_folders_in_my_drive")
my_drive_emails = [
TEST_USER_1_EMAIL,
TEST_USER_3_EMAIL,
]
connector = google_drive_service_acct_connector_factory(
include_shared_drives=False,
include_my_drives=True,
my_drive_emails=",".join([str(email) for email in my_drive_emails]),
)
retrieved_docs: list[Document] = []
for doc_batch in connector.poll_source(0, time.time()):
retrieved_docs.extend(doc_batch)
expected_file_ids = TEST_USER_1_FILE_IDS + TEST_USER_3_FILE_IDS
assert_retrieved_docs_match_expected(
retrieved_docs=retrieved_docs,
expected_file_ids=expected_file_ids,
)
@patch(
"danswer.file_processing.extract_file_text.get_unstructured_api_key",
return_value=None,
)
def test_personal_folders_only(
mock_get_api_key: MagicMock,
google_drive_oauth_connector_factory: Callable[..., GoogleDriveConnector],
) -> None:
print("\n\nRunning test_personal_folders_only")
folder_urls = [
FOLDER_3_URL,
]
connector = google_drive_oauth_connector_factory(
connector = google_drive_service_acct_connector_factory(
primary_admin_email=ADMIN_EMAIL,
include_shared_drives=False,
include_my_drives=False,
include_files_shared_with_me=False,
shared_folder_urls=",".join([str(url) for url in folder_urls]),
shared_drive_urls=None,
my_drive_emails=None,
)
retrieved_docs: list[Document] = []
for doc_batch in connector.poll_source(0, time.time()):

View File

@ -126,8 +126,13 @@ def test_all_permissions(
google_drive_service_acct_connector_factory: Callable[..., GoogleDriveConnector],
) -> None:
google_drive_connector = google_drive_service_acct_connector_factory(
primary_admin_email=ADMIN_EMAIL,
include_shared_drives=True,
include_my_drives=True,
include_files_shared_with_me=False,
shared_folder_urls=None,
shared_drive_urls=None,
my_drive_emails=None,
)
access_map: dict[str, ExternalAccess] = {}

View File

@ -0,0 +1,218 @@
import time
from collections.abc import Callable
from unittest.mock import MagicMock
from unittest.mock import patch
from danswer.connectors.google_drive.connector import GoogleDriveConnector
from danswer.connectors.models import Document
from tests.daily.connectors.google_drive.consts_and_utils import ADMIN_FOLDER_3_FILE_IDS
from tests.daily.connectors.google_drive.consts_and_utils import (
assert_retrieved_docs_match_expected,
)
from tests.daily.connectors.google_drive.consts_and_utils import FOLDER_1_1_FILE_IDS
from tests.daily.connectors.google_drive.consts_and_utils import FOLDER_1_2_FILE_IDS
from tests.daily.connectors.google_drive.consts_and_utils import FOLDER_1_FILE_IDS
from tests.daily.connectors.google_drive.consts_and_utils import FOLDER_1_URL
from tests.daily.connectors.google_drive.consts_and_utils import FOLDER_3_URL
from tests.daily.connectors.google_drive.consts_and_utils import SHARED_DRIVE_1_FILE_IDS
from tests.daily.connectors.google_drive.consts_and_utils import TEST_USER_1_EMAIL
from tests.daily.connectors.google_drive.consts_and_utils import TEST_USER_1_FILE_IDS
@patch(
"danswer.file_processing.extract_file_text.get_unstructured_api_key",
return_value=None,
)
def test_all(
mock_get_api_key: MagicMock,
google_drive_oauth_connector_factory: Callable[..., GoogleDriveConnector],
) -> None:
print("\n\nRunning test_all")
connector = google_drive_oauth_connector_factory(
primary_admin_email=TEST_USER_1_EMAIL,
include_files_shared_with_me=True,
include_shared_drives=True,
include_my_drives=True,
shared_folder_urls=None,
shared_drive_urls=None,
my_drive_emails=None,
)
retrieved_docs: list[Document] = []
for doc_batch in connector.poll_source(0, time.time()):
retrieved_docs.extend(doc_batch)
expected_file_ids = (
# These are the files from my drive
TEST_USER_1_FILE_IDS
# These are the files from shared drives
+ SHARED_DRIVE_1_FILE_IDS
+ FOLDER_1_FILE_IDS
+ FOLDER_1_1_FILE_IDS
+ FOLDER_1_2_FILE_IDS
# These are the files shared with me from admin
+ ADMIN_FOLDER_3_FILE_IDS
+ list(range(0, 2))
)
assert_retrieved_docs_match_expected(
retrieved_docs=retrieved_docs,
expected_file_ids=expected_file_ids,
)
@patch(
"danswer.file_processing.extract_file_text.get_unstructured_api_key",
return_value=None,
)
def test_shared_drives_only(
mock_get_api_key: MagicMock,
google_drive_oauth_connector_factory: Callable[..., GoogleDriveConnector],
) -> None:
print("\n\nRunning test_shared_drives_only")
connector = google_drive_oauth_connector_factory(
primary_admin_email=TEST_USER_1_EMAIL,
include_files_shared_with_me=False,
include_shared_drives=True,
include_my_drives=False,
shared_folder_urls=None,
shared_drive_urls=None,
my_drive_emails=None,
)
retrieved_docs: list[Document] = []
for doc_batch in connector.poll_source(0, time.time()):
retrieved_docs.extend(doc_batch)
expected_file_ids = (
# These are the files from shared drives
SHARED_DRIVE_1_FILE_IDS
+ FOLDER_1_FILE_IDS
+ FOLDER_1_1_FILE_IDS
+ FOLDER_1_2_FILE_IDS
)
assert_retrieved_docs_match_expected(
retrieved_docs=retrieved_docs,
expected_file_ids=expected_file_ids,
)
@patch(
"danswer.file_processing.extract_file_text.get_unstructured_api_key",
return_value=None,
)
def test_shared_with_me_only(
mock_get_api_key: MagicMock,
google_drive_oauth_connector_factory: Callable[..., GoogleDriveConnector],
) -> None:
print("\n\nRunning test_shared_with_me_only")
connector = google_drive_oauth_connector_factory(
primary_admin_email=TEST_USER_1_EMAIL,
include_files_shared_with_me=True,
include_shared_drives=False,
include_my_drives=False,
shared_folder_urls=None,
shared_drive_urls=None,
my_drive_emails=None,
)
retrieved_docs: list[Document] = []
for doc_batch in connector.poll_source(0, time.time()):
retrieved_docs.extend(doc_batch)
expected_file_ids = (
# These are the files shared with me from admin
ADMIN_FOLDER_3_FILE_IDS
+ list(range(0, 2))
)
assert_retrieved_docs_match_expected(
retrieved_docs=retrieved_docs,
expected_file_ids=expected_file_ids,
)
@patch(
"danswer.file_processing.extract_file_text.get_unstructured_api_key",
return_value=None,
)
def test_my_drive_only(
mock_get_api_key: MagicMock,
google_drive_oauth_connector_factory: Callable[..., GoogleDriveConnector],
) -> None:
print("\n\nRunning test_my_drive_only")
connector = google_drive_oauth_connector_factory(
primary_admin_email=TEST_USER_1_EMAIL,
include_files_shared_with_me=False,
include_shared_drives=False,
include_my_drives=True,
shared_folder_urls=None,
shared_drive_urls=None,
my_drive_emails=None,
)
retrieved_docs: list[Document] = []
for doc_batch in connector.poll_source(0, time.time()):
retrieved_docs.extend(doc_batch)
# These are the files from my drive
expected_file_ids = TEST_USER_1_FILE_IDS
assert_retrieved_docs_match_expected(
retrieved_docs=retrieved_docs,
expected_file_ids=expected_file_ids,
)
@patch(
"danswer.file_processing.extract_file_text.get_unstructured_api_key",
return_value=None,
)
def test_shared_my_drive_folder(
mock_get_api_key: MagicMock,
google_drive_oauth_connector_factory: Callable[..., GoogleDriveConnector],
) -> None:
print("\n\nRunning test_shared_my_drive_folder")
connector = google_drive_oauth_connector_factory(
primary_admin_email=TEST_USER_1_EMAIL,
include_files_shared_with_me=False,
include_shared_drives=False,
include_my_drives=True,
shared_folder_urls=FOLDER_3_URL,
shared_drive_urls=None,
my_drive_emails=None,
)
retrieved_docs: list[Document] = []
for doc_batch in connector.poll_source(0, time.time()):
retrieved_docs.extend(doc_batch)
expected_file_ids = (
# this is a folder from admin's drive that is shared with me
ADMIN_FOLDER_3_FILE_IDS
)
assert_retrieved_docs_match_expected(
retrieved_docs=retrieved_docs,
expected_file_ids=expected_file_ids,
)
@patch(
"danswer.file_processing.extract_file_text.get_unstructured_api_key",
return_value=None,
)
def test_shared_drive_folder(
mock_get_api_key: MagicMock,
google_drive_oauth_connector_factory: Callable[..., GoogleDriveConnector],
) -> None:
print("\n\nRunning test_shared_drive_folder")
connector = google_drive_oauth_connector_factory(
primary_admin_email=TEST_USER_1_EMAIL,
include_files_shared_with_me=False,
include_shared_drives=False,
include_my_drives=True,
shared_folder_urls=FOLDER_1_URL,
shared_drive_urls=None,
my_drive_emails=None,
)
retrieved_docs: list[Document] = []
for doc_batch in connector.poll_source(0, time.time()):
retrieved_docs.extend(doc_batch)
expected_file_ids = FOLDER_1_FILE_IDS + FOLDER_1_1_FILE_IDS + FOLDER_1_2_FILE_IDS
assert_retrieved_docs_match_expected(
retrieved_docs=retrieved_docs,
expected_file_ids=expected_file_ids,
)

View File

@ -1,20 +1,13 @@
import React, { Dispatch, FC, SetStateAction, useState } from "react";
import CredentialSubText, {
AdminBooleanFormField,
} from "@/components/credentials/CredentialFields";
import { FileUpload } from "@/components/admin/connectors/FileUpload";
import CredentialSubText from "@/components/credentials/CredentialFields";
import { ConnectionConfiguration } from "@/lib/connectors/connectors";
import SelectInput from "./ConnectorInput/SelectInput";
import NumberInput from "./ConnectorInput/NumberInput";
import { TextFormField } from "@/components/admin/connectors/Field";
import ListInput from "./ConnectorInput/ListInput";
import FileInput from "./ConnectorInput/FileInput";
import { AdvancedOptionsToggle } from "@/components/AdvancedOptionsToggle";
import { AccessTypeForm } from "@/components/admin/connectors/AccessTypeForm";
import { AccessTypeGroupSelector } from "@/components/admin/connectors/AccessTypeGroupSelector";
import { ConfigurableSources } from "@/lib/types";
import { Credential } from "@/lib/connectors/credentials";
import CollapsibleSection from "@/app/admin/assistants/CollapsibleSection";
import { RenderField } from "./FieldRendering";
export interface DynamicConnectionFormProps {
config: ConnectionConfiguration;
@ -25,105 +18,6 @@ export interface DynamicConnectionFormProps {
currentCredential: Credential<any> | null;
}
interface RenderFieldProps {
field: any;
values: any;
selectedFiles: File[];
setSelectedFiles: Dispatch<SetStateAction<File[]>>;
connector: ConfigurableSources;
currentCredential: Credential<any> | null;
}
const RenderField: FC<RenderFieldProps> = ({
field,
values,
selectedFiles,
setSelectedFiles,
connector,
currentCredential,
}) => {
if (
field.visibleCondition &&
!field.visibleCondition(values, currentCredential)
) {
return null;
}
const label =
typeof field.label === "function"
? field.label(currentCredential)
: field.label;
const description =
typeof field.description === "function"
? field.description(currentCredential)
: field.description;
const fieldContent = (
<>
{field.type === "file" ? (
<FileUpload
name={field.name}
selectedFiles={selectedFiles}
setSelectedFiles={setSelectedFiles}
/>
) : field.type === "zip" ? (
<FileInput
name={field.name}
label={label}
optional={field.optional}
description={description}
/>
) : field.type === "list" ? (
<ListInput name={field.name} label={label} description={description} />
) : field.type === "select" ? (
<SelectInput
name={field.name}
optional={field.optional}
description={description}
options={field.options || []}
label={label}
/>
) : field.type === "number" ? (
<NumberInput
label={label}
optional={field.optional}
description={description}
name={field.name}
/>
) : field.type === "checkbox" ? (
<AdminBooleanFormField
checked={values[field.name]}
subtext={description}
name={field.name}
label={label}
/>
) : (
<TextFormField
subtext={description}
optional={field.optional}
type={field.type}
label={label}
name={field.name}
isTextArea={true}
/>
)}
</>
);
if (
field.visibleCondition &&
field.visibleCondition(values, currentCredential)
) {
return (
<CollapsibleSection prompt={label} key={field.name}>
{fieldContent}
</CollapsibleSection>
);
} else {
return <div key={field.name}>{fieldContent}</div>;
}
};
const DynamicConnectionForm: FC<DynamicConnectionFormProps> = ({
config,
selectedFiles,
@ -136,14 +30,12 @@ const DynamicConnectionForm: FC<DynamicConnectionFormProps> = ({
return (
<>
<h2 className="text-2xl font-bold text-text-800">{config.description}</h2>
{config.subtext && (
<CredentialSubText>{config.subtext}</CredentialSubText>
)}
<TextFormField
subtext="A descriptive name for the connector. This will be used to identify the connector in the Admin UI."
subtext="A descriptive name for the connector."
type={"text"}
label={"Connector Name"}
name={"name"}
@ -174,17 +66,20 @@ const DynamicConnectionForm: FC<DynamicConnectionFormProps> = ({
setShowAdvancedOptions={setShowAdvancedOptions}
/>
{showAdvancedOptions &&
config.advanced_values.map((field) => (
<RenderField
key={field.name}
field={field}
values={values}
selectedFiles={selectedFiles}
setSelectedFiles={setSelectedFiles}
connector={connector}
currentCredential={currentCredential}
/>
))}
config.advanced_values.map(
(field) =>
!field.hidden && (
<RenderField
key={field.name}
field={field}
values={values}
selectedFiles={selectedFiles}
setSelectedFiles={setSelectedFiles}
connector={connector}
currentCredential={currentCredential}
/>
)
)}
</>
)}
</>

View File

@ -0,0 +1,224 @@
import React, { Dispatch, FC, SetStateAction } from "react";
import { AdminBooleanFormField } from "@/components/credentials/CredentialFields";
import { FileUpload } from "@/components/admin/connectors/FileUpload";
import { TabOption } from "@/lib/connectors/connectors";
import SelectInput from "./ConnectorInput/SelectInput";
import NumberInput from "./ConnectorInput/NumberInput";
import { TextFormField } from "@/components/admin/connectors/Field";
import ListInput from "./ConnectorInput/ListInput";
import FileInput from "./ConnectorInput/FileInput";
import { ConfigurableSources } from "@/lib/types";
import { Credential } from "@/lib/connectors/credentials";
import CollapsibleSection from "@/app/admin/assistants/CollapsibleSection";
import {
Tabs,
TabsContent,
TabsList,
TabsTrigger,
} from "@/components/ui/fully_wrapped_tabs";
interface TabsFieldProps {
tabField: TabOption;
values: any;
selectedFiles: File[];
setSelectedFiles: Dispatch<SetStateAction<File[]>>;
connector: ConfigurableSources;
currentCredential: Credential<any> | null;
}
const TabsField: FC<TabsFieldProps> = ({
tabField,
values,
selectedFiles,
setSelectedFiles,
connector,
currentCredential,
}) => {
return (
<div className="w-full">
{tabField.label && (
<div className="mb-4">
<h3 className="text-lg font-semibold">
{typeof tabField.label === "function"
? tabField.label(currentCredential)
: tabField.label}
</h3>
{tabField.description && (
<p className="text-sm text-muted-foreground mt-1">
{typeof tabField.description === "function"
? tabField.description(currentCredential)
: tabField.description}
</p>
)}
</div>
)}
<Tabs
defaultValue={tabField.tabs[0].value}
className="w-full"
onValueChange={(newTab) => {
// Clear values from other tabs but preserve defaults
tabField.tabs.forEach((tab) => {
if (tab.value !== newTab) {
tab.fields.forEach((field) => {
// Only clear if not default value
if (values[field.name] !== field.default) {
values[field.name] = field.default;
}
});
}
});
}}
>
<TabsList>
{tabField.tabs.map((tab) => (
<TabsTrigger key={tab.value} value={tab.value}>
{tab.label}
</TabsTrigger>
))}
</TabsList>
{tabField.tabs.map((tab) => (
<TabsContent key={tab.value} value={tab.value} className="">
{tab.fields.map((subField, index, array) => {
// Check visibility condition first
if (
subField.visibleCondition &&
!subField.visibleCondition(values, currentCredential)
) {
return null;
}
return (
<div
key={subField.name}
className={
index < array.length - 1 && subField.type !== "string_tab"
? "mb-4"
: ""
}
>
<RenderField
key={subField.name}
field={subField}
values={values}
selectedFiles={selectedFiles}
setSelectedFiles={setSelectedFiles}
connector={connector}
currentCredential={currentCredential}
/>
</div>
);
})}
</TabsContent>
))}
</Tabs>
</div>
);
};
interface RenderFieldProps {
field: any;
values: any;
selectedFiles: File[];
setSelectedFiles: Dispatch<SetStateAction<File[]>>;
connector: ConfigurableSources;
currentCredential: Credential<any> | null;
}
export const RenderField: FC<RenderFieldProps> = ({
field,
values,
selectedFiles,
setSelectedFiles,
connector,
currentCredential,
}) => {
const label =
typeof field.label === "function"
? field.label(currentCredential)
: field.label;
const description =
typeof field.description === "function"
? field.description(currentCredential)
: field.description;
if (field.type === "tab") {
return (
<TabsField
tabField={field}
values={values}
selectedFiles={selectedFiles}
setSelectedFiles={setSelectedFiles}
connector={connector}
currentCredential={currentCredential}
/>
);
}
const fieldContent = (
<>
{field.type === "file" ? (
<FileUpload
name={field.name}
selectedFiles={selectedFiles}
setSelectedFiles={setSelectedFiles}
/>
) : field.type === "zip" ? (
<FileInput
name={field.name}
label={label}
optional={field.optional}
description={description}
/>
) : field.type === "list" ? (
<ListInput name={field.name} label={label} description={description} />
) : field.type === "select" ? (
<SelectInput
name={field.name}
optional={field.optional}
description={description}
options={field.options || []}
label={label}
/>
) : field.type === "number" ? (
<NumberInput
label={label}
optional={field.optional}
description={description}
name={field.name}
/>
) : field.type === "checkbox" ? (
<AdminBooleanFormField
checked={values[field.name]}
subtext={description}
name={field.name}
label={label}
/>
) : field.type === "text" ? (
<TextFormField
subtext={description}
optional={field.optional}
type={field.type}
label={label}
name={field.name}
isTextArea={field.isTextArea || false}
defaultHeight={"h-15"}
/>
) : field.type === "string_tab" ? (
<div className="text-center">{description}</div>
) : (
<>INVALID FIELD TYPE</>
)}
</>
);
if (field.wrapInCollapsible) {
return (
<CollapsibleSection prompt={label} key={field.name}>
{fieldContent}
</CollapsibleSection>
);
}
return <div key={field.name}>{fieldContent}</div>;
};

View File

@ -1,4 +1,4 @@
import { ErrorMessage, Field } from "formik";
import { ErrorMessage, Field, useField } from "formik";
import {
ExplanationText,
@ -96,18 +96,18 @@ export function AdminTextField({
name={name}
id={name}
className={`
${small && "text-sm"}
border
border-border
rounded
w-full
bg-input
py-2
px-3
mt-1
${heightString}
${fontSize}
${isCode ? " font-mono" : ""}
${small && "text-sm"}
border
border-border
rounded
w-full
bg-input
py-2
px-3
mt-1
${heightString}
${fontSize}
${isCode ? " font-mono" : ""}
`}
disabled={disabled}
placeholder={placeholder}
@ -143,13 +143,18 @@ export const AdminBooleanFormField = ({
alignTop,
onChange,
}: BooleanFormFieldProps) => {
const [field, meta, helpers] = useField(name);
return (
<div>
<label className={`flex text-sm`}>
<Field
name={name}
checked={checked}
<input
type="checkbox"
{...field}
checked={Boolean(field.value)}
onChange={(e) => {
helpers.setValue(e.target.checked);
}}
className={`mr-3 bg-white px-5 w-3.5 h-3.5 ${
alignTop ? "mt-1" : "my-auto"
}`}

View File

@ -0,0 +1,94 @@
"use client";
import * as React from "react";
import * as TabsPrimitive from "@radix-ui/react-tabs";
import { cn } from "@/lib/utils";
const Tabs = TabsPrimitive.Root;
const TabsList = React.forwardRef<
React.ElementRef<typeof TabsPrimitive.List>,
React.ComponentPropsWithoutRef<typeof TabsPrimitive.List>
>(({ className, ...props }, ref) => (
<TabsPrimitive.List
ref={ref}
className={cn(
[
"inline-flex",
"flex w-full",
"items-center",
"justify-center",
"bg-background-150",
"text-text-500",
"dark:bg-background-800",
"dark:text-text-400",
"rounded-t-lg",
].join(" "),
className
)}
{...props}
/>
));
TabsList.displayName = TabsPrimitive.List.displayName;
const TabsTrigger = React.forwardRef<
React.ElementRef<typeof TabsPrimitive.Trigger>,
React.ComponentPropsWithoutRef<typeof TabsPrimitive.Trigger>
>(({ className, ...props }, ref) => (
<TabsPrimitive.Trigger
ref={ref}
className={cn(
[
"relative",
"justify-center",
"flex w-full",
"border-b",
"data-[state=active]:border-t",
"data-[state=active]:border-l",
"data-[state=active]:border-r",
"data-[state=active]:border-b-0",
"p-2",
"data-[state=active]:bg-white",
"data-[state=active]:rounded-t-lg",
"data-[state=active]:shadow-[3px_-3px_6px_-3px_rgba(0,0,0,0.15)]",
].join(" "),
className
)}
{...props}
/>
));
TabsTrigger.displayName = TabsPrimitive.Trigger.displayName;
const TabsContent = React.forwardRef<
React.ElementRef<typeof TabsPrimitive.Content>,
React.ComponentPropsWithoutRef<typeof TabsPrimitive.Content>
>(({ className, ...props }, ref) => (
<TabsPrimitive.Content
ref={ref}
className={cn(
[
"mt-2",
"ring-offset-background",
"focus-visible:outline-none",
"focus-visible:ring-2",
"focus-visible:ring-text-950",
"focus-visible:ring-offset-2",
"dark:ring-offset-background-950",
"dark:focus-visible:ring-text-300",
"border-l",
"border-r",
"border-b",
"px-6 pt-6 pb-3",
"-mt-px",
"rounded-b-lg",
"shadow-[3px_-4px_6px_-3px_rgba(0,0,0,0.15)]",
].join(" "),
className
)}
{...props}
/>
));
TabsContent.displayName = TabsPrimitive.Content.displayName;
export { Tabs, TabsList, TabsTrigger, TabsContent };

View File

@ -42,6 +42,7 @@ export interface Option {
values: any,
currentCredential: Credential<any> | null
) => boolean;
wrapInCollapsible?: boolean;
}
export interface SelectOption extends Option {
@ -59,6 +60,7 @@ export interface ListOption extends Option {
export interface TextOption extends Option {
type: "text";
default?: string;
isTextArea?: boolean;
}
export interface NumberOption extends Option {
@ -81,6 +83,31 @@ export interface ZipOption extends Option {
default?: string;
}
export interface StringTabOption extends Option {
type: "string_tab";
default?: string;
}
export interface TabOption extends Option {
type: "tab";
defaultTab?: string;
tabs: {
label: string;
value: string;
fields: (
| BooleanOption
| ListOption
| TextOption
| NumberOption
| SelectOption
| FileOption
| ZipOption
| StringTabOption
)[];
}[];
default?: [];
}
export interface ConnectionConfiguration {
description: string;
subtext?: string;
@ -92,6 +119,7 @@ export interface ConnectionConfiguration {
| SelectOption
| FileOption
| ZipOption
| TabOption
)[];
advanced_values: (
| BooleanOption
@ -101,6 +129,7 @@ export interface ConnectionConfiguration {
| SelectOption
| FileOption
| ZipOption
| TabOption
)[];
overrideDefaultFreq?: number;
}
@ -211,64 +240,96 @@ export const connectorConfigs: Record<
description: "Configure Google Drive connector",
values: [
{
type: "checkbox",
label: "Include shared drives?",
description:
"This will allow Danswer to index everything in your shared drives.",
name: "include_shared_drives",
optional: true,
default: true,
},
{
type: "text",
description: (currentCredential) => {
return currentCredential?.credential_json?.google_tokens
? "If you are a non-admin user authenticated using Google Oauth, you will need to specify the URLs for the shared drives you would like to index. Leaving this blank will NOT index any shared drives."
: "Enter a comma separated list of the URLs for the shared drive you would like to index. Leave this blank to index all shared drives.";
},
label: "Shared Drive URLs",
name: "shared_drive_urls",
visibleCondition: (values) => values.include_shared_drives,
optional: true,
},
{
type: "checkbox",
label: (currentCredential) => {
return currentCredential?.credential_json?.google_tokens
? "Include My Drive?"
: "Include Everyone's My Drive?";
},
description: (currentCredential) => {
return currentCredential?.credential_json?.google_tokens
? "This will allow Danswer to index everything in your My Drive."
: "This will allow Danswer to index everything in everyone's My Drives.";
},
name: "include_my_drives",
optional: true,
default: true,
},
{
type: "text",
description:
"Enter a comma separated list of the emails of the users whose MyDrive you want to index. Leave blank to index all MyDrives.",
label: "My Drive Emails",
name: "my_drive_emails",
visibleCondition: (values, currentCredential) =>
values.include_my_drives &&
!currentCredential?.credential_json?.google_tokens,
optional: true,
},
],
advanced_values: [
{
type: "text",
description:
"Enter a comma separated list of the URLs of any folders you would like to index. The files located in these folders (and all subfolders) will be indexed. Note: This will be in addition to whatever settings you have selected above, so leave those blank if you only want to index the folders specified here.",
label: "Folder URLs",
name: "shared_folder_urls",
type: "tab",
name: "indexing_scope",
label: "How should we index your Google Drive?",
optional: true,
tabs: [
{
value: "general",
label: "General",
fields: [
{
type: "checkbox",
label: "Include shared drives?",
description: (currentCredential) => {
return currentCredential?.credential_json?.google_tokens
? "This will allow Danswer to index everything in the shared drives you have access to."
: "This will allow Danswer to index everything in your Organization's shared drives.";
},
name: "include_shared_drives",
default: false,
},
{
type: "checkbox",
label: (currentCredential) => {
return currentCredential?.credential_json?.google_tokens
? "Include My Drive?"
: "Include Everyone's My Drive?";
},
description: (currentCredential) => {
return currentCredential?.credential_json?.google_tokens
? "This will allow Danswer to index everything in your My Drive."
: "This will allow Danswer to index everything in everyone's My Drives.";
},
name: "include_my_drives",
default: false,
},
{
type: "checkbox",
description:
"This will allow Danswer to index all files shared with you.",
label: "Include All Files Shared With You?",
name: "include_files_shared_with_me",
visibleCondition: (values, currentCredential) =>
currentCredential?.credential_json?.google_tokens,
default: false,
},
],
},
{
value: "specific",
label: "Specific",
fields: [
{
type: "text",
description: (currentCredential) => {
return currentCredential?.credential_json?.google_tokens
? "Enter a comma separated list of the URLs for the shared drive you would like to index. You must have access to these shared drives."
: "Enter a comma separated list of the URLs for the shared drive you would like to index.";
},
label: "Shared Drive URLs",
name: "shared_drive_urls",
default: "",
isTextArea: true,
},
{
type: "text",
description:
"Enter a comma separated list of the URLs of any folders you would like to index. The files located in these folders (and all subfolders) will be indexed.",
label: "Folder URLs",
name: "shared_folder_urls",
default: "",
isTextArea: true,
},
{
type: "text",
description:
"Enter a comma separated list of the emails of the users whose MyDrive you want to index.",
label: "My Drive Emails",
name: "my_drive_emails",
visibleCondition: (values, currentCredential) =>
!currentCredential?.credential_json?.google_tokens,
default: "",
isTextArea: true,
},
],
},
],
defaultTab: "space",
},
],
advanced_values: [],
},
gmail: {
description: "Configure Gmail connector",
@ -282,26 +343,7 @@ export const connectorConfigs: Record<
},
confluence: {
description: "Configure Confluence connector",
subtext: `Specify the base URL of your Confluence instance, the space name, and optionally a specific page ID to index. If no page ID is provided, the entire space will be indexed. If no space is specified, all available Confluence spaces will be indexed.`,
values: [
{
type: "text",
query: "Enter the wiki base URL:",
label: "Wiki Base URL",
name: "wiki_base",
optional: false,
description:
"The base URL of your Confluence instance (e.g., https://your-domain.atlassian.net/wiki)",
},
{
type: "text",
query: "Enter the space:",
label: "Space",
name: "space",
optional: true,
description:
"The Confluence space name to index (e.g. `KB`). If no space is specified, all available Confluence spaces will be indexed.",
},
{
type: "checkbox",
query: "Is this a Confluence Cloud instance?",
@ -312,36 +354,92 @@ export const connectorConfigs: Record<
description:
"Check if this is a Confluence Cloud instance, uncheck for Confluence Server/Data Center",
},
],
advanced_values: [
{
type: "text",
query: "Enter the page ID (optional):",
label: "Page ID",
name: "page_id",
optional: true,
description:
"Specific page ID to index - leave empty to index the entire space (e.g. `131368`)",
},
{
type: "checkbox",
query: "Should index pages recursively?",
label: "Index Recursively",
name: "index_recursively",
description:
"If this is set and the Wiki Page URL leads to a page, we will index the page and all of its children instead of just the page. This is set by default for Confluence connectors without a page ID specified.",
query: "Enter the wiki base URL:",
label: "Wiki Base URL",
name: "wiki_base",
optional: false,
description:
"The base URL of your Confluence instance (e.g., https://your-domain.atlassian.net/wiki)",
},
{
type: "text",
query: "Enter the CQL query (optional):",
label: "CQL Query",
name: "cql_query",
type: "tab",
name: "indexing_scope",
label: "How Should We Index Your Confluence?",
optional: true,
description:
"IMPORTANT: This will overwrite all other selected connector settings (besides Wiki Base URL). We currently only support CQL queries that return objects of type 'page'. This means all CQL queries must contain 'type=page' as the only type filter. It is also important that no filters for 'lastModified' are used as it will cause issues with our connector polling logic. We will still get all attachments and comments for the pages returned by the CQL query. Any 'lastmodified' filters will be overwritten. See https://developer.atlassian.com/server/confluence/advanced-searching-using-cql/ for more details.",
tabs: [
{
value: "everything",
label: "Everything",
fields: [
{
type: "string_tab",
label: "Everything",
name: "everything",
description:
"This connector will index all pages the provided credentials have access to!",
},
],
},
{
value: "space",
label: "Space",
fields: [
{
type: "text",
query: "Enter the space:",
label: "Space Key",
name: "space",
default: "",
description: "The Confluence space key to index (e.g. `KB`).",
},
],
},
{
value: "page",
label: "Page",
fields: [
{
type: "text",
query: "Enter the page ID:",
label: "Page ID",
name: "page_id",
default: "",
description: "Specific page ID to index (e.g. `131368`)",
},
{
type: "checkbox",
query: "Should index pages recursively?",
label: "Index Recursively",
name: "index_recursively",
description:
"If this is set, we will index the page indicated by the Page ID as well as all of its children.",
optional: false,
default: true,
},
],
},
{
value: "cql",
label: "CQL Query",
fields: [
{
type: "text",
query: "Enter the CQL query (optional):",
label: "CQL Query",
name: "cql_query",
default: "",
description:
"IMPORTANT: We currently only support CQL queries that return objects of type 'page'. This means all CQL queries must contain 'type=page' as the only type filter. It is also important that no filters for 'lastModified' are used as it will cause issues with our connector polling logic. We will still get all attachments and comments for the pages returned by the CQL query. Any 'lastmodified' filters will be overwritten. See https://developer.atlassian.com/server/confluence/advanced-searching-using-cql/ for more details.",
},
],
},
],
defaultTab: "space",
},
],
advanced_values: [],
},
jira: {
description: "Configure Jira connector",