prevent other tests from interfering with existing google drive tests (#3624)

* prevent other tests from interfering with existing google drive tests

* cleanup gdrive tests

* finished

* done
This commit is contained in:
hagen-danswer 2025-01-07 15:32:36 -08:00 committed by GitHub
parent a95f4298ad
commit 4ce24d68f7
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
2 changed files with 46 additions and 15 deletions

View File

@ -123,8 +123,19 @@ SPECIAL_FILE_ID_TO_CONTENT_MAP: dict[int, str] = {
file_name_template = "file_{}.txt"
file_text_template = "This is file {}"
# This is done to prevent different tests from interfering with each other
# So each test type should have its own valid prefix
_VALID_PREFIX = "file_"
def print_discrepencies(expected: set[str], retrieved: set[str]) -> None:
def filter_invalid_prefixes(names: set[str]) -> set[str]:
return {name for name in names if name.startswith(_VALID_PREFIX)}
def print_discrepencies(
expected: set[str],
retrieved: set[str],
) -> None:
if expected != retrieved:
print(expected)
print(retrieved)
@ -134,7 +145,7 @@ def print_discrepencies(expected: set[str], retrieved: set[str]) -> None:
print(expected - retrieved)
def get_file_content(file_id: int) -> str:
def _get_expected_file_content(file_id: int) -> str:
if file_id in SPECIAL_FILE_ID_TO_CONTENT_MAP:
return SPECIAL_FILE_ID_TO_CONTENT_MAP[file_id]
@ -142,25 +153,42 @@ def get_file_content(file_id: int) -> str:
def assert_retrieved_docs_match_expected(
retrieved_docs: list[Document], expected_file_ids: Sequence[int]
retrieved_docs: list[Document],
expected_file_ids: Sequence[int],
) -> None:
expected_file_names = {
file_name_template.format(file_id) for file_id in expected_file_ids
}
expected_file_texts = {get_file_content(file_id) for file_id in expected_file_ids}
expected_file_texts = {
_get_expected_file_content(file_id) for file_id in expected_file_ids
}
retrieved_file_names = set([doc.semantic_identifier for doc in retrieved_docs])
retrieved_texts = set(
# Filter out invalid prefixes to prevent different tests from interfering with each other
valid_retrieved_docs = [
doc
for doc in retrieved_docs
if doc.semantic_identifier.startswith(_VALID_PREFIX)
]
valid_retrieved_file_names = set(
[doc.semantic_identifier for doc in valid_retrieved_docs]
)
valid_retrieved_texts = set(
[
" - ".join([section.text for section in doc.sections])
for doc in retrieved_docs
for doc in valid_retrieved_docs
]
)
# Check file names
print_discrepencies(expected_file_names, retrieved_file_names)
assert expected_file_names == retrieved_file_names
print_discrepencies(
expected=expected_file_names,
retrieved=valid_retrieved_file_names,
)
assert expected_file_names == valid_retrieved_file_names
# Check file texts
print_discrepencies(expected_file_texts, retrieved_texts)
assert expected_file_texts == retrieved_texts
print_discrepencies(
expected=expected_file_texts,
retrieved=valid_retrieved_texts,
)
assert expected_file_texts == valid_retrieved_texts

View File

@ -15,6 +15,7 @@ from tests.daily.connectors.google_drive.consts_and_utils import ADMIN_EMAIL
from tests.daily.connectors.google_drive.consts_and_utils import ADMIN_FILE_IDS
from tests.daily.connectors.google_drive.consts_and_utils import ADMIN_FOLDER_3_FILE_IDS
from tests.daily.connectors.google_drive.consts_and_utils import file_name_template
from tests.daily.connectors.google_drive.consts_and_utils import filter_invalid_prefixes
from tests.daily.connectors.google_drive.consts_and_utils import FOLDER_1_1_FILE_IDS
from tests.daily.connectors.google_drive.consts_and_utils import FOLDER_1_2_FILE_IDS
from tests.daily.connectors.google_drive.consts_and_utils import FOLDER_1_FILE_IDS
@ -81,9 +82,10 @@ def assert_correct_access_for_user(
all_accessible_ids = expected_access_ids + PUBLIC_RANGE
expected_file_names = {file_name_template.format(i) for i in all_accessible_ids}
print_discrepencies(expected_file_names, retrieved_file_names)
filtered_retrieved_file_names = filter_invalid_prefixes(retrieved_file_names)
print_discrepencies(expected_file_names, filtered_retrieved_file_names)
assert expected_file_names == retrieved_file_names
assert expected_file_names == filtered_retrieved_file_names
# This function is supposed to map to the group_sync.py file for the google drive connector
@ -172,8 +174,9 @@ def test_all_permissions(
}
# Should get everything
print_discrepencies(expected_file_names, found_file_names)
assert expected_file_names == found_file_names
filtered_retrieved_file_names = filter_invalid_prefixes(found_file_names)
print_discrepencies(expected_file_names, filtered_retrieved_file_names)
assert expected_file_names == filtered_retrieved_file_names
group_map = get_group_map(google_drive_connector)