mirror of
https://github.com/danswer-ai/danswer.git
synced 2025-04-08 11:58:34 +02:00
WIP, testing theory
This commit is contained in:
parent
2b38b0c53b
commit
ebb92dc6d7
@ -122,7 +122,7 @@ def crawl_folders_for_files(
|
||||
This function starts crawling from any folder. It is slower though.
|
||||
"""
|
||||
logger.info("Entered crawl_folders_for_files with parent_id: " + parent_id)
|
||||
if parent_id not in traversed_parent_ids:
|
||||
if True: # TODO: temporary for debugging
|
||||
logger.info("Parent id not in traversed parent ids, getting files")
|
||||
found_files = False
|
||||
for file in _get_files_in_parent(
|
||||
|
@ -135,13 +135,15 @@ def filter_invalid_prefixes(names: set[str]) -> set[str]:
|
||||
return {name for name in names if name.startswith(_VALID_PREFIX)}
|
||||
|
||||
|
||||
def print_discrepencies(
|
||||
def print_discrepancies(
|
||||
expected: set[str],
|
||||
retrieved: set[str],
|
||||
) -> None:
|
||||
if expected != retrieved:
|
||||
print(expected)
|
||||
print(retrieved)
|
||||
expected_list = sorted(list(expected), key=lambda x: int(x.split("_")[-1]))
|
||||
retrieved_list = sorted(list(retrieved), key=lambda x: int(x.split("_")[-1]))
|
||||
print(expected_list)
|
||||
print(retrieved_list)
|
||||
print("Extra:")
|
||||
print(retrieved - expected)
|
||||
print("Missing:")
|
||||
@ -166,6 +168,8 @@ def assert_retrieved_docs_match_expected(
|
||||
_get_expected_file_content(file_id) for file_id in expected_file_ids
|
||||
}
|
||||
|
||||
retrieved_docs.sort(key=lambda x: x.semantic_identifier)
|
||||
|
||||
for doc in retrieved_docs:
|
||||
print(f"doc.semantic_identifier: {doc.semantic_identifier}")
|
||||
|
||||
@ -192,14 +196,14 @@ def assert_retrieved_docs_match_expected(
|
||||
)
|
||||
|
||||
# Check file names
|
||||
print_discrepencies(
|
||||
print_discrepancies(
|
||||
expected=expected_file_names,
|
||||
retrieved=valid_retrieved_file_names,
|
||||
)
|
||||
assert expected_file_names == valid_retrieved_file_names
|
||||
|
||||
# Check file texts
|
||||
print_discrepencies(
|
||||
print_discrepancies(
|
||||
expected=expected_file_texts,
|
||||
retrieved=valid_retrieved_texts,
|
||||
)
|
||||
|
@ -22,7 +22,7 @@ from tests.daily.connectors.google_drive.consts_and_utils import FOLDER_1_FILE_I
|
||||
from tests.daily.connectors.google_drive.consts_and_utils import FOLDER_2_1_FILE_IDS
|
||||
from tests.daily.connectors.google_drive.consts_and_utils import FOLDER_2_2_FILE_IDS
|
||||
from tests.daily.connectors.google_drive.consts_and_utils import FOLDER_2_FILE_IDS
|
||||
from tests.daily.connectors.google_drive.consts_and_utils import print_discrepencies
|
||||
from tests.daily.connectors.google_drive.consts_and_utils import print_discrepancies
|
||||
from tests.daily.connectors.google_drive.consts_and_utils import PUBLIC_RANGE
|
||||
from tests.daily.connectors.google_drive.consts_and_utils import SECTIONS_FILE_IDS
|
||||
from tests.daily.connectors.google_drive.consts_and_utils import SHARED_DRIVE_1_FILE_IDS
|
||||
@ -83,7 +83,7 @@ def assert_correct_access_for_user(
|
||||
expected_file_names = {file_name_template.format(i) for i in all_accessible_ids}
|
||||
|
||||
filtered_retrieved_file_names = filter_invalid_prefixes(retrieved_file_names)
|
||||
print_discrepencies(expected_file_names, filtered_retrieved_file_names)
|
||||
print_discrepancies(expected_file_names, filtered_retrieved_file_names)
|
||||
|
||||
assert expected_file_names == filtered_retrieved_file_names
|
||||
|
||||
@ -175,7 +175,7 @@ def test_all_permissions(
|
||||
|
||||
# Should get everything
|
||||
filtered_retrieved_file_names = filter_invalid_prefixes(found_file_names)
|
||||
print_discrepencies(expected_file_names, filtered_retrieved_file_names)
|
||||
print_discrepancies(expected_file_names, filtered_retrieved_file_names)
|
||||
assert expected_file_names == filtered_retrieved_file_names
|
||||
|
||||
group_map = get_group_map(google_drive_connector)
|
||||
|
Loading…
x
Reference in New Issue
Block a user