better drive url cleaning (#4247)

* better drive url cleaning

* nit

* address JR comments
This commit is contained in:
evan-danswer 2025-03-13 14:16:24 -07:00 committed by GitHub
parent b1a7cff9e0
commit 934700b928
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194

View File

@ -4,6 +4,7 @@ from concurrent.futures import as_completed
from concurrent.futures import ThreadPoolExecutor
from functools import partial
from typing import Any
from urllib.parse import urlparse
from google.oauth2.credentials import Credentials as OAuthCredentials # type: ignore
from google.oauth2.service_account import Credentials as ServiceAccountCredentials # type: ignore
@ -59,7 +60,7 @@ def _extract_str_list_from_comma_str(string: str | None) -> list[str]:
def _extract_ids_from_urls(urls: list[str]) -> list[str]:
return [url.split("/")[-1] for url in urls]
return [urlparse(url).path.strip("/").split("/")[-1] for url in urls]
def _convert_single_file(