mirror of
https://github.com/danswer-ai/danswer.git
synced 2025-03-17 13:22:42 +01:00
better drive url cleaning (#4247)
* better drive url cleaning * nit * address JR comments
This commit is contained in:
parent
b1a7cff9e0
commit
934700b928
@ -4,6 +4,7 @@ from concurrent.futures import as_completed
|
||||
from concurrent.futures import ThreadPoolExecutor
|
||||
from functools import partial
|
||||
from typing import Any
|
||||
from urllib.parse import urlparse
|
||||
|
||||
from google.oauth2.credentials import Credentials as OAuthCredentials # type: ignore
|
||||
from google.oauth2.service_account import Credentials as ServiceAccountCredentials # type: ignore
|
||||
@ -59,7 +60,7 @@ def _extract_str_list_from_comma_str(string: str | None) -> list[str]:
|
||||
|
||||
|
||||
def _extract_ids_from_urls(urls: list[str]) -> list[str]:
|
||||
return [url.split("/")[-1] for url in urls]
|
||||
return [urlparse(url).path.strip("/").split("/")[-1] for url in urls]
|
||||
|
||||
|
||||
def _convert_single_file(
|
||||
|
Loading…
x
Reference in New Issue
Block a user