mirror of
https://github.com/danswer-ai/danswer.git
synced 2025-06-23 06:21:00 +02:00
Sheet update (#3189)
* quick pass * k * update sheet * add multiple sheet stuff * k * finalized * update configuration
This commit is contained in:
parent
100b4a0d16
commit
deee237c7e
@ -2,6 +2,7 @@ import io
|
|||||||
from datetime import datetime
|
from datetime import datetime
|
||||||
from datetime import timezone
|
from datetime import timezone
|
||||||
|
|
||||||
|
from googleapiclient.discovery import build # type: ignore
|
||||||
from googleapiclient.errors import HttpError # type: ignore
|
from googleapiclient.errors import HttpError # type: ignore
|
||||||
|
|
||||||
from danswer.configs.app_configs import CONTINUE_ON_CONNECTOR_FAILURE
|
from danswer.configs.app_configs import CONTINUE_ON_CONNECTOR_FAILURE
|
||||||
@ -48,6 +49,67 @@ def _extract_sections_basic(
|
|||||||
return [Section(link=link, text=UNSUPPORTED_FILE_TYPE_CONTENT)]
|
return [Section(link=link, text=UNSUPPORTED_FILE_TYPE_CONTENT)]
|
||||||
|
|
||||||
try:
|
try:
|
||||||
|
if mime_type == GDriveMimeType.SPREADSHEET.value:
|
||||||
|
try:
|
||||||
|
sheets_service = build(
|
||||||
|
"sheets", "v4", credentials=service._http.credentials
|
||||||
|
)
|
||||||
|
spreadsheet = (
|
||||||
|
sheets_service.spreadsheets()
|
||||||
|
.get(spreadsheetId=file["id"])
|
||||||
|
.execute()
|
||||||
|
)
|
||||||
|
|
||||||
|
sections = []
|
||||||
|
for sheet in spreadsheet["sheets"]:
|
||||||
|
sheet_name = sheet["properties"]["title"]
|
||||||
|
sheet_id = sheet["properties"]["sheetId"]
|
||||||
|
|
||||||
|
# Get sheet dimensions
|
||||||
|
grid_properties = sheet["properties"].get("gridProperties", {})
|
||||||
|
row_count = grid_properties.get("rowCount", 1000)
|
||||||
|
column_count = grid_properties.get("columnCount", 26)
|
||||||
|
|
||||||
|
# Convert column count to letter (e.g., 26 -> Z, 27 -> AA)
|
||||||
|
end_column = ""
|
||||||
|
while column_count:
|
||||||
|
column_count, remainder = divmod(column_count - 1, 26)
|
||||||
|
end_column = chr(65 + remainder) + end_column
|
||||||
|
|
||||||
|
range_name = f"'{sheet_name}'!A1:{end_column}{row_count}"
|
||||||
|
|
||||||
|
try:
|
||||||
|
result = (
|
||||||
|
sheets_service.spreadsheets()
|
||||||
|
.values()
|
||||||
|
.get(spreadsheetId=file["id"], range=range_name)
|
||||||
|
.execute()
|
||||||
|
)
|
||||||
|
values = result.get("values", [])
|
||||||
|
|
||||||
|
if values:
|
||||||
|
text = f"Sheet: {sheet_name}\n"
|
||||||
|
for row in values:
|
||||||
|
text += "\t".join(str(cell) for cell in row) + "\n"
|
||||||
|
sections.append(
|
||||||
|
Section(
|
||||||
|
link=f"{link}#gid={sheet_id}",
|
||||||
|
text=text,
|
||||||
|
)
|
||||||
|
)
|
||||||
|
except HttpError as e:
|
||||||
|
logger.warning(
|
||||||
|
f"Error fetching data for sheet '{sheet_name}': {e}"
|
||||||
|
)
|
||||||
|
continue
|
||||||
|
return sections
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
logger.warning(
|
||||||
|
f"Ran into exception '{e}' when pulling data from Google Sheet '{file['name']}'."
|
||||||
|
" Falling back to basic extraction."
|
||||||
|
)
|
||||||
|
|
||||||
if mime_type in [
|
if mime_type in [
|
||||||
GDriveMimeType.DOC.value,
|
GDriveMimeType.DOC.value,
|
||||||
GDriveMimeType.PPT.value,
|
GDriveMimeType.PPT.value,
|
||||||
@ -65,6 +127,7 @@ def _extract_sections_basic(
|
|||||||
.decode("utf-8")
|
.decode("utf-8")
|
||||||
)
|
)
|
||||||
return [Section(link=link, text=text)]
|
return [Section(link=link, text=text)]
|
||||||
|
|
||||||
elif mime_type in [
|
elif mime_type in [
|
||||||
GDriveMimeType.PLAIN_TEXT.value,
|
GDriveMimeType.PLAIN_TEXT.value,
|
||||||
GDriveMimeType.MARKDOWN.value,
|
GDriveMimeType.MARKDOWN.value,
|
||||||
|
Loading…
x
Reference in New Issue
Block a user