danswer/backend/onyx/file_store/file_store.py
2024-12-13 09:56:10 -08:00

147 lines
4.5 KiB
Python

from abc import ABC
from abc import abstractmethod
from typing import IO
from sqlalchemy.orm import Session
from onyx.configs.constants import FileOrigin
from onyx.db.models import PGFileStore
from onyx.db.pg_file_store import create_populate_lobj
from onyx.db.pg_file_store import delete_lobj_by_id
from onyx.db.pg_file_store import delete_pgfilestore_by_file_name
from onyx.db.pg_file_store import get_pgfilestore_by_file_name
from onyx.db.pg_file_store import read_lobj
from onyx.db.pg_file_store import upsert_pgfilestore
class FileStore(ABC):
"""
An abstraction for storing files and large binary objects.
"""
@abstractmethod
def save_file(
self,
file_name: str,
content: IO,
display_name: str | None,
file_origin: FileOrigin,
file_type: str,
file_metadata: dict | None = None,
) -> None:
"""
Save a file to the blob store
Parameters:
- connector_name: Name of the CC-Pair (as specified by the user in the UI)
- file_name: Name of the file to save
- content: Contents of the file
- display_name: Display name of the file
- file_origin: Origin of the file
- file_type: Type of the file
"""
raise NotImplementedError
@abstractmethod
def read_file(
self, file_name: str, mode: str | None, use_tempfile: bool = False
) -> IO:
"""
Read the content of a given file by the name
Parameters:
- file_name: Name of file to read
- mode: Mode to open the file (e.g. 'b' for binary)
- use_tempfile: Whether to use a temporary file to store the contents
in order to avoid loading the entire file into memory
Returns:
Contents of the file and metadata dict
"""
@abstractmethod
def read_file_record(self, file_name: str) -> PGFileStore:
"""
Read the file record by the name
"""
@abstractmethod
def delete_file(self, file_name: str) -> None:
"""
Delete a file by its name.
Parameters:
- file_name: Name of file to delete
"""
class PostgresBackedFileStore(FileStore):
def __init__(self, db_session: Session):
self.db_session = db_session
def save_file(
self,
file_name: str,
content: IO,
display_name: str | None,
file_origin: FileOrigin,
file_type: str,
file_metadata: dict | None = None,
) -> None:
try:
# The large objects in postgres are saved as special objects can be listed with
# SELECT * FROM pg_largeobject_metadata;
obj_id = create_populate_lobj(content=content, db_session=self.db_session)
upsert_pgfilestore(
file_name=file_name,
display_name=display_name or file_name,
file_origin=file_origin,
file_type=file_type,
lobj_oid=obj_id,
db_session=self.db_session,
file_metadata=file_metadata,
)
self.db_session.commit()
except Exception:
self.db_session.rollback()
raise
def read_file(
self, file_name: str, mode: str | None = None, use_tempfile: bool = False
) -> IO:
file_record = get_pgfilestore_by_file_name(
file_name=file_name, db_session=self.db_session
)
return read_lobj(
lobj_oid=file_record.lobj_oid,
db_session=self.db_session,
mode=mode,
use_tempfile=use_tempfile,
)
def read_file_record(self, file_name: str) -> PGFileStore:
file_record = get_pgfilestore_by_file_name(
file_name=file_name, db_session=self.db_session
)
return file_record
def delete_file(self, file_name: str) -> None:
try:
file_record = get_pgfilestore_by_file_name(
file_name=file_name, db_session=self.db_session
)
delete_lobj_by_id(file_record.lobj_oid, db_session=self.db_session)
delete_pgfilestore_by_file_name(
file_name=file_name, db_session=self.db_session
)
self.db_session.commit()
except Exception:
self.db_session.rollback()
raise
def get_default_file_store(db_session: Session) -> FileStore:
# The only supported file store now is the Postgres File Store
return PostgresBackedFileStore(db_session=db_session)