mirror of
https://github.com/danswer-ai/danswer.git
synced 2025-04-01 00:18:18 +02:00
Code for ease of eval (#1656)
This commit is contained in:
parent
93cc5a9e77
commit
c798ade127
@ -52,6 +52,8 @@ SECTION_SEPARATOR = "\n\n"
|
||||
# For combining attributes, doesn't have to be unique/perfect to work
|
||||
INDEX_SEPARATOR = "==="
|
||||
|
||||
# For File Connector Metadata override file
|
||||
DANSWER_METADATA_FILENAME = ".danswer_metadata.json"
|
||||
|
||||
# Messages
|
||||
DISABLED_GEN_AI_MSG = (
|
||||
|
@ -86,7 +86,12 @@ def _process_file(
|
||||
all_metadata = {**metadata, **file_metadata} if metadata else file_metadata
|
||||
|
||||
# If this is set, we will show this in the UI as the "name" of the file
|
||||
file_display_name_override = all_metadata.get("file_display_name")
|
||||
file_display_name = all_metadata.get("file_display_name") or os.path.basename(
|
||||
file_name
|
||||
)
|
||||
title = (
|
||||
all_metadata["title"] or "" if "title" in all_metadata else file_display_name
|
||||
)
|
||||
|
||||
time_updated = all_metadata.get("time_updated", datetime.now(timezone.utc))
|
||||
if isinstance(time_updated, str):
|
||||
@ -108,6 +113,7 @@ def _process_file(
|
||||
"secondary_owners",
|
||||
"filename",
|
||||
"file_display_name",
|
||||
"title",
|
||||
]
|
||||
}
|
||||
|
||||
@ -131,8 +137,8 @@ def _process_file(
|
||||
Section(link=all_metadata.get("link"), text=file_content_raw.strip())
|
||||
],
|
||||
source=DocumentSource.FILE,
|
||||
semantic_identifier=file_display_name_override
|
||||
or os.path.basename(file_name),
|
||||
semantic_identifier=file_display_name,
|
||||
title=title,
|
||||
doc_updated_at=final_time_updated,
|
||||
primary_owners=p_owners,
|
||||
secondary_owners=s_owners,
|
||||
|
@ -16,6 +16,7 @@ import pptx # type: ignore
|
||||
from pypdf import PdfReader
|
||||
from pypdf.errors import PdfStreamError
|
||||
|
||||
from danswer.configs.constants import DANSWER_METADATA_FILENAME
|
||||
from danswer.file_processing.html_utils import parse_html_page_basic
|
||||
from danswer.utils.logger import setup_logger
|
||||
|
||||
@ -88,7 +89,7 @@ def load_files_from_zip(
|
||||
with zipfile.ZipFile(zip_file_io, "r") as zip_file:
|
||||
zip_metadata = {}
|
||||
try:
|
||||
metadata_file_info = zip_file.getinfo(".danswer_metadata.json")
|
||||
metadata_file_info = zip_file.getinfo(DANSWER_METADATA_FILENAME)
|
||||
with zip_file.open(metadata_file_info, "r") as metadata_file:
|
||||
try:
|
||||
zip_metadata = json.load(metadata_file)
|
||||
@ -96,18 +97,19 @@ def load_files_from_zip(
|
||||
# convert list of dicts to dict of dicts
|
||||
zip_metadata = {d["filename"]: d for d in zip_metadata}
|
||||
except json.JSONDecodeError:
|
||||
logger.warn("Unable to load .danswer_metadata.json")
|
||||
logger.warn(f"Unable to load {DANSWER_METADATA_FILENAME}")
|
||||
except KeyError:
|
||||
logger.info("No .danswer_metadata.json file")
|
||||
logger.info(f"No {DANSWER_METADATA_FILENAME} file")
|
||||
|
||||
for file_info in zip_file.infolist():
|
||||
with zip_file.open(file_info.filename, "r") as file:
|
||||
if ignore_dirs and file_info.is_dir():
|
||||
continue
|
||||
|
||||
if ignore_macos_resource_fork_files and is_macos_resource_fork_file(
|
||||
file_info.filename
|
||||
):
|
||||
if (
|
||||
ignore_macos_resource_fork_files
|
||||
and is_macos_resource_fork_file(file_info.filename)
|
||||
) or file_info.filename == DANSWER_METADATA_FILENAME:
|
||||
continue
|
||||
yield file_info, file, zip_metadata.get(file_info.filename, {})
|
||||
|
||||
|
@ -1,23 +1,40 @@
|
||||
#!/bin/bash
|
||||
|
||||
# Usage of the script with optional volume arguments
|
||||
# ./restart_containers.sh [vespa_volume] [postgres_volume]
|
||||
|
||||
VESPA_VOLUME=${1:-""} # Default is empty if not provided
|
||||
POSTGRES_VOLUME=${2:-""} # Default is empty if not provided
|
||||
|
||||
# Stop and remove the existing containers
|
||||
echo "Stopping and removing existing containers..."
|
||||
docker stop danswer_postgres danswer_vespa
|
||||
docker rm danswer_postgres danswer_vespa
|
||||
|
||||
# Start the PostgreSQL container
|
||||
# Start the PostgreSQL container with optional volume
|
||||
echo "Starting PostgreSQL container..."
|
||||
docker run -p 5432:5432 --name danswer_postgres -e POSTGRES_PASSWORD=password -d postgres
|
||||
if [[ -n "$POSTGRES_VOLUME" ]]; then
|
||||
docker run -p 5432:5432 --name danswer_postgres -e POSTGRES_PASSWORD=password -d -v $POSTGRES_VOLUME:/var/lib/postgresql/data postgres
|
||||
else
|
||||
docker run -p 5432:5432 --name danswer_postgres -e POSTGRES_PASSWORD=password -d postgres
|
||||
fi
|
||||
|
||||
# Start the Vespa container
|
||||
# Start the Vespa container with optional volume
|
||||
echo "Starting Vespa container..."
|
||||
docker run --detach --name danswer_vespa --hostname vespa-container --publish 8081:8081 --publish 19071:19071 vespaengine/vespa:8
|
||||
if [[ -n "$VESPA_VOLUME" ]]; then
|
||||
docker run --detach --name danswer_vespa --hostname vespa-container --publish 8081:8081 --publish 19071:19071 -v $VESPA_VOLUME:/opt/vespa/var vespaengine/vespa:8
|
||||
else
|
||||
docker run --detach --name danswer_vespa --hostname vespa-container --publish 8081:8081 --publish 19071:19071 vespaengine/vespa:8
|
||||
fi
|
||||
|
||||
# Ensure alembic runs in the correct directory
|
||||
SCRIPT_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" &> /dev/null && pwd )"
|
||||
PARENT_DIR="$(dirname "$SCRIPT_DIR")"
|
||||
cd "$PARENT_DIR"
|
||||
|
||||
# Give Postgres a second to start
|
||||
sleep 1
|
||||
|
||||
# Run Alembic upgrade
|
||||
echo "Running Alembic migration..."
|
||||
alembic upgrade head
|
||||
|
Loading…
x
Reference in New Issue
Block a user