mirror of
https://github.com/danswer-ai/danswer.git
synced 2025-05-30 17:50:27 +02:00
* tool to generate vespa schema variations for our cloud * extraneous assign * float, not double * back to double --------- Co-authored-by: Richard Kuo (Onyx) <rkuo@onyx.app>
49 lines
1.6 KiB
Python
49 lines
1.6 KiB
Python
"""Tool to generate all supported schema variations for Onyx Cloud's Vespa database."""
|
|
|
|
import argparse
|
|
|
|
from onyx.db.enums import EmbeddingPrecision
|
|
from onyx.document_index.vespa.index import _replace_template_values_in_schema
|
|
from onyx.document_index.vespa.index import _replace_tenant_template_value_in_schema
|
|
from onyx.document_index.vespa_constants import TENANT_ID_REPLACEMENT
|
|
from onyx.utils.logger import setup_logger
|
|
from shared_configs.configs import SUPPORTED_EMBEDDING_MODELS
|
|
|
|
logger = setup_logger()
|
|
|
|
|
|
def write_schema(index_name: str, dim: int, template: str) -> None:
|
|
index_filename = index_name + ".sd"
|
|
index_rendered_str = _replace_tenant_template_value_in_schema(
|
|
template, TENANT_ID_REPLACEMENT
|
|
)
|
|
index_rendered_str = _replace_template_values_in_schema(
|
|
index_rendered_str, index_name, dim, EmbeddingPrecision.FLOAT
|
|
)
|
|
|
|
with open(index_filename, "w", encoding="utf-8") as f:
|
|
f.write(index_rendered_str)
|
|
|
|
logger.info(f"Wrote {index_filename}")
|
|
|
|
|
|
def main() -> None:
|
|
parser = argparse.ArgumentParser(description="Generate multi tenant Vespa schemas")
|
|
parser.add_argument("--template", help="The schema template to use", required=True)
|
|
args = parser.parse_args()
|
|
|
|
with open(args.template, "r", encoding="utf-8") as f:
|
|
template_str = f.read()
|
|
|
|
num_indexes = 0
|
|
for model in SUPPORTED_EMBEDDING_MODELS:
|
|
write_schema(model.index_name, model.dim, template_str)
|
|
write_schema(model.index_name + "__danswer_alt_index", model.dim, template_str)
|
|
num_indexes += 2
|
|
|
|
logger.info(f"Wrote {num_indexes} indexes.")
|
|
|
|
|
|
if __name__ == "__main__":
|
|
main()
|