diff --git a/backend/ee/onyx/document_index/vespa/app_config/cloud-services.xml.jinja b/backend/ee/onyx/document_index/vespa/app_config/cloud-services.xml.jinja
new file mode 100644
index 0000000000..92c019bed1
--- /dev/null
+++ b/backend/ee/onyx/document_index/vespa/app_config/cloud-services.xml.jinja
@@ -0,0 +1,48 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+{{ document_elements }}
+
+
+
+
+
+
+
+
+
+ 2
+
+
+
+
+
+
+
+ 3
+ 750
+ 350
+ 300
+
+
+
+ 2
+
+
+
diff --git a/backend/scripts/debugging/onyx_vespa_schemas.py b/backend/scripts/debugging/onyx_vespa_schemas.py
index b556c48f1b..bc765144f4 100644
--- a/backend/scripts/debugging/onyx_vespa_schemas.py
+++ b/backend/scripts/debugging/onyx_vespa_schemas.py
@@ -1,6 +1,18 @@
-"""Tool to generate all supported schema variations for Onyx Cloud's Vespa database."""
+"""Tool to generate all supported schema variations for Onyx Cloud's Vespa database.
+
+Usage:
+
+```
+PYTHONPATH=. python scripts/debugging/onyx_vespa_schemas.py
+```
+
+Then, paste them into the existing vespa schema downloaded from the Vespa console,
+and then re-zip.
+"""
import argparse
+import os
+from pathlib import Path
import jinja2
@@ -16,8 +28,13 @@ def write_schema(
dim: int,
embedding_precision: EmbeddingPrecision,
template: jinja2.Template,
+ output_path: Path,
) -> None:
- index_filename = index_name + ".sd"
+ # Create schemas directory if it doesn't exist
+ schemas_dir = output_path / "schemas"
+ schemas_dir.mkdir(parents=True, exist_ok=True)
+
+ index_filename = schemas_dir / (index_name + ".sd")
schema = template.render(
multi_tenant=True,
@@ -32,13 +49,72 @@ def write_schema(
logger.info(f"Wrote {index_filename}")
-def main() -> None:
- parser = argparse.ArgumentParser(description="Generate multi tenant Vespa schemas")
- parser.add_argument("--template", help="The Jinja template to use", required=True)
- args = parser.parse_args()
+def generate_document_entries() -> str:
+ """Generate document entries for all supported embedding models."""
+ document_entries = []
+
+ for model in SUPPORTED_EMBEDDING_MODELS:
+ # Add regular index
+ document_entries.append(
+ f' '
+ )
+ # Add alt index
+ document_entries.append(
+ f' '
+ )
+
+ return "\n".join(document_entries)
+
+
+def write_cloud_services(cloud_services_template_path: str, output_path: Path) -> None:
+ """Generate and write the cloud-services.xml file."""
+ # Create output directory if it doesn't exist
+ output_path.mkdir(parents=True, exist_ok=True)
jinja_env = jinja2.Environment()
+ with open(cloud_services_template_path, "r", encoding="utf-8") as f:
+ template_str = f.read()
+
+ template = jinja_env.from_string(template_str)
+ document_entries = generate_document_entries()
+
+ services_xml = template.render(document_elements=document_entries)
+
+ services_file = output_path / "services.xml"
+ with open(services_file, "w", encoding="utf-8") as f:
+ f.write(services_xml)
+
+ logger.info(f"Wrote {services_file}")
+
+
+def main() -> None:
+ parser = argparse.ArgumentParser(
+ description="Generate multi tenant Vespa schemas and services configuration"
+ )
+ parser.add_argument(
+ "--template",
+ help="The Jinja template to use for schemas",
+ default="onyx/document_index/vespa/app_config/schemas/danswer_chunk.sd.jinja",
+ )
+ parser.add_argument(
+ "--cloud-services-template",
+ help="The cloud-services.xml.jinja template path",
+ default="ee/onyx/document_index/vespa/app_config/cloud-services.xml.jinja",
+ )
+ parser.add_argument(
+ "--output-path",
+ help="Output directory path (defaults to current directory)",
+ default=".",
+ )
+ args = parser.parse_args()
+
+ # Convert output path to Path object
+ output_path = Path(args.output_path)
+
+ jinja_env = jinja2.Environment()
+
+ # Generate schema files
with open(args.template, "r", encoding="utf-8") as f:
template_str = f.read()
@@ -46,17 +122,33 @@ def main() -> None:
num_indexes = 0
for model in SUPPORTED_EMBEDDING_MODELS:
- write_schema(model.index_name, model.dim, model.embedding_precision, template)
+ write_schema(
+ model.index_name,
+ model.dim,
+ model.embedding_precision,
+ template,
+ output_path,
+ )
write_schema(
model.index_name + "__danswer_alt_index",
model.dim,
model.embedding_precision,
template,
+ output_path,
)
num_indexes += 2
logger.info(f"Wrote {num_indexes} indexes.")
+ # Generate cloud services configuration if template is provided
+ if args.cloud_services_template:
+ if os.path.exists(args.cloud_services_template):
+ write_cloud_services(args.cloud_services_template, output_path)
+ else:
+ logger.error(
+ f"Cloud services template not found: {args.cloud_services_template}"
+ )
+
if __name__ == "__main__":
main()