mirror of
https://github.com/danswer-ai/danswer.git
synced 2025-09-19 20:24:32 +02:00
Changed default local model to nomic (#1943)
This commit is contained in:
@@ -44,7 +44,7 @@ export function CustomModelForm({
|
||||
name="model_name"
|
||||
label="Name:"
|
||||
subtext="The name of the model on Hugging Face"
|
||||
placeholder="E.g. 'intfloat/e5-base-v2'"
|
||||
placeholder="E.g. 'nomic-ai/nomic-embed-text-v1'"
|
||||
autoCompleteDisabled={true}
|
||||
/>
|
||||
|
||||
|
@@ -67,12 +67,22 @@ export interface CloudEmbeddingProviderFull extends CloudEmbeddingProvider {
|
||||
|
||||
export const AVAILABLE_MODELS: HostedEmbeddingModel[] = [
|
||||
{
|
||||
model_name: "intfloat/e5-base-v2",
|
||||
model_name: "nomic-ai/nomic-embed-text-v1",
|
||||
model_dim: 768,
|
||||
normalize: true,
|
||||
description:
|
||||
"The recommended default for most situations. If you aren't sure which model to use, this is probably the one.",
|
||||
isDefault: true,
|
||||
link: "https://huggingface.co/nomic-ai/nomic-embed-text-v1",
|
||||
query_prefix: "search_query: ",
|
||||
passage_prefix: "search_document: ",
|
||||
},
|
||||
{
|
||||
model_name: "intfloat/e5-base-v2",
|
||||
model_dim: 768,
|
||||
normalize: true,
|
||||
description:
|
||||
"A smaller and faster model than the default. It is around 2x faster than the default model at the cost of lower search quality.",
|
||||
link: "https://huggingface.co/intfloat/e5-base-v2",
|
||||
query_prefix: "query: ",
|
||||
passage_prefix: "passage: ",
|
||||
@@ -82,7 +92,7 @@ export const AVAILABLE_MODELS: HostedEmbeddingModel[] = [
|
||||
model_dim: 384,
|
||||
normalize: true,
|
||||
description:
|
||||
"A smaller / faster version of the default model. If you're running Danswer on a resource constrained system, then this is a good choice.",
|
||||
"The smallest and fastest version of the E5 line of models. If you're running Danswer on a resource constrained system, then this may be a good choice.",
|
||||
link: "https://huggingface.co/intfloat/e5-small-v2",
|
||||
query_prefix: "query: ",
|
||||
passage_prefix: "passage: ",
|
||||
@@ -92,7 +102,7 @@ export const AVAILABLE_MODELS: HostedEmbeddingModel[] = [
|
||||
model_dim: 768,
|
||||
normalize: true,
|
||||
description:
|
||||
"If you have many documents in other languages besides English, this is the one to go for.",
|
||||
"For corpora in other languages besides English, this is the one to choose.",
|
||||
link: "https://huggingface.co/intfloat/multilingual-e5-base",
|
||||
query_prefix: "query: ",
|
||||
passage_prefix: "passage: ",
|
||||
@@ -102,7 +112,7 @@ export const AVAILABLE_MODELS: HostedEmbeddingModel[] = [
|
||||
model_dim: 384,
|
||||
normalize: true,
|
||||
description:
|
||||
"If you have many documents in other languages besides English, and you're running on a resource constrained system, then this is the one to go for.",
|
||||
"For corpora in other languages besides English, as well as being on a resource constrained system, this is the one to choose.",
|
||||
link: "https://huggingface.co/intfloat/multilingual-e5-base",
|
||||
query_prefix: "query: ",
|
||||
passage_prefix: "passage: ",
|
||||
|
@@ -265,8 +265,8 @@ function Main() {
|
||||
return (
|
||||
<div className="h-screen">
|
||||
<Text>
|
||||
Embedding models are used to generate embeddings for your documents,
|
||||
which then power Danswer's search.
|
||||
These deep learning models are used to generate vector representations
|
||||
of your documents, which then power Danswer's search.
|
||||
</Text>
|
||||
|
||||
{alreadySelectedModel && (
|
||||
@@ -359,12 +359,12 @@ function Main() {
|
||||
<>
|
||||
<Title className="mt-8">Switch your Embedding Model</Title>
|
||||
<Text className="mb-4">
|
||||
If the current model is not working for you, you can update your
|
||||
model choice below. Note that this will require a complete
|
||||
re-indexing of all your documents across every connected source. We
|
||||
will take care of this in the background, but depending on the size
|
||||
of your corpus, this could take hours, day, or even weeks. You can
|
||||
monitor the progress of the re-indexing on this page.
|
||||
Note that updating the backing model will require a complete
|
||||
re-indexing of all documents across every connected source. This is
|
||||
taken care of in the background so that the system can continue to
|
||||
be used, but depending on the size of the corpus, this could take
|
||||
hours or days. You can monitor the progress of the re-indexing on
|
||||
this page while the models are being switched.
|
||||
</Text>
|
||||
|
||||
<div className="mt-8 text-sm mr-auto mb-12 divide-x-2 flex">
|
||||
|
Reference in New Issue
Block a user