mirror of
https://github.com/danswer-ai/danswer.git
synced 2025-03-26 17:51:54 +01:00
Allow all LLMs for image generation assistants (#3730)
* Allow all LLMs for image generation assistants * ensure pushed * update color + assistant -> model * update prompt * fix silly conditional
This commit is contained in:
parent
6551d6bc87
commit
3c37764974
@ -15,6 +15,7 @@ from onyx.llm.models import PreviousMessage
|
||||
from onyx.llm.utils import build_content_with_imgs
|
||||
from onyx.llm.utils import check_message_tokens
|
||||
from onyx.llm.utils import message_to_prompt_and_imgs
|
||||
from onyx.llm.utils import model_supports_image_input
|
||||
from onyx.natural_language_processing.utils import get_tokenizer
|
||||
from onyx.prompts.chat_prompts import CHAT_USER_CONTEXT_FREE_PROMPT
|
||||
from onyx.prompts.direct_qa_prompts import HISTORY_BLOCK
|
||||
@ -90,6 +91,7 @@ class AnswerPromptBuilder:
|
||||
provider_type=llm_config.model_provider,
|
||||
model_name=llm_config.model_name,
|
||||
)
|
||||
self.llm_config = llm_config
|
||||
self.llm_tokenizer_encode_func = cast(
|
||||
Callable[[str], list[int]], llm_tokenizer.encode
|
||||
)
|
||||
@ -98,12 +100,21 @@ class AnswerPromptBuilder:
|
||||
(
|
||||
self.message_history,
|
||||
self.history_token_cnts,
|
||||
) = translate_history_to_basemessages(message_history)
|
||||
) = translate_history_to_basemessages(
|
||||
message_history,
|
||||
exclude_images=not model_supports_image_input(
|
||||
self.llm_config.model_name,
|
||||
self.llm_config.model_provider,
|
||||
),
|
||||
)
|
||||
|
||||
self.system_message_and_token_cnt: tuple[SystemMessage, int] | None = None
|
||||
self.user_message_and_token_cnt = (
|
||||
user_message,
|
||||
check_message_tokens(user_message, self.llm_tokenizer_encode_func),
|
||||
check_message_tokens(
|
||||
user_message,
|
||||
self.llm_tokenizer_encode_func,
|
||||
),
|
||||
)
|
||||
|
||||
self.new_messages_and_token_cnts: list[tuple[BaseMessage, int]] = []
|
||||
|
@ -11,6 +11,7 @@ from onyx.llm.utils import build_content_with_imgs
|
||||
|
||||
def translate_onyx_msg_to_langchain(
|
||||
msg: ChatMessage | PreviousMessage,
|
||||
exclude_images: bool = False,
|
||||
) -> BaseMessage:
|
||||
files: list[InMemoryChatFile] = []
|
||||
|
||||
@ -18,7 +19,9 @@ def translate_onyx_msg_to_langchain(
|
||||
# attached. Just ignore them for now.
|
||||
if not isinstance(msg, ChatMessage):
|
||||
files = msg.files
|
||||
content = build_content_with_imgs(msg.message, files, message_type=msg.message_type)
|
||||
content = build_content_with_imgs(
|
||||
msg.message, files, message_type=msg.message_type, exclude_images=exclude_images
|
||||
)
|
||||
|
||||
if msg.message_type == MessageType.SYSTEM:
|
||||
raise ValueError("System messages are not currently part of history")
|
||||
@ -32,9 +35,12 @@ def translate_onyx_msg_to_langchain(
|
||||
|
||||
def translate_history_to_basemessages(
|
||||
history: list[ChatMessage] | list["PreviousMessage"],
|
||||
exclude_images: bool = False,
|
||||
) -> tuple[list[BaseMessage], list[int]]:
|
||||
history_basemessages = [
|
||||
translate_onyx_msg_to_langchain(msg) for msg in history if msg.token_count != 0
|
||||
translate_onyx_msg_to_langchain(msg, exclude_images)
|
||||
for msg in history
|
||||
if msg.token_count != 0
|
||||
]
|
||||
history_token_counts = [msg.token_count for msg in history if msg.token_count != 0]
|
||||
return history_basemessages, history_token_counts
|
||||
|
@ -142,6 +142,7 @@ def build_content_with_imgs(
|
||||
img_urls: list[str] | None = None,
|
||||
b64_imgs: list[str] | None = None,
|
||||
message_type: MessageType = MessageType.USER,
|
||||
exclude_images: bool = False,
|
||||
) -> str | list[str | dict[str, Any]]: # matching Langchain's BaseMessage content type
|
||||
files = files or []
|
||||
|
||||
@ -157,7 +158,7 @@ def build_content_with_imgs(
|
||||
|
||||
message_main_content = _build_content(message, files)
|
||||
|
||||
if not img_files and not img_urls:
|
||||
if exclude_images or (not img_files and not img_urls):
|
||||
return message_main_content
|
||||
|
||||
return cast(
|
||||
@ -382,9 +383,19 @@ def _strip_colon_from_model_name(model_name: str) -> str:
|
||||
return ":".join(model_name.split(":")[:-1]) if ":" in model_name else model_name
|
||||
|
||||
|
||||
def _find_model_obj(
|
||||
model_map: dict, provider: str, model_names: list[str | None]
|
||||
) -> dict | None:
|
||||
def _find_model_obj(model_map: dict, provider: str, model_name: str) -> dict | None:
|
||||
stripped_model_name = _strip_extra_provider_from_model_name(model_name)
|
||||
|
||||
model_names = [
|
||||
model_name,
|
||||
_strip_extra_provider_from_model_name(model_name),
|
||||
# Remove leading extra provider. Usually for cases where user has a
|
||||
# customer model proxy which appends another prefix
|
||||
# remove :XXXX from the end, if present. Needed for ollama.
|
||||
_strip_colon_from_model_name(model_name),
|
||||
_strip_colon_from_model_name(stripped_model_name),
|
||||
]
|
||||
|
||||
# Filter out None values and deduplicate model names
|
||||
filtered_model_names = [name for name in model_names if name]
|
||||
|
||||
@ -417,21 +428,10 @@ def get_llm_max_tokens(
|
||||
return GEN_AI_MAX_TOKENS
|
||||
|
||||
try:
|
||||
extra_provider_stripped_model_name = _strip_extra_provider_from_model_name(
|
||||
model_name
|
||||
)
|
||||
model_obj = _find_model_obj(
|
||||
model_map,
|
||||
model_provider,
|
||||
[
|
||||
model_name,
|
||||
# Remove leading extra provider. Usually for cases where user has a
|
||||
# customer model proxy which appends another prefix
|
||||
extra_provider_stripped_model_name,
|
||||
# remove :XXXX from the end, if present. Needed for ollama.
|
||||
_strip_colon_from_model_name(model_name),
|
||||
_strip_colon_from_model_name(extra_provider_stripped_model_name),
|
||||
],
|
||||
model_name,
|
||||
)
|
||||
if not model_obj:
|
||||
raise RuntimeError(
|
||||
@ -523,3 +523,23 @@ def get_max_input_tokens(
|
||||
raise RuntimeError("No tokens for input for the LLM given settings")
|
||||
|
||||
return input_toks
|
||||
|
||||
|
||||
def model_supports_image_input(model_name: str, model_provider: str) -> bool:
|
||||
model_map = get_model_map()
|
||||
try:
|
||||
model_obj = _find_model_obj(
|
||||
model_map,
|
||||
model_provider,
|
||||
model_name,
|
||||
)
|
||||
if not model_obj:
|
||||
raise RuntimeError(
|
||||
f"No litellm entry found for {model_provider}/{model_name}"
|
||||
)
|
||||
return model_obj.get("supports_vision", False)
|
||||
except Exception:
|
||||
logger.exception(
|
||||
f"Failed to get model object for {model_provider}/{model_name}"
|
||||
)
|
||||
return False
|
||||
|
@ -16,6 +16,7 @@ from onyx.llm.interfaces import LLM
|
||||
from onyx.llm.models import PreviousMessage
|
||||
from onyx.llm.utils import build_content_with_imgs
|
||||
from onyx.llm.utils import message_to_string
|
||||
from onyx.llm.utils import model_supports_image_input
|
||||
from onyx.prompts.constants import GENERAL_SEP_PAT
|
||||
from onyx.tools.message import ToolCallSummary
|
||||
from onyx.tools.models import ToolResponse
|
||||
@ -316,12 +317,22 @@ class ImageGenerationTool(Tool):
|
||||
for img in img_generation_response
|
||||
if img.image_data is not None
|
||||
]
|
||||
prompt_builder.update_user_prompt(
|
||||
build_image_generation_user_prompt(
|
||||
query=prompt_builder.get_user_message_content(),
|
||||
img_urls=img_urls,
|
||||
b64_imgs=b64_imgs,
|
||||
)
|
||||
|
||||
user_prompt = build_image_generation_user_prompt(
|
||||
query=prompt_builder.get_user_message_content(),
|
||||
supports_image_input=model_supports_image_input(
|
||||
prompt_builder.llm_config.model_name,
|
||||
prompt_builder.llm_config.model_provider,
|
||||
),
|
||||
prompts=[
|
||||
prompt
|
||||
for response in img_generation_response
|
||||
for prompt in response.revised_prompt
|
||||
],
|
||||
img_urls=img_urls,
|
||||
b64_imgs=b64_imgs,
|
||||
)
|
||||
|
||||
prompt_builder.update_user_prompt(user_prompt)
|
||||
|
||||
return prompt_builder
|
||||
|
@ -9,16 +9,34 @@ You have just created the attached images in response to the following query: "{
|
||||
Can you please summarize them in a sentence or two? Do NOT include image urls or bulleted lists.
|
||||
"""
|
||||
|
||||
IMG_GENERATION_SUMMARY_PROMPT_NO_IMAGES = """
|
||||
You have generated images based on the following query: "{query}".
|
||||
The prompts used to create these images were: {prompts}
|
||||
|
||||
Describe the two images you generated, summarizing the key elements and content in a sentence or two.
|
||||
Be specific about what was generated and respond as if you have seen them,
|
||||
without including any disclaimers or speculations.
|
||||
"""
|
||||
|
||||
|
||||
def build_image_generation_user_prompt(
|
||||
query: str,
|
||||
supports_image_input: bool,
|
||||
img_urls: list[str] | None = None,
|
||||
b64_imgs: list[str] | None = None,
|
||||
prompts: list[str] | None = None,
|
||||
) -> HumanMessage:
|
||||
return HumanMessage(
|
||||
content=build_content_with_imgs(
|
||||
message=IMG_GENERATION_SUMMARY_PROMPT.format(query=query).strip(),
|
||||
b64_imgs=b64_imgs,
|
||||
img_urls=img_urls,
|
||||
if supports_image_input:
|
||||
return HumanMessage(
|
||||
content=build_content_with_imgs(
|
||||
message=IMG_GENERATION_SUMMARY_PROMPT.format(query=query).strip(),
|
||||
b64_imgs=b64_imgs,
|
||||
img_urls=img_urls,
|
||||
)
|
||||
)
|
||||
else:
|
||||
return HumanMessage(
|
||||
content=IMG_GENERATION_SUMMARY_PROMPT_NO_IMAGES.format(
|
||||
query=query, prompts=prompts
|
||||
).strip()
|
||||
)
|
||||
)
|
||||
|
@ -444,26 +444,10 @@ export function AssistantEditor({
|
||||
let enabledTools = Object.keys(values.enabled_tools_map)
|
||||
.map((toolId) => Number(toolId))
|
||||
.filter((toolId) => values.enabled_tools_map[toolId]);
|
||||
|
||||
const searchToolEnabled = searchTool
|
||||
? enabledTools.includes(searchTool.id)
|
||||
: false;
|
||||
const imageGenerationToolEnabled = imageGenerationTool
|
||||
? enabledTools.includes(imageGenerationTool.id)
|
||||
: false;
|
||||
|
||||
if (imageGenerationToolEnabled) {
|
||||
if (
|
||||
// model must support image input for image generation
|
||||
// to work
|
||||
!checkLLMSupportsImageInput(
|
||||
values.llm_model_version_override || defaultModelName || ""
|
||||
)
|
||||
) {
|
||||
enabledTools = enabledTools.filter(
|
||||
(toolId) => toolId !== imageGenerationTool!.id
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
// if disable_retrieval is set, set num_chunks to 0
|
||||
// to tell the backend to not fetch any documents
|
||||
@ -914,25 +898,20 @@ export function AssistantEditor({
|
||||
id={`enabled_tools_map.${imageGenerationTool.id}`}
|
||||
name={`enabled_tools_map.${imageGenerationTool.id}`}
|
||||
onCheckedChange={() => {
|
||||
if (
|
||||
currentLLMSupportsImageOutput &&
|
||||
isImageGenerationAvailable
|
||||
) {
|
||||
if (isImageGenerationAvailable) {
|
||||
toggleToolInValues(
|
||||
imageGenerationTool.id
|
||||
);
|
||||
}
|
||||
}}
|
||||
className={
|
||||
!currentLLMSupportsImageOutput ||
|
||||
!isImageGenerationAvailable
|
||||
? "opacity-50 cursor-not-allowed"
|
||||
: ""
|
||||
}
|
||||
/>
|
||||
</TooltipTrigger>
|
||||
{(!currentLLMSupportsImageOutput ||
|
||||
!isImageGenerationAvailable) && (
|
||||
{!isImageGenerationAvailable && (
|
||||
<TooltipContent side="top" align="center">
|
||||
<p className="bg-background-900 max-w-[200px] mb-1 text-sm rounded-lg p-1.5 text-white">
|
||||
{!currentLLMSupportsImageOutput
|
||||
|
@ -49,6 +49,7 @@ import {
|
||||
useContext,
|
||||
useEffect,
|
||||
useLayoutEffect,
|
||||
useMemo,
|
||||
useRef,
|
||||
useState,
|
||||
} from "react";
|
||||
@ -1623,7 +1624,7 @@ export function ChatPage({
|
||||
setPopup({
|
||||
type: "error",
|
||||
message:
|
||||
"The current Assistant does not support image input. Please select an assistant with Vision support.",
|
||||
"The current model does not support image input. Please select a model with Vision support.",
|
||||
});
|
||||
return;
|
||||
}
|
||||
@ -1841,6 +1842,14 @@ export function ChatPage({
|
||||
// eslint-disable-next-line react-hooks/exhaustive-deps
|
||||
}, [messageHistory]);
|
||||
|
||||
const imageFileInMessageHistory = useMemo(() => {
|
||||
return messageHistory
|
||||
.filter((message) => message.type === "user")
|
||||
.some((message) =>
|
||||
message.files.some((file) => file.type === ChatFileType.IMAGE)
|
||||
);
|
||||
}, [messageHistory]);
|
||||
|
||||
const currentVisibleRange = visibleRange.get(currentSessionId()) || {
|
||||
start: 0,
|
||||
end: 0,
|
||||
@ -1921,6 +1930,10 @@ export function ChatPage({
|
||||
handleSlackChatRedirect();
|
||||
}, [searchParams, router]);
|
||||
|
||||
useEffect(() => {
|
||||
llmOverrideManager.updateImageFilesPresent(imageFileInMessageHistory);
|
||||
}, [imageFileInMessageHistory]);
|
||||
|
||||
useEffect(() => {
|
||||
const handleKeyDown = (event: KeyboardEvent) => {
|
||||
if (event.metaKey || event.ctrlKey) {
|
||||
|
@ -5,7 +5,6 @@ import {
|
||||
PopoverTrigger,
|
||||
} from "@/components/ui/popover";
|
||||
import { ChatInputOption } from "./ChatInputOption";
|
||||
import { AnthropicSVG } from "@/components/icons/icons";
|
||||
import { getDisplayNameForModel } from "@/lib/hooks";
|
||||
import {
|
||||
checkLLMSupportsImageInput,
|
||||
@ -19,6 +18,14 @@ import {
|
||||
import { Persona } from "@/app/admin/assistants/interfaces";
|
||||
import { LlmOverrideManager } from "@/lib/hooks";
|
||||
|
||||
import {
|
||||
Tooltip,
|
||||
TooltipContent,
|
||||
TooltipProvider,
|
||||
TooltipTrigger,
|
||||
} from "@/components/ui/tooltip";
|
||||
import { FiAlertTriangle } from "react-icons/fi";
|
||||
|
||||
interface LLMPopoverProps {
|
||||
llmProviders: LLMProviderDescriptor[];
|
||||
llmOverrideManager: LlmOverrideManager;
|
||||
@ -139,6 +146,22 @@ export default function LLMPopover({
|
||||
);
|
||||
}
|
||||
})()}
|
||||
{llmOverrideManager.imageFilesPresent &&
|
||||
!checkLLMSupportsImageInput(name) && (
|
||||
<TooltipProvider>
|
||||
<Tooltip delayDuration={0}>
|
||||
<TooltipTrigger className="my-auto flex items-center ml-auto">
|
||||
<FiAlertTriangle className="text-alert" size={16} />
|
||||
</TooltipTrigger>
|
||||
<TooltipContent>
|
||||
<p className="text-xs">
|
||||
This LLM is not vision-capable and cannot process
|
||||
image files present in your chat session.
|
||||
</p>
|
||||
</TooltipContent>
|
||||
</Tooltip>
|
||||
</TooltipProvider>
|
||||
)}
|
||||
</button>
|
||||
);
|
||||
}
|
||||
|
@ -1,7 +1,8 @@
|
||||
import { useRef, useState } from "react";
|
||||
import { cva, type VariantProps } from "class-variance-authority";
|
||||
import { cn } from "@/lib/utils";
|
||||
import { CheckCircle, XCircle } from "lucide-react";
|
||||
import { Check, CheckCircle, XCircle } from "lucide-react";
|
||||
import { Warning } from "@phosphor-icons/react";
|
||||
const popupVariants = cva(
|
||||
"fixed bottom-4 left-4 p-4 rounded-lg shadow-xl text-white z-[10000] flex items-center space-x-3 transition-all duration-300 ease-in-out",
|
||||
{
|
||||
@ -26,9 +27,9 @@ export interface PopupSpec extends VariantProps<typeof popupVariants> {
|
||||
export const Popup: React.FC<PopupSpec> = ({ message, type }) => (
|
||||
<div className={cn(popupVariants({ type }))}>
|
||||
{type === "success" ? (
|
||||
<CheckCircle className="w-6 h-6 animate-pulse" />
|
||||
<Check className="w-6 h-6" />
|
||||
) : type === "error" ? (
|
||||
<XCircle className="w-6 h-6 animate-pulse" />
|
||||
<Warning className="w-6 h-6 " />
|
||||
) : type === "info" ? (
|
||||
<svg
|
||||
className="w-6 h-6"
|
||||
|
@ -360,6 +360,8 @@ export interface LlmOverrideManager {
|
||||
temperature: number | null;
|
||||
updateTemperature: (temperature: number | null) => void;
|
||||
updateModelOverrideForChatSession: (chatSession?: ChatSession) => void;
|
||||
imageFilesPresent: boolean;
|
||||
updateImageFilesPresent: (present: boolean) => void;
|
||||
}
|
||||
export function useLlmOverride(
|
||||
llmProviders: LLMProviderDescriptor[],
|
||||
@ -383,6 +385,11 @@ export function useLlmOverride(
|
||||
}
|
||||
return { name: "", provider: "", modelName: "" };
|
||||
};
|
||||
const [imageFilesPresent, setImageFilesPresent] = useState(false);
|
||||
|
||||
const updateImageFilesPresent = (present: boolean) => {
|
||||
setImageFilesPresent(present);
|
||||
};
|
||||
|
||||
const [globalDefault, setGlobalDefault] = useState<LlmOverride>(
|
||||
getValidLlmOverride(globalModel)
|
||||
@ -447,6 +454,8 @@ export function useLlmOverride(
|
||||
setGlobalDefault,
|
||||
temperature,
|
||||
updateTemperature,
|
||||
imageFilesPresent,
|
||||
updateImageFilesPresent,
|
||||
};
|
||||
}
|
||||
|
||||
|
Loading…
x
Reference in New Issue
Block a user