diff --git a/backend/danswer/configs/model_configs.py b/backend/danswer/configs/model_configs.py index b2e0169dd9..a0b635566a 100644 --- a/backend/danswer/configs/model_configs.py +++ b/backend/danswer/configs/model_configs.py @@ -20,6 +20,6 @@ BATCH_SIZE_ENCODE_CHUNKS = 8 # QA Model API Configs # https://platform.openai.com/docs/models/model-endpoint-compatibility -INTERNAL_MODEL_VERSION = os.environ.get("INTERNAL_MODEL", "openai-completion") -OPENAPI_MODEL_VERSION = os.environ.get("OPENAI_MODEL_VERSION", "text-davinci-003") +INTERNAL_MODEL_VERSION = os.environ.get("INTERNAL_MODEL", "openai-chat-completion") +OPENAPI_MODEL_VERSION = os.environ.get("OPENAI_MODEL_VERSION", "gpt-4") OPENAI_MAX_OUTPUT_TOKENS = 512 diff --git a/backend/danswer/direct_qa/qa_prompts.py b/backend/danswer/direct_qa/qa_prompts.py index a7d1da9b04..da39ea6bf5 100644 --- a/backend/danswer/direct_qa/qa_prompts.py +++ b/backend/danswer/direct_qa/qa_prompts.py @@ -6,17 +6,12 @@ ANSWER_PAT = "Answer:" UNCERTAINTY_PAT = "?" QUOTE_PAT = "Quote:" -SYSTEM_ROLE = "You are a Question Answering system that answers queries based on provided documents. " - BASE_PROMPT = ( f"Answer the query based on provided documents and quote relevant sections. " f"Respond with a json containing a concise answer and up to three most relevant quotes from the documents. " f"The quotes must be EXACT substrings from the documents.\n" ) -UNABLE_TO_FIND_JSON_MSG = ( - "If the query cannot be answered based on the documents, respond with {}. " -) SAMPLE_QUESTION = "Where is the Eiffel Tower?" @@ -106,12 +101,21 @@ def freeform_processor(question: str, documents: list[str]) -> str: def json_chat_processor(question: str, documents: list[str]) -> list[dict[str, str]]: - role_msg = ( - SYSTEM_ROLE - + 'Start by reading the following documents and responding with "Acknowledged"' + intro_msg = ( + "You are a Question Answering assistant that answers queries based on provided documents.\n" + 'Start by reading the following documents and responding with "Acknowledged".' ) - messages = [{"role": "system", "content": role_msg}] + task_msg = ( + "Now answer the next user query based on documents above and quote relevant sections.\n" + "Respond with a JSON containing the answer and up to three most relevant quotes from the documents.\n" + "If the query cannot be answered based on the documents, do not provide an answer.\n" + "All quotes MUST be EXACT substrings from provided documents.\n" + "Your responses should be informative and concise.\n" + "You MUST prioritize information from provided documents over internal knowledge.\n" + f"Sample response:\n{json.dumps(SAMPLE_JSON_RESPONSE)}" + ) + messages = [{"role": "system", "content": intro_msg}] for document in documents: messages.extend( @@ -123,18 +127,10 @@ def json_chat_processor(question: str, documents: list[str]) -> list[dict[str, s {"role": "assistant", "content": "Acknowledged"}, ] ) - sample_msg = ( - f"Now answer the user query based on documents above and quote relevant sections. " - f"Respond with a json containing a concise answer and up to three most relevant quotes from the documents.\n" - f"Sample response: {json.dumps(SAMPLE_JSON_RESPONSE)}" - ) - messages.append({"role": "system", "content": sample_msg}) + messages.append({"role": "system", "content": task_msg}) messages.append({"role": "user", "content": f"{QUESTION_PAT}\n{question}\n"}) - # Note that the below will be dropped in reflexion if used - messages.append({"role": "assistant", "content": "Answer Json:\n"}) - return messages diff --git a/backend/danswer/direct_qa/question_answer.py b/backend/danswer/direct_qa/question_answer.py index 957f1265e3..581f440785 100644 --- a/backend/danswer/direct_qa/question_answer.py +++ b/backend/danswer/direct_qa/question_answer.py @@ -324,10 +324,57 @@ class OpenAIChatCompletionQA(QAModel): answer, quotes_dict = process_answer(model_output, context_docs) return answer, quotes_dict - @log_function_time() def answer_question_stream( self, query: str, context_docs: list[InferenceChunk] - ) -> Any: - raise NotImplementedError( - "Danswer with chat completion does not support streaming yet" - ) + ) -> Generator[dict[str, Any] | None, None, None]: + top_contents = [ranked_chunk.content for ranked_chunk in context_docs] + messages = self.prompt_processor(query, top_contents) + logger.debug(messages) + + try: + response = openai.ChatCompletion.create( + messages=messages, + temperature=0, + top_p=1, + frequency_penalty=0, + presence_penalty=0, + model=self.model_version, + max_tokens=self.max_output_tokens, + stream=True, + ) + + model_output = "" + found_answer_start = False + found_answer_end = False + for event in response: + event_dict = event["choices"][0]["delta"] + if ( + "content" not in event_dict + ): # could be a role message or empty termination + continue + event_text = event_dict["content"] + model_previous = model_output + model_output += event_text + + if not found_answer_start and '{"answer":"' in model_output.replace( + " ", "" + ).replace("\n", ""): + found_answer_start = True + continue + + if found_answer_start and not found_answer_end: + if stream_answer_end(model_previous, event_text): + found_answer_end = True + continue + yield {"answer_data": event_text} + + except Exception as e: + logger.exception(e) + model_output = "Model Failure" + + logger.debug(model_output) + + answer, quotes_dict = process_answer(model_output, context_docs) + logger.info(answer) + + yield quotes_dict