diff --git a/backend/danswer/configs/app_configs.py b/backend/danswer/configs/app_configs.py
index 6778147b0..e6eb2a97d 100644
--- a/backend/danswer/configs/app_configs.py
+++ b/backend/danswer/configs/app_configs.py
@@ -54,7 +54,7 @@ CHUNK_OVERLAP = 5
 #####
 # Other API Keys
 #####
-OPENAI_API_KEY = os.environ["OPENAI_API_KEY"]
+OPENAI_API_KEY = os.environ.get("OPENAI_API_KEY", "")
 
 
 #####
diff --git a/backend/danswer/direct_qa/qa_prompts.py b/backend/danswer/direct_qa/qa_prompts.py
index f6a4c13bd..8a9dd1ceb 100644
--- a/backend/danswer/direct_qa/qa_prompts.py
+++ b/backend/danswer/direct_qa/qa_prompts.py
@@ -1,11 +1,92 @@
+import json
+
 DOC_SEP_PAT = "---NEW DOCUMENT---"
 QUESTION_PAT = "Query:"
 ANSWER_PAT = "Answer:"
 UNCERTAINTY_PAT = "?"
 QUOTE_PAT = "Quote:"
 
+SYSTEM_ROLE = "You are a Question Answering system that answers queries based on provided documents. "
 
-def generic_prompt_processor(question: str, documents: list[str]) -> str:
+BASE_PROMPT = (
+    f"Answer the query based on provided documents and quote relevant sections. "
+    f"Respond with a json containing a concise answer and up to three most relevant quotes from the documents.\n"
+)
+
+UNABLE_TO_FIND_JSON_MSG = (
+    "If the query cannot be answered based on the documents, respond with {}. "
+)
+
+SAMPLE_QUESTION = "Where is the Eiffel Tower?"
+
+SAMPLE_JSON_RESPONSE = {
+    "answer": "The Eiffel Tower is located in Paris, France.",
+    "quotes": [
+        "The Eiffel Tower is an iconic symbol of Paris",
+        "located on the Champ de Mars in France.",
+    ],
+}
+
+
+def json_processor(question: str, documents: list[str]) -> str:
+    prompt = (
+        BASE_PROMPT + f"Sample response:\n{json.dumps(SAMPLE_JSON_RESPONSE)}\n\n"
+        f'Each context document below is prefixed with "{DOC_SEP_PAT}".\n\n'
+    )
+
+    for document in documents:
+        prompt += f"\n{DOC_SEP_PAT}\n{document}"
+
+    prompt += "\n\n---\n\n"
+    prompt += f"{QUESTION_PAT}\n{question}\n"
+    return prompt
+
+
+# Chain of Thought approach works however has higher token cost (more expensive) and is slower.
+# Should use this one if users ask questions that require logical reasoning.
+def json_cot_variant_processor(question: str, documents: list[str]) -> str:
+    prompt = (
+        f"Answer the query based on provided documents and quote relevant sections. "
+        f'Respond with a freeform reasoning section followed by "Final Answer:" with a '
+        f"json containing a concise answer to the query and up to three most relevant quotes from the documents.\n"
+        f"Sample answer json:\n{json.dumps(SAMPLE_JSON_RESPONSE)}\n\n"
+        f'Each context document below is prefixed with "{DOC_SEP_PAT}".\n\n'
+    )
+
+    for document in documents:
+        prompt += f"\n{DOC_SEP_PAT}\n{document}"
+
+    prompt += "\n\n---\n\n"
+    prompt += f"{QUESTION_PAT}\n{question}\n"
+    prompt += "Reasoning:\n"
+    return prompt
+
+
+# This one seems largely useless with a single example
+# Model seems to take the one example of answering Yes and just does that too.
+def json_reflexion_processor(question: str, documents: list[str]) -> str:
+    reflexion_str = "Does this fully answer the user query?"
+    prompt = (
+        BASE_PROMPT
+        + f'After each generated json, ask "{reflexion_str}" and respond Yes or No. '
+        f"If No, generate a better json response to the query.\n"
+        f"Sample question and response:\n"
+        f"{QUESTION_PAT}\n{SAMPLE_QUESTION}\n"
+        f"{json.dumps(SAMPLE_JSON_RESPONSE)}\n"
+        f"{reflexion_str} Yes\n\n"
+        f'Each context document below is prefixed with "{DOC_SEP_PAT}".\n\n'
+    )
+
+    for document in documents:
+        prompt += f"\n---NEW CONTEXT DOCUMENT---\n{document}"
+
+    prompt += "\n\n---\n\n"
+    prompt += f"{QUESTION_PAT}\n{question}\n"
+    return prompt
+
+
+# Initial design, works pretty well but not optimal
+def freeform_processor(question: str, documents: list[str]) -> str:
     prompt = (
         f"Answer the query based on the documents below and quote the documents segments containing the answer. "
         f'Respond with one "{ANSWER_PAT}" section and as many "{QUOTE_PAT}" sections as is relevant. '
@@ -23,7 +104,40 @@ def generic_prompt_processor(question: str, documents: list[str]) -> str:
     return prompt
 
 
-def openai_chat_completion_processor(
+def json_chat_processor(question: str, documents: list[str]) -> list[dict[str, str]]:
+    role_msg = (
+        SYSTEM_ROLE
+        + 'Start by reading the following documents and responding with "Acknowledged"'
+    )
+
+    messages = [{"role": "system", "content": role_msg}]
+
+    for document in documents:
+        messages.extend(
+            [
+                {
+                    "role": "user",
+                    "content": document,
+                },
+                {"role": "assistant", "content": "Acknowledged"},
+            ]
+        )
+    sample_msg = (
+        f"Now answer the user query based on documents above and quote relevant sections. "
+        f"Respond with a json containing a concise answer and up to three most relevant quotes from the documents.\n"
+        f"Sample response: {json.dumps(SAMPLE_JSON_RESPONSE)}"
+    )
+    messages.append({"role": "system", "content": sample_msg})
+
+    messages.append({"role": "user", "content": f"{QUESTION_PAT}\n{question}\n"})
+
+    # Note that the below will be dropped in reflexion if used
+    messages.append({"role": "assistant", "content": "Answer Json:\n"})
+
+    return messages
+
+
+def freeform_chat_processor(
     question: str, documents: list[str]
 ) -> list[dict[str, str]]:
     sample_quote = "Quote:\nThe hotdogs are freshly cooked.\n\nQuote:\nThey are very cheap at only a dollar each."
@@ -60,3 +174,16 @@ def openai_chat_completion_processor(
     )
 
     return messages
+
+
+# Not very useful, have not seen it improve an answer based on this
+# Sometimes gpt-3.5-turbo will just answer something worse like:
+# 'The response is a valid json that fully answers the user query with quotes exactly matching sections of the source
+# document. No revision is needed.'
+def get_chat_reflexion_msg() -> dict[str, str]:
+    reflexion_content = (
+        "Is the assistant response a valid json that fully answer the user query? "
+        "If the response needs to be fixed or if an improvement is possible, provide a revised json. "
+        "Otherwise, respond with the same json."
+    )
+    return {"role": "system", "content": reflexion_content}
diff --git a/backend/danswer/direct_qa/question_answer.py b/backend/danswer/direct_qa/question_answer.py
index 704ae4c13..e29f03d27 100644
--- a/backend/danswer/direct_qa/question_answer.py
+++ b/backend/danswer/direct_qa/question_answer.py
@@ -1,3 +1,4 @@
+import json
 import math
 import re
 from collections.abc import Callable
@@ -18,8 +19,9 @@ from danswer.configs.model_configs import OPENAI_MAX_OUTPUT_TOKENS
 from danswer.configs.model_configs import OPENAPI_MODEL_VERSION
 from danswer.direct_qa.interfaces import QAModel
 from danswer.direct_qa.qa_prompts import ANSWER_PAT
-from danswer.direct_qa.qa_prompts import generic_prompt_processor
-from danswer.direct_qa.qa_prompts import openai_chat_completion_processor
+from danswer.direct_qa.qa_prompts import get_chat_reflexion_msg
+from danswer.direct_qa.qa_prompts import json_chat_processor
+from danswer.direct_qa.qa_prompts import json_processor
 from danswer.direct_qa.qa_prompts import QUOTE_PAT
 from danswer.direct_qa.qa_prompts import UNCERTAINTY_PAT
 from danswer.utils.logging import setup_logger
@@ -33,10 +35,9 @@ logger = setup_logger()
 openai.api_key = OPENAI_API_KEY
 
 
-def separate_answer_quotes(
+def extract_answer_quotes_freeform(
     answer_raw: str,
 ) -> Tuple[Optional[str], Optional[list[str]]]:
-    """Gives back the answer and quote sections"""
     null_answer_check = (
         answer_raw.replace(ANSWER_PAT, "").replace(QUOTE_PAT, "").strip()
     )
@@ -71,6 +72,27 @@ def separate_answer_quotes(
     return answer, sections_clean[1:]
 
 
+def extract_answer_quotes_json(
+    answer_dict: dict[str, str | list[str]]
+) -> Tuple[Optional[str], Optional[list[str]]]:
+    answer_dict = {k.lower(): v for k, v in answer_dict.items()}
+    answer = str(answer_dict.get("answer"))
+    quotes = answer_dict.get("quotes") or answer_dict.get("quote")
+    if isinstance(quotes, str):
+        quotes = [quotes]
+    return answer, quotes
+
+
+def separate_answer_quotes(
+    answer_raw: str,
+) -> Tuple[Optional[str], Optional[list[str]]]:
+    try:
+        model_raw_json = json.loads(answer_raw)
+        return extract_answer_quotes_json(model_raw_json)
+    except ValueError:
+        return extract_answer_quotes_freeform(answer_raw)
+
+
 def match_quotes_to_docs(
     quotes: list[str],
     chunks: list[InferenceChunk],
@@ -140,7 +162,7 @@ def process_answer(
 class OpenAICompletionQA(QAModel):
     def __init__(
         self,
-        prompt_processor: Callable[[str, list[str]], str] = generic_prompt_processor,
+        prompt_processor: Callable[[str, list[str]], str] = json_processor,
         model_version: str = OPENAPI_MODEL_VERSION,
         max_output_tokens: int = OPENAI_MAX_OUTPUT_TOKENS,
     ) -> None:
@@ -185,13 +207,15 @@ class OpenAIChatCompletionQA(QAModel):
         self,
         prompt_processor: Callable[
             [str, list[str]], list[dict[str, str]]
-        ] = openai_chat_completion_processor,
+        ] = json_chat_processor,
         model_version: str = OPENAPI_MODEL_VERSION,
         max_output_tokens: int = OPENAI_MAX_OUTPUT_TOKENS,
+        reflexion_try_count: int = 0,
     ) -> None:
         self.prompt_processor = prompt_processor
         self.model_version = model_version
         self.max_output_tokens = max_output_tokens
+        self.reflexion_try_count = reflexion_try_count
 
     @log_function_time()
     def answer_question(
@@ -200,24 +224,28 @@ class OpenAIChatCompletionQA(QAModel):
         top_contents = [ranked_chunk.content for ranked_chunk in context_docs]
         messages = self.prompt_processor(query, top_contents)
         logger.debug(messages)
-
-        try:
-            response = openai.ChatCompletion.create(
-                messages=messages,
-                temperature=0,
-                top_p=1,
-                frequency_penalty=0,
-                presence_penalty=0,
-                model=self.model_version,
-                max_tokens=self.max_output_tokens,
-            )
-            model_output = response["choices"][0]["message"]["content"].strip()
-            logger.info(
-                "OpenAI Token Usage: " + str(response["usage"]).replace("\n", "")
-            )
-        except Exception as e:
-            logger.exception(e)
-            model_output = "Model Failure"
+        model_output = ""
+        for _ in range(self.reflexion_try_count + 1):
+            try:
+                response = openai.ChatCompletion.create(
+                    messages=messages,
+                    temperature=0,
+                    top_p=1,
+                    frequency_penalty=0,
+                    presence_penalty=0,
+                    model=self.model_version,
+                    max_tokens=self.max_output_tokens,
+                )
+                model_output = response["choices"][0]["message"]["content"].strip()
+                assistant_msg = {"content": model_output, "role": "assistant"}
+                messages.extend([assistant_msg, get_chat_reflexion_msg()])
+                logger.info(
+                    "OpenAI Token Usage: " + str(response["usage"]).replace("\n", "")
+                )
+            except Exception as e:
+                logger.exception(e)
+                logger.warning(f"Model failure for query: {query}")
+                return None, None
 
         logger.debug(model_output)
 
diff --git a/backend/danswer/utils/text_processing.py b/backend/danswer/utils/text_processing.py
index 685d073ae..07359e845 100644
--- a/backend/danswer/utils/text_processing.py
+++ b/backend/danswer/utils/text_processing.py
@@ -19,6 +19,12 @@ def shared_precompare_cleanup(text: str) -> str:
     text = text.replace("*", "")
 
     # GPT models sometimes like to edit the quoting, ie "Title: Contents" becomes Title: "Contents"
+    text = text.replace('\\"', "")
     text = text.replace('"', "")
 
+    # GPT models often change up punctuations to make the text flow better.
+    text = text.replace(".", "")
+    text = text.replace(":", "")
+    text = text.replace(",", "")
+
     return text