Merge pull request #648 from NVIDIA/feature/prompt-improvements-llama-3

Prompt improvements Llama-3
NVIDIA · Jul 24, 2024 · 6b1bfb1 · 6b1bfb1
2 parents 6354436 + 486d5c4
commit 6b1bfb1
Show file tree

Hide file tree

Showing 10 changed files with 238 additions and 9 deletions.
diff --git a/examples/configs/llm/llama-3/config.yml b/examples/configs/llm/llama-3/config.yml
@@ -0,0 +1,4 @@
+models:
+  - type: main
+    engine: nvidia_ai_endpoints
+    model: meta/llama-3.1-70b-instruct
diff --git a/nemoguardrails/actions/llm/generation.py b/nemoguardrails/actions/llm/generation.py
@@ -497,9 +497,17 @@ async def generate_user_intent(
                 # Initialize the LLMCallInfo object
                 llm_call_info_var.set(LLMCallInfo(task=Task.GENERAL.value))
 
+                if kb:
+                    chunks = await kb.search_relevant_chunks(event["text"])
+                    relevant_chunks = "\n".join([chunk["body"] for chunk in chunks])
+                else:
+                    relevant_chunks = ""
+
                 # Otherwise, we still create an altered prompt.
                 prompt = self.llm_task_manager.render_task_prompt(
-                    task=Task.GENERAL, events=events
+                    task=Task.GENERAL,
+                    events=events,
+                    context={"relevant_chunks": relevant_chunks},
                 )
 
                 generation_options: GenerationOptions = generation_options_var.get()
@@ -1131,6 +1139,8 @@ async def generate_intent_steps_message(
             else:
                 relevant_chunks = ""
 
+            relevant_chunks = relevant_chunks.strip()
+
             prompt = self.llm_task_manager.render_task_prompt(
                 task=Task.GENERATE_INTENT_STEPS_MESSAGE,
                 events=events,

diff --git a/nemoguardrails/actions/llm/utils.py b/nemoguardrails/actions/llm/utils.py
@@ -339,15 +339,15 @@ def get_last_user_utterance(events: List[dict]) -> Optional[str]:
     return None
 
 
-def get_retrieved_relevant_chunks(events: List[dict]) -> Optional[dict]:
+def get_retrieved_relevant_chunks(events: List[dict]) -> Optional[str]:
     """Returns the retrieved chunks for current user utterance from the events."""
     for event in reversed(events):
         if event["type"] == "UserMessage":
             break
         if event["type"] == "ContextUpdate" and "relevant_chunks" in event.get(
             "data", {}
         ):
-            return event["data"]["relevant_chunks"]
+            return (event["data"]["relevant_chunks"] or "").strip()
 
     return None
 

diff --git a/nemoguardrails/llm/filters.py b/nemoguardrails/llm/filters.py
@@ -171,6 +171,49 @@ def to_messages(colang_history: str) -> List[dict]:
     return messages
 
 
+def to_intent_messages(colang_history: str) -> List[dict]:
+    messages = []
+
+    lines = colang_history.split("\n")
+    for i, line in enumerate(lines):
+        if line.startswith('user "'):
+            continue
+        else:
+            if i > 0 and lines[i - 1].startswith('user "'):
+                line = "User intent: " + line.strip()
+                messages.append({"type": "user", "content": line})
+            elif line.startswith("user "):
+                line = "User intent: " + line[5:].strip()
+                messages.append({"type": "user", "content": line})
+            elif line.startswith("bot "):
+                line = "Bot intent: " + line[4:].strip()
+                messages.append({"type": "assistant", "content": line})
+            elif line.startswith('  "'):
+                continue
+
+    return messages
+
+
+def to_intent_messages_2(colang_history: str) -> List[dict]:
+    messages = []
+
+    lines = colang_history.split("\n")
+    for i, line in enumerate(lines):
+        if line.startswith('user "'):
+            messages.append({"type": "user", "content": line[6:-1]})
+        else:
+            if i > 0 and lines[i - 1].startswith('user "'):
+                continue
+            if line.startswith("bot "):
+                line = "Bot intent: " + line[4:].strip()
+                messages.append({"type": "assistant", "content": line})
+            elif line.startswith('  "'):
+                line = "Bot message: " + line[2:].strip()
+                messages.append({"type": "assistant", "content": line})
+
+    return messages
+
+
 def verbose_v1(colang_history: str) -> str:
     """Filter that given a history in colang format, returns a verbose version of the history."""
     lines = colang_history.split("\n")
@@ -193,6 +236,18 @@ def verbose_v1(colang_history: str) -> str:
     return "\n".join(lines)
 
 
+def to_chat_messages(events: List[dict]) -> str:
+    """Filter that turns an array of events into a sequence of user/assistant messages."""
+    messages = []
+    for event in events:
+        if event["type"] == "UserMessage":
+            messages.append({"type": "user", "content": event["text"]})
+        elif event["type"] == "StartUtteranceBotAction":
+            messages.append({"type": "assistant", "content": event["script"]})
+
+    return messages
+
+
 def user_assistant_sequence(events: List[dict]) -> str:
     """Filter that turns an array of events into a sequence of user/assistant messages.
 

diff --git a/nemoguardrails/llm/prompts.py b/nemoguardrails/llm/prompts.py
@@ -91,6 +91,10 @@ def _get_prompt(
                     _score = 0.8
                     break
 
+                # If we match a substring, the score is 0.4
+                elif _model in model:
+                    _score = 0.4
+
         if prompt.mode != prompting_mode:
             # Penalize matching score for being in an incorrect mode.
             # This way, if a prompt with the correct mode (say "compact") is found, it will be preferred over a prompt with another mode (say "standard").

diff --git a/nemoguardrails/llm/prompts/llama3.yml b/nemoguardrails/llm/prompts/llama3.yml
@@ -0,0 +1,144 @@
+# Collection of all the prompts
+prompts:
+    - task: general
+      models:
+        - llama3
+        - llama-3.1
+
+      messages:
+        - type: system
+          content: |
+            {{ general_instructions }}{% if relevant_chunks != None and relevant_chunks != '' %}
+            This is some relevant context:
+            ```markdown
+            {{ relevant_chunks }}
+            ```{% endif %}
+        - "{{ history | to_chat_messages }}"
+
+    # Prompt for detecting the user message canonical form.
+    - task: generate_user_intent
+      models:
+        - llama3
+        - llama-3.1
+
+      messages:
+        - type: system
+          content: |
+            {{ general_instructions }}
+
+            Your task is to generate the user intent in a conversation given the last user message similar to the examples below.
+            Do not provide any explanations, just output the user intent.
+
+            # Examples:
+            {{ examples | verbose_v1 }}
+
+        - "{{ sample_conversation | first_turns(2) | to_messages }}"
+        - "{{ history | colang | to_messages }}"
+        - type: assistant
+          content: |
+              Bot thinking: potential user intents are: {{ potential_user_intents }}
+
+      output_parser: "verbose_v1"
+
+    # Prompt for generating the next steps.
+    - task: generate_next_steps
+      models:
+        - llama3
+        - llama-3.1
+
+      messages:
+        - type: system
+          content: |
+            {{ general_instructions }}
+
+            Your task is to generate the next steps in a conversation given the last user message similar to the examples below.
+            Do not provide any explanations, just output the user intent and the next steps.
+
+            # Examples:
+            {{ examples | remove_text_messages | verbose_v1 }}
+
+        - "{{ sample_conversation | first_turns(2) | to_intent_messages }}"
+        - "{{ history | colang | to_intent_messages }}"
+
+      output_parser: "verbose_v1"
+
+    # Prompt for generating the bot message from a canonical form.
+    - task: generate_bot_message
+      models:
+        - llama3
+        - llama-3.1
+
+      messages:
+        - type: system
+          content: |
+              {{ general_instructions }}{% if relevant_chunks != None and relevant_chunks != '' %}
+              This is some relevant context:
+              ```markdown
+              {{ relevant_chunks }}
+              ```{% endif %}
+              Your task is to generate the bot message in a conversation given the last user message, user intent and bot intent.
+              Similar to the examples below.
+              Do not provide any explanations, just output the bot message.
+
+              # Examples:
+              {{ examples | verbose_v1 }}
+
+        - "{{ sample_conversation | first_turns(2) | to_intent_messages_2 }}"
+        - "{{ history | colang | to_intent_messages_2 }}"
+
+      output_parser: "verbose_v1"
+
+    # Prompt for generating the user intent, next steps and bot message in a single call.
+    - task: generate_intent_steps_message
+      models:
+        - llama3
+        - llama-3.1
+
+      messages:
+        - type: system
+          content: |
+            {{ general_instructions }}{% if relevant_chunks != None and relevant_chunks != '' %}
+            This is some relevant context:
+            ```markdown
+            {{ relevant_chunks }}
+            ```{% endif %}
+
+            Your task is to generate the user intent and the next steps in a conversation given the last user message similar to the examples below.
+            Do not provide any explanations, just output the user intent and the next steps.
+
+            # Examples:
+            {{ examples | verbose_v1 }}
+
+        - "{{ sample_conversation | first_turns(2) | to_messages }}"
+        - "{{ history | colang | to_messages }}"
+        - type: assistant
+          content: |
+              Bot thinking: potential user intents are: {{ potential_user_intents }}
+
+      output_parser: "verbose_v1"
+
+    # Prompt for generating the value of a context variable.
+    - task: generate_value
+      models:
+        - llama3
+        - llama-3.1
+
+      messages:
+        - type: system
+          content: |
+            {{ general_instructions }}
+
+            Your task is to generate value for the ${{ var_name }} variable..
+            Do not provide any explanations, just output value.
+
+            # Examples:
+            {{ examples | verbose_v1 }}
+
+        - "{{ sample_conversation | first_turns(2) | to_messages }}"
+        - "{{ history | colang | to_messages }}"
+        - type: assistant
+          content: |
+              Bot thinking: follow the following instructions: {{ instructions }}
+              ${{ var_name }} =
+
+      output_parser: "verbose_v1"
diff --git a/nemoguardrails/llm/taskmanager.py b/nemoguardrails/llm/taskmanager.py
@@ -27,6 +27,9 @@
     indent,
     last_turns,
     remove_text_messages,
+    to_chat_messages,
+    to_intent_messages,
+    to_intent_messages_2,
     to_messages,
     to_messages_nemollm,
     user_assistant_sequence,
@@ -67,6 +70,9 @@ def __init__(self, config: RailsConfig):
             "user_assistant_sequence_nemollm"
         ] = user_assistant_sequence_nemollm
         self.env.filters["to_messages"] = to_messages
+        self.env.filters["to_intent_messages"] = to_intent_messages
+        self.env.filters["to_intent_messages_2"] = to_intent_messages_2
+        self.env.filters["to_chat_messages"] = to_chat_messages
         self.env.filters["to_messages_nemollm"] = to_messages_nemollm
         self.env.filters["verbose_v1"] = verbose_v1
 

diff --git a/nemoguardrails/logging/callbacks.py b/nemoguardrails/logging/callbacks.py
@@ -101,10 +101,9 @@ async def on_chat_model_start(
                     if msg.type == "ai"
                     else "System"
                 )
-                + "[/][black on white]"
+                + "[/]"
                 + "\n"
                 + msg.content
-                + "[/]"
                 for msg in messages[0]
             ]
         )

diff --git a/nemoguardrails/logging/verbose.py b/nemoguardrails/logging/verbose.py
@@ -67,14 +67,15 @@ def emit(self, record) -> None:
                     skip_print = True
                     console.print("")
 
-                    if title == "Prompt Messages":
-                        body = body.split("\n", 3)[3]
-
                     for line in body.split("\n"):
                         if line.strip() == "[/]":
                             continue
 
-                        text = Text(line, style="black on #909090", end="\n")
+                        if line.startswith("[cyan]") and line.endswith("[/]"):
+                            text = Text(line[6:-3], style="maroon", end="\n")
+                        else:
+                            text = Text(line, style="black on #909090", end="\n")
+
                         text.pad_right(console.width)
                         console.print(text)
                     console.print("")

diff --git a/tests/test_llm_task_manager.py b/tests/test_llm_task_manager.py
@@ -152,6 +152,8 @@ def test_prompt_length_exceeded_empty_events():
                model: gpt-3.5-turbo-instruct
             prompts:
             - task: generate_user_intent
+              models:
+              - gpt-3.5-turbo-instruct
               max_length: 2000
               content: |-
                 {{ general_instructions }}
@@ -192,6 +194,8 @@ def test_prompt_length_exceeded_compressed_history():
                model: gpt-3.5-turbo-instruct
             prompts:
             - task: generate_user_intent
+              models:
+              - gpt-3.5-turbo-instruct
               max_length: 3000
               content: |-
                 {{ general_instructions }}
@@ -252,6 +256,8 @@ def test_stop_configuration_parameter():
               model: gpt-3.5-turbo-instruct
             prompts:
             - task: generate_user_intent
+              models:
+              - gpt-3.5-turbo-instruct
               stop:
               - <<end>>
               - <<stop>>