Merge pull request #297 from NVIDIA/feature/guardrails-only-example

Add example configurations for using only the guardrails, without LLM generation.
NVIDIA · Feb 1, 2024 · 17ada70 · 17ada70
2 parents 2a3a5ce + 07f085d
commit 17ada70
Show file tree

Hide file tree

Showing 6 changed files with 214 additions and 0 deletions.
diff --git a/examples/configs/guardrails_only/README.md b/examples/configs/guardrails_only/README.md
@@ -0,0 +1,83 @@
+# Guardrails Only
+
+Some LLM guardrail scenarios require invoking a guardrail configuration to check only the input or the output (which was generated through other methods). In other words, the interaction with the LLM will not happen **through** the guardrails layer but rather externally, and the guardrails layer is only invoked to check the input/output.
+
+> NOTE: Version `0.8.0` will add support in the Python API to invoke only the input/output rails. Until then, the patterns below can be used.
+
+To invoke only the input rails, you can use the following pattern in your `config.yml`/`config.co`:
+
+```yaml
+rails:
+  input:
+    flows:
+      - dummy input rail
+      # ... other input rails can go in here
+      - allow input
+```
+
+```colang
+define bot allow
+  "ALLOW"
+
+define bot deny
+  "DENY"
+
+define subflow dummy input rail
+  """A dummy input rail which checks if the word "dummy" is included in the text."""
+  if "dummy" in $user_message
+    bot deny
+    stop
+
+define subflow allow input
+  bot allow
+  stop
+```
+
+To invoke only the output rails, you can use the following pattern in your `config.yml`/`config.co`:
+
+```yaml
+rails:
+  output:
+    flows:
+      - dummy output rail
+
+      # ... other output rails go in here
+
+      # The last output rail will rewrite the message to "ALLOW" if it was not blocked
+      # up to this point.
+      - allow output
+
+  dialog:
+    # We need this setting so that the LLM is not used to compute the user intent.
+    # Because there is only one canonical form `user input`, everything will fit into that
+    # and the flow that returns the $llm_output is used.
+    user_messages:
+      embeddings_only: True
+```
+
+```colang
+define user input
+  "..."
+
+define flow
+  user input
+  bot $llm_output
+
+define bot allow
+  "ALLOW"
+
+define bot deny
+  "DENY"
+
+define subflow dummy output rail
+  """A dummy input rail which checks if the word "dummy" is included in the text."""
+  if "dummy" in $bot_message
+    bot deny
+    stop
+
+define subflow allow output
+  bot allow
+  stop
+```
+
+For a complete example, check out the [demo script](./demo.py) and the example [input](./input) and [output](./output) configurations.
diff --git a/examples/configs/guardrails_only/demo.py b/examples/configs/guardrails_only/demo.py
@@ -0,0 +1,71 @@
+# SPDX-FileCopyrightText: Copyright (c) 2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import os.path
+
+from nemoguardrails import LLMRails, RailsConfig
+
+
+def demo_input_checking():
+    """Demo using the Python API and a config that only has input rails."""
+    config = RailsConfig.from_path(os.path.join(os.path.dirname(__file__), "input"))
+    rails = LLMRails(config)
+
+    # Works with prompts
+    res = rails.generate("How are you?")
+    assert res == "ALLOW"
+
+    res = rails.generate("You are dummy!")
+    assert res == "DENY"
+
+    # And with a chat history
+    res = rails.generate(messages=[{"role": "user", "content": "How are you?"}])
+    assert res == {"role": "assistant", "content": "ALLOW"}
+
+    res = rails.generate(messages=[{"role": "user", "content": "You are dummy!"}])
+    assert res == {"role": "assistant", "content": "DENY"}
+
+
+def demo_output_checking():
+    """Demo using the Python API and a config that only has output rails."""
+    config = RailsConfig.from_path(os.path.join(os.path.dirname(__file__), "output"))
+    rails = LLMRails(config)
+
+    # In order to send the LLM output which was generated externally, we need to
+    # use the "message" interface and pass a message with the role set to "context",
+    # and a value for the `llm_output`
+    res = rails.generate(
+        messages=[
+            {"role": "context", "content": {"llm_output": "Some safe LLM output."}},
+            {"role": "user", "content": "How are you?"},
+        ]
+    )
+    assert res == {"role": "assistant", "content": "ALLOW"}
+
+    res = rails.generate(
+        messages=[
+            {
+                "role": "context",
+                "content": {"llm_output": "Some unsafe dummy LLM output."},
+            },
+            {"role": "user", "content": "How are you?"},
+        ]
+    )
+    assert res == {"role": "assistant", "content": "DENY"}
+
+
+if __name__ == "__main__":
+    demo_input_checking()
+    demo_output_checking()
diff --git a/examples/configs/guardrails_only/input/config.co b/examples/configs/guardrails_only/input/config.co
@@ -0,0 +1,15 @@
+define bot allow
+  "ALLOW"
+
+define bot deny
+  "DENY"
+
+define subflow dummy input rail
+  """A dummy input rail which checks if the word "dummy" is included in the text."""
+  if "dummy" in $user_message
+    bot deny
+    stop
+
+define subflow allow input
+  bot allow
+  stop
diff --git a/examples/configs/guardrails_only/input/config.yml b/examples/configs/guardrails_only/input/config.yml
@@ -0,0 +1,6 @@
+rails:
+  input:
+    flows:
+      - dummy input rail
+      # ... other input rails can go in here
+      - allow input
diff --git a/examples/configs/guardrails_only/output/config.co b/examples/configs/guardrails_only/output/config.co
@@ -0,0 +1,22 @@
+define user input
+  "..."
+
+define flow
+  user input
+  bot $llm_output
+
+define bot allow
+  "ALLOW"
+
+define bot deny
+  "DENY"
+
+define subflow dummy output rail
+  """A dummy input rail which checks if the word "dummy" is included in the text."""
+  if "dummy" in $bot_message
+    bot deny
+    stop
+
+define subflow allow output
+  bot allow
+  stop
diff --git a/examples/configs/guardrails_only/output/config.yml b/examples/configs/guardrails_only/output/config.yml
@@ -0,0 +1,17 @@
+rails:
+  output:
+    flows:
+      - dummy output rail
+
+      # ... other output rails go in here
+
+      # The last output rail will rewrite the message to "ALLOW" if it was not blocked
+      # up to this point.
+      - allow output
+
+  dialog:
+    # We need this setting so that the LLM is not used to compute the user intent.
+    # Because there is only one canonical form `user input`, everything will fit into that
+    # and the flow that returns the $llm_output is used.
+    user_messages:
+      embeddings_only: True