From 65e97883db1747f4c37f4783c13d7ca36c2e9269 Mon Sep 17 00:00:00 2001
From: James Falcon <james.falcon@canonical.com>
Date: Wed, 15 Jan 2025 09:22:23 -0600
Subject: [PATCH] fix: Ensure _should_wait_via_user_data() handles all user
 data types (#5976)

The function "_should_wait_via_user_data()" wasn't properly
handling user data that has a header that starts with
#cloud-config, but isn't cloud-config, like #cloud-config-archive

Fixes GH-5975
---
 cloudinit/cmd/main.py                         |  9 +++++-
 .../modules/test_cloud_config_archive.py      | 31 +++++++++++++++++++
 tests/unittests/cmd/test_main.py              | 15 +++++++++
 3 files changed, 54 insertions(+), 1 deletion(-)
 create mode 100644 tests/integration_tests/modules/test_cloud_config_archive.py

diff --git a/cloudinit/cmd/main.py b/cloudinit/cmd/main.py
index 883b1da4c1e..7ad713ebf65 100644
--- a/cloudinit/cmd/main.py
+++ b/cloudinit/cmd/main.py
@@ -330,8 +330,12 @@ def _should_wait_via_user_data(
     if not raw_config:
         return False, "no configuration found"
 
+    # Since this could be some arbitrarily large blob of binary data,
+    # such as a gzipped file, only grab enough to inspect the header.
+    # Since we can get a header like #cloud-config-archive, make sure
+    # we grab enough to not be incorrectly identified as cloud-config.
     if (
-        handlers.type_from_starts_with(raw_config.strip()[:13])
+        handlers.type_from_starts_with(raw_config.strip()[:42])
         != "text/cloud-config"
     ):
         return True, "non-cloud-config user data found"
@@ -348,6 +352,9 @@ def _should_wait_via_user_data(
         )
         return True, "failed to parse user data as yaml"
 
+    if not isinstance(parsed_yaml, dict):
+        return True, "parsed config not in cloud-config format"
+
     # These all have the potential to require network access, so we should wait
     if "write_files" in parsed_yaml:
         for item in parsed_yaml["write_files"]:
diff --git a/tests/integration_tests/modules/test_cloud_config_archive.py b/tests/integration_tests/modules/test_cloud_config_archive.py
new file mode 100644
index 00000000000..df0bc93b5fb
--- /dev/null
+++ b/tests/integration_tests/modules/test_cloud_config_archive.py
@@ -0,0 +1,31 @@
+import pytest
+
+from tests.integration_tests.instances import IntegrationInstance
+from tests.integration_tests.util import verify_clean_boot, verify_clean_log
+
+USER_DATA = """\
+#cloud-config-archive
+- type: "text/cloud-boothook"
+  content: |
+    #!/bin/sh
+    echo "this is from a boothook." > /var/tmp/boothook.txt
+- type: "text/cloud-config"
+  content: |
+    bootcmd:
+    - echo "this is from a cloud-config." > /var/tmp/bootcmd.txt
+"""
+
+
+@pytest.mark.ci
+@pytest.mark.user_data(USER_DATA)
+def test_cloud_config_archive(client: IntegrationInstance):
+    """Basic correctness test for #cloud-config-archive."""
+    log = client.read_from_file("/var/log/cloud-init.log")
+    assert "this is from a boothook." in client.read_from_file(
+        "/var/tmp/boothook.txt"
+    )
+    assert "this is from a cloud-config." in client.read_from_file(
+        "/var/tmp/bootcmd.txt"
+    )
+    verify_clean_log(log)
+    verify_clean_boot(client)
diff --git a/tests/unittests/cmd/test_main.py b/tests/unittests/cmd/test_main.py
index aef747e8e85..4df78718268 100644
--- a/tests/unittests/cmd/test_main.py
+++ b/tests/unittests/cmd/test_main.py
@@ -18,6 +18,19 @@
 )
 
 
+CLOUD_CONFIG_ARCHIVE = """\
+#cloud-config-archive
+- type: "text/cloud-boothook"
+  content: |
+    #!/bin/sh
+    echo "this is from a boothook." > /var/tmp/boothook.txt
+- type: "text/cloud-config"
+  content: |
+    bootcmd:
+    - echo "this is from a cloud-config." > /var/tmp/bootcmd.txt
+"""
+
+
 EXTRA_CLOUD_CONFIG = """\
 #cloud-config
 write_files
@@ -264,6 +277,8 @@ def test_main_sys_argv(
             ),
             # Not parseable as yaml
             (mock.Mock(), "#cloud-config\nbootcmd:\necho hello", True),
+            # Yaml that parses to list
+            (mock.Mock(), CLOUD_CONFIG_ARCHIVE, True),
             # Non-cloud-config
             (mock.Mock(), "#!/bin/bash\n  - echo hello", True),
             # Something that after processing won't decode to utf-8