diff --git a/lib/std/process.zig b/lib/std/process.zig index 8f3f990890a1..f9870736f968 100644 --- a/lib/std/process.zig +++ b/lib/std/process.zig @@ -625,11 +625,22 @@ pub const ArgIteratorWasi = struct { }; /// Iterator that implements the Windows command-line parsing algorithm. +/// The implementation is intended to be compatible with the post-2008 C runtime, +/// but is *not* intended to be compatible with `CommandLineToArgvW` since +/// `CommandLineToArgvW` uses the pre-2008 parsing rules. /// -/// This iterator faithfully implements the parsing behavior observed in `CommandLineToArgvW` with +/// This iterator faithfully implements the parsing behavior observed from the C runtime with /// one exception: if the command-line string is empty, the iterator will immediately complete -/// without returning any arguments (whereas `CommandLineArgvW` will return a single argument +/// without returning any arguments (whereas the C runtime will return a single argument /// representing the name of the current executable). +/// +/// The essential parts of the algorithm are described in Microsoft's documentation: +/// +/// - https://learn.microsoft.com/en-us/cpp/cpp/main-function-command-line-args?view=msvc-170#parsing-c-command-line-arguments +/// +/// David Deley explains some additional undocumented quirks in great detail: +/// +/// - https://daviddeley.com/autohotkey/parameters/parameters.htm#WINCRULES pub const ArgIteratorWindows = struct { allocator: Allocator, /// Owned by the iterator. @@ -686,6 +697,51 @@ pub const ArgIteratorWindows = struct { fn emitCharacter(self: *ArgIteratorWindows, char: u8) void { self.buffer[self.end] = char; self.end += 1; + + // Because we are emitting WTF-8 byte-by-byte, we need to + // check to see if we've emitted two consecutive surrogate + // codepoints that form a valid surrogate pair in order + // to ensure that we're always emitting well-formed WTF-8 + // (https://simonsapin.github.io/wtf-8/#concatenating). + // + // If we do have a valid surrogate pair, we need to emit + // the UTF-8 sequence for the codepoint that they encode + // instead of the WTF-8 encoding for the two surrogate pairs + // separately. + // + // This is relevant when dealing with a WTF-16 encoded + // command line like this: + // "<0xD801>"<0xDC37> + // which would get converted to WTF-8 in `cmd_line` as: + // "<0xED><0xA0><0x81>"<0xED><0xB0><0xB7> + // and then after parsing it'd naively get emitted as: + // <0xED><0xA0><0x81><0xED><0xB0><0xB7> + // but instead, we need to recognize the surrogate pair + // and emit the codepoint it encodes, which in this + // example is U+10437 (𐐷), which is encoded in UTF-8 as: + // <0xF0><0x90><0x90><0xB7> + concatSurrogatePair(self); + } + + fn concatSurrogatePair(self: *ArgIteratorWindows) void { + // Surrogate codepoints are always encoded as 3 bytes, so there + // must be 6 bytes for a surrogate pair to exist. + if (self.end - self.start >= 6) { + const window = self.buffer[self.end - 6 .. self.end]; + const view = std.unicode.Wtf8View.init(window) catch return; + var it = view.iterator(); + var pair: [2]u16 = undefined; + pair[0] = std.mem.nativeToLittle(u16, std.math.cast(u16, it.nextCodepoint().?) orelse return); + if (!std.unicode.utf16IsHighSurrogate(pair[0])) return; + pair[1] = std.mem.nativeToLittle(u16, std.math.cast(u16, it.nextCodepoint().?) orelse return); + if (!std.unicode.utf16IsLowSurrogate(pair[1])) return; + // We know we have a valid surrogate pair, so convert + // it to UTF-8, overwriting the surrogate pair's bytes + // and then chop off the extra bytes. + const len = std.unicode.utf16LeToUtf8(window, &pair) catch unreachable; + const delta = 6 - len; + self.end -= delta; + } } fn yieldArg(self: *ArgIteratorWindows) [:0]const u8 { @@ -711,69 +767,37 @@ pub const ArgIteratorWindows = struct { } }; - // The essential parts of the algorithm are described in Microsoft's documentation: - // - // - - // - - // - // David Deley explains some additional undocumented quirks in great detail: - // - // - - // - // Code points <= U+0020 terminating an unquoted first argument was discovered independently by - // testing and observing the behavior of 'CommandLineToArgvW' on Windows 10. - fn nextWithStrategy(self: *ArgIteratorWindows, comptime strategy: type) strategy.T { // The first argument (the executable name) uses different parsing rules. if (self.index == 0) { - var char = if (self.cmd_line.len != 0) self.cmd_line[0] else 0; - switch (char) { - 0 => { - // Immediately complete the iterator. - // 'CommandLineToArgvW' would return the name of the current executable here. - return strategy.eof; - }, - '"' => { - // If the first character is a quote, read everything until the next quote (then - // skip that quote), or until the end of the string. - self.index += 1; - while (true) : (self.index += 1) { - char = if (self.index != self.cmd_line.len) self.cmd_line[self.index] else 0; - switch (char) { - 0 => { - return strategy.yieldArg(self); - }, - '"' => { - self.index += 1; - return strategy.yieldArg(self); - }, - else => { - strategy.emitCharacter(self, char); - }, - } - } - }, - else => { - // Otherwise, read everything until the next space or ASCII control character - // (not including DEL) (then skip that character), or until the end of the - // string. This means that if the command-line string starts with one of these - // characters, the first returned argument will be the empty string. - while (true) : (self.index += 1) { - char = if (self.index != self.cmd_line.len) self.cmd_line[self.index] else 0; - switch (char) { - 0 => { - return strategy.yieldArg(self); - }, - '\x01'...' ' => { - self.index += 1; - return strategy.yieldArg(self); - }, - else => { - strategy.emitCharacter(self, char); - }, + if (self.cmd_line.len == 0 or self.cmd_line[0] == 0) { + // Immediately complete the iterator. + // The C runtime would return the name of the current executable here. + return strategy.eof; + } + + var inside_quotes = false; + while (true) : (self.index += 1) { + const char = if (self.index != self.cmd_line.len) self.cmd_line[self.index] else 0; + switch (char) { + 0 => { + return strategy.yieldArg(self); + }, + '"' => { + inside_quotes = !inside_quotes; + }, + ' ', '\t' => { + if (inside_quotes) + strategy.emitCharacter(self, char) + else { + self.index += 1; + return strategy.yieldArg(self); } - } - }, + }, + else => { + strategy.emitCharacter(self, char); + }, + } } } @@ -791,9 +815,10 @@ pub const ArgIteratorWindows = struct { // // - The end of the string always terminates the current argument. // - When not in 'inside_quotes' mode, a space or tab terminates the current argument. - // - 2n backslashes followed by a quote emit n backslashes. If in 'inside_quotes' and the - // quote is immediately followed by a second quote, one quote is emitted and the other is - // skipped, otherwise, the quote is skipped. Finally, 'inside_quotes' is toggled. + // - 2n backslashes followed by a quote emit n backslashes (note: n can be zero). + // If in 'inside_quotes' and the quote is immediately followed by a second quote, + // one quote is emitted and the other is skipped, otherwise, the quote is skipped + // and 'inside_quotes' is toggled. // - 2n + 1 backslashes followed by a quote emit n backslashes followed by a quote. // - n backslashes not followed by a quote emit n backslashes. var backslash_count: usize = 0; @@ -826,8 +851,9 @@ pub const ArgIteratorWindows = struct { { strategy.emitCharacter(self, '"'); self.index += 1; + } else { + inside_quotes = !inside_quotes; } - inside_quotes = !inside_quotes; } }, '\\' => { @@ -1215,10 +1241,10 @@ test ArgIteratorWindows { // Separators try t("aa bb cc", &.{ "aa", "bb", "cc" }); try t("aa\tbb\tcc", &.{ "aa", "bb", "cc" }); - try t("aa\nbb\ncc", &.{ "aa", "bb\ncc" }); - try t("aa\r\nbb\r\ncc", &.{ "aa", "\nbb\r\ncc" }); - try t("aa\rbb\rcc", &.{ "aa", "bb\rcc" }); - try t("aa\x07bb\x07cc", &.{ "aa", "bb\x07cc" }); + try t("aa\nbb\ncc", &.{"aa\nbb\ncc"}); + try t("aa\r\nbb\r\ncc", &.{"aa\r\nbb\r\ncc"}); + try t("aa\rbb\rcc", &.{"aa\rbb\rcc"}); + try t("aa\x07bb\x07cc", &.{"aa\x07bb\x07cc"}); try t("aa\x7Fbb\x7Fcc", &.{"aa\x7Fbb\x7Fcc"}); try t("aa🦎bb🦎cc", &.{"aa🦎bb🦎cc"}); @@ -1227,22 +1253,22 @@ test ArgIteratorWindows { try t(" aa bb ", &.{ "", "aa", "bb" }); try t("\t\t", &.{""}); try t("\t\taa\t\tbb\t\t", &.{ "", "aa", "bb" }); - try t("\n\n", &.{ "", "\n" }); - try t("\n\naa\n\nbb\n\n", &.{ "", "\naa\n\nbb\n\n" }); + try t("\n\n", &.{"\n\n"}); + try t("\n\naa\n\nbb\n\n", &.{"\n\naa\n\nbb\n\n"}); // Executable name with quotes/backslashes try t("\"aa bb\tcc\ndd\"", &.{"aa bb\tcc\ndd"}); try t("\"", &.{""}); try t("\"\"", &.{""}); - try t("\"\"\"", &.{ "", "" }); - try t("\"\"\"\"", &.{ "", "" }); - try t("\"\"\"\"\"", &.{ "", "\"" }); - try t("aa\"bb\"cc\"dd", &.{"aa\"bb\"cc\"dd"}); - try t("aa\"bb cc\"dd", &.{ "aa\"bb", "ccdd" }); - try t("\"aa\\\"bb\"", &.{ "aa\\", "bb" }); + try t("\"\"\"", &.{""}); + try t("\"\"\"\"", &.{""}); + try t("\"\"\"\"\"", &.{""}); + try t("aa\"bb\"cc\"dd", &.{"aabbccdd"}); + try t("aa\"bb cc\"dd", &.{"aabb ccdd"}); + try t("\"aa\\\"bb\"", &.{"aa\\bb"}); try t("\"aa\\\\\"", &.{"aa\\\\"}); - try t("aa\\\"bb", &.{"aa\\\"bb"}); - try t("aa\\\\\"bb", &.{"aa\\\\\"bb"}); + try t("aa\\\"bb", &.{"aa\\bb"}); + try t("aa\\\\\"bb", &.{"aa\\\\bb"}); // Arguments with quotes/backslashes try t(". \"aa bb\tcc\ndd\"", &.{ ".", "aa bb\tcc\ndd" }); @@ -1252,29 +1278,66 @@ test ArgIteratorWindows { try t(". \"\"", &.{ ".", "" }); try t(". \"\"\"", &.{ ".", "\"" }); try t(". \"\"\"\"", &.{ ".", "\"" }); - try t(". \"\"\"\"\"", &.{ ".", "\"" }); + try t(". \"\"\"\"\"", &.{ ".", "\"\"" }); try t(". \"\"\"\"\"\"", &.{ ".", "\"\"" }); try t(". \" \"", &.{ ".", " " }); try t(". \" \"\"", &.{ ".", " \"" }); try t(". \" \"\"\"", &.{ ".", " \"" }); - try t(". \" \"\"\"\"", &.{ ".", " \"" }); + try t(". \" \"\"\"\"", &.{ ".", " \"\"" }); try t(". \" \"\"\"\"\"", &.{ ".", " \"\"" }); - try t(". \" \"\"\"\"\"\"", &.{ ".", " \"\"" }); + try t(". \" \"\"\"\"\"\"", &.{ ".", " \"\"\"" }); try t(". \\\"", &.{ ".", "\"" }); try t(". \\\"\"", &.{ ".", "\"" }); try t(". \\\"\"\"", &.{ ".", "\"" }); try t(". \\\"\"\"\"", &.{ ".", "\"\"" }); try t(". \\\"\"\"\"\"", &.{ ".", "\"\"" }); - try t(". \\\"\"\"\"\"\"", &.{ ".", "\"\"" }); + try t(". \\\"\"\"\"\"\"", &.{ ".", "\"\"\"" }); try t(". \" \\\"", &.{ ".", " \"" }); try t(". \" \\\"\"", &.{ ".", " \"" }); try t(". \" \\\"\"\"", &.{ ".", " \"\"" }); try t(". \" \\\"\"\"\"", &.{ ".", " \"\"" }); - try t(". \" \\\"\"\"\"\"", &.{ ".", " \"\"" }); + try t(". \" \\\"\"\"\"\"", &.{ ".", " \"\"\"" }); try t(". \" \\\"\"\"\"\"\"", &.{ ".", " \"\"\"" }); try t(". aa\\bb\\\\cc\\\\\\dd", &.{ ".", "aa\\bb\\\\cc\\\\\\dd" }); try t(". \\\\\\\"aa bb\"", &.{ ".", "\\\"aa", "bb" }); try t(". \\\\\\\\\"aa bb\"", &.{ ".", "\\\\aa bb" }); + + // From https://learn.microsoft.com/en-us/cpp/cpp/main-function-command-line-args#results-of-parsing-command-lines + try t( + \\foo.exe "abc" d e + , &.{ "foo.exe", "abc", "d", "e" }); + try t( + \\foo.exe a\\b d"e f"g h + , &.{ "foo.exe", "a\\\\b", "de fg", "h" }); + try t( + \\foo.exe a\\\"b c d + , &.{ "foo.exe", "a\\\"b", "c", "d" }); + try t( + \\foo.exe a\\\\"b c" d e + , &.{ "foo.exe", "a\\\\b c", "d", "e" }); + try t( + \\foo.exe a"b"" c d + , &.{ "foo.exe", "ab\" c d" }); + + // From https://daviddeley.com/autohotkey/parameters/parameters.htm#WINCRULESEX + try t("foo.exe CallMeIshmael", &.{ "foo.exe", "CallMeIshmael" }); + try t("foo.exe \"Call Me Ishmael\"", &.{ "foo.exe", "Call Me Ishmael" }); + try t("foo.exe Cal\"l Me I\"shmael", &.{ "foo.exe", "Call Me Ishmael" }); + try t("foo.exe CallMe\\\"Ishmael", &.{ "foo.exe", "CallMe\"Ishmael" }); + try t("foo.exe \"CallMe\\\"Ishmael\"", &.{ "foo.exe", "CallMe\"Ishmael" }); + try t("foo.exe \"Call Me Ishmael\\\\\"", &.{ "foo.exe", "Call Me Ishmael\\" }); + try t("foo.exe \"CallMe\\\\\\\"Ishmael\"", &.{ "foo.exe", "CallMe\\\"Ishmael" }); + try t("foo.exe a\\\\\\b", &.{ "foo.exe", "a\\\\\\b" }); + try t("foo.exe \"a\\\\\\b\"", &.{ "foo.exe", "a\\\\\\b" }); + + // Surrogate pair encoding of 𐐷 separated by quotes. + // Encoded as WTF-16: + // "<0xD801>"<0xDC37> + // Encoded as WTF-8: + // "<0xED><0xA0><0x81>"<0xED><0xB0><0xB7> + // During parsing, the quotes drop out and the surrogate pair + // should end up encoded as its normal UTF-8 representation. + try t("foo.exe \"\xed\xa0\x81\"\xed\xb0\xb7", &.{ "foo.exe", "𐐷" }); } fn testArgIteratorWindows(cmd_line: []const u8, expected_args: []const []const u8) !void { diff --git a/test/standalone/build.zig.zon b/test/standalone/build.zig.zon index 8b59f261179e..8f5a061fe2e0 100644 --- a/test/standalone/build.zig.zon +++ b/test/standalone/build.zig.zon @@ -104,6 +104,9 @@ .windows_spawn = .{ .path = "windows_spawn", }, + .windows_argv = .{ + .path = "windows_argv", + }, .self_exe_symlink = .{ .path = "self_exe_symlink", }, diff --git a/test/standalone/windows_argv/README.md b/test/standalone/windows_argv/README.md new file mode 100644 index 000000000000..f7ce7ac7c424 --- /dev/null +++ b/test/standalone/windows_argv/README.md @@ -0,0 +1,19 @@ +Tests that Zig's `std.process.ArgIteratorWindows` is compatible with both the MSVC and MinGW C runtimes' argv splitting algorithms. + +The method of testing is: +- Compile a C file with `wmain` as its entry point +- The C `wmain` calls a Zig-implemented `verify` function that takes the `argv` from `wmain` and compares it to the argv gotten from `std.proccess.argsAlloc` (which takes `kernel32.GetCommandLineW()` and splits it) +- The compiled C program is spawned continuously as a child process by the implementation in `fuzz.zig` with randomly generated command lines + + On Windows, the 'application name' and the 'command line' are disjoint concepts. That is, you can spawn `foo.exe` but set the command line to `bar.exe`, and `CreateProcessW` will spawn `foo.exe` but `argv[0]` will be `bar.exe`. This quirk allows us to test arbitrary `argv[0]` values as well which otherwise wouldn't be possible. + +Note: This is intentionally testing against the C runtime argv splitting and *not* [`CommandLineToArgvW`](https://learn.microsoft.com/en-us/windows/win32/api/shellapi/nf-shellapi-commandlinetoargvw), since the C runtime argv splitting was updated in 2008 but `CommandLineToArgvW` still uses the pre-2008 algorithm (which differs in both `argv[0]` rules and `""`; see [here](https://daviddeley.com/autohotkey/parameters/parameters.htm#WINCRULESDOC) for details) + +--- + +In addition to being run during `zig build test-standalone`, this test can be run on its own via `zig build test` from within this directory. + +When run on its own: +- `-Diterations=` can be used to set the max fuzzing iterations, and `-Diterations=0` can be used to fuzz indefinitely +- `-Dseed=` can be used to set the PRNG seed for fuzz testing. If not provided, then the seed is chosen at random during `build.zig` compilation. + +On failure, the number of iterations and the seed can be seen in the failing command, e.g. in `path\to\fuzz.exe path\to\verify-msvc.exe 100 2780392459403250529`, the iterations is `100` and the seed is `2780392459403250529`. diff --git a/test/standalone/windows_argv/build.zig b/test/standalone/windows_argv/build.zig new file mode 100644 index 000000000000..dcc2d1e4b183 --- /dev/null +++ b/test/standalone/windows_argv/build.zig @@ -0,0 +1,88 @@ +const std = @import("std"); +const builtin = @import("builtin"); + +pub fn build(b: *std.Build) !void { + const test_step = b.step("test", "Test it"); + b.default_step = test_step; + + if (builtin.os.tag != .windows) return; + + const optimize: std.builtin.OptimizeMode = .Debug; + + const lib_msvc = b.addStaticLibrary(.{ + .name = "toargv-msvc", + .root_source_file = .{ .path = "lib.zig" }, + .target = b.resolveTargetQuery(.{ + .abi = .msvc, + }), + .optimize = optimize, + }); + const verify_msvc = b.addExecutable(.{ + .name = "verify-msvc", + .target = b.resolveTargetQuery(.{ + .abi = .msvc, + }), + .optimize = optimize, + }); + verify_msvc.addCSourceFile(.{ + .file = .{ .path = "verify.c" }, + .flags = &.{ "-DUNICODE", "-D_UNICODE" }, + }); + verify_msvc.linkLibrary(lib_msvc); + verify_msvc.linkLibC(); + + const lib_gnu = b.addStaticLibrary(.{ + .name = "toargv-gnu", + .root_source_file = .{ .path = "lib.zig" }, + .target = b.resolveTargetQuery(.{ + .abi = .gnu, + }), + .optimize = optimize, + }); + const verify_gnu = b.addExecutable(.{ + .name = "verify-gnu", + .target = b.resolveTargetQuery(.{ + .abi = .gnu, + }), + .optimize = optimize, + }); + verify_gnu.addCSourceFile(.{ + .file = .{ .path = "verify.c" }, + .flags = &.{ "-DUNICODE", "-D_UNICODE" }, + }); + verify_gnu.mingw_unicode_entry_point = true; + verify_gnu.linkLibrary(lib_gnu); + verify_gnu.linkLibC(); + + const fuzz = b.addExecutable(.{ + .name = "fuzz", + .root_source_file = .{ .path = "fuzz.zig" }, + .target = b.host, + .optimize = optimize, + }); + + const fuzz_max_iterations = b.option(u64, "iterations", "The max fuzz iterations (default: 100)") orelse 100; + const fuzz_iterations_arg = std.fmt.allocPrint(b.allocator, "{}", .{fuzz_max_iterations}) catch @panic("oom"); + + const fuzz_seed = b.option(u64, "seed", "Seed to use for the PRNG (default: random)") orelse seed: { + var buf: [8]u8 = undefined; + try std.posix.getrandom(&buf); + break :seed std.mem.readInt(u64, &buf, builtin.cpu.arch.endian()); + }; + const fuzz_seed_arg = std.fmt.allocPrint(b.allocator, "{}", .{fuzz_seed}) catch @panic("oom"); + + const run_msvc = b.addRunArtifact(fuzz); + run_msvc.setName("fuzz-msvc"); + run_msvc.addArtifactArg(verify_msvc); + run_msvc.addArgs(&.{ fuzz_iterations_arg, fuzz_seed_arg }); + run_msvc.expectExitCode(0); + + const run_gnu = b.addRunArtifact(fuzz); + run_gnu.setName("fuzz-gnu"); + run_gnu.addArtifactArg(verify_gnu); + run_gnu.addArgs(&.{ fuzz_iterations_arg, fuzz_seed_arg }); + run_gnu.expectExitCode(0); + + test_step.dependOn(&run_msvc.step); + test_step.dependOn(&run_gnu.step); +} diff --git a/test/standalone/windows_argv/fuzz.zig b/test/standalone/windows_argv/fuzz.zig new file mode 100644 index 000000000000..b88853196fb0 --- /dev/null +++ b/test/standalone/windows_argv/fuzz.zig @@ -0,0 +1,159 @@ +const std = @import("std"); +const builtin = @import("builtin"); +const windows = std.os.windows; +const Allocator = std.mem.Allocator; + +pub fn main() !void { + var gpa = std.heap.GeneralPurposeAllocator(.{}){}; + defer std.debug.assert(gpa.deinit() == .ok); + const allocator = gpa.allocator(); + + const args = try std.process.argsAlloc(allocator); + defer std.process.argsFree(allocator, args); + + if (args.len < 2) return error.MissingArgs; + + const verify_path_wtf8 = args[1]; + const verify_path_w = try std.unicode.wtf8ToWtf16LeAllocZ(allocator, verify_path_wtf8); + defer allocator.free(verify_path_w); + + const iterations: u64 = iterations: { + if (args.len < 3) break :iterations 0; + break :iterations try std.fmt.parseUnsigned(u64, args[2], 10); + }; + + var rand_seed = false; + const seed: u64 = seed: { + if (args.len < 4) { + rand_seed = true; + var buf: [8]u8 = undefined; + try std.posix.getrandom(&buf); + break :seed std.mem.readInt(u64, &buf, builtin.cpu.arch.endian()); + } + break :seed try std.fmt.parseUnsigned(u64, args[3], 10); + }; + var random = std.rand.DefaultPrng.init(seed); + const rand = random.random(); + + // If the seed was not given via the CLI, then output the + // randomly chosen seed so that this run can be reproduced + if (rand_seed) { + std.debug.print("rand seed: {}\n", .{seed}); + } + + var cmd_line_w_buf = std.ArrayList(u16).init(allocator); + defer cmd_line_w_buf.deinit(); + + var i: u64 = 0; + var errors: u64 = 0; + while (iterations == 0 or i < iterations) { + const cmd_line_w = try randomCommandLineW(allocator, rand); + defer allocator.free(cmd_line_w); + + // avoid known difference for 0-length command lines + if (cmd_line_w.len == 0 or cmd_line_w[0] == '\x00') continue; + + const exit_code = try spawnVerify(verify_path_w, cmd_line_w); + if (exit_code != 0) { + std.debug.print(">>> found discrepancy <<<\n", .{}); + const cmd_line_wtf8 = try std.unicode.wtf16LeToWtf8Alloc(allocator, cmd_line_w); + defer allocator.free(cmd_line_wtf8); + std.debug.print("\"{}\"\n\n", .{std.zig.fmtEscapes(cmd_line_wtf8)}); + + errors += 1; + } + + i += 1; + } + if (errors > 0) { + // we never get here if iterations is 0 so we don't have to worry about that case + std.debug.print("found {} discrepancies in {} iterations\n", .{ errors, iterations }); + return error.FoundDiscrepancies; + } +} + +fn randomCommandLineW(allocator: Allocator, rand: std.rand.Random) ![:0]const u16 { + const Choice = enum { + backslash, + quote, + space, + tab, + control, + printable, + non_ascii, + }; + + const choices = rand.uintAtMostBiased(u16, 256); + var buf = try std.ArrayList(u16).initCapacity(allocator, choices); + errdefer buf.deinit(); + + for (0..choices) |_| { + const choice = rand.enumValue(Choice); + const code_unit = switch (choice) { + .backslash => '\\', + .quote => '"', + .space => ' ', + .tab => '\t', + .control => switch (rand.uintAtMostBiased(u8, 0x21)) { + 0x21 => '\x7F', + else => |b| b, + }, + .printable => '!' + rand.uintAtMostBiased(u8, '~' - '!'), + .non_ascii => rand.intRangeAtMostBiased(u16, 0x80, 0xFFFF), + }; + try buf.append(std.mem.nativeToLittle(u16, code_unit)); + } + + return buf.toOwnedSliceSentinel(0); +} + +/// Returns the exit code of the verify process +fn spawnVerify(verify_path: [:0]const u16, cmd_line: [:0]const u16) !windows.DWORD { + const child_proc = spawn: { + var startup_info: windows.STARTUPINFOW = .{ + .cb = @sizeOf(windows.STARTUPINFOW), + .lpReserved = null, + .lpDesktop = null, + .lpTitle = null, + .dwX = 0, + .dwY = 0, + .dwXSize = 0, + .dwYSize = 0, + .dwXCountChars = 0, + .dwYCountChars = 0, + .dwFillAttribute = 0, + .dwFlags = windows.STARTF_USESTDHANDLES, + .wShowWindow = 0, + .cbReserved2 = 0, + .lpReserved2 = null, + .hStdInput = null, + .hStdOutput = null, + .hStdError = windows.GetStdHandle(windows.STD_ERROR_HANDLE) catch null, + }; + var proc_info: windows.PROCESS_INFORMATION = undefined; + + try windows.CreateProcessW( + @constCast(verify_path.ptr), + @constCast(cmd_line.ptr), + null, + null, + windows.TRUE, + 0, + null, + null, + &startup_info, + &proc_info, + ); + windows.CloseHandle(proc_info.hThread); + + break :spawn proc_info.hProcess; + }; + defer windows.CloseHandle(child_proc); + try windows.WaitForSingleObjectEx(child_proc, windows.INFINITE, false); + + var exit_code: windows.DWORD = undefined; + if (windows.kernel32.GetExitCodeProcess(child_proc, &exit_code) == 0) { + return error.UnableToGetExitCode; + } + return exit_code; +} diff --git a/test/standalone/windows_argv/lib.h b/test/standalone/windows_argv/lib.h new file mode 100644 index 000000000000..e3cb684715f9 --- /dev/null +++ b/test/standalone/windows_argv/lib.h @@ -0,0 +1,8 @@ +#ifndef _LIB_H_ +#define _LIB_H_ + +#include + +int verify(int argc, wchar_t *argv[]); + +#endif \ No newline at end of file diff --git a/test/standalone/windows_argv/lib.zig b/test/standalone/windows_argv/lib.zig new file mode 100644 index 000000000000..074273ae21aa --- /dev/null +++ b/test/standalone/windows_argv/lib.zig @@ -0,0 +1,59 @@ +const std = @import("std"); + +/// Returns 1 on success, 0 on failure +export fn verify(argc: c_int, argv: [*]const [*:0]const u16) c_int { + const argv_slice = argv[0..@intCast(argc)]; + testArgv(argv_slice) catch |err| switch (err) { + error.OutOfMemory => @panic("oom"), + error.Overflow => @panic("bytes needed to contain args would overflow usize"), + error.ArgvMismatch => return 0, + }; + return 1; +} + +fn testArgv(expected_args: []const [*:0]const u16) !void { + var arena_state = std.heap.ArenaAllocator.init(std.heap.page_allocator); + defer arena_state.deinit(); + const allocator = arena_state.allocator(); + + const args = try std.process.argsAlloc(allocator); + var wtf8_buf = std.ArrayList(u8).init(allocator); + + var eql = true; + if (args.len != expected_args.len) eql = false; + + const min_len = @min(expected_args.len, args.len); + for (expected_args[0..min_len], args[0..min_len], 0..) |expected_arg, arg_wtf8, i| { + wtf8_buf.clearRetainingCapacity(); + try std.unicode.wtf16LeToWtf8ArrayList(&wtf8_buf, std.mem.span(expected_arg)); + if (!std.mem.eql(u8, wtf8_buf.items, arg_wtf8)) { + std.debug.print("{}: expected: \"{}\"\n", .{ i, std.zig.fmtEscapes(wtf8_buf.items) }); + std.debug.print("{}: actual: \"{}\"\n", .{ i, std.zig.fmtEscapes(arg_wtf8) }); + eql = false; + } + } + if (!eql) { + for (expected_args[min_len..], min_len..) |arg, i| { + wtf8_buf.clearRetainingCapacity(); + try std.unicode.wtf16LeToWtf8ArrayList(&wtf8_buf, std.mem.span(arg)); + std.debug.print("{}: expected: \"{}\"\n", .{ i, std.zig.fmtEscapes(wtf8_buf.items) }); + } + for (args[min_len..], min_len..) |arg, i| { + std.debug.print("{}: actual: \"{}\"\n", .{ i, std.zig.fmtEscapes(arg) }); + } + const peb = std.os.windows.peb(); + const lpCmdLine: [*:0]u16 = @ptrCast(peb.ProcessParameters.CommandLine.Buffer); + wtf8_buf.clearRetainingCapacity(); + try std.unicode.wtf16LeToWtf8ArrayList(&wtf8_buf, std.mem.span(lpCmdLine)); + std.debug.print("command line: \"{}\"\n", .{std.zig.fmtEscapes(wtf8_buf.items)}); + std.debug.print("expected argv:\n", .{}); + std.debug.print("&.{{\n", .{}); + for (expected_args) |arg| { + wtf8_buf.clearRetainingCapacity(); + try std.unicode.wtf16LeToWtf8ArrayList(&wtf8_buf, std.mem.span(arg)); + std.debug.print(" \"{}\",\n", .{std.zig.fmtEscapes(wtf8_buf.items)}); + } + std.debug.print("}}\n", .{}); + return error.ArgvMismatch; + } +} diff --git a/test/standalone/windows_argv/verify.c b/test/standalone/windows_argv/verify.c new file mode 100644 index 000000000000..53a40d61c9fc --- /dev/null +++ b/test/standalone/windows_argv/verify.c @@ -0,0 +1,7 @@ +#include +#include "lib.h" + +int wmain(int argc, wchar_t *argv[]) { + if (!verify(argc, argv)) return 1; + return 0; +} \ No newline at end of file