From 4f0c78fb3c4d34a6e536ba8e797ab383764e0963 Mon Sep 17 00:00:00 2001 From: "Mark A. Grondona" Date: Fri, 24 Jan 2025 19:12:21 +0000 Subject: [PATCH] job-exec: fix confusing "job shell exec error" log message Problem: When the job-exec module detects an exec error for a job shell it emits a confusing error message that includes either the path to the job shell or the IMP (if a multiuser job), and only the result of `strerror()` for the errno returned from libsubprocess. When using sdexec, this errno is always `ENOENT`, resulting in a confusing error message that seems to indicate that `flux-imp` was not found. It is unhelpful to include `argv[0]` in this error message. It will always be the job shell or the IMP and we all know it. Drop this from the log message. Also, sdexec will provide extra information in the subprocess error string available from `flux_subprocess_fail_error (p)`. Log this instead of `strerror (errno)`. Fixes #6568 --- src/modules/job-exec/exec.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/modules/job-exec/exec.c b/src/modules/job-exec/exec.c index 9537f61e17dc..4e4b5eabba56 100644 --- a/src/modules/job-exec/exec.c +++ b/src/modules/job-exec/exec.c @@ -379,12 +379,12 @@ static void error_cb (struct bulk_exec *exec, flux_subprocess_t *p, void *arg) } else { jobinfo_fatal_error (job, - errnum, + 0, "%s on broker %s (rank %d): %s", "job shell exec error", hostname, rank, - flux_cmd_arg (cmd, 0)); + flux_subprocess_fail_error (p)); } } else