Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

add resource journal #6586

Open
wants to merge 7 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
20 changes: 20 additions & 0 deletions doc/man1/flux-resource.rst
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@ SYNOPSIS
| **flux** **resource** **reload** [-f] [--xml] *path*

| **flux** **resource** **acquire-mute**
| **flux** **resource** **eventlog** [*-w* *EVENT*] [*-f*]

DESCRIPTION
===========
Expand Down Expand Up @@ -356,6 +357,24 @@ Tell the resource module to stop sending RFC 28 ``resource.acquire`` responses
to the scheduler. This is used during Flux instance shutdown to avoid asking
the scheduler to needlessly process OFFLINE updates.

eventlog
--------

.. program:: flux resource eventlog

Watch the resource journal, which is described in RFC 44.

.. option:: -f, --follow

After printing the current eventlog, wait for new events and print
them as they arrive.

.. option:: -w, --wait=EVENT

Wait for the specified event to be posted, print it, then quit.
The current set of valid events events is *restart*, *resource-define*,
*online*, *offline*, *drain*, *undrain*, *torpid*, and *lively*.

OUTPUT FORMAT
=============

Expand Down Expand Up @@ -520,3 +539,4 @@ FLUX RFC
| :doc:`rfc:spec_22`
| :doc:`rfc:spec_27`
| :doc:`rfc:spec_29`
| :doc:`rfc:spec_44`
52 changes: 52 additions & 0 deletions src/cmd/flux-resource.py
Original file line number Diff line number Diff line change
Expand Up @@ -720,6 +720,41 @@
print(rset.encode())


def print_events(events, follow, wait):
if not events and not follow and not wait:
return False

Check warning on line 725 in src/cmd/flux-resource.py

View check run for this annotation

Codecov / codecov/patch

src/cmd/flux-resource.py#L725

Added line #L725 was not covered by tests
for entry in events:
print(json.dumps(entry))
if wait and entry["name"] == wait:
return False
return True


def eventlog_continuation(f, follow, wait):
try:
payload = f.get()
except OSError as exc:
if exc.errno != errno.ENODATA:
raise
payload = None

Check warning on line 739 in src/cmd/flux-resource.py

View check run for this annotation

Codecov / codecov/patch

src/cmd/flux-resource.py#L736-L739

Added lines #L736 - L739 were not covered by tests
if not payload or not print_events(payload["events"], follow, wait):
f.flux_handle.reactor_stop()
else:
f.reset()


def eventlog(args):
"""Show the resource eventlog"""
h = flux.Flux()
f = h.rpc(
"resource.journal",
nodeid=0,
flags=flux.constants.FLUX_RPC_STREAMING,
)
f.then(eventlog_continuation, args.follow, args.wait)
h.reactor_run()


LOGGER = logging.getLogger("flux-resource")


Expand Down Expand Up @@ -1009,6 +1044,23 @@
R_parser.add_argument("--from-stdin", action="store_true", help=argparse.SUPPRESS)
R_parser.add_argument("--config-file", help=argparse.SUPPRESS)

eventlog_parser = subparsers.add_parser(
"eventlog", formatter_class=flux.util.help_formatter()
)
eventlog_parser.add_argument(
"-f",
"--follow",
action="store_true",
help="Display new events as they are posted",
)
eventlog_parser.add_argument(
"-w",
"--wait",
metavar="EVENT",
help="Display events until EVENT is posted",
)
eventlog_parser.set_defaults(func=eventlog)

args = parser.parse_args()
args.func(args)

Expand Down
33 changes: 33 additions & 0 deletions src/modules/resource/monitor.c
Original file line number Diff line number Diff line change
Expand Up @@ -112,6 +112,37 @@
return idset_decode (members);
}

static int post_restart_event (struct monitor *monitor)
{
struct idset *ranks = NULL;
char *ranks_str = NULL;
char *online_str = NULL;
const char *hostlist;
int rc = -1;

if (!(hostlist = flux_attr_get (monitor->ctx->h, "hostlist"))
|| !(ranks = idset_create (monitor->size, 0))
|| idset_range_set (ranks, 0, monitor->size - 1) < 0
|| !(ranks_str = idset_encode (ranks, IDSET_FLAG_RANGE))
|| !(online_str = idset_encode (monitor->up, IDSET_FLAG_RANGE))
|| reslog_post_pack (monitor->ctx->reslog,
NULL,
0.,
"restart",
EVENT_NO_COMMIT,
"{s:s s:s s:s}",
"ranks", ranks_str,
"online", online_str,
"nodelist", hostlist) < 0)
goto done;

Check warning on line 137 in src/modules/resource/monitor.c

View check run for this annotation

Codecov / codecov/patch

src/modules/resource/monitor.c#L137

Added line #L137 was not covered by tests
rc = 0;
done:
ERRNO_SAFE_WRAP (free, online_str);
ERRNO_SAFE_WRAP (free, ranks_str);
idset_destroy (ranks);
return rc;
}

/* Post event 'name' with a context containing idset:s, where 's' is
* the string encoding of 'ids'. The event is not propagated to the KVS.
*/
Expand Down Expand Up @@ -361,6 +392,8 @@
monitor) < 0)
goto error;
}
if (post_restart_event (monitor) < 0)
goto error;

Check warning on line 396 in src/modules/resource/monitor.c

View check run for this annotation

Codecov / codecov/patch

src/modules/resource/monitor.c#L396

Added line #L396 was not covered by tests
done:
return monitor;
error:
Expand Down
Loading
Loading