Skip to content

Commit

Permalink
feat: add support for Pebble check-failed and check-recovered events (#…
Browse files Browse the repository at this point in the history
…1281)

Two new event types are supported (actually getting these events requires Juju 3.6b2 or higher):

* `PebbleChangeFailedEvent` - emitted when a Pebble check in a container
hits the failure threshold
* `PebbleChangeRecoveredEvent` - emitted when a Pebble check in a
container passes after previously hitting the failure threshold

Each event type has a single new attribute (in addition to those
inherited from `WorkloadEvent`):

* `info` - this is a shortcut to Pebble's `get_check` method (not
otherwise accessible from the charm), but will only query Pebble if more
than the `name` is requested, and will cache the result on the event
instance.

Two new enums are added to Pebble, to make it easier to work with these
events, particularly with Harness:

* `ChangeStatus` - the possible values for `ops.pebble.Change.status`
(note that this is the full list, but we only expect charms to see
Doing, Done, and Error for now).
* `ChangeKind` - the possible values for `ops.pebble.Change.kind`
(again, this is the current full list, but we only expect charms to see
perform-check and recover-check at the moment).

`Harness.pebble_notify` is extended to support emitting the two new
events. This requires implementation knowledge (that the check events
are triggered by a Pebble notice of type change-updated), but provides
minimal support for testing with Harness, and will be supplemented by
user-friendly testing in Scenario.

The testing Pebble backend gains an implementation for `get_change` and
`get_checks`. These are not used directly here, but will be used in
Scenario, and it makes more sense to have the implementation in ops.

This is the ops portion of
[OP046](https://docs.google.com/document/d/13ItH8l5ZSmmv9WqZXpqjV8EifZU5e3zxINiNLubnC68/edit).
It requires a [corresponding change to
Pebble](canonical/pebble#444) and [change to
Juju](juju/juju#17655).
  • Loading branch information
tonyandrewmeyer authored Jul 17, 2024
1 parent 5a21cd2 commit e3eae86
Show file tree
Hide file tree
Showing 10 changed files with 360 additions and 11 deletions.
8 changes: 8 additions & 0 deletions ops/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -65,6 +65,9 @@
'LeaderSettingsChangedEvent',
'MetadataLinks',
'PayloadMeta',
'PebbleCheckEvent',
'PebbleCheckFailedEvent',
'PebbleCheckRecoveredEvent',
'PebbleCustomNoticeEvent',
'PebbleNoticeEvent',
'PebbleReadyEvent',
Expand Down Expand Up @@ -132,6 +135,7 @@
'ContainerMapping',
'ErrorStatus',
'InvalidStatusError',
'LazyCheckInfo',
'LazyMapping',
'LazyNotice',
'MaintenanceStatus',
Expand Down Expand Up @@ -204,6 +208,9 @@
LeaderSettingsChangedEvent,
MetadataLinks,
PayloadMeta,
PebbleCheckEvent,
PebbleCheckFailedEvent,
PebbleCheckRecoveredEvent,
PebbleCustomNoticeEvent,
PebbleNoticeEvent,
PebbleReadyEvent,
Expand Down Expand Up @@ -275,6 +282,7 @@
ContainerMapping,
ErrorStatus,
InvalidStatusError,
LazyCheckInfo,
LazyMapping,
LazyNotice,
MaintenanceStatus,
Expand Down
58 changes: 58 additions & 0 deletions ops/charm.py
Original file line number Diff line number Diff line change
Expand Up @@ -831,6 +831,60 @@ class PebbleCustomNoticeEvent(PebbleNoticeEvent):
"""Event triggered when a Pebble notice of type "custom" is created or repeats."""


class PebbleCheckEvent(WorkloadEvent):
"""Base class for Pebble check events."""

info: model.LazyCheckInfo
"""Provide access to the check's current state."""

def __init__(
self,
handle: Handle,
workload: model.Container,
check_name: str,
):
super().__init__(handle, workload)
self.info = model.LazyCheckInfo(workload, check_name)

def snapshot(self) -> Dict[str, Any]:
"""Used by the framework to serialize the event to disk.
Not meant to be called by charm code.
"""
d = super().snapshot()
d['check_name'] = self.info.name
return d

def restore(self, snapshot: Dict[str, Any]):
"""Used by the framework to deserialize the event from disk.
Not meant to be called by charm code.
"""
check_name = snapshot.pop('check_name')
super().restore(snapshot)
self.info = model.LazyCheckInfo(self.workload, check_name)


class PebbleCheckFailedEvent(PebbleCheckEvent):
"""Event triggered when a Pebble check exceeds the configured failure threshold.
Note that the check may have started passing by the time this event is
emitted (which will mean that a :class:`PebbleCheckRecoveredEvent` will be
emitted next). If the handler is executing code that should only be done
if the check is currently failing, check the current status with
``event.info.status == ops.pebble.CheckStatus.DOWN``.
"""


class PebbleCheckRecoveredEvent(PebbleCheckEvent):
"""Event triggered when a Pebble check recovers.
This event is only triggered when the check has previously reached a failure
state (not simply failed, but failed at least as many times as the
configured threshold).
"""


class SecretEvent(HookEvent):
"""Base class for all secret events."""

Expand Down Expand Up @@ -1219,6 +1273,10 @@ def __init__(self, framework: Framework):
container_name = container_name.replace('-', '_')
self.on.define_event(f'{container_name}_pebble_ready', PebbleReadyEvent)
self.on.define_event(f'{container_name}_pebble_custom_notice', PebbleCustomNoticeEvent)
self.on.define_event(f'{container_name}_pebble_check_failed', PebbleCheckFailedEvent)
self.on.define_event(
f'{container_name}_pebble_check_recovered', PebbleCheckRecoveredEvent
)

@property
def app(self) -> model.Application:
Expand Down
3 changes: 3 additions & 0 deletions ops/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -167,6 +167,9 @@ def _get_event_args(
notice_type = os.environ['JUJU_NOTICE_TYPE']
notice_key = os.environ['JUJU_NOTICE_KEY']
args.extend([notice_id, notice_type, notice_key])
elif issubclass(event_type, ops.charm.PebbleCheckEvent):
check_name = os.environ['JUJU_PEBBLE_CHECK_NAME']
args.append(check_name)
return args, {}
elif issubclass(event_type, ops.charm.SecretEvent):
args: List[Any] = [
Expand Down
34 changes: 34 additions & 0 deletions ops/model.py
Original file line number Diff line number Diff line change
Expand Up @@ -3792,6 +3792,40 @@ def _ensure_loaded(self):
assert self._notice.key == self.key


class LazyCheckInfo:
"""Provide lazily-loaded access to a Pebble check's info.
The attributes provided by this class are the same as those of
:class:`ops.pebble.CheckInfo`, however, the notice details are only fetched
from Pebble if necessary (and cached on the instance).
"""

name: str
level: Optional[Union[pebble.CheckLevel, str]]
status: Union[pebble.CheckStatus, str]
failures: int
threshold: int
change_id: Optional[pebble.ChangeID]

def __init__(self, container: Container, name: str):
self._container = container
self.name = name
self._info: Optional[ops.pebble.CheckInfo] = None

def __repr__(self):
return f'LazyCheckInfo(name={self.name!r})'

def __getattr__(self, item: str):
# Note: not called for defined attribute `name`.
self._ensure_loaded()
return getattr(self._info, item)

def _ensure_loaded(self):
if self._info is not None:
return
self._info = self._container.get_check(self.name)


@dataclasses.dataclass(frozen=True)
class CloudCredential:
"""Credentials for cloud.
Expand Down
54 changes: 54 additions & 0 deletions ops/pebble.py
Original file line number Diff line number Diff line change
Expand Up @@ -1471,6 +1471,15 @@ class NoticeType(enum.Enum):
"""Enum of notice types."""

CUSTOM = 'custom'
"""A custom notice reported via the Pebble client API or ``pebble notify``.
The key and data fields are provided by the user. The key must be in
the format ``example.com/path`` to ensure well-namespaced notice keys.
"""

CHANGE_UPDATE = 'change-update'
"""Recorded whenever a change's status is updated. The key for change-update
notices is the change ID.
"""


class NoticesUsers(enum.Enum):
Expand All @@ -1483,6 +1492,51 @@ class NoticesUsers(enum.Enum):
"""


class ChangeKind(enum.Enum):
"""Enum of Pebble Change kinds."""

START = 'start'
STOP = 'stop'
RESTART = 'restart'
REPLAN = 'replan'
EXEC = 'exec'
RECOVER_CHECK = 'recover-check'
PERFORM_CHECK = 'perform-check'


class ChangeStatus(enum.Enum):
"""Enum of Pebble Change statuses."""

HOLD = 'Hold'
"""Hold status means the task should not run for the moment, perhaps as a
consequence of an error on another task."""

DO = 'Do'
"""Do status means the change or task is ready to start."""

DOING = 'Doing'
"""Doing status means the change or task is running or an attempt was made to run it."""

DONE = 'Done'
"""Done status means the change or task was accomplished successfully."""

ABORT = 'Abort'
"""Abort status means the task should stop doing its activities and then undo."""

UNDO = 'Undo'
"""Undo status means the change or task should be undone, probably due to an error."""

UNDOING = 'Undoing'
"""UndoingStatus means the change or task is being undone or an attempt was made to undo it."""

ERROR = 'Error'
"""Error status means the change or task has errored out while running or being undone."""

WAIT = 'Wait'
"""Wait status means the task was accomplished successfully but some
external event needs to happen before work can progress further."""


@dataclasses.dataclass(frozen=True)
class Notice:
"""Information about a single notice."""
Expand Down
47 changes: 37 additions & 10 deletions ops/testing.py
Original file line number Diff line number Diff line change
Expand Up @@ -1218,8 +1218,24 @@ def pebble_notify(

id, new_or_repeated = client._notify(type, key, data=data, repeat_after=repeat_after)

if self._charm is not None and type == pebble.NoticeType.CUSTOM and new_or_repeated:
self.charm.on[container_name].pebble_custom_notice.emit(container, id, type.value, key)
if self._charm is not None and new_or_repeated:
if type == pebble.NoticeType.CUSTOM:
self.charm.on[container_name].pebble_custom_notice.emit(
container, id, type.value, key
)
elif type == pebble.NoticeType.CHANGE_UPDATE and data:
kind = pebble.ChangeKind(data.get('kind'))
status = pebble.ChangeStatus(client.get_change(key).status)
if kind == pebble.ChangeKind.PERFORM_CHECK and status == pebble.ChangeStatus.ERROR:
self.charm.on[container_name].pebble_check_failed.emit(
container, data['check-name']
)
elif (
kind == pebble.ChangeKind.RECOVER_CHECK and status == pebble.ChangeStatus.DONE
):
self.charm.on[container_name].pebble_check_recovered.emit(
container, data['check-name']
)

return id

Expand Down Expand Up @@ -3023,6 +3039,8 @@ def __init__(self, backend: _TestingModelBackend, container_root: pathlib.Path):
self._exec_handlers: Dict[Tuple[str, ...], ExecHandler] = {}
self._notices: Dict[Tuple[str, str], pebble.Notice] = {}
self._last_notice_id = 0
self._changes: Dict[str, pebble.Change] = {}
self._check_infos: Dict[str, pebble.CheckInfo] = {}

def _handle_exec(self, command_prefix: Sequence[str], handler: ExecHandler):
prefix = tuple(command_prefix)
Expand Down Expand Up @@ -3056,8 +3074,13 @@ def get_changes(
) -> List[pebble.Change]:
raise NotImplementedError(self.get_changes)

def get_change(self, change_id: pebble.ChangeID) -> pebble.Change:
raise NotImplementedError(self.get_change)
def get_change(self, change_id: str) -> pebble.Change:
self._check_connection()
try:
return self._changes[change_id]
except KeyError:
message = f'cannot find change with id "{change_id}"'
raise self._api_error(404, message) from None

def abort_change(self, change_id: pebble.ChangeID) -> pebble.Change:
raise NotImplementedError(self.abort_change)
Expand Down Expand Up @@ -3632,8 +3655,16 @@ def send_signal(self, sig: Union[int, str], service_names: Iterable[str]):
message = f'cannot send signal to "{first_service}": invalid signal name "{sig}"'
raise self._api_error(500, message) from None

def get_checks(self, level=None, names=None): # type:ignore
raise NotImplementedError(self.get_checks) # type:ignore
def get_checks(
self, level: Optional[pebble.CheckLevel] = None, names: Optional[Iterable[str]] = None
) -> List[pebble.CheckInfo]:
if names is not None:
names = frozenset(names)
return [
info
for info in self._check_infos.values()
if (level is None or level == info.level) and (names is None or info.name in names)
]

def notify(
self,
Expand All @@ -3658,10 +3689,6 @@ def _notify(
Return a tuple of (notice_id, new_or_repeated).
"""
if type != pebble.NoticeType.CUSTOM:
message = f'invalid type "{type.value}" (can only add "custom" notices)'
raise self._api_error(400, message)

# The shape of the code below is taken from State.AddNotice in Pebble.
now = datetime.datetime.now(tz=datetime.timezone.utc)

Expand Down
20 changes: 20 additions & 0 deletions test/charms/test_main/src/charm.py
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,8 @@ def __init__(self, *args: typing.Any):
on_collect_metrics=[],
on_test_pebble_ready=[],
on_test_pebble_custom_notice=[],
on_test_pebble_check_failed=[],
on_test_pebble_check_recovered=[],
on_log_critical_action=[],
on_log_error_action=[],
on_log_warning_action=[],
Expand Down Expand Up @@ -88,6 +90,10 @@ def __init__(self, *args: typing.Any):
self.framework.observe(
self.on.test_pebble_custom_notice, self._on_test_pebble_custom_notice
)
self.framework.observe(self.on.test_pebble_check_failed, self._on_test_pebble_check_failed)
self.framework.observe(
self.on.test_pebble_check_recovered, self._on_test_pebble_check_recovered
)

self.framework.observe(self.on.secret_remove, self._on_secret_remove)
self.framework.observe(self.on.secret_rotate, self._on_secret_rotate)
Expand Down Expand Up @@ -197,6 +203,20 @@ def _on_test_pebble_custom_notice(self, event: ops.PebbleCustomNoticeEvent):
self._stored.observed_event_types.append(type(event).__name__)
self._stored.test_pebble_custom_notice_data = event.snapshot()

def _on_test_pebble_check_failed(self, event: ops.PebbleCheckFailedEvent):
assert event.workload is not None, 'workload events must have a reference to a container'
assert isinstance(event.info, ops.LazyCheckInfo)
self._stored.on_test_pebble_check_failed.append(type(event).__name__)
self._stored.observed_event_types.append(type(event).__name__)
self._stored.test_pebble_check_failed_data = event.snapshot()

def _on_test_pebble_check_recovered(self, event: ops.PebbleCheckRecoveredEvent):
assert event.workload is not None, 'workload events must have a reference to a container'
assert isinstance(event.info, ops.LazyCheckInfo)
self._stored.on_test_pebble_check_recovered.append(type(event).__name__)
self._stored.observed_event_types.append(type(event).__name__)
self._stored.test_pebble_check_recovered_data = event.snapshot()

def _on_start_action(self, event: ops.ActionEvent):
assert (
event.handle.kind == 'start_action'
Expand Down
Loading

0 comments on commit e3eae86

Please sign in to comment.