diff --git a/.chloggen/opamp-extension-reportshealth.yaml b/.chloggen/opamp-extension-reportshealth.yaml new file mode 100644 index 000000000000..c58f185a6628 --- /dev/null +++ b/.chloggen/opamp-extension-reportshealth.yaml @@ -0,0 +1,27 @@ +# Use this changelog template to create an entry for release notes. + +# One of 'breaking', 'deprecation', 'new_component', 'enhancement', 'bug_fix' +change_type: enhancement + +# The name of the component, or a single word describing the area of concern, (e.g. filelogreceiver) +component: opampextension + +# A brief description of the change. Surround your text with quotes ("") if it needs to start with a backtick (`). +note: Implement `ReportsHealth` capability in OpAMP extension + +# Mandatory: One or more tracking issues related to the change. You can use the PR number here if no issue exists. +issues: [35433] + +# (Optional) One or more lines of additional information to render under the primary note. +# These lines will be padded with 2 spaces and then inserted directly into the document. +# Use pipe (|) for multiline entries. +subtext: + +# If your change doesn't affect end users or the exported elements of any package, +# you should instead start your pull request title with [chore] or use the "Skip Changelog" label. +# Optional: The change log or logs in which this entry should be included. +# e.g. '[user]' or '[user, api]' +# Include 'user' if the change is relevant to end users. +# Include 'api' if there is a change to a library API. +# Default: '[user]' +change_logs: [] diff --git a/extension/opampextension/README.md b/extension/opampextension/README.md index 59d7284fc643..4d5227c2e276 100644 --- a/extension/opampextension/README.md +++ b/extension/opampextension/README.md @@ -43,6 +43,7 @@ The following settings are optional for both transports: instance UID remains constant across process restarts. - `capabilities`: Keys with boolean true/false values that enable a particular OpAMP capability. - `reports_effective_config`: Whether to enable the OpAMP ReportsEffectiveConfig capability. Default is `true`. + - `reports_health`: Whether to enable the OpAMP ReportsHealth capability. Default is `true`. - `agent_description`: Setting that modifies the agent description reported to the OpAMP server. - `non_identifying_attributes`: A map of key value pairs that will be added to the [non-identifying attributes](https://github.com/open-telemetry/opamp-spec/blob/main/specification.md#agentdescriptionnon_identifying_attributes) reported to the OpAMP server. If an attribute collides with the default non-identifying attributes that are automatically added, the ones specified here take precedence. - `ppid`: An optional process ID to monitor. When this process is no longer running, the extension will emit a fatal error, causing the collector to exit. This is meant to be set by the Supervisor or some other parent process, and should not be configured manually. diff --git a/extension/opampextension/config.go b/extension/opampextension/config.go index a06adee4fab8..e47ae1894ed0 100644 --- a/extension/opampextension/config.go +++ b/extension/opampextension/config.go @@ -54,6 +54,8 @@ type AgentDescription struct { type Capabilities struct { // ReportsEffectiveConfig enables the OpAMP ReportsEffectiveConfig Capability. (default: true) ReportsEffectiveConfig bool `mapstructure:"reports_effective_config"` + // ReportsHealth enables the OpAMP ReportsHealth Capability. (default: true) + ReportsHealth bool `mapstructure:"reports_health"` } func (caps Capabilities) toAgentCapabilities() protobufs.AgentCapabilities { @@ -63,6 +65,9 @@ func (caps Capabilities) toAgentCapabilities() protobufs.AgentCapabilities { if caps.ReportsEffectiveConfig { agentCapabilities |= protobufs.AgentCapabilities_AgentCapabilities_ReportsEffectiveConfig } + if caps.ReportsHealth { + agentCapabilities |= protobufs.AgentCapabilities_AgentCapabilities_ReportsHealth + } return agentCapabilities } diff --git a/extension/opampextension/config_test.go b/extension/opampextension/config_test.go index bbccff4d91aa..7f50970f3184 100644 --- a/extension/opampextension/config_test.go +++ b/extension/opampextension/config_test.go @@ -8,6 +8,7 @@ import ( "testing" "time" + "github.com/open-telemetry/opamp-go/protobufs" "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" "go.opentelemetry.io/collector/config/configopaque" @@ -39,6 +40,7 @@ func TestUnmarshalConfig(t *testing.T) { InstanceUID: "01BX5ZZKBKACTAV9WEVGEMMVRZ", Capabilities: Capabilities{ ReportsEffectiveConfig: true, + ReportsHealth: true, }, PPIDPollInterval: 5 * time.Second, }, cfg) @@ -63,6 +65,7 @@ func TestUnmarshalHttpConfig(t *testing.T) { InstanceUID: "01BX5ZZKBKACTAV9WEVGEMMVRZ", Capabilities: Capabilities{ ReportsEffectiveConfig: true, + ReportsHealth: true, }, PPIDPollInterval: 5 * time.Second, }, cfg) @@ -286,3 +289,41 @@ func TestConfig_Validate(t *testing.T) { }) } } + +func TestCapabilities_toAgentCapabilities(t *testing.T) { + type fields struct { + ReportsEffectiveConfig bool + ReportsHealth bool + } + tests := []struct { + name string + fields fields + want protobufs.AgentCapabilities + }{ + { + name: "default capabilities", + fields: fields{ + ReportsEffectiveConfig: false, + ReportsHealth: false, + }, + want: protobufs.AgentCapabilities_AgentCapabilities_ReportsStatus, + }, + { + name: "all supported capabilities enabled", + fields: fields{ + ReportsEffectiveConfig: true, + ReportsHealth: true, + }, + want: protobufs.AgentCapabilities_AgentCapabilities_ReportsStatus | protobufs.AgentCapabilities_AgentCapabilities_ReportsEffectiveConfig | protobufs.AgentCapabilities_AgentCapabilities_ReportsHealth, + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + caps := Capabilities{ + ReportsEffectiveConfig: tt.fields.ReportsEffectiveConfig, + ReportsHealth: tt.fields.ReportsHealth, + } + assert.Equalf(t, tt.want, caps.toAgentCapabilities(), "toAgentCapabilities()") + }) + } +} diff --git a/extension/opampextension/factory.go b/extension/opampextension/factory.go index ea4ea23a22d6..868c3bc85c65 100644 --- a/extension/opampextension/factory.go +++ b/extension/opampextension/factory.go @@ -27,6 +27,7 @@ func createDefaultConfig() component.Config { Server: &OpAMPServer{}, Capabilities: Capabilities{ ReportsEffectiveConfig: true, + ReportsHealth: true, }, PPIDPollInterval: 5 * time.Second, } diff --git a/extension/opampextension/opamp_agent.go b/extension/opampextension/opamp_agent.go index db1ef789e738..f984974c6c88 100644 --- a/extension/opampextension/opamp_agent.go +++ b/extension/opampextension/opamp_agent.go @@ -32,6 +32,8 @@ import ( "github.com/open-telemetry/opentelemetry-collector-contrib/extension/opampcustommessages" ) +var _ extensioncapabilities.PipelineWatcher = (*opampAgent)(nil) + type opampAgent struct { cfg *Config logger *zap.Logger @@ -121,6 +123,8 @@ func (o *opampAgent) Start(ctx context.Context, host component.Host) error { return err } + o.setHealth(&protobufs.ComponentHealth{Healthy: false}) + o.logger.Debug("Starting OpAMP client...") if err := o.opampClient.Start(context.Background(), settings); err != nil { @@ -141,6 +145,7 @@ func (o *opampAgent) Shutdown(ctx context.Context) error { if o.opampClient == nil { return nil } + o.logger.Debug("Stopping OpAMP client...") err := o.opampClient.Stop(ctx) // Opamp-go considers this an error, but the collector does not. @@ -178,6 +183,16 @@ func (o *opampAgent) Register(capability string, opts ...opampcustommessages.Cus return o.customCapabilityRegistry.Register(capability, opts...) } +func (o *opampAgent) Ready() error { + o.setHealth(&protobufs.ComponentHealth{Healthy: true}) + return nil +} + +func (o *opampAgent) NotReady() error { + o.setHealth(&protobufs.ComponentHealth{Healthy: false}) + return nil +} + func (o *opampAgent) updateEffectiveConfig(conf *confmap.Conf) { o.eclk.Lock() defer o.eclk.Unlock() @@ -344,3 +359,11 @@ func (o *opampAgent) onMessage(_ context.Context, msg *types.MessageData) { o.customCapabilityRegistry.ProcessMessage(msg.CustomMessage) } } + +func (o *opampAgent) setHealth(ch *protobufs.ComponentHealth) { + if o.capabilities.ReportsHealth && o.opampClient != nil { + if err := o.opampClient.SetHealth(ch); err != nil { + o.logger.Error("Could not report health to OpAMP server", zap.Error(err)) + } + } +} diff --git a/extension/opampextension/opamp_agent_test.go b/extension/opampextension/opamp_agent_test.go index e2013d1d45eb..fd72d346492c 100644 --- a/extension/opampextension/opamp_agent_test.go +++ b/extension/opampextension/opamp_agent_test.go @@ -31,6 +31,7 @@ func TestNewOpampAgent(t *testing.T) { assert.Equal(t, "test version", o.agentVersion) assert.NotEmpty(t, o.instanceID.String()) assert.True(t, o.capabilities.ReportsEffectiveConfig) + assert.True(t, o.capabilities.ReportsHealth) assert.Empty(t, o.effectiveConfig) assert.Nil(t, o.agentDescription) }