Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Issue 8032: make node agent configMap name configurable #8097

Merged
merged 1 commit into from
Aug 19, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions changelogs/unreleased/8097-Lyndon-Li
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Fix issue #8032, make node-agent configMap name configurable
12 changes: 7 additions & 5 deletions pkg/cmd/cli/nodeagent/server.go
Original file line number Diff line number Diff line change
Expand Up @@ -84,6 +84,7 @@
metricsAddress string
resourceTimeout time.Duration
dataMoverPrepareTimeout time.Duration
nodeAgentConfig string
}

func NewServerCommand(f client.Factory) *cobra.Command {
Expand Down Expand Up @@ -120,6 +121,7 @@
command.Flags().DurationVar(&config.resourceTimeout, "resource-timeout", config.resourceTimeout, "How long to wait for resource processes which are not covered by other specific timeout parameters. Default is 10 minutes.")
command.Flags().DurationVar(&config.dataMoverPrepareTimeout, "data-mover-prepare-timeout", config.dataMoverPrepareTimeout, "How long to wait for preparing a DataUpload/DataDownload. Default is 30 minutes.")
command.Flags().StringVar(&config.metricsAddress, "metrics-address", config.metricsAddress, "The address to expose prometheus metrics")
command.Flags().StringVar(&config.nodeAgentConfig, "node-agent-config", config.nodeAgentConfig, "The name of configMap containing node-agent configurations.")

Check warning on line 124 in pkg/cmd/cli/nodeagent/server.go

View check run for this annotation

Codecov / codecov/patch

pkg/cmd/cli/nodeagent/server.go#L124

Added line #L124 was not covered by tests

return command
}
Expand Down Expand Up @@ -463,14 +465,14 @@
var getConfigsFunc = nodeagent.GetConfigs

func (s *nodeAgentServer) getDataPathConfigs() {
configs, err := getConfigsFunc(s.ctx, s.namespace, s.kubeClient)
if err != nil {
s.logger.WithError(err).Warn("Failed to get node agent configs")
if s.config.nodeAgentConfig == "" {
s.logger.Info("No node-agent configMap is specified")
return
}

if configs == nil {
s.logger.Infof("Node agent configs are not found")
configs, err := getConfigsFunc(s.ctx, s.namespace, s.kubeClient, s.config.nodeAgentConfig)
if err != nil {
s.logger.WithError(err).Warnf("Failed to get node agent configs from configMap %s, ignore it", s.config.nodeAgentConfig)
return
}

Expand Down
33 changes: 22 additions & 11 deletions pkg/cmd/cli/nodeagent/server_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -17,12 +17,12 @@ package nodeagent

import (
"context"
"errors"
"fmt"
"os"
"path/filepath"
"testing"

"github.com/pkg/errors"
"github.com/stretchr/testify/assert"
corev1 "k8s.io/api/core/v1"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
Expand Down Expand Up @@ -122,28 +122,36 @@ func Test_getDataPathConfigs(t *testing.T) {

tests := []struct {
name string
getFunc func(context.Context, string, kubernetes.Interface) (*nodeagent.Configs, error)
getFunc func(context.Context, string, kubernetes.Interface, string) (*nodeagent.Configs, error)
configMapName string
expectConfigs *nodeagent.Configs
expectLog string
}{
{
name: "failed to get configs",
getFunc: func(context.Context, string, kubernetes.Interface) (*nodeagent.Configs, error) {
name: "no config specified",
expectLog: "No node-agent configMap is specified",
},
{
name: "failed to get configs",
configMapName: "node-agent-config",
getFunc: func(context.Context, string, kubernetes.Interface, string) (*nodeagent.Configs, error) {
return nil, errors.New("fake-get-error")
},
expectLog: "Failed to get node agent configs",
expectLog: "Failed to get node agent configs from configMap node-agent-config, ignore it",
},
{
name: "configs cm not found",
getFunc: func(context.Context, string, kubernetes.Interface) (*nodeagent.Configs, error) {
return nil, nil
name: "configs cm not found",
configMapName: "node-agent-config",
getFunc: func(context.Context, string, kubernetes.Interface, string) (*nodeagent.Configs, error) {
return nil, errors.New("fake-not-found-error")
},
expectLog: "Node agent configs are not found",
expectLog: "Failed to get node agent configs from configMap node-agent-config, ignore it",
},

{
name: "succeed",
getFunc: func(context.Context, string, kubernetes.Interface) (*nodeagent.Configs, error) {
name: "succeed",
configMapName: "node-agent-config",
getFunc: func(context.Context, string, kubernetes.Interface, string) (*nodeagent.Configs, error) {
return configs, nil
},
expectConfigs: configs,
Expand All @@ -155,6 +163,9 @@ func Test_getDataPathConfigs(t *testing.T) {
logBuffer := ""

s := &nodeAgentServer{
config: nodeAgentServerConfig{
nodeAgentConfig: test.configMapName,
},
logger: testutil.NewSingleLogger(&logBuffer),
}

Expand Down
11 changes: 3 additions & 8 deletions pkg/nodeagent/node_agent.go
Original file line number Diff line number Diff line change
Expand Up @@ -34,8 +34,7 @@ import (

const (
// daemonSet is the name of the Velero node agent daemonset.
daemonSet = "node-agent"
configName = "node-agent-config"
daemonSet = "node-agent"
)

var (
Expand Down Expand Up @@ -121,14 +120,10 @@ func GetPodSpec(ctx context.Context, kubeClient kubernetes.Interface, namespace
return &ds.Spec.Template.Spec, nil
}

func GetConfigs(ctx context.Context, namespace string, kubeClient kubernetes.Interface) (*Configs, error) {
func GetConfigs(ctx context.Context, namespace string, kubeClient kubernetes.Interface, configName string) (*Configs, error) {
cm, err := kubeClient.CoreV1().ConfigMaps(namespace).Get(ctx, configName, metav1.GetOptions{})
if err != nil {
if apierrors.IsNotFound(err) {
return nil, nil
} else {
return nil, errors.Wrapf(err, "error to get node agent configs %s", configName)
}
return nil, errors.Wrapf(err, "error to get node agent configs %s", configName)
}

if cm.Data == nil {
Expand Down
6 changes: 1 addition & 5 deletions pkg/nodeagent/node_agent_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -254,10 +254,6 @@ func TestGetConfigs(t *testing.T) {
expectResult *Configs
expectErr string
}{
{
name: "cm is not found",
namespace: "fake-ns",
},
{
name: "cm get error",
namespace: "fake-ns",
Expand Down Expand Up @@ -318,7 +314,7 @@ func TestGetConfigs(t *testing.T) {
fakeKubeClient.Fake.PrependReactor(reactor.verb, reactor.resource, reactor.reactorFunc)
}

result, err := GetConfigs(context.TODO(), test.namespace, fakeKubeClient)
result, err := GetConfigs(context.TODO(), test.namespace, fakeKubeClient, "node-agent-config")
if test.expectErr == "" {
assert.NoError(t, err)

Expand Down
21 changes: 18 additions & 3 deletions site/content/docs/main/data-movement-backup-node-selection.md
Original file line number Diff line number Diff line change
Expand Up @@ -11,12 +11,12 @@ Velero data movement backup supports to constrain the nodes where it runs. This
- Constrain the data movement backup to run in specific nodes because these nodes have more resources than others
- Constrain the data movement backup to run in specific nodes because the storage allows volume/snapshot provisions in these nodes only

Velero introduces a new section in ```node-agent-config``` configMap, called ```loadAffinity```, through which you can specify the nodes to/not to run data movement backups, in the affinity and anti-affinity flavors.
If it is not there, ```node-agent-config``` should be created manually. The configMap should be in the same namespace where Velero is installed. If multiple Velero instances are installed in different namespaces, there should be one configMap in each namespace which applies to node-agent in that namespace only.
Velero introduces a new section in the node-agent configMap, called ```loadAffinity```, through which you can specify the nodes to/not to run data movement backups, in the affinity and anti-affinity flavors.
If it is not there, a configMap should be created manually. The configMap should be in the same namespace where Velero is installed. If multiple Velero instances are installed in different namespaces, there should be one configMap in each namespace which applies to node-agent in that namespace only. The name of the configMap should be specified in the node-agent server parameter ```--node-agent-config```.
Node-agent server checks these configurations at startup time. Therefore, you could edit this configMap any time, but in order to make the changes effective, node-agent server needs to be restarted.

### Sample
Here is a sample of the ```node-agent-config``` configMap with ```loadAffinity```:
Here is a sample of the configMap with ```loadAffinity```:
```json
{
"loadAffinity": [
Expand Down Expand Up @@ -50,6 +50,21 @@ To create the configMap, save something like the above sample to a json file and
kubectl create cm node-agent-config -n velero --from-file=<json file name>
```

To provide the configMap to node-agent, edit the node-agent daemonset and add the ```- --node-agent-config``` argument to the spec:
1. Open the node-agent daemonset spec
```
kubectl edit ds node-agent -n velero
```
2. Add ```- --node-agent-config``` to ```spec.template.spec.containers```
```
spec:
template:
spec:
containers:
- args:
- --node-agent-config=<configMap name>
```

### Affinity
Affinity configuration means allowing the data movement backup to run in the nodes specified. There are two ways to define it:
- It could be defined by `MatchLabels`. The labels defined in `MatchLabels` means a `LabelSelectorOpIn` operation by default, so in the current context, they will be treated as affinity rules. In the above sample, it defines to run data movement backups in nodes with label `beta.kubernetes.io/instance-type` of value `Standard_B4ms` (Run data movement backups in `Standard_B4ms` nodes only).
Expand Down
18 changes: 16 additions & 2 deletions site/content/docs/main/node-agent-concurrency.md
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ Varying from the data size, data complexity, resource availability, the tasks ma

Node-agent concurrency configurations allow you to configure the concurrent number of node-agent loads per node. When the resources are sufficient in nodes, you can set a large concurrent number, so as to reduce the backup/restore time; otherwise, the concurrency should be reduced, otherwise, the backup/restore may encounter problems, i.e., time lagging, hang or OOM kill.

To set Node-agent concurrency configurations, a configMap named ```node-agent-config``` should be created manually. The configMap should be in the same namespace where Velero is installed. If multiple Velero instances are installed in different namespaces, there should be one configMap in each namespace which applies to node-agent in that namespace only.
To set Node-agent concurrency configurations, a configMap should be created manually. The configMap should be in the same namespace where Velero is installed. If multiple Velero instances are installed in different namespaces, there should be one configMap in each namespace which applies to node-agent in that namespace only. The name of the configMap should be specified in the node-agent server parameter ```--node-agent-config```.
Node-agent server checks these configurations at startup time. Therefore, you could edit this configMap any time, but in order to make the changes effective, node-agent server needs to be restarted.

### Global concurrent number
Expand All @@ -32,7 +32,7 @@ At least one node is expected to have a label with the specified ```RuledConfigs
If one node falls into more than one rules, e.g., if node1 also has the label ```beta.kubernetes.io/instance-type=Standard_B4ms```, the smallest number (3) will be used.

### Sample
A sample of the complete ```node-agent-config``` configMap is as below:
A sample of the complete configMap is as below:
Lyndon-Li marked this conversation as resolved.
Show resolved Hide resolved
```json
{
"loadConcurrency": {
Expand Down Expand Up @@ -62,5 +62,19 @@ To create the configMap, save something like the above sample to a json file and
```
kubectl create cm node-agent-config -n velero --from-file=<json file name>
```
To provide the configMap to node-agent, edit the node-agent daemonset and add the ```- --node-agent-config``` argument to the spec:
1. Open the node-agent daemonset spec
```
kubectl edit ds node-agent -n velero
```
2. Add ```- --node-agent-config``` to ```spec.template.spec.containers```
```
spec:
template:
spec:
containers:
- args:
- --node-agent-config=<configMap name>
```


Loading