Skip to content

Commit

Permalink
Browse files Browse the repository at this point in the history
117988: roachtest: admission/follower-overload test improvements r=sumeerbhola a=aadityasondhi

[roachtest: fix zone config syntax in ac follower overload test](cockroachdb@c80491e) 

Informs cockroachdb#82508.

Release note: None

---

[roachtest: add bandwidth overload test in admission/follower-overload](cockroachdb@d0dc296) 

This aims to simulate read bandwidth induced overload by running a large
kv0 workload on a 3 node cluster, while all the leases are owned by n1 and
n2.

Informs cockroachdb#82508.

Release note: None

Co-authored-by: Aaditya Sondhi <[email protected]>
  • Loading branch information
craig[bot] and aadityasondhi committed Feb 6, 2024
2 parents 17f1eac + 5708c68 commit 3ff9177
Showing 1 changed file with 60 additions and 18 deletions.
78 changes: 60 additions & 18 deletions pkg/cmd/roachtest/tests/admission_control_follower_overload.go
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ package tests

import (
"context"
"fmt"
"strings"
"time"

Expand All @@ -31,8 +32,8 @@ func registerFollowerOverload(r registry.Registry) {
return registry.TestSpec{
Name: "admission/follower-overload/" + subtest,
Owner: registry.OwnerAdmissionControl,
Timeout: 3 * time.Hour,
CompatibleClouds: registry.AllExceptAWS,
Timeout: 6 * time.Hour,
CompatibleClouds: registry.AllClouds,
Suites: registry.ManualOnly,
// TODO(aaditya): Revisit this as part of #111614.
//Suites: registry.Suites(registry.Weekly),
Expand All @@ -43,7 +44,14 @@ func registerFollowerOverload(r registry.Registry) {
// NB: use 16vcpu machines to avoid getting anywhere close to EBS
// bandwidth limits on AWS, see:
// https://github.com/cockroachdb/cockroach/issues/82109#issuecomment-1154049976
Cluster: r.MakeClusterSpec(4, spec.CPU(4), spec.ReuseNone()),
Cluster: func() spec.ClusterSpec {
c := r.MakeClusterSpec(4, spec.CPU(4), spec.ReuseNone(), spec.DisableLocalSSD())
c.AWS.MachineType = cfg.cloudConfig.AWSInstanceType
c.AWS.Zones = cfg.cloudConfig.AWSRegion
c.GCE.MachineType = cfg.cloudConfig.GCEInstanceType
c.GCE.Zones = cfg.cloudConfig.GCERegion
return c
}(),
Run: func(ctx context.Context, t test.Test, c cluster.Cluster) {
runAdmissionControlFollowerOverload(ctx, t, c, cfg)
},
Expand Down Expand Up @@ -95,15 +103,38 @@ func registerFollowerOverload(r registry.Registry) {
kv50N3: true,
}))

// Similar to presplit-no-leases, but using specific instance type and
// increased kv0 writes to isolate for bandwidth induced overload.
//
// NB: As of Jan 2024, this test is specific to AWS only.
r.Add(spec("bandwidth", admissionControlFollowerOverloadOpts{
kv0N12: true,
kvN12ExtraArgs: "--splits 100",
kv0N12BlockBytes: "10000",
kv0N12Rate: "600",
kv50N3: true,
cloudConfig: followerOverloadTestCloudConfig{AWSInstanceType: "c5.xlarge", AWSRegion: "us-east-1a"},
}))

// TODO(irfansharif,aaditya): Add variants that enable regular traffic flow
// control. Run variants without follower pausing too.
}

type admissionControlFollowerOverloadOpts struct {
ioNemesis bool // limit write throughput on s3 (n3) to 20MiB/s
kvN12ExtraArgs string
kv0N12 bool // run kv0 on n1 and n2
kv50N3 bool // run kv50 on n3
ioNemesis bool // limit write throughput on s3 (n3) to 20MiB/s
kvN12ExtraArgs string
kv0N12 bool // run kv0 on n1 and n2
kv0N12BlockBytes string // [optional] block bytes for kv0 on n1 and n2, default=5000
kv0N12Rate string // [optional] rate limit for kv0 on n1 and n2, default=400
kv50N3 bool // run kv50 on n3
cloudConfig followerOverloadTestCloudConfig // optional
}

type followerOverloadTestCloudConfig struct {
AWSInstanceType string
AWSRegion string
GCEInstanceType string
GCERegion string
}

func runAdmissionControlFollowerOverload(
Expand Down Expand Up @@ -143,7 +174,7 @@ func runAdmissionControlFollowerOverload(
defer cleanupFunc()
}

phaseDuration := time.Hour
phaseDuration := 3 * time.Hour

nodes := c.Range(1, 3)
c.Start(ctx, t.L(), option.DefaultStartOpts(), install.MakeClusterSettings(), nodes)
Expand All @@ -170,7 +201,7 @@ func runAdmissionControlFollowerOverload(
for _, row := range runner.QueryStr(
t, `SELECT target FROM [ SHOW ZONE CONFIGURATIONS ]`,
) {
q := `ALTER ` + row[0] + ` CONFIGURE ZONE USING lease_preferences = '[[-node3]]'`
q := `ALTER ` + row[0] + ` CONFIGURE ZONE USING lease_preferences = '[[-node3]]', constraints = COPY FROM PARENT`
t.L().Printf("%s", q)
_, err := db.Exec(q)
require.NoError(t, err)
Expand All @@ -185,8 +216,8 @@ func runAdmissionControlFollowerOverload(
var attempts int
for ctx.Err() == nil {
attempts++
m1 := runner.QueryStr(t, `SELECT range_id FROM crdb_internal.ranges WHERE lease_holder=3 AND database_name != 'kvn3'`)
m2 := runner.QueryStr(t, `SELECT range_id FROM crdb_internal.ranges WHERE lease_holder!=3 AND database_name = 'kvn3'`)
m1 := runner.QueryStr(t, `SELECT DISTINCT range_id FROM [SHOW CLUSTER RANGES WITH TABLES, DETAILS] WHERE lease_holder=3 AND database_name != 'kvn3'`)
m2 := runner.QueryStr(t, `SELECT DISTINCT range_id FROM [SHOW CLUSTER RANGES WITH TABLES, DETAILS] WHERE lease_holder!=3 AND database_name = 'kvn3'`)
if len(m1)+len(m2) == 0 {
t.L().Printf("done waiting for lease movement")
break
Expand Down Expand Up @@ -226,13 +257,24 @@ func runAdmissionControlFollowerOverload(
// to EBS, see:
//
// https://github.com/cockroachdb/cockroach/issues/82109#issuecomment-1154049976
deployWorkload := `
mkdir -p logs && \
sudo systemd-run --property=Type=exec \
--property=StandardOutput=file:/home/ubuntu/logs/kv-n12.stdout.log \
--property=StandardError=file:/home/ubuntu/logs/kv-n12.stderr.log \
--remain-after-exit --unit kv-n12 -- ./cockroach workload run kv --read-percent 0 \
--max-rate 400 --concurrency 400 --min-block-bytes 5000 --max-block-bytes 5000 --tolerate-errors {pgurl:1-2}`

// We override the values, if specified, otherwise we use defaults as explained above.
maxRate := cfg.kv0N12Rate
if maxRate == "" {
maxRate = "400"
}
maxBytes := cfg.kv0N12BlockBytes
if maxBytes == "" {
maxBytes = "5000"
}
deployWorkload := fmt.Sprintf("mkdir -p logs && sudo systemd-run --property=Type=exec "+
"--property=StandardOutput=file:/home/ubuntu/logs/kv-n12.stdout.log "+
"--property=StandardError=file:/home/ubuntu/logs/kv-n12.stderr.log "+
"--remain-after-exit --unit kv-n12 -- ./cockroach workload run kv --read-percent 0 "+
"--max-rate %s --concurrency 400 --min-block-bytes %s --max-block-bytes %s --tolerate-errors {pgurl:1-2}",
maxRate, maxBytes, maxBytes,
)

c.Run(ctx, option.WithNodes(c.Node(4)), deployWorkload)
}
if cfg.kv50N3 {
Expand Down

0 comments on commit 3ff9177

Please sign in to comment.