From fe60ef09c092c064f49dce2c65ae6ba3efd2b905 Mon Sep 17 00:00:00 2001 From: SrishT Date: Fri, 10 Jul 2020 02:10:18 +0530 Subject: [PATCH 1/2] Issue 421: Adding manifests for various cluster installation flavours Signed-off-by: SrishT --- charts/pravega/README.md | 12 ++-- charts/pravega/values.yaml | 76 ++++++++++++++++++++---- charts/pravega/values/large.yaml | 92 +++++++++++++++++++++++++++++ charts/pravega/values/medium.yaml | 92 +++++++++++++++++++++++++++++ charts/pravega/values/minikube.yaml | 26 ++++++++ charts/pravega/values/small.yaml | 92 +++++++++++++++++++++++++++++ 6 files changed, 373 insertions(+), 17 deletions(-) create mode 100644 charts/pravega/values/large.yaml create mode 100644 charts/pravega/values/medium.yaml create mode 100644 charts/pravega/values/minikube.yaml create mode 100644 charts/pravega/values/small.yaml diff --git a/charts/pravega/README.md b/charts/pravega/README.md index fefb06bcf..bcb4a4e1b 100644 --- a/charts/pravega/README.md +++ b/charts/pravega/README.md @@ -54,20 +54,20 @@ The following table lists the configurable parameters of the Pravega chart and t | `debugLogging` | Enable debug logging | `false` | | `serviceAccount.name` | Service account to be used | `pravega-components` | | `controller.replicas` | Number of controller replicas | `1` | -| `controller.resources.requests.cpu` | CPU requests for controller | `1000m` | +| `controller.resources.requests.cpu` | CPU requests for controller | `500m` | | `controller.resources.requests.memory` | Memory requests for controller | `1Gi` | -| `controller.resources.limits.cpu` | CPU limits for controller | `2000m` | -| `controller.resources.limits.memory` | Memory limits for controller | `3Gi` | +| `controller.resources.limits.cpu` | CPU limits for controller | `1000m` | +| `controller.resources.limits.memory` | Memory limits for controller | `2Gi` | | `controller.service.type` | Override the controller service type, if external access is enabled (LoadBalancer/NodePort) | | | `controller.service.annotations` | Annotations to add to the controller service, if external access is enabled | `{}` | -| `controller.jvmOptions` | JVM Options for controller | `[]` | +| `controller.jvmOptions` | JVM Options for controller | `["-Xmx2g", "-XX:MaxDirectMemorySize=2g"]` | | `segmentStore.replicas` | Number of segmentStore replicas | `1` | | `segmentStore.secret` | Secret configuration for the segmentStore | `{}` | | `segmentStore.env` | Name of configmap containing environment variables to be added to the segmentStore | | | `segmentStore.resources.requests.cpu` | CPU requests for segmentStore | `1000m` | -| `segmentStore.resources.requests.memory` | Memory requests for segmentStore | `3Gi` | +| `segmentStore.resources.requests.memory` | Memory requests for segmentStore | `4Gi` | | `segmentStore.resources.limits.cpu` | CPU limits for segmentStore | `2000m` | -| `segmentStore.resources.limits.memory` | Memory limits for segmentStore | `5Gi` | +| `segmentStore.resources.limits.memory` | Memory limits for segmentStore | `4Gi` | | `segmentStore.service.type` | Override the segmentStore service type, if external access is enabled (LoadBalancer/NodePort) | | | `segmentStore.service.annotations` | Annotations to add to the segmentStore service, if external access is enabled | `{}` | | `segmentStore.jvmOptions` | JVM Options for segmentStore | `[]` | diff --git a/charts/pravega/values.yaml b/charts/pravega/values.yaml index c29ee3e56..4b62a1c90 100644 --- a/charts/pravega/values.yaml +++ b/charts/pravega/values.yaml @@ -43,11 +43,11 @@ controller: replicas: 1 resources: requests: - cpu: 1000m + cpu: 500m memory: 1Gi limits: - cpu: 2000m - memory: 3Gi + cpu: 1000m + memory: 2Gi ## service type and annotations are ignored if external access is disabled service: ## used to override the service type for controller @@ -64,16 +64,16 @@ segmentStore: resources: requests: cpu: 1000m - memory: 3Gi + memory: 4Gi limits: cpu: 2000m - memory: 5Gi + memory: 4Gi ## service type and annotations are ignored if external access is disabled service: ## used to override the service type for segmentStore type: annotations: {} - jvmOptions: [] + jvmOptions: ["-Xmx2g", "-XX:MaxDirectMemorySize=2g"] storage: @@ -92,10 +92,10 @@ storage: ## ecs is used to configure a Dell EMC ECS system as the long term storage backend ## considered only if storage.longtermStorage.type = ecs ecs: {} - # configUri: "" - # bucket: "" - # prefix: "" - # credentials: "" + # configUri: https://object.ecstestdrive.com?namespace=namespace%26identity=user%26secretKey=password + # bucket: "bucket" + # prefix: "prefix" + # credentials: ecs-credentials ## hdfs is used to configure an HDFS system as long term storage backend ## considered only if storage.longtermStorage.type = hdfs @@ -109,4 +109,58 @@ storage: className: standard options: - # bookkeeper.ack.quorum.size: "3" + ## If you add more Bookies to the cluster and you want to exploit them for IO, then you need increase this number. The Bookkeeper ensemble defines + ## the number of Bookies that should be available or healthy to allow Bookkeeper to work. In production, we recommend this value to be: + ## bookkeeper.ensemble.size = bookieReplicas - F, + ## where F is the number of Bookie failures we can tolerate to continue working. For instance, if we instantiate 6 Bookies and we want to tolerate + ## up to F = 2 failures, then the bookkeeper.ensemble.size = 4. + bookkeeper.ensemble.size: "3" + ## By default, we want to keep 3 replicas of the data in Bookkeeper to ensure durability. + bookkeeper.write.quorum.size: "3" + ## By default, we set bookkeeper.write.quorum.size == bookkeeper.ack.quorum.size, so we wait for the ACK of all the Bookies on a write to proceed to the next one. This + ## has been proven as a factor that makes Pravega much more stable in the long run for IO heavy workloads, at the cost of sacrificing performance. + ## If we set bkWriteQuorumSize > bkAckQuorumSize, we can improve performance, but a temporarily "slow" Bookie may lead to OOM errors at both the + ## Segment Store or Bookkeeper sides, given that Pravega continues to write to the "slow" Bookie at the same pace as for the "fast" ones. For more + ## context, please see: https://github.com/pravega/pravega/issues/4058 and https://github.com/pravega/pravega/issues/4057 + bookkeeper.ack.quorum.size: "3" + ## We need to increase this timeout to tolerate environments in which Tier 1 storage gets really saturated and Bookkeeper exhibits high latency. If + ## not, an IO workload that saturates Bookkeeper can induce the Segment Store into an state of unstability. + bookkeeper.write.timeout.milliseconds: "60000" + ## This parameter will start inducing the max delay to Tier 1 operations if the Segment Store reaches 32MB of queued data waiting to be written to + ## Bookkeeper. We have tested that in a slow IO infrastructure like vSAN, this setting helps to protect the system from unstability. + bookkeeper.write.outstanding.bytes.max: "33554432" + ## In Pravega +0.7 we use an in-memory cache. We need to be sure that the size of the cache is always lower than the direct memory set in JVM options. + pravegaservice.cache.size.max: "1073741824" + pravegaservice.cache.time.seconds.max: "600" + hdfs.block.size: "67108864" + ## Despite AWS support team has suggested to set 1MB block sizes to write to Tier 2 to improve performance when using NFS, we found that a larger block + ## size significantly improves the throughput of a single segment against EFS. + writer.flush.threshold.bytes: "67108864" + writer.flush.size.bytes.max: "67108864" + ## The number of containers depends on the Segment Stores in the cluster (or vice-versa). To distributed the load of Segment Containers across Segment + ## Stores, we recommend to set 4 to 8 Segment Containers per Segment Store. Clearly, determining this number will depend on a provisioning decision. + ## Note that both containerCount values should be always the same between the Segment Store and the Controller service. + pravegaservice.container.count: "8" + controller.container.count: "8" + ## Increase the default Controller bucket count to distribute load across multiple instances. + controller.retention.bucket.count: "4" + controller.service.asyncTaskPool.size: "20" + controller.retention.thread.count: "4" + log.level: "INFO" + ## The following parameters are only useful if you are going to deploy metrics in this cluster. + # metrics.dynamicCache.size: "100000" + # metrics.statistics.enable: "true" + # metrics.statsD.reporter.enable: "false" + # metrics.statsD.connect.host: "telegraph.default" + # metrics.statsD.connect.port: "8125" + # metrics.influxDB.reporter.enable: "true" + # metrics.output.frequency.seconds: "10" + # metrics.influxDB.connect.uri: "http://INFLUXDB-IP:8086" + # controller.metrics.dynamicCache.size: "100000" + # controller.metrics.statistics.enable: "true" + # controller.metrics.statsD.reporter.enable: "false" + # controller.metrics.statsD.connect.host: "telegraph.default" + # controller.metrics.statsD.connect.port: "8125" + # controller.metrics.influxDB.reporter.enable: "true" + # controller.metrics.output.frequency.seconds: "10" + # controller.metrics.influxDB.connect.uri: "http://INFLUXDB-IP:8086" diff --git a/charts/pravega/values/large.yaml b/charts/pravega/values/large.yaml new file mode 100644 index 000000000..291f90ed8 --- /dev/null +++ b/charts/pravega/values/large.yaml @@ -0,0 +1,92 @@ +controller: + replicas: 2 + resources: + requests: + cpu: 500m + memory: 1Gi + limits: + cpu: 2000m + memory: 4Gi + ## service type and annotations are ignored if external access is disabled + service: + ## used to override the service type for controller + type: + annotations: {} + jvmOptions: [] + +segmentStore: + replicas: 6 + secret: {} + # name: + # path: + env: + resources: + requests: + cpu: 2000m + memory: 16Gi + limits: + cpu: 4000m + memory: 16Gi + ## service type and annotations are ignored if external access is disabled + service: + ## used to override the service type for segmentStore + type: + annotations: {} + jvmOptions: ["-Xmx4g", "-XX:MaxDirectMemorySize=12g"] + +options: + ## If you add more Bookies to the cluster and you want to exploit them for IO, then you need increase this number. The Bookkeeper ensemble defines + ## the number of Bookies that should be available or healthy to allow Bookkeeper to work. In production, we recommend this value to be: + ## bookkeeper.ensemble.size = bookieReplicas - F, + ## where F is the number of Bookie failures we can tolerate to continue working. For instance, if we instantiate 6 Bookies and we want to tolerate + ## up to F = 2 failures, then the bookkeeper.ensemble.size = 4. + bookkeeper.ensemble.size: "3" + ## By default, we want to keep 3 replicas of the data in Bookkeeper to ensure durability. + bookkeeper.write.quorum.size: "3" + ## By default, we set bookkeeper.write.quorum.size == bookkeeper.ack.quorum.size, so we wait for the ACK of all the Bookies on a write to proceed to the next one. This + ## has been proven as a factor that makes Pravega much more stable in the long run for IO heavy workloads, at the cost of sacrificing performance. + ## If we set bkWriteQuorumSize > bkAckQuorumSize, we can improve performance, but a temporarily "slow" Bookie may lead to OOM errors at both the + ## Segment Store or Bookkeeper sides, given that Pravega continues to write to the "slow" Bookie at the same pace as for the "fast" ones. For more + ## context, please see: https://github.com/pravega/pravega/issues/4058 and https://github.com/pravega/pravega/issues/4057 + bookkeeper.ack.quorum.size: "3" + ## We need to increase this timeout to tolerate environments in which Tier 1 storage gets really saturated and Bookkeeper exhibits high latency. If + ## not, an IO workload that saturates Bookkeeper can induce the Segment Store into an state of unstability. + bookkeeper.write.timeout.milliseconds: "60000" + ## This parameter will start inducing the max delay to Tier 1 operations if the Segment Store reaches 32MB of queued data waiting to be written to + ## Bookkeeper. We have tested that in a slow IO infrastructure like vSAN, this setting helps to protect the system from unstability. + bookkeeper.write.outstanding.bytes.max: "33554432" + ## In Pravega +0.7 we use an in-memory cache. We need to be sure that the size of the cache is always lower than the direct memory set in JVM options. + pravegaservice.cache.size.max: "11811160064" + pravegaservice.cache.time.seconds.max: "600" + hdfs.block.size: "67108864" + ## Despite AWS support team has suggested to set 1MB block sizes to write to Tier 2 to improve performance when using NFS, we found that a larger block + ## size significantly improves the throughput of a single segment against EFS. + writer.flush.threshold.bytes: "67108864" + writer.flush.size.bytes.max: "67108864" + ## The number of containers depends on the Segment Stores in the cluster (or vice-versa). To distributed the load of Segment Containers across Segment + ## Stores, we recommend to set 4 to 8 Segment Containers per Segment Store. Clearly, determining this number will depend on a provisioning decision. + ## Note that both containerCount values should be always the same between the Segment Store and the Controller service. + pravegaservice.container.count: "48" + controller.container.count: "48" + ## Increase the default Controller bucket count to distribute load across multiple instances. + controller.retention.bucket.count: "10" + controller.service.asyncTaskPool.size: "20" + controller.retention.thread.count: "4" + log.level: "INFO" + ## The following parameters are only useful if you are going to deploy metrics in this cluster. + # metrics.dynamicCache.size: "100000" + # metrics.statistics.enable: "true" + # metrics.statsD.reporter.enable: "false" + # metrics.statsD.connect.host: "telegraph.default" + # metrics.statsD.connect.port: "8125" + # metrics.influxDB.reporter.enable: "true" + # metrics.output.frequency.seconds: "10" + # metrics.influxDB.connect.uri: "http://INFLUXDB-IP:8086" + # controller.metrics.dynamicCache.size: "100000" + # controller.metrics.statistics.enable: "true" + # controller.metrics.statsD.reporter.enable: "false" + # controller.metrics.statsD.connect.host: "telegraph.default" + # controller.metrics.statsD.connect.port: "8125" + # controller.metrics.influxDB.reporter.enable: "true" + # controller.metrics.output.frequency.seconds: "10" + # controller.metrics.influxDB.connect.uri: "http://INFLUXDB-IP:8086" diff --git a/charts/pravega/values/medium.yaml b/charts/pravega/values/medium.yaml new file mode 100644 index 000000000..406a9ab84 --- /dev/null +++ b/charts/pravega/values/medium.yaml @@ -0,0 +1,92 @@ +controller: + replicas: 2 + resources: + requests: + cpu: 500m + memory: 1Gi + limits: + cpu: 1000m + memory: 2Gi + ## service type and annotations are ignored if external access is disabled + service: + ## used to override the service type for controller + type: + annotations: {} + jvmOptions: [] + +segmentStore: + replicas: 2 + secret: {} + # name: + # path: + env: + resources: + requests: + cpu: 2000m + memory: 8Gi + limits: + cpu: 4000m + memory: 8Gi + ## service type and annotations are ignored if external access is disabled + service: + ## used to override the service type for segmentStore + type: + annotations: {} + jvmOptions: ["-Xmx4g", "-XX:MaxDirectMemorySize=4g"] + +options: + ## If you add more Bookies to the cluster and you want to exploit them for IO, then you need increase this number. The Bookkeeper ensemble defines + ## the number of Bookies that should be available or healthy to allow Bookkeeper to work. In production, we recommend this value to be: + ## bookkeeper.ensemble.size = bookieReplicas - F, + ## where F is the number of Bookie failures we can tolerate to continue working. For instance, if we instantiate 6 Bookies and we want to tolerate + ## up to F = 2 failures, then the bookkeeper.ensemble.size = 4. + bookkeeper.ensemble.size: "3" + ## By default, we want to keep 3 replicas of the data in Bookkeeper to ensure durability. + bookkeeper.write.quorum.size: "3" + ## By default, we set bookkeeper.write.quorum.size == bookkeeper.ack.quorum.size, so we wait for the ACK of all the Bookies on a write to proceed to the next one. This + ## has been proven as a factor that makes Pravega much more stable in the long run for IO heavy workloads, at the cost of sacrificing performance. + ## If we set bkWriteQuorumSize > bkAckQuorumSize, we can improve performance, but a temporarily "slow" Bookie may lead to OOM errors at both the + ## Segment Store or Bookkeeper sides, given that Pravega continues to write to the "slow" Bookie at the same pace as for the "fast" ones. For more + ## context, please see: https://github.com/pravega/pravega/issues/4058 and https://github.com/pravega/pravega/issues/4057 + bookkeeper.ack.quorum.size: "3" + ## We need to increase this timeout to tolerate environments in which Tier 1 storage gets really saturated and Bookkeeper exhibits high latency. If + ## not, an IO workload that saturates Bookkeeper can induce the Segment Store into an state of unstability. + bookkeeper.write.timeout.milliseconds: "60000" + ## This parameter will start inducing the max delay to Tier 1 operations if the Segment Store reaches 32MB of queued data waiting to be written to + ## Bookkeeper. We have tested that in a slow IO infrastructure like vSAN, this setting helps to protect the system from unstability. + bookkeeper.write.outstanding.bytes.max: "33554432" + ## In Pravega +0.7 we use an in-memory cache. We need to be sure that the size of the cache is always lower than the direct memory set in JVM options. + pravegaservice.cache.size.max: "3221225472" + pravegaservice.cache.time.seconds.max: "600" + hdfs.block.size: "67108864" + ## Despite AWS support team has suggested to set 1MB block sizes to write to Tier 2 to improve performance when using NFS, we found that a larger block + ## size significantly improves the throughput of a single segment against EFS. + writer.flush.threshold.bytes: "67108864" + writer.flush.size.bytes.max: "67108864" + ## The number of containers depends on the Segment Stores in the cluster (or vice-versa). To distributed the load of Segment Containers across Segment + ## Stores, we recommend to set 4 to 8 Segment Containers per Segment Store. Clearly, determining this number will depend on a provisioning decision. + ## Note that both containerCount values should be always the same between the Segment Store and the Controller service. + pravegaservice.container.count: "16" + controller.container.count: "16" + ## Increase the default Controller bucket count to distribute load across multiple instances. + controller.retention.bucket.count: "10" + controller.service.asyncTaskPool.size: "20" + controller.retention.thread.count: "4" + log.level: "INFO" + ## The following parameters are only useful if you are going to deploy metrics in this cluster. + # metrics.dynamicCache.size: "100000" + # metrics.statistics.enable: "true" + # metrics.statsD.reporter.enable: "false" + # metrics.statsD.connect.host: "telegraph.default" + # metrics.statsD.connect.port: "8125" + # metrics.influxDB.reporter.enable: "true" + # metrics.output.frequency.seconds: "10" + # metrics.influxDB.connect.uri: "http://INFLUXDB-IP:8086" + # controller.metrics.dynamicCache.size: "100000" + # controller.metrics.statistics.enable: "true" + # controller.metrics.statsD.reporter.enable: "false" + # controller.metrics.statsD.connect.host: "telegraph.default" + # controller.metrics.statsD.connect.port: "8125" + # controller.metrics.influxDB.reporter.enable: "true" + # controller.metrics.output.frequency.seconds: "10" + # controller.metrics.influxDB.connect.uri: "http://INFLUXDB-IP:8086" diff --git a/charts/pravega/values/minikube.yaml b/charts/pravega/values/minikube.yaml new file mode 100644 index 000000000..137f78e83 --- /dev/null +++ b/charts/pravega/values/minikube.yaml @@ -0,0 +1,26 @@ +controller: + replicas: 1 + resources: + requests: + cpu: 250m + memory: 512Mi + limits: + cpu: 500m + memory: 1Gi + jvmOptions: [] + +segmentStore: + replicas: 1 + resources: + requests: + cpu: 500m + memory: 1Gi + limits: + cpu: 500m + memory: 2Gi + jvmOptions: [] + +options: + bookkeeper.ack.quorum.size: "1" + bookkeeper.write.quorum.size: "1" + bookkeeper.ensemble.size: "1" diff --git a/charts/pravega/values/small.yaml b/charts/pravega/values/small.yaml new file mode 100644 index 000000000..fc1893c2a --- /dev/null +++ b/charts/pravega/values/small.yaml @@ -0,0 +1,92 @@ +controller: + replicas: 1 + resources: + requests: + cpu: 500m + memory: 1Gi + limits: + cpu: 1000m + memory: 2Gi + ## service type and annotations are ignored if external access is disabled + service: + ## used to override the service type for controller + type: + annotations: {} + jvmOptions: [] + +segmentStore: + replicas: 1 + secret: {} + # name: + # path: + env: + resources: + requests: + cpu: 1000m + memory: 4Gi + limits: + cpu: 2000m + memory: 4Gi + ## service type and annotations are ignored if external access is disabled + service: + ## used to override the service type for segmentStore + type: + annotations: {} + jvmOptions: ["-Xmx2g", "-XX:MaxDirectMemorySize=2g"] + +options: + ## If you add more Bookies to the cluster and you want to exploit them for IO, then you need increase this number. The Bookkeeper ensemble defines + ## the number of Bookies that should be available or healthy to allow Bookkeeper to work. In production, we recommend this value to be: + ## bookkeeper.ensemble.size = bookieReplicas - F, + ## where F is the number of Bookie failures we can tolerate to continue working. For instance, if we instantiate 6 Bookies and we want to tolerate + ## up to F = 2 failures, then the bookkeeper.ensemble.size = 4. + bookkeeper.ensemble.size: "3" + ## By default, we want to keep 3 replicas of the data in Bookkeeper to ensure durability. + bookkeeper.write.quorum.size: "3" + ## By default, we set bookkeeper.write.quorum.size == bookkeeper.ack.quorum.size, so we wait for the ACK of all the Bookies on a write to proceed to the next one. This + ## has been proven as a factor that makes Pravega much more stable in the long run for IO heavy workloads, at the cost of sacrificing performance. + ## If we set bkWriteQuorumSize > bkAckQuorumSize, we can improve performance, but a temporarily "slow" Bookie may lead to OOM errors at both the + ## Segment Store or Bookkeeper sides, given that Pravega continues to write to the "slow" Bookie at the same pace as for the "fast" ones. For more + ## context, please see: https://github.com/pravega/pravega/issues/4058 and https://github.com/pravega/pravega/issues/4057 + bookkeeper.ack.quorum.size: "3" + ## We need to increase this timeout to tolerate environments in which Tier 1 storage gets really saturated and Bookkeeper exhibits high latency. If + ## not, an IO workload that saturates Bookkeeper can induce the Segment Store into an state of unstability. + bookkeeper.write.timeout.milliseconds: "60000" + ## This parameter will start inducing the max delay to Tier 1 operations if the Segment Store reaches 32MB of queued data waiting to be written to + ## Bookkeeper. We have tested that in a slow IO infrastructure like vSAN, this setting helps to protect the system from unstability. + bookkeeper.write.outstanding.bytes.max: "33554432" + ## In Pravega +0.7 we use an in-memory cache. We need to be sure that the size of the cache is always lower than the direct memory set in JVM options. + pravegaservice.cache.size.max: "1073741824" + pravegaservice.cache.time.seconds.max: "600" + hdfs.block.size: "67108864" + ## Despite AWS support team has suggested to set 1MB block sizes to write to Tier 2 to improve performance when using NFS, we found that a larger block + ## size significantly improves the throughput of a single segment against EFS. + writer.flush.threshold.bytes: "67108864" + writer.flush.size.bytes.max: "67108864" + ## The number of containers depends on the Segment Stores in the cluster (or vice-versa). To distributed the load of Segment Containers across Segment + ## Stores, we recommend to set 4 to 8 Segment Containers per Segment Store. Clearly, determining this number will depend on a provisioning decision. + ## Note that both containerCount values should be always the same between the Segment Store and the Controller service. + pravegaservice.container.count: "8" + controller.container.count: "8" + ## Increase the default Controller bucket count to distribute load across multiple instances. + controller.retention.bucket.count: "4" + controller.service.asyncTaskPool.size: "20" + controller.retention.thread.count: "4" + log.level: "INFO" + ## The following parameters are only useful if you are going to deploy metrics in this cluster. + # metrics.dynamicCache.size: "100000" + # metrics.statistics.enable: "true" + # metrics.statsD.reporter.enable: "false" + # metrics.statsD.connect.host: "telegraph.default" + # metrics.statsD.connect.port: "8125" + # metrics.influxDB.reporter.enable: "true" + # metrics.output.frequency.seconds: "10" + # metrics.influxDB.connect.uri: "http://INFLUXDB-IP:8086" + # controller.metrics.dynamicCache.size: "100000" + # controller.metrics.statistics.enable: "true" + # controller.metrics.statsD.reporter.enable: "false" + # controller.metrics.statsD.connect.host: "telegraph.default" + # controller.metrics.statsD.connect.port: "8125" + # controller.metrics.influxDB.reporter.enable: "true" + # controller.metrics.output.frequency.seconds: "10" + # controller.metrics.influxDB.connect.uri: "http://INFLUXDB-IP:8086" From da241feed69646f2359a592d4c34c93e709c0fff Mon Sep 17 00:00:00 2001 From: SrishT Date: Mon, 13 Jul 2020 16:59:42 +0530 Subject: [PATCH 2/2] Issue 421: Addressing review comments Signed-off-by: SrishT --- charts/pravega/values.yaml | 22 ---------------------- charts/pravega/values/large.yaml | 22 ---------------------- charts/pravega/values/medium.yaml | 22 ---------------------- charts/pravega/values/small.yaml | 22 ---------------------- 4 files changed, 88 deletions(-) diff --git a/charts/pravega/values.yaml b/charts/pravega/values.yaml index 4b62a1c90..e940b640c 100644 --- a/charts/pravega/values.yaml +++ b/charts/pravega/values.yaml @@ -109,40 +109,18 @@ storage: className: standard options: - ## If you add more Bookies to the cluster and you want to exploit them for IO, then you need increase this number. The Bookkeeper ensemble defines - ## the number of Bookies that should be available or healthy to allow Bookkeeper to work. In production, we recommend this value to be: - ## bookkeeper.ensemble.size = bookieReplicas - F, - ## where F is the number of Bookie failures we can tolerate to continue working. For instance, if we instantiate 6 Bookies and we want to tolerate - ## up to F = 2 failures, then the bookkeeper.ensemble.size = 4. bookkeeper.ensemble.size: "3" - ## By default, we want to keep 3 replicas of the data in Bookkeeper to ensure durability. bookkeeper.write.quorum.size: "3" - ## By default, we set bookkeeper.write.quorum.size == bookkeeper.ack.quorum.size, so we wait for the ACK of all the Bookies on a write to proceed to the next one. This - ## has been proven as a factor that makes Pravega much more stable in the long run for IO heavy workloads, at the cost of sacrificing performance. - ## If we set bkWriteQuorumSize > bkAckQuorumSize, we can improve performance, but a temporarily "slow" Bookie may lead to OOM errors at both the - ## Segment Store or Bookkeeper sides, given that Pravega continues to write to the "slow" Bookie at the same pace as for the "fast" ones. For more - ## context, please see: https://github.com/pravega/pravega/issues/4058 and https://github.com/pravega/pravega/issues/4057 bookkeeper.ack.quorum.size: "3" - ## We need to increase this timeout to tolerate environments in which Tier 1 storage gets really saturated and Bookkeeper exhibits high latency. If - ## not, an IO workload that saturates Bookkeeper can induce the Segment Store into an state of unstability. bookkeeper.write.timeout.milliseconds: "60000" - ## This parameter will start inducing the max delay to Tier 1 operations if the Segment Store reaches 32MB of queued data waiting to be written to - ## Bookkeeper. We have tested that in a slow IO infrastructure like vSAN, this setting helps to protect the system from unstability. bookkeeper.write.outstanding.bytes.max: "33554432" - ## In Pravega +0.7 we use an in-memory cache. We need to be sure that the size of the cache is always lower than the direct memory set in JVM options. pravegaservice.cache.size.max: "1073741824" pravegaservice.cache.time.seconds.max: "600" hdfs.block.size: "67108864" - ## Despite AWS support team has suggested to set 1MB block sizes to write to Tier 2 to improve performance when using NFS, we found that a larger block - ## size significantly improves the throughput of a single segment against EFS. writer.flush.threshold.bytes: "67108864" writer.flush.size.bytes.max: "67108864" - ## The number of containers depends on the Segment Stores in the cluster (or vice-versa). To distributed the load of Segment Containers across Segment - ## Stores, we recommend to set 4 to 8 Segment Containers per Segment Store. Clearly, determining this number will depend on a provisioning decision. - ## Note that both containerCount values should be always the same between the Segment Store and the Controller service. pravegaservice.container.count: "8" controller.container.count: "8" - ## Increase the default Controller bucket count to distribute load across multiple instances. controller.retention.bucket.count: "4" controller.service.asyncTaskPool.size: "20" controller.retention.thread.count: "4" diff --git a/charts/pravega/values/large.yaml b/charts/pravega/values/large.yaml index 291f90ed8..3193bffdb 100644 --- a/charts/pravega/values/large.yaml +++ b/charts/pravega/values/large.yaml @@ -35,40 +35,18 @@ segmentStore: jvmOptions: ["-Xmx4g", "-XX:MaxDirectMemorySize=12g"] options: - ## If you add more Bookies to the cluster and you want to exploit them for IO, then you need increase this number. The Bookkeeper ensemble defines - ## the number of Bookies that should be available or healthy to allow Bookkeeper to work. In production, we recommend this value to be: - ## bookkeeper.ensemble.size = bookieReplicas - F, - ## where F is the number of Bookie failures we can tolerate to continue working. For instance, if we instantiate 6 Bookies and we want to tolerate - ## up to F = 2 failures, then the bookkeeper.ensemble.size = 4. bookkeeper.ensemble.size: "3" - ## By default, we want to keep 3 replicas of the data in Bookkeeper to ensure durability. bookkeeper.write.quorum.size: "3" - ## By default, we set bookkeeper.write.quorum.size == bookkeeper.ack.quorum.size, so we wait for the ACK of all the Bookies on a write to proceed to the next one. This - ## has been proven as a factor that makes Pravega much more stable in the long run for IO heavy workloads, at the cost of sacrificing performance. - ## If we set bkWriteQuorumSize > bkAckQuorumSize, we can improve performance, but a temporarily "slow" Bookie may lead to OOM errors at both the - ## Segment Store or Bookkeeper sides, given that Pravega continues to write to the "slow" Bookie at the same pace as for the "fast" ones. For more - ## context, please see: https://github.com/pravega/pravega/issues/4058 and https://github.com/pravega/pravega/issues/4057 bookkeeper.ack.quorum.size: "3" - ## We need to increase this timeout to tolerate environments in which Tier 1 storage gets really saturated and Bookkeeper exhibits high latency. If - ## not, an IO workload that saturates Bookkeeper can induce the Segment Store into an state of unstability. bookkeeper.write.timeout.milliseconds: "60000" - ## This parameter will start inducing the max delay to Tier 1 operations if the Segment Store reaches 32MB of queued data waiting to be written to - ## Bookkeeper. We have tested that in a slow IO infrastructure like vSAN, this setting helps to protect the system from unstability. bookkeeper.write.outstanding.bytes.max: "33554432" - ## In Pravega +0.7 we use an in-memory cache. We need to be sure that the size of the cache is always lower than the direct memory set in JVM options. pravegaservice.cache.size.max: "11811160064" pravegaservice.cache.time.seconds.max: "600" hdfs.block.size: "67108864" - ## Despite AWS support team has suggested to set 1MB block sizes to write to Tier 2 to improve performance when using NFS, we found that a larger block - ## size significantly improves the throughput of a single segment against EFS. writer.flush.threshold.bytes: "67108864" writer.flush.size.bytes.max: "67108864" - ## The number of containers depends on the Segment Stores in the cluster (or vice-versa). To distributed the load of Segment Containers across Segment - ## Stores, we recommend to set 4 to 8 Segment Containers per Segment Store. Clearly, determining this number will depend on a provisioning decision. - ## Note that both containerCount values should be always the same between the Segment Store and the Controller service. pravegaservice.container.count: "48" controller.container.count: "48" - ## Increase the default Controller bucket count to distribute load across multiple instances. controller.retention.bucket.count: "10" controller.service.asyncTaskPool.size: "20" controller.retention.thread.count: "4" diff --git a/charts/pravega/values/medium.yaml b/charts/pravega/values/medium.yaml index 406a9ab84..b8ceb850f 100644 --- a/charts/pravega/values/medium.yaml +++ b/charts/pravega/values/medium.yaml @@ -35,40 +35,18 @@ segmentStore: jvmOptions: ["-Xmx4g", "-XX:MaxDirectMemorySize=4g"] options: - ## If you add more Bookies to the cluster and you want to exploit them for IO, then you need increase this number. The Bookkeeper ensemble defines - ## the number of Bookies that should be available or healthy to allow Bookkeeper to work. In production, we recommend this value to be: - ## bookkeeper.ensemble.size = bookieReplicas - F, - ## where F is the number of Bookie failures we can tolerate to continue working. For instance, if we instantiate 6 Bookies and we want to tolerate - ## up to F = 2 failures, then the bookkeeper.ensemble.size = 4. bookkeeper.ensemble.size: "3" - ## By default, we want to keep 3 replicas of the data in Bookkeeper to ensure durability. bookkeeper.write.quorum.size: "3" - ## By default, we set bookkeeper.write.quorum.size == bookkeeper.ack.quorum.size, so we wait for the ACK of all the Bookies on a write to proceed to the next one. This - ## has been proven as a factor that makes Pravega much more stable in the long run for IO heavy workloads, at the cost of sacrificing performance. - ## If we set bkWriteQuorumSize > bkAckQuorumSize, we can improve performance, but a temporarily "slow" Bookie may lead to OOM errors at both the - ## Segment Store or Bookkeeper sides, given that Pravega continues to write to the "slow" Bookie at the same pace as for the "fast" ones. For more - ## context, please see: https://github.com/pravega/pravega/issues/4058 and https://github.com/pravega/pravega/issues/4057 bookkeeper.ack.quorum.size: "3" - ## We need to increase this timeout to tolerate environments in which Tier 1 storage gets really saturated and Bookkeeper exhibits high latency. If - ## not, an IO workload that saturates Bookkeeper can induce the Segment Store into an state of unstability. bookkeeper.write.timeout.milliseconds: "60000" - ## This parameter will start inducing the max delay to Tier 1 operations if the Segment Store reaches 32MB of queued data waiting to be written to - ## Bookkeeper. We have tested that in a slow IO infrastructure like vSAN, this setting helps to protect the system from unstability. bookkeeper.write.outstanding.bytes.max: "33554432" - ## In Pravega +0.7 we use an in-memory cache. We need to be sure that the size of the cache is always lower than the direct memory set in JVM options. pravegaservice.cache.size.max: "3221225472" pravegaservice.cache.time.seconds.max: "600" hdfs.block.size: "67108864" - ## Despite AWS support team has suggested to set 1MB block sizes to write to Tier 2 to improve performance when using NFS, we found that a larger block - ## size significantly improves the throughput of a single segment against EFS. writer.flush.threshold.bytes: "67108864" writer.flush.size.bytes.max: "67108864" - ## The number of containers depends on the Segment Stores in the cluster (or vice-versa). To distributed the load of Segment Containers across Segment - ## Stores, we recommend to set 4 to 8 Segment Containers per Segment Store. Clearly, determining this number will depend on a provisioning decision. - ## Note that both containerCount values should be always the same between the Segment Store and the Controller service. pravegaservice.container.count: "16" controller.container.count: "16" - ## Increase the default Controller bucket count to distribute load across multiple instances. controller.retention.bucket.count: "10" controller.service.asyncTaskPool.size: "20" controller.retention.thread.count: "4" diff --git a/charts/pravega/values/small.yaml b/charts/pravega/values/small.yaml index fc1893c2a..c027b9277 100644 --- a/charts/pravega/values/small.yaml +++ b/charts/pravega/values/small.yaml @@ -35,40 +35,18 @@ segmentStore: jvmOptions: ["-Xmx2g", "-XX:MaxDirectMemorySize=2g"] options: - ## If you add more Bookies to the cluster and you want to exploit them for IO, then you need increase this number. The Bookkeeper ensemble defines - ## the number of Bookies that should be available or healthy to allow Bookkeeper to work. In production, we recommend this value to be: - ## bookkeeper.ensemble.size = bookieReplicas - F, - ## where F is the number of Bookie failures we can tolerate to continue working. For instance, if we instantiate 6 Bookies and we want to tolerate - ## up to F = 2 failures, then the bookkeeper.ensemble.size = 4. bookkeeper.ensemble.size: "3" - ## By default, we want to keep 3 replicas of the data in Bookkeeper to ensure durability. bookkeeper.write.quorum.size: "3" - ## By default, we set bookkeeper.write.quorum.size == bookkeeper.ack.quorum.size, so we wait for the ACK of all the Bookies on a write to proceed to the next one. This - ## has been proven as a factor that makes Pravega much more stable in the long run for IO heavy workloads, at the cost of sacrificing performance. - ## If we set bkWriteQuorumSize > bkAckQuorumSize, we can improve performance, but a temporarily "slow" Bookie may lead to OOM errors at both the - ## Segment Store or Bookkeeper sides, given that Pravega continues to write to the "slow" Bookie at the same pace as for the "fast" ones. For more - ## context, please see: https://github.com/pravega/pravega/issues/4058 and https://github.com/pravega/pravega/issues/4057 bookkeeper.ack.quorum.size: "3" - ## We need to increase this timeout to tolerate environments in which Tier 1 storage gets really saturated and Bookkeeper exhibits high latency. If - ## not, an IO workload that saturates Bookkeeper can induce the Segment Store into an state of unstability. bookkeeper.write.timeout.milliseconds: "60000" - ## This parameter will start inducing the max delay to Tier 1 operations if the Segment Store reaches 32MB of queued data waiting to be written to - ## Bookkeeper. We have tested that in a slow IO infrastructure like vSAN, this setting helps to protect the system from unstability. bookkeeper.write.outstanding.bytes.max: "33554432" - ## In Pravega +0.7 we use an in-memory cache. We need to be sure that the size of the cache is always lower than the direct memory set in JVM options. pravegaservice.cache.size.max: "1073741824" pravegaservice.cache.time.seconds.max: "600" hdfs.block.size: "67108864" - ## Despite AWS support team has suggested to set 1MB block sizes to write to Tier 2 to improve performance when using NFS, we found that a larger block - ## size significantly improves the throughput of a single segment against EFS. writer.flush.threshold.bytes: "67108864" writer.flush.size.bytes.max: "67108864" - ## The number of containers depends on the Segment Stores in the cluster (or vice-versa). To distributed the load of Segment Containers across Segment - ## Stores, we recommend to set 4 to 8 Segment Containers per Segment Store. Clearly, determining this number will depend on a provisioning decision. - ## Note that both containerCount values should be always the same between the Segment Store and the Controller service. pravegaservice.container.count: "8" controller.container.count: "8" - ## Increase the default Controller bucket count to distribute load across multiple instances. controller.retention.bucket.count: "4" controller.service.asyncTaskPool.size: "20" controller.retention.thread.count: "4"