pravega · anishakj · Jul 15, 2020 · Jul 9, 2020 · Jul 13, 2020 · RaulGracia
diff --git a/charts/pravega/README.md b/charts/pravega/README.md
@@ -54,20 +54,20 @@ The following table lists the configurable parameters of the Pravega chart and t
 | `debugLogging` | Enable debug logging | `false` |
 | `serviceAccount.name` | Service account to be used | `pravega-components` |
 | `controller.replicas` | Number of controller replicas | `1` |
-| `controller.resources.requests.cpu` | CPU requests for controller | `1000m` |
+| `controller.resources.requests.cpu` | CPU requests for controller | `500m` |
 | `controller.resources.requests.memory` | Memory requests for controller | `1Gi` |
-| `controller.resources.limits.cpu` | CPU limits for controller | `2000m` |
-| `controller.resources.limits.memory` | Memory limits for controller | `3Gi` |
+| `controller.resources.limits.cpu` | CPU limits for controller | `1000m` |
+| `controller.resources.limits.memory` | Memory limits for controller | `2Gi` |
 | `controller.service.type` | Override the controller service type, if external access is enabled (LoadBalancer/NodePort) | |
 | `controller.service.annotations` | Annotations to add to the controller service, if external access is enabled | `{}` |
-| `controller.jvmOptions` | JVM Options for controller | `[]` |
+| `controller.jvmOptions` | JVM Options for controller | `["-Xmx2g", "-XX:MaxDirectMemorySize=2g"]` |
 | `segmentStore.replicas` | Number of segmentStore replicas | `1` |
 | `segmentStore.secret` | Secret configuration for the segmentStore | `{}` |
 | `segmentStore.env` | Name of configmap containing environment variables to be added to the segmentStore | |
 | `segmentStore.resources.requests.cpu` | CPU requests for segmentStore | `1000m` |
-| `segmentStore.resources.requests.memory` | Memory requests for segmentStore | `3Gi` |
+| `segmentStore.resources.requests.memory` | Memory requests for segmentStore | `4Gi` |
 | `segmentStore.resources.limits.cpu` | CPU limits for segmentStore | `2000m` |
-| `segmentStore.resources.limits.memory` | Memory limits for segmentStore | `5Gi` |
+| `segmentStore.resources.limits.memory` | Memory limits for segmentStore | `4Gi` |
 | `segmentStore.service.type` | Override the segmentStore service type, if external access is enabled (LoadBalancer/NodePort) | |
 | `segmentStore.service.annotations` | Annotations to add to the segmentStore service, if external access is enabled | `{}` |
 | `segmentStore.jvmOptions` | JVM Options for segmentStore | `[]` |

diff --git a/charts/pravega/values.yaml b/charts/pravega/values.yaml
@@ -43,11 +43,11 @@ controller:
   replicas: 1
   resources:
     requests:
-      cpu: 1000m
+      cpu: 500m
       memory: 1Gi
     limits:
-      cpu: 2000m
-      memory: 3Gi
+      cpu: 1000m
+      memory: 2Gi
   ## service type and annotations are ignored if external access is disabled
   service:
     ## used to override the service type for controller
@@ -64,16 +64,16 @@ segmentStore:
   resources:
     requests:
       cpu: 1000m
-      memory: 3Gi
+      memory: 4Gi
     limits:
       cpu: 2000m
-      memory: 5Gi
+      memory: 4Gi
   ## service type and annotations are ignored if external access is disabled
   service:
     ## used to override the service type for segmentStore
     type:
     annotations: {}
-  jvmOptions: []
+  jvmOptions: ["-Xmx2g", "-XX:MaxDirectMemorySize=2g"]
 
 storage:
 
@@ -92,10 +92,10 @@ storage:
     ## ecs is used to configure a Dell EMC ECS system as the long term storage backend
     ## considered only if storage.longtermStorage.type = ecs
     ecs: {}
-      # configUri: ""
-      # bucket: ""
-      # prefix: ""
-      # credentials: ""
+      # configUri: https://object.ecstestdrive.com?namespace=namespace%26identity=user%26secretKey=password
+      # bucket: "bucket"
+      # prefix: "prefix"
+      # credentials: ecs-credentials
 
     ## hdfs is used to configure an HDFS system as long term storage backend
     ## considered only if storage.longtermStorage.type = hdfs
@@ -109,4 +109,58 @@ storage:
     className: standard
 
 options:
-  # bookkeeper.ack.quorum.size: "3"
+  ## If you add more Bookies to the cluster and you want to exploit them for IO, then you need increase this number. The Bookkeeper ensemble defines
+  ## the number of Bookies that should be available or healthy to allow Bookkeeper to work. In production, we recommend this value to be:
+  ## bookkeeper.ensemble.size = bookieReplicas - F,
+  ## where F is the number of Bookie failures we can tolerate to continue working. For instance, if we instantiate 6 Bookies and we want to tolerate
+  ## up to F = 2 failures, then the bookkeeper.ensemble.size = 4.
+  bookkeeper.ensemble.size: "3"
+  ## By default, we want to keep 3 replicas of the data in Bookkeeper to ensure durability.
+  bookkeeper.write.quorum.size: "3"
+  ## By default, we set bookkeeper.write.quorum.size == bookkeeper.ack.quorum.size, so we wait for the ACK of all the Bookies on a write to proceed to the next one. This
+  ## has been proven as a factor that makes Pravega much more stable in the long run for IO heavy workloads, at the cost of sacrificing performance.
+  ## If we set bkWriteQuorumSize > bkAckQuorumSize, we can improve performance, but a temporarily "slow" Bookie may lead to OOM errors at both the
+  ## Segment Store or Bookkeeper sides, given that Pravega continues to write to the "slow" Bookie at the same pace as for the "fast" ones. For more
+  ## context, please see: https://github.com/pravega/pravega/issues/4058 and https://github.com/pravega/pravega/issues/4057
+  bookkeeper.ack.quorum.size: "3"
+  ## We need to increase this timeout to tolerate environments in which Tier 1 storage gets really saturated and Bookkeeper exhibits high latency. If
+  ## not, an IO workload that saturates Bookkeeper can induce the Segment Store into an state of unstability.
+  bookkeeper.write.timeout.milliseconds: "60000"
+  ## This parameter will start inducing the max delay to Tier 1 operations if the Segment Store reaches 32MB of queued data waiting to be written to
+  ## Bookkeeper. We have tested that in a slow IO infrastructure like vSAN, this setting helps to protect the system from unstability.
+  bookkeeper.write.outstanding.bytes.max: "33554432"
+  ## In Pravega +0.7 we use an in-memory cache. We need to be sure that the size of the cache is always lower than the direct memory set in JVM options.
+  pravegaservice.cache.size.max: "1073741824"
+  pravegaservice.cache.time.seconds.max: "600"
+  hdfs.block.size: "67108864"
+  ## Despite AWS support team has suggested to set 1MB block sizes to write to Tier 2 to improve performance when using NFS, we found that a larger block
+  ## size significantly improves the throughput of a single segment against EFS.
+  writer.flush.threshold.bytes: "67108864"
+  writer.flush.size.bytes.max: "67108864"
+  ## The number of containers depends on the Segment Stores in the cluster (or vice-versa). To distributed the load of Segment Containers across Segment
+  ## Stores, we recommend to set 4 to 8 Segment Containers per Segment Store. Clearly, determining this number will depend on a provisioning decision.
+  ## Note that both containerCount values should be always the same between the Segment Store and the Controller service.
+  pravegaservice.container.count: "8"
+  controller.container.count: "8"
+  ## Increase the default Controller bucket count to distribute load across multiple instances.
+  controller.retention.bucket.count: "4"
+  controller.service.asyncTaskPool.size: "20"
+  controller.retention.thread.count: "4"
+  log.level: "INFO"
+  ## The following parameters are only useful if you are going to deploy metrics in this cluster.
+  # metrics.dynamicCache.size: "100000"
+  # metrics.statistics.enable: "true"
+  # metrics.statsD.reporter.enable: "false"
+  # metrics.statsD.connect.host: "telegraph.default"
+  # metrics.statsD.connect.port: "8125"
+  # metrics.influxDB.reporter.enable: "true"
+  # metrics.output.frequency.seconds: "10"
+  # metrics.influxDB.connect.uri: "http://INFLUXDB-IP:8086"
+  # controller.metrics.dynamicCache.size: "100000"
+  # controller.metrics.statistics.enable: "true"
+  # controller.metrics.statsD.reporter.enable: "false"
+  # controller.metrics.statsD.connect.host: "telegraph.default"
+  # controller.metrics.statsD.connect.port: "8125"
+  # controller.metrics.influxDB.reporter.enable: "true"
+  # controller.metrics.output.frequency.seconds: "10"
+  # controller.metrics.influxDB.connect.uri: "http://INFLUXDB-IP:8086"
diff --git a/charts/pravega/values/large.yaml b/charts/pravega/values/large.yaml
@@ -0,0 +1,92 @@
+controller:
+  replicas: 2
+  resources:
+    requests:
+      cpu: 500m
+      memory: 1Gi
+    limits:
+      cpu: 2000m
+      memory: 4Gi
+  ## service type and annotations are ignored if external access is disabled
+  service:
+    ## used to override the service type for controller
+    type:
+    annotations: {}
+  jvmOptions: []
+
+segmentStore:
+  replicas: 6
+  secret: {}
+    # name:
+    # path:
+  env:
+  resources:
+    requests:
+      cpu: 2000m
+      memory: 16Gi
+    limits:
+      cpu: 4000m
+      memory: 16Gi
+  ## service type and annotations are ignored if external access is disabled
+  service:
+    ## used to override the service type for segmentStore
+    type:
+    annotations: {}
+  jvmOptions: ["-Xmx4g", "-XX:MaxDirectMemorySize=12g"]
+
+options:
+  ## If you add more Bookies to the cluster and you want to exploit them for IO, then you need increase this number. The Bookkeeper ensemble defines
+  ## the number of Bookies that should be available or healthy to allow Bookkeeper to work. In production, we recommend this value to be:
+  ## bookkeeper.ensemble.size = bookieReplicas - F,
+  ## where F is the number of Bookie failures we can tolerate to continue working. For instance, if we instantiate 6 Bookies and we want to tolerate
+  ## up to F = 2 failures, then the bookkeeper.ensemble.size = 4.
+  bookkeeper.ensemble.size: "3"
+  ## By default, we want to keep 3 replicas of the data in Bookkeeper to ensure durability.
+  bookkeeper.write.quorum.size: "3"
+  ## By default, we set bookkeeper.write.quorum.size == bookkeeper.ack.quorum.size, so we wait for the ACK of all the Bookies on a write to proceed to the next one. This
+  ## has been proven as a factor that makes Pravega much more stable in the long run for IO heavy workloads, at the cost of sacrificing performance.
+  ## If we set bkWriteQuorumSize > bkAckQuorumSize, we can improve performance, but a temporarily "slow" Bookie may lead to OOM errors at both the
+  ## Segment Store or Bookkeeper sides, given that Pravega continues to write to the "slow" Bookie at the same pace as for the "fast" ones. For more
+  ## context, please see: https://github.com/pravega/pravega/issues/4058 and https://github.com/pravega/pravega/issues/4057
+  bookkeeper.ack.quorum.size: "3"
+  ## We need to increase this timeout to tolerate environments in which Tier 1 storage gets really saturated and Bookkeeper exhibits high latency. If
+  ## not, an IO workload that saturates Bookkeeper can induce the Segment Store into an state of unstability.
+  bookkeeper.write.timeout.milliseconds: "60000"
+  ## This parameter will start inducing the max delay to Tier 1 operations if the Segment Store reaches 32MB of queued data waiting to be written to
+  ## Bookkeeper. We have tested that in a slow IO infrastructure like vSAN, this setting helps to protect the system from unstability.
+  bookkeeper.write.outstanding.bytes.max: "33554432"
+  ## In Pravega +0.7 we use an in-memory cache. We need to be sure that the size of the cache is always lower than the direct memory set in JVM options.
+  pravegaservice.cache.size.max: "11811160064"
+  pravegaservice.cache.time.seconds.max: "600"
+  hdfs.block.size: "67108864"
+  ## Despite AWS support team has suggested to set 1MB block sizes to write to Tier 2 to improve performance when using NFS, we found that a larger block
+  ## size significantly improves the throughput of a single segment against EFS.
+  writer.flush.threshold.bytes: "67108864"
+  writer.flush.size.bytes.max: "67108864"
+  ## The number of containers depends on the Segment Stores in the cluster (or vice-versa). To distributed the load of Segment Containers across Segment
+  ## Stores, we recommend to set 4 to 8 Segment Containers per Segment Store. Clearly, determining this number will depend on a provisioning decision.
+  ## Note that both containerCount values should be always the same between the Segment Store and the Controller service.
+  pravegaservice.container.count: "48"
+  controller.container.count: "48"
+  ## Increase the default Controller bucket count to distribute load across multiple instances.
+  controller.retention.bucket.count: "10"
+  controller.service.asyncTaskPool.size: "20"
+  controller.retention.thread.count: "4"
+  log.level: "INFO"
+  ## The following parameters are only useful if you are going to deploy metrics in this cluster.
+  # metrics.dynamicCache.size: "100000"
+  # metrics.statistics.enable: "true"
+  # metrics.statsD.reporter.enable: "false"
+  # metrics.statsD.connect.host: "telegraph.default"
+  # metrics.statsD.connect.port: "8125"
+  # metrics.influxDB.reporter.enable: "true"
+  # metrics.output.frequency.seconds: "10"
+  # metrics.influxDB.connect.uri: "http://INFLUXDB-IP:8086"
+  # controller.metrics.dynamicCache.size: "100000"
+  # controller.metrics.statistics.enable: "true"
+  # controller.metrics.statsD.reporter.enable: "false"
+  # controller.metrics.statsD.connect.host: "telegraph.default"
+  # controller.metrics.statsD.connect.port: "8125"
+  # controller.metrics.influxDB.reporter.enable: "true"
+  # controller.metrics.output.frequency.seconds: "10"
+  # controller.metrics.influxDB.connect.uri: "http://INFLUXDB-IP:8086"
diff --git a/charts/pravega/values/medium.yaml b/charts/pravega/values/medium.yaml
@@ -0,0 +1,92 @@
+controller:
+  replicas: 2
+  resources:
+    requests:
+      cpu: 500m
+      memory: 1Gi
+    limits:
+      cpu: 1000m
+      memory: 2Gi
+  ## service type and annotations are ignored if external access is disabled
+  service:
+    ## used to override the service type for controller
+    type:
+    annotations: {}
+  jvmOptions: []
+
+segmentStore:
+  replicas: 2
+  secret: {}
+    # name:
+    # path:
+  env:
+  resources:
+    requests:
+      cpu: 2000m
+      memory: 8Gi
+    limits:
+      cpu: 4000m
+      memory: 8Gi
+  ## service type and annotations are ignored if external access is disabled
+  service:
+    ## used to override the service type for segmentStore
+    type:
+    annotations: {}
+  jvmOptions: ["-Xmx4g", "-XX:MaxDirectMemorySize=4g"]
+
+options:
+  ## If you add more Bookies to the cluster and you want to exploit them for IO, then you need increase this number. The Bookkeeper ensemble defines
+  ## the number of Bookies that should be available or healthy to allow Bookkeeper to work. In production, we recommend this value to be:
+  ## bookkeeper.ensemble.size = bookieReplicas - F,
+  ## where F is the number of Bookie failures we can tolerate to continue working. For instance, if we instantiate 6 Bookies and we want to tolerate
+  ## up to F = 2 failures, then the bookkeeper.ensemble.size = 4.
+  bookkeeper.ensemble.size: "3"
+  ## By default, we want to keep 3 replicas of the data in Bookkeeper to ensure durability.
+  bookkeeper.write.quorum.size: "3"
+  ## By default, we set bookkeeper.write.quorum.size == bookkeeper.ack.quorum.size, so we wait for the ACK of all the Bookies on a write to proceed to the next one. This
+  ## has been proven as a factor that makes Pravega much more stable in the long run for IO heavy workloads, at the cost of sacrificing performance.
+  ## If we set bkWriteQuorumSize > bkAckQuorumSize, we can improve performance, but a temporarily "slow" Bookie may lead to OOM errors at both the
+  ## Segment Store or Bookkeeper sides, given that Pravega continues to write to the "slow" Bookie at the same pace as for the "fast" ones. For more
+  ## context, please see: https://github.com/pravega/pravega/issues/4058 and https://github.com/pravega/pravega/issues/4057
+  bookkeeper.ack.quorum.size: "3"
+  ## We need to increase this timeout to tolerate environments in which Tier 1 storage gets really saturated and Bookkeeper exhibits high latency. If
+  ## not, an IO workload that saturates Bookkeeper can induce the Segment Store into an state of unstability.
+  bookkeeper.write.timeout.milliseconds: "60000"
+  ## This parameter will start inducing the max delay to Tier 1 operations if the Segment Store reaches 32MB of queued data waiting to be written to
+  ## Bookkeeper. We have tested that in a slow IO infrastructure like vSAN, this setting helps to protect the system from unstability.
+  bookkeeper.write.outstanding.bytes.max: "33554432"
+  ## In Pravega +0.7 we use an in-memory cache. We need to be sure that the size of the cache is always lower than the direct memory set in JVM options.
+  pravegaservice.cache.size.max: "3221225472"
+  pravegaservice.cache.time.seconds.max: "600"
+  hdfs.block.size: "67108864"
+  ## Despite AWS support team has suggested to set 1MB block sizes to write to Tier 2 to improve performance when using NFS, we found that a larger block
+  ## size significantly improves the throughput of a single segment against EFS.
+  writer.flush.threshold.bytes: "67108864"
+  writer.flush.size.bytes.max: "67108864"
+  ## The number of containers depends on the Segment Stores in the cluster (or vice-versa). To distributed the load of Segment Containers across Segment
+  ## Stores, we recommend to set 4 to 8 Segment Containers per Segment Store. Clearly, determining this number will depend on a provisioning decision.
+  ## Note that both containerCount values should be always the same between the Segment Store and the Controller service.
+  pravegaservice.container.count: "16"
+  controller.container.count: "16"
+  ## Increase the default Controller bucket count to distribute load across multiple instances.
+  controller.retention.bucket.count: "10"
+  controller.service.asyncTaskPool.size: "20"
+  controller.retention.thread.count: "4"
+  log.level: "INFO"
+  ## The following parameters are only useful if you are going to deploy metrics in this cluster.
+  # metrics.dynamicCache.size: "100000"
+  # metrics.statistics.enable: "true"
+  # metrics.statsD.reporter.enable: "false"
+  # metrics.statsD.connect.host: "telegraph.default"
+  # metrics.statsD.connect.port: "8125"
+  # metrics.influxDB.reporter.enable: "true"
+  # metrics.output.frequency.seconds: "10"
+  # metrics.influxDB.connect.uri: "http://INFLUXDB-IP:8086"
+  # controller.metrics.dynamicCache.size: "100000"
+  # controller.metrics.statistics.enable: "true"
+  # controller.metrics.statsD.reporter.enable: "false"
+  # controller.metrics.statsD.connect.host: "telegraph.default"
+  # controller.metrics.statsD.connect.port: "8125"
+  # controller.metrics.influxDB.reporter.enable: "true"
+  # controller.metrics.output.frequency.seconds: "10"
+  # controller.metrics.influxDB.connect.uri: "http://INFLUXDB-IP:8086"
diff --git a/charts/pravega/values/minikube.yaml b/charts/pravega/values/minikube.yaml
@@ -0,0 +1,26 @@
+controller:
+  replicas: 1
+  resources:
+    requests:
+      cpu: 250m
+      memory: 512Mi
+    limits:
+      cpu: 500m
+      memory: 1Gi
+  jvmOptions: []
+
+segmentStore:
+  replicas: 1
+  resources:
+    requests:
+      cpu: 500m
+      memory: 1Gi
+    limits:
+      cpu: 500m
+      memory: 2Gi
+  jvmOptions: []
+
+options:
+  bookkeeper.ack.quorum.size: "1"
+  bookkeeper.write.quorum.size: "1"
+  bookkeeper.ensemble.size: "1"