From f51566e62205dc8fc8869a90a1f32425949b59e1 Mon Sep 17 00:00:00 2001 From: Dmitri Gekhtman <62982571+DmitriGekhtman@users.noreply.github.com> Date: Wed, 9 Feb 2022 18:59:50 -0800 Subject: [PATCH] Prep K8s operator for the Ray 1.11.0 release. (#22264) For consistency and safety, we fix an explicit 6379 port for all default and example configs for Ray on K8s. Documentation is updated to recommend matching Ray versions in operator and Ray cluster. --- deploy/charts/ray/templates/raycluster.yaml | 4 ++-- deploy/charts/ray/values.yaml | 13 ++++++------- deploy/components/example_cluster.yaml | 4 ++-- python/ray/autoscaler/kubernetes/defaults.yaml | 4 ++-- .../autoscaler/kubernetes/example-full-legacy.yaml | 4 ++-- python/ray/autoscaler/kubernetes/example-full.yaml | 2 +- .../ray/autoscaler/kubernetes/example-ingress.yaml | 4 +--- 7 files changed, 16 insertions(+), 19 deletions(-) diff --git a/deploy/charts/ray/templates/raycluster.yaml b/deploy/charts/ray/templates/raycluster.yaml index a8501534ec92..f24f60c70c9b 100644 --- a/deploy/charts/ray/templates/raycluster.yaml +++ b/deploy/charts/ray/templates/raycluster.yaml @@ -51,7 +51,7 @@ spec: - name: RAY_gcs_server_rpc_server_thread_num value: "1" ports: - - containerPort: 6379 # Redis port + - containerPort: 6379 # Redis port for Ray <= 1.10.0. GCS server port for Ray >= 1.11.0. - containerPort: 10001 # Used by Ray Client - containerPort: 8265 # Used by Ray Dashboard - containerPort: 8000 # Used by Ray Serve @@ -92,7 +92,7 @@ spec: # Note dashboard-host is set to 0.0.0.0 so that Kubernetes can port forward. headStartRayCommands: - ray stop - - ulimit -n 65536; ray start --head --no-monitor --dashboard-host 0.0.0.0 + - ulimit -n 65536; ray start --head --port=6379 --no-monitor --dashboard-host 0.0.0.0 # Commands to start Ray on worker nodes. You don't need to change this. workerStartRayCommands: - ray stop diff --git a/deploy/charts/ray/values.yaml b/deploy/charts/ray/values.yaml index d276913aade5..6c1416b5ff75 100644 --- a/deploy/charts/ray/values.yaml +++ b/deploy/charts/ray/values.yaml @@ -3,6 +3,8 @@ # RayCluster settings: # image is Ray image to use for the head and workers of this Ray cluster. +# It's recommended to build custom dependencies for your workload into this image, +# taking one of the offical `rayproject/ray` images as base. image: rayproject/ray:latest # headPodType is the podType used for the Ray head node (as configured below). headPodType: rayHeadType @@ -96,11 +98,8 @@ namespacedOperator: false # in which to launch the operator. operatorNamespace: default # operatorImage - The image used in the operator deployment. +# It is recommended to use one of the official `rayproject/ray` images for the operator. +# It is recommended to use the same Ray version in the operator as in the Ray clusters managed +# by the operator. In other words, the images specified under the fields `operatorImage` and `image` +# should carry matching Ray versions. operatorImage: rayproject/ray:latest -# `rayproject/ray:latest` contains the latest official release version of Ray. -# `rayproject/ray:nightly` runs the current master version of Ray. -# For a particular official release version of Ray, use `rayproject/ray:1.x.y`. -# For a specific master commit, use the first 6 characters of the commit SHA, e.g. `rayproject/ray:050a07`. -# The operator and Ray cluster can use different Ray versions, provided both versions are >= 1.2.0 - - diff --git a/deploy/components/example_cluster.yaml b/deploy/components/example_cluster.yaml index 1513e8fde833..e27eb1be566c 100644 --- a/deploy/components/example_cluster.yaml +++ b/deploy/components/example_cluster.yaml @@ -54,7 +54,7 @@ spec: command: ["/bin/bash", "-c", "--"] args: ["trap : TERM INT; touch /tmp/raylogs; tail -f /tmp/raylogs; sleep infinity & wait;"] ports: - - containerPort: 6379 # Redis port + - containerPort: 6379 # Redis port for Ray <= 1.10.0. GCS server port for Ray >= 1.11.0. - containerPort: 10001 # Used by Ray Client - containerPort: 8265 # Used by Ray Dashboard - containerPort: 8000 # Used by Ray Serve @@ -129,7 +129,7 @@ spec: # Note dashboard-host is set to 0.0.0.0 so that Kubernetes can port forward. headStartRayCommands: - ray stop - - ulimit -n 65536; ray start --head --no-monitor --dashboard-host 0.0.0.0 &> /tmp/raylogs + - ulimit -n 65536; ray start --head --port=6379 --no-monitor --dashboard-host 0.0.0.0 &> /tmp/raylogs # Commands to start Ray on worker nodes. You don't need to change this. workerStartRayCommands: - ray stop diff --git a/python/ray/autoscaler/kubernetes/defaults.yaml b/python/ray/autoscaler/kubernetes/defaults.yaml index 4cf8a40481ba..0121e9ef8a24 100644 --- a/python/ray/autoscaler/kubernetes/defaults.yaml +++ b/python/ray/autoscaler/kubernetes/defaults.yaml @@ -171,7 +171,7 @@ available_node_types: command: ["/bin/bash", "-c", "--"] args: ['trap : TERM INT; sleep infinity & wait;'] ports: - - containerPort: 6379 # Redis port + - containerPort: 6379 # Redis port for Ray <= 1.10.0. GCS server port for Ray >= 1.11.0. - containerPort: 10001 # Used by Ray Client - containerPort: 8265 # Used by Ray Dashboard @@ -200,7 +200,7 @@ available_node_types: # Note dashboard-host is set to 0.0.0.0 so that kubernetes can port forward. head_start_ray_commands: - ray stop - - ulimit -n 65536; ray start --head --autoscaling-config=~/ray_bootstrap_config.yaml --dashboard-host 0.0.0.0 + - ulimit -n 65536; ray start --head --port=6379 --autoscaling-config=~/ray_bootstrap_config.yaml --dashboard-host 0.0.0.0 # Command to start ray on worker nodes. You don't need to change this. worker_start_ray_commands: diff --git a/python/ray/autoscaler/kubernetes/example-full-legacy.yaml b/python/ray/autoscaler/kubernetes/example-full-legacy.yaml index d85c1fa4a259..6d1558700467 100644 --- a/python/ray/autoscaler/kubernetes/example-full-legacy.yaml +++ b/python/ray/autoscaler/kubernetes/example-full-legacy.yaml @@ -137,7 +137,7 @@ head_node: command: ["/bin/bash", "-c", "--"] args: ["trap : TERM INT; sleep infinity & wait;"] ports: - - containerPort: 6379 # Redis port + - containerPort: 6379 # Redis port for Ray <= 1.10.0. GCS server port for Ray >= 1.11.0. - containerPort: 10001 # Used by Ray Client - containerPort: 8265 # Used by Ray Dashboard @@ -253,7 +253,7 @@ worker_setup_commands: [] # Note dashboard-host is set to 0.0.0.0 so that kubernetes can port forward. head_start_ray_commands: - ray stop - - ulimit -n 65536; ray start --head --autoscaling-config=~/ray_bootstrap_config.yaml --dashboard-host 0.0.0.0 + - ulimit -n 65536; ray start --head --port=6379 --autoscaling-config=~/ray_bootstrap_config.yaml --dashboard-host 0.0.0.0 # Command to start ray on worker nodes. You don't need to change this. worker_start_ray_commands: diff --git a/python/ray/autoscaler/kubernetes/example-full.yaml b/python/ray/autoscaler/kubernetes/example-full.yaml index 764fb8f1bff2..3af465890407 100644 --- a/python/ray/autoscaler/kubernetes/example-full.yaml +++ b/python/ray/autoscaler/kubernetes/example-full.yaml @@ -205,7 +205,7 @@ available_node_types: # Note dashboard-host is set to 0.0.0.0 so that kubernetes can port forward. head_start_ray_commands: - ray stop - - ulimit -n 65536; ray start --head --autoscaling-config=~/ray_bootstrap_config.yaml --dashboard-host 0.0.0.0 + - ulimit -n 65536; ray start --head --port=6379 --autoscaling-config=~/ray_bootstrap_config.yaml --dashboard-host 0.0.0.0 # Command to start ray on worker nodes. You don't need to change this. worker_start_ray_commands: diff --git a/python/ray/autoscaler/kubernetes/example-ingress.yaml b/python/ray/autoscaler/kubernetes/example-ingress.yaml index 2967261fb6db..cb3e82a08a77 100644 --- a/python/ray/autoscaler/kubernetes/example-ingress.yaml +++ b/python/ray/autoscaler/kubernetes/example-ingress.yaml @@ -152,9 +152,7 @@ head_node: command: ["/bin/bash", "-c", "--"] args: ["trap : TERM INT; sleep infinity & wait;"] ports: - - containerPort: 6379 # Redis port. - - containerPort: 6380 # Redis port. - - containerPort: 6381 # Redis port. + - containerPort: 6379 # Redis port for Ray <= 1.10.0. GCS server port for Ray >= 1.11.0 - containerPort: 22345 # Ray internal communication. - containerPort: 22346 # Ray internal communication.