From e9d6c3cb5f8caf5689d3aace7d26ca149c2bbb03 Mon Sep 17 00:00:00 2001 From: esoulard Date: Wed, 15 May 2024 18:25:27 +0200 Subject: [PATCH] Fix rolling restart by waiting for consul leave and after restart --- README.md | 4 ++-- defaults/main.yml | 1 + handlers/reload_consul_conf.yml | 2 +- tasks/leave_restart_consul.yml | 33 +++++++++++++++++++++++------ templates/consul_bsdinit.j2 | 2 +- templates/consul_launchctl.plist.j2 | 2 +- templates/consul_smf_manifest.j2 | 2 +- templates/consul_systemd.service.j2 | 4 ++-- templates/consul_sysvinit.j2 | 17 +++++++-------- 9 files changed, 43 insertions(+), 24 deletions(-) diff --git a/README.md b/README.md index 4cc960c8..fbcbad09 100644 --- a/README.md +++ b/README.md @@ -839,8 +839,8 @@ _Consul Enterprise Only (requires that CONSUL_ENTERPRISE is set to true)_ - Restarts consul node one by one to avoid service interruption on existing cluster (Unix platforms only). - Default value: *false* -### `consul_rolling_restart_delay_sec` - - Adds a delay between node restart (Linux platforms only). + ### `consul_rolling_restart_delay_sec` + - Adds a delay between consul leave and node restart (Linux platforms only). - Default value: 5 #### Custom Configuration Section diff --git a/defaults/main.yml b/defaults/main.yml index 5ac472bd..5597d94c 100644 --- a/defaults/main.yml +++ b/defaults/main.yml @@ -50,6 +50,7 @@ consul_log_path: "{{ lookup('env', 'CONSUL_LOG_PATH') | default('/var/log/consul consul_log_file: "{{ lookup('env', 'CONSUL_LOG_FILE') | default('consul.log', true) }}" consul_run_path: /run/consul consul_binary: "{{ consul_bin_path }}/consul" +consul_pid_file: "{{ consul_run_path }}/consul.pid" ### System user and group consul_manage_user: true diff --git a/handlers/reload_consul_conf.yml b/handlers/reload_consul_conf.yml index fb2c6511..deaf1db3 100644 --- a/handlers/reload_consul_conf.yml +++ b/handlers/reload_consul_conf.yml @@ -3,6 +3,6 @@ # Cannot use `consul reload` because it requires the HTTP API to be bound to a non-loopback interface - name: Reload consul configuration on unix # noqa no-changed-when - ansible.builtin.command: pkill --pidfile "{{ consul_run_path }}/consul.pid" --signal SIGHUP + ansible.builtin.command: pkill --pidfile "{{ consul_pid_file }}" --signal SIGHUP when: ansible_os_family != "Windows" listen: reload consul configuration diff --git a/tasks/leave_restart_consul.yml b/tasks/leave_restart_consul.yml index ec8d7562..9dedd812 100644 --- a/tasks/leave_restart_consul.yml +++ b/tasks/leave_restart_consul.yml @@ -1,9 +1,23 @@ --- -- name: Consul leave +- name: Check if consul is running delegate_to: "{{ rolling_restart_host }}" - ansible.builtin.command: "{{ consul_binary }} leave {% if consul_acl_enable %} -token {{ consul_acl_master_token }} {% endif %} -http-addr {{ consul_addresses.http }}:{{ consul_ports.http }}" - changed_when: true + stat: + path: "{{ consul_pid_file }}" + register: consul_pid + +- block: + - name: Consul leave + delegate_to: "{{ rolling_restart_host }}" + ansible.builtin.command: + cmd: "{{ consul_binary }} leave {% if consul_acl_enable %} -token {{ consul_acl_master_token }} {% endif %} -http-addr {{ consul_addresses.http }}:{{ consul_ports.http }}" + changed_when: true + + - name: Give the cluster some time to settle + ansible.builtin.pause: + seconds: "{{ consul_rolling_restart_delay_sec }}" + when: consul_rolling_restart_delay_sec > 0 + when: consul_pid.stat.exists - name: Restart consul on Unix delegate_to: "{{ rolling_restart_host }}" @@ -13,7 +27,12 @@ # Needed to force SysV service manager on Docker for Molecule tests use: "{{ ansible_service_mgr }}" -- name: Wait for service availability - ansible.builtin.pause: - seconds: "{{ consul_rolling_restart_delay_sec }}" - when: consul_rolling_restart_delay_sec > 0 +- name: Assert that consul service is running + delegate_to: "{{ rolling_restart_host }}" + ansible.builtin.command: + cmd: "{{ consul_binary }} info {% if consul_acl_enable %} -token {{ consul_acl_master_token }} {% endif %} -http-addr {{ consul_addresses.http }}:{{ consul_ports.http }}" + changed_when: false + register: consul_info + retries: 20 + delay: 1 + until: "consul_info.rc == 0" diff --git a/templates/consul_bsdinit.j2 b/templates/consul_bsdinit.j2 index 46b934fc..9ef997c1 100644 --- a/templates/consul_bsdinit.j2 +++ b/templates/consul_bsdinit.j2 @@ -30,7 +30,7 @@ consul_start() { for user in ${consul_users}; do mkdir {{ consul_run_path }} chown -R "{{ consul_user }}:{{ consul_group }}" {{ consul_run_path }} - su -m "${user}" -c "{{ consul_bin_path }}/consul agent -config-file={{ consul_config_path }}/config.json -config-dir={{ consul_configd_path }} -pid-file={{ consul_run_path }}/consul.pid&" + su -m "${user}" -c "{{ consul_bin_path }}/consul agent -config-file={{ consul_config_path }}/config.json -config-dir={{ consul_configd_path }} -pid-file={{ consul_pid_file }}&" done } diff --git a/templates/consul_launchctl.plist.j2 b/templates/consul_launchctl.plist.j2 index ed3ddf62..dd031598 100644 --- a/templates/consul_launchctl.plist.j2 +++ b/templates/consul_launchctl.plist.j2 @@ -24,7 +24,7 @@ agent -config-file={{ consul_config_path }}/config.json -config-dir={{ consul_configd_path }} - -pid-file={{ consul_run_path }}/consul.pid + -pid-file={{ consul_pid_file }} RunAtLoad diff --git a/templates/consul_smf_manifest.j2 b/templates/consul_smf_manifest.j2 index 187c8dad..7b45c3ce 100644 --- a/templates/consul_smf_manifest.j2 +++ b/templates/consul_smf_manifest.j2 @@ -23,7 +23,7 @@ - + diff --git a/templates/consul_systemd.service.j2 b/templates/consul_systemd.service.j2 index 0a4883bb..6b7f3e85 100644 --- a/templates/consul_systemd.service.j2 +++ b/templates/consul_systemd.service.j2 @@ -16,7 +16,7 @@ After=network-online.target [Service] User={{ consul_user }} Group={{ consul_group }} -PIDFile={{ consul_run_path }}/consul.pid +PIDFile={{ consul_pid_file }} PermissionsStartOnly=true {% if consul_ui_legacy %} Environment=CONSUL_UI_LEGACY=true @@ -26,7 +26,7 @@ ExecStartPre=/bin/chown -R {{ consul_user }}:{{ consul_group }} {{ consul_run_pa ExecStart={{ consul_bin_path }}/consul agent \ -config-file={{ consul_config_path }}/config.json \ -config-dir={{ consul_configd_path}} \ - -pid-file={{ consul_run_path }}/consul.pid + -pid-file={{ consul_pid_file }} ExecReload=/bin/kill -HUP $MAINPID KillMode=process KillSignal=SIGTERM diff --git a/templates/consul_sysvinit.j2 b/templates/consul_sysvinit.j2 index 895e2b53..268ab52b 100644 --- a/templates/consul_sysvinit.j2 +++ b/templates/consul_sysvinit.j2 @@ -3,7 +3,7 @@ # chkconfig: 2345 95 95 # description: Consul service discovery framework # processname: consul -# pidfile: {{ consul_run_path }}/consul.pid +# pidfile: {{ consul_pid_file }} {% if ansible_distribution == "Ubuntu" %} . /lib/lsb/init-functions @@ -14,7 +14,6 @@ CONSUL={{ consul_bin_path }}/consul CONFIG={{ consul_config_path }}/config.json CONFIGD={{ consul_configd_path }} -PID_FILE={{ consul_run_path }}/consul.pid LOCK_FILE=/var/lock/subsys/consul {% if consul_ui_legacy %} CONSUL_UI_LEGACY=true @@ -29,13 +28,13 @@ mkrundir() { chown {{ consul_user }} {{ consul_run_path }} } -KILLPROC_OPT="-p ${PID_FILE}" +KILLPROC_OPT="-p {{ consul_pid_file }}" mkpidfile() { mkrundir - [ ! -f "${PID_FILE}" ] && pidofproc "${CONSUL}" > "${PID_FILE}" + [ ! -f "{{ consul_pid_file }}" ] && pidofproc "${CONSUL}" > "{{ consul_pid_file }}" chown -R {{ consul_user }} {{ consul_run_path }} if [ $? -ne 0 ] ; then - rm "${PID_FILE}" + rm "{{ consul_pid_file }}" KILLPROC_OPT="" fi } @@ -44,10 +43,10 @@ start() { echo -n "Starting consul: " mkrundir mkpidfile - # [ -f "${PID_FILE}" ] && rm "${PID_FILE}" + # [ -f "{{ consul_pid_file }}" ] && rm "{{ consul_pid_file }}" daemon --user={{ consul_user }} \ - --pidfile="${PID_FILE}" \ - "${CONSUL}" agent -config-file="${CONFIG}" -config-dir="${CONFIGD}" -pid-file="${PID_FILE}" & + --pidfile="{{ consul_pid_file }}" \ + "${CONSUL}" agent -config-file="${CONFIG}" -config-dir="${CONFIGD}" -pid-file="{{ consul_pid_file }}" & retcode=$? touch ${LOCK_FILE} return "${retcode}" @@ -63,7 +62,7 @@ stop() { killproc "${KILLPROC_OPT}" "${CONSUL}" -SIGTERM retcode=$? - rm -f "${LOCK_FILE}" "${PID_FILE}" + rm -f "${LOCK_FILE}" "{{ consul_pid_file }}" return "${retcode}" }