From 2b177fec2f181ef0bed51d59fa12fe988efd4f19 Mon Sep 17 00:00:00 2001 From: cbaxley Date: Thu, 10 Oct 2024 06:49:49 -0400 Subject: [PATCH 01/48] Remove some old TODOs --- testing/configure/azure_scripts/download_in_container.ps1 | 1 - 1 file changed, 1 deletion(-) diff --git a/testing/configure/azure_scripts/download_in_container.ps1 b/testing/configure/azure_scripts/download_in_container.ps1 index 33926357..28f5e4fe 100644 --- a/testing/configure/azure_scripts/download_in_container.ps1 +++ b/testing/configure/azure_scripts/download_in_container.ps1 @@ -72,7 +72,6 @@ if ($Os -eq "linux") { $DestinationPath = "/home/$UserName/lme/$DestinationFileName" # Create the lme directory if it doesn't exist $DirectoryCreationScript = "mkdir -p '/home/$UserName/lme'" - # TODO: We don't want to output this until we fix it so we can put all of the output from thw whole script into one json object # We are just ignoring the output for now $CreateDirectoryResponse = az vm run-command invoke ` --command-id RunShellScript ` From 3b8db23405bda38ae338894ea576033ebe3806a9 Mon Sep 17 00:00:00 2001 From: cbaxley Date: Thu, 10 Oct 2024 06:54:28 -0400 Subject: [PATCH 02/48] Don't remove the Azure resources at the end of the Linux only tests --- .github/workflows/linux_only.yml | 26 +++++++++++++------------- 1 file changed, 13 insertions(+), 13 deletions(-) diff --git a/.github/workflows/linux_only.yml b/.github/workflows/linux_only.yml index 54bab48d..637c535e 100644 --- a/.github/workflows/linux_only.yml +++ b/.github/workflows/linux_only.yml @@ -127,19 +127,19 @@ jobs: pytest -v api_tests/linux_only/ selenium_tests/linux_only/' " - - name: Cleanup Azure resources - if: always() - env: - AZURE_CLIENT_ID: ${{ secrets.AZURE_CLIENT_ID }} - AZURE_SECRET: ${{ secrets.AZURE_SECRET }} - AZURE_TENANT: ${{ secrets.AZURE_TENANT }} - AZURE_SUBSCRIPTION_ID: ${{ secrets.AZURE_SUBSCRIPTION_ID }} - run: | - cd testing/v2/development - docker compose -p ${{ env.UNIQUE_ID }} exec -T pipeline bash -c " - az login --service-principal -u $AZURE_CLIENT_ID -p $AZURE_SECRET --tenant $AZURE_TENANT - az group delete --name pipe-${{ env.UNIQUE_ID }} --yes --no-wait - " + # - name: Cleanup Azure resources + # if: always() + # env: + # AZURE_CLIENT_ID: ${{ secrets.AZURE_CLIENT_ID }} + # AZURE_SECRET: ${{ secrets.AZURE_SECRET }} + # AZURE_TENANT: ${{ secrets.AZURE_TENANT }} + # AZURE_SUBSCRIPTION_ID: ${{ secrets.AZURE_SUBSCRIPTION_ID }} + # run: | + # cd testing/v2/development + # docker compose -p ${{ env.UNIQUE_ID }} exec -T pipeline bash -c " + # az login --service-principal -u $AZURE_CLIENT_ID -p $AZURE_SECRET --tenant $AZURE_TENANT + # az group delete --name pipe-${{ env.UNIQUE_ID }} --yes --no-wait + # " - name: Stop and remove containers if: always() From bcf92c5347a0cc850310081a2f6fec26bde0336c Mon Sep 17 00:00:00 2001 From: cbaxley Date: Fri, 11 Oct 2024 05:16:24 -0400 Subject: [PATCH 03/48] Change the variables to work with the 2.0 pipeline --- testing/tests/.env_example | 2 +- .../tests/api_tests/linux_only/test_server.py | 88 ++++++++++--------- .../tests/selenium_tests/cluster/conftest.py | 2 +- .../selenium_tests/linux_only/conftest.py | 2 +- .../linux_only/test_basic_loading.py | 2 +- .../azure/build_azure_linux_network.py | 12 +-- 6 files changed, 55 insertions(+), 53 deletions(-) diff --git a/testing/tests/.env_example b/testing/tests/.env_example index 65efa408..70c924e8 100644 --- a/testing/tests/.env_example +++ b/testing/tests/.env_example @@ -9,7 +9,7 @@ export ES_HOST="lme" # When running in docker and connecting from dev container export KIBANA_HOST=lme # When running in docker and connecting from dev container # export KIBANA_HOST=localhost # When running the tests inside of the lme container # export KIBANA_HOST=xx.xx.xx.xxx # When you have a cluser installed in azure -export KIBANA_PORT=443 +export KIBANA_PORT=5601 export KIBANA_USER=elastic export SELENIUM_TIMEOUT=60 # debug, detached, headless diff --git a/testing/tests/api_tests/linux_only/test_server.py b/testing/tests/api_tests/linux_only/test_server.py index cc945b8f..a283d0ae 100644 --- a/testing/tests/api_tests/linux_only/test_server.py +++ b/testing/tests/api_tests/linux_only/test_server.py @@ -67,46 +67,48 @@ def test_elastic_indices(es_host, es_port, username, password): response = make_request(url, username, password) assert response.status_code == 200, f"Expected 200, got {response.status_code}" - assert ("yellow open .ds-metrics-fleet_server.agent_versions-default" in response.text) - assert ("yellow open .ds-logs-endpoint.events.process-default" in response.text) - assert ("yellow open .ds-metrics-system.network-default" in response.text) - assert ("green open .internal.alerts-ml.anomaly-detection.alerts-default" in response.text) - assert ("green open wazuh-alerts-4.x" in response.text) - assert ("green open .internal.alerts-observability.slo.alerts-default" in response.text) - assert ("yellow open .ds-logs-elastic_agent.endpoint_security-default" in response.text) - assert ("green open .internal.alerts-observability.apm.alerts-default" in response.text) - assert ("yellow open .ds-metrics-system.process.summary-default" in response.text) - assert ("yellow open .ds-logs-elastic_agent.filebeat-default" in response.text) - assert ("yellow open .ds-logs-endpoint.events.file-default" in response.text) - assert ("green open .internal.alerts-observability.metrics.alerts-default" in response.text) - assert ("yellow open .ds-metrics-endpoint.metadata-default" in response.text) - assert ("yellow open .ds-logs-system.syslog-default" in response.text) - assert ("yellow open .ds-logs-elastic_agent.fleet_server-default" in response.text) - assert ("green open .internal.alerts-security.alerts-default" in response.text) - assert ("yellow open .ds-metrics-system.uptime-default" in response.text) - assert ("green open .internal.alerts-stack.alerts-default" in response.text) - assert ("yellow open .ds-metrics-system.memory-default" in response.text) - assert ("green open .internal.alerts-observability.logs.alerts" in response.text) - assert ("yellow open .ds-metrics-endpoint.policy-default" in response.text) - assert ("yellow open .ds-metrics-system.cpu-default" in response.text) - assert ("green open .internal.alerts-observability.uptime.alerts-default" in response.text) - assert ("yellow open .ds-metrics-system.process-default" in response.text) - assert ("yellow open .ds-metrics-elastic_agent.elastic_agent-default" in response.text) - assert ("yellow open .ds-metrics-elastic_agent.fleet_server-default" in response.text) - assert ("yellow open .ds-metrics-elastic_agent.metricbeat-default" in response.text) - assert ("yellow open .ds-metrics-system.load-default" in response.text) - assert ("yellow open .ds-logs-endpoint.events.network-default" in response.text) - assert ("yellow open .ds-metrics-fleet_server.agent_status-default" in response.text) - assert ("green open metrics-endpoint.metadata_current_default" in response.text) - assert ("yellow open .ds-logs-elastic_agent.metricbeat-default" in response.text) - assert ("green open .kibana-observability-ai-assistant-conversations" in response.text) - assert ("yellow open .ds-logs-elastic_agent-default" in response.text) - assert ("yellow open .ds-metrics-system.fsstat-default" in response.text) - assert ("yellow open .ds-metrics-elastic_agent.filebeat-default" in response.text) - assert ("green open .internal.alerts-observability.threshold.alerts-default" in response.text) - assert ("yellow open .ds-logs-system.auth-default" in response.text) - assert ("yellow open .ds-metrics-system.diskio-default" in response.text) - assert ("yellow open .ds-metrics-system.filesystem-default" in response.text) - assert ("green open .kibana-observability-ai-assistant-kb" in response.text) - assert ("yellow open .ds-metrics-system.socket_summary-default" in response.text) - assert ("yellow open .ds-metrics-endpoint.metrics-default" in response.text) + assert ("open .internal.alerts-observability.logs.alerts" in response.text) + assert ("open .internal.alerts-observability.uptime.alerts-default" in response.text) + assert ("open .internal.alerts-ml.anomaly-detection.alerts-default" in response.text) + assert ("open .internal.alerts-observability.slo.alerts-default" in response.text) + assert ("open .internal.alerts-observability.apm.alerts-default" in response.text) + assert ("open .internal.alerts-observability.metrics.alerts-default" in response.text) + assert ("open wazuh-alerts-4.x" in response.text) + assert ("open .internal.alerts-observability.threshold.alerts-default" in response.text) + assert ("open .internal.alerts-security.alerts-default" in response.text) + assert ("open .internal.alerts-stack.alerts-default" in response.text) + assert ("open .kibana-observability-ai-assistant-conversations" in response.text) + assert ("open .kibana-observability-ai-assistant-kb" in response.text) + + +# assert ("open .ds-metrics-fleet_server.agent_versions-default" in response.text) +# assert ("open .ds-logs-endpoint.events.process-default" in response.text) +# assert ("open .ds-metrics-system.network-default" in response.text) +# assert ("open .ds-logs-elastic_agent.endpoint_security-default" in response.text) +# assert ("open .ds-metrics-system.process.summary-default" in response.text) +# assert ("open .ds-logs-elastic_agent.filebeat-default" in response.text) +# assert ("open .ds-logs-endpoint.events.file-default" in response.text) +# assert ("open .ds-metrics-endpoint.metadata-default" in response.text) +# assert ("open .ds-logs-system.syslog-default" in response.text) +# assert ("open .ds-logs-elastic_agent.fleet_server-default" in response.text) +# assert ("open .ds-metrics-system.uptime-default" in response.text) +# assert ("open .ds-metrics-system.memory-default" in response.text) +# assert ("open .ds-metrics-endpoint.policy-default" in response.text) +# assert ("open .ds-metrics-system.cpu-default" in response.text) +# assert ("open .ds-metrics-system.process-default" in response.text) +# assert ("open .ds-metrics-elastic_agent.elastic_agent-default" in response.text) +# assert ("open .ds-metrics-elastic_agent.fleet_server-default" in response.text) +# assert ("open .ds-metrics-elastic_agent.metricbeat-default" in response.text) +# assert ("open .ds-metrics-system.load-default" in response.text) +# assert ("open .ds-logs-endpoint.events.network-default" in response.text) +# assert ("open .ds-metrics-fleet_server.agent_status-default" in response.text) +# assert ("open metrics-endpoint.metadata_current_default" in response.text) +# assert ("open .ds-logs-elastic_agent.metricbeat-default" in response.text) +# assert ("open .ds-logs-elastic_agent-default" in response.text) +# assert ("open .ds-metrics-system.fsstat-default" in response.text) +# assert ("open .ds-metrics-elastic_agent.filebeat-default" in response.text) +# assert ("open .ds-logs-system.auth-default" in response.text) +# assert ("open .ds-metrics-system.diskio-default" in response.text) +# assert ("open .ds-metrics-system.filesystem-default" in response.text) +# assert ("open .ds-metrics-system.socket_summary-default" in response.text) +# assert ("open .ds-metrics-endpoint.metrics-default" in response.text) diff --git a/testing/tests/selenium_tests/cluster/conftest.py b/testing/tests/selenium_tests/cluster/conftest.py index aa8b515c..542be835 100644 --- a/testing/tests/selenium_tests/cluster/conftest.py +++ b/testing/tests/selenium_tests/cluster/conftest.py @@ -15,7 +15,7 @@ def kibana_host(): @pytest.fixture(scope="session") def kibana_port(): - return int(os.getenv("KIBANA_PORT", 443)) + return int(os.getenv("KIBANA_PORT", 5601)) @pytest.fixture(scope="session") def kibana_user(): diff --git a/testing/tests/selenium_tests/linux_only/conftest.py b/testing/tests/selenium_tests/linux_only/conftest.py index 792722c6..9dd12a46 100644 --- a/testing/tests/selenium_tests/linux_only/conftest.py +++ b/testing/tests/selenium_tests/linux_only/conftest.py @@ -15,7 +15,7 @@ def kibana_host(): @pytest.fixture(scope="session") def kibana_port(): - return int(os.getenv("KIBANA_PORT", 443)) + return int(os.getenv("KIBANA_PORT", 5601)) @pytest.fixture(scope="session") def kibana_user(): diff --git a/testing/tests/selenium_tests/linux_only/test_basic_loading.py b/testing/tests/selenium_tests/linux_only/test_basic_loading.py index b490a209..71ecffa0 100644 --- a/testing/tests/selenium_tests/linux_only/test_basic_loading.py +++ b/testing/tests/selenium_tests/linux_only/test_basic_loading.py @@ -17,7 +17,7 @@ def setup_login(self, driver, login): # driver.quit() # Clean up the browser (driver) here - @pytest.mark.skip(reason="This test isn't working for 2.0 yet") + # @pytest.mark.skip(reason="This test isn't working for 2.0 yet") def test_title(self, setup_login, kibana_url, timeout): driver = setup_login driver.get(f"{kibana_url}/app/dashboards") diff --git a/testing/v2/installers/azure/build_azure_linux_network.py b/testing/v2/installers/azure/build_azure_linux_network.py index bf2a1cb9..bd0a0068 100755 --- a/testing/v2/installers/azure/build_azure_linux_network.py +++ b/testing/v2/installers/azure/build_azure_linux_network.py @@ -639,23 +639,23 @@ def main( "--ports", type=int, nargs="+", - default=[22, 443], - help="Ports to open. Default: [22, 443]", + default=[22, 443, 5601, 9200], + help="Ports to open. Default: [22, 443, 5601, 9200]", ) parser.add_argument( "-pr", "--priorities", type=int, nargs="+", - default=[1001, 1002], - help="Priorities for the ports. Default: [1001, 1002]", + default=[1001, 1002, 1003, 1004], + help="Priorities for the ports. Default: [1001, 1002, 1003, 1004]", ) parser.add_argument( "-pt", "--protocols", nargs="+", - default=["Tcp", "Tcp"], - help="Protocols for the ports. Default: ['Tcp']", + default=["Tcp", "Tcp", "Tcp", "Tcp"], + help="Protocols for the ports. Default: ['Tcp', 'Tcp', 'Tcp', 'Tcp']", ) parser.add_argument( "-vs", From df108a13f76be8c4dcefd1cac3bfe663740b4a59 Mon Sep 17 00:00:00 2001 From: cbaxley Date: Fri, 11 Oct 2024 06:02:03 -0400 Subject: [PATCH 04/48] Run the cluster run workflow to debug the tests --- .github/workflows/cluster.yml | 32 ++++++++++++++++---------------- .github/workflows/linux_only.yml | 32 ++++++++++++++++---------------- 2 files changed, 32 insertions(+), 32 deletions(-) diff --git a/.github/workflows/cluster.yml b/.github/workflows/cluster.yml index 2ea477c4..c3fca139 100644 --- a/.github/workflows/cluster.yml +++ b/.github/workflows/cluster.yml @@ -2,9 +2,9 @@ name: Cluster Run - Minimega on: workflow_dispatch: - # pull_request: - # branches: - # - '*' + pull_request: + branches: + - '*' jobs: build-and-test-cluster: @@ -292,19 +292,19 @@ jobs: pytest -v selenium_tests/' " - - name: Cleanup Azure resources - if: always() - env: - AZURE_CLIENT_ID: ${{ secrets.AZURE_CLIENT_ID }} - AZURE_SECRET: ${{ secrets.AZURE_SECRET }} - AZURE_TENANT: ${{ secrets.AZURE_TENANT }} - AZURE_SUBSCRIPTION_ID: ${{ secrets.AZURE_SUBSCRIPTION_ID }} - run: | - cd testing/v2/development - docker compose -p ${{ env.UNIQUE_ID }} exec -T pipeline bash -c " - az login --service-principal -u $AZURE_CLIENT_ID -p $AZURE_SECRET --tenant $AZURE_TENANT - az group delete --name pipe-${{ env.UNIQUE_ID }} --yes --no-wait - " + # - name: Cleanup Azure resources + # if: always() + # env: + # AZURE_CLIENT_ID: ${{ secrets.AZURE_CLIENT_ID }} + # AZURE_SECRET: ${{ secrets.AZURE_SECRET }} + # AZURE_TENANT: ${{ secrets.AZURE_TENANT }} + # AZURE_SUBSCRIPTION_ID: ${{ secrets.AZURE_SUBSCRIPTION_ID }} + # run: | + # cd testing/v2/development + # docker compose -p ${{ env.UNIQUE_ID }} exec -T pipeline bash -c " + # az login --service-principal -u $AZURE_CLIENT_ID -p $AZURE_SECRET --tenant $AZURE_TENANT + # az group delete --name pipe-${{ env.UNIQUE_ID }} --yes --no-wait + # " - name: Stop and remove containers if: always() diff --git a/.github/workflows/linux_only.yml b/.github/workflows/linux_only.yml index 637c535e..34652b48 100644 --- a/.github/workflows/linux_only.yml +++ b/.github/workflows/linux_only.yml @@ -2,9 +2,9 @@ name: Linux Only on: workflow_dispatch: - pull_request: - branches: - - '*' + # pull_request: + # branches: + # - '*' jobs: build-and-test-linux-only: @@ -127,19 +127,19 @@ jobs: pytest -v api_tests/linux_only/ selenium_tests/linux_only/' " - # - name: Cleanup Azure resources - # if: always() - # env: - # AZURE_CLIENT_ID: ${{ secrets.AZURE_CLIENT_ID }} - # AZURE_SECRET: ${{ secrets.AZURE_SECRET }} - # AZURE_TENANT: ${{ secrets.AZURE_TENANT }} - # AZURE_SUBSCRIPTION_ID: ${{ secrets.AZURE_SUBSCRIPTION_ID }} - # run: | - # cd testing/v2/development - # docker compose -p ${{ env.UNIQUE_ID }} exec -T pipeline bash -c " - # az login --service-principal -u $AZURE_CLIENT_ID -p $AZURE_SECRET --tenant $AZURE_TENANT - # az group delete --name pipe-${{ env.UNIQUE_ID }} --yes --no-wait - # " + - name: Cleanup Azure resources + if: always() + env: + AZURE_CLIENT_ID: ${{ secrets.AZURE_CLIENT_ID }} + AZURE_SECRET: ${{ secrets.AZURE_SECRET }} + AZURE_TENANT: ${{ secrets.AZURE_TENANT }} + AZURE_SUBSCRIPTION_ID: ${{ secrets.AZURE_SUBSCRIPTION_ID }} + run: | + cd testing/v2/development + docker compose -p ${{ env.UNIQUE_ID }} exec -T pipeline bash -c " + az login --service-principal -u $AZURE_CLIENT_ID -p $AZURE_SECRET --tenant $AZURE_TENANT + az group delete --name pipe-${{ env.UNIQUE_ID }} --yes --no-wait + " - name: Stop and remove containers if: always() From 0e3b95d6f3467eecd2c82d486c3456a4a0b7da04 Mon Sep 17 00:00:00 2001 From: cbaxley Date: Fri, 11 Oct 2024 07:38:45 -0400 Subject: [PATCH 05/48] Run ansible playbook to set fleet --- .github/workflows/cluster.yml | 26 +-- scripts/set_fleet.yml | 179 ++++++++++++++++++++ testing/v2/installers/install_v2/install.sh | 4 +- 3 files changed, 194 insertions(+), 15 deletions(-) create mode 100644 scripts/set_fleet.yml diff --git a/.github/workflows/cluster.yml b/.github/workflows/cluster.yml index c3fca139..4aa9aafc 100644 --- a/.github/workflows/cluster.yml +++ b/.github/workflows/cluster.yml @@ -292,19 +292,19 @@ jobs: pytest -v selenium_tests/' " - # - name: Cleanup Azure resources - # if: always() - # env: - # AZURE_CLIENT_ID: ${{ secrets.AZURE_CLIENT_ID }} - # AZURE_SECRET: ${{ secrets.AZURE_SECRET }} - # AZURE_TENANT: ${{ secrets.AZURE_TENANT }} - # AZURE_SUBSCRIPTION_ID: ${{ secrets.AZURE_SUBSCRIPTION_ID }} - # run: | - # cd testing/v2/development - # docker compose -p ${{ env.UNIQUE_ID }} exec -T pipeline bash -c " - # az login --service-principal -u $AZURE_CLIENT_ID -p $AZURE_SECRET --tenant $AZURE_TENANT - # az group delete --name pipe-${{ env.UNIQUE_ID }} --yes --no-wait - # " + - name: Cleanup Azure resources + if: always() + env: + AZURE_CLIENT_ID: ${{ secrets.AZURE_CLIENT_ID }} + AZURE_SECRET: ${{ secrets.AZURE_SECRET }} + AZURE_TENANT: ${{ secrets.AZURE_TENANT }} + AZURE_SUBSCRIPTION_ID: ${{ secrets.AZURE_SUBSCRIPTION_ID }} + run: | + cd testing/v2/development + docker compose -p ${{ env.UNIQUE_ID }} exec -T pipeline bash -c " + az login --service-principal -u $AZURE_CLIENT_ID -p $AZURE_SECRET --tenant $AZURE_TENANT + az group delete --name pipe-${{ env.UNIQUE_ID }} --yes --no-wait + " - name: Stop and remove containers if: always() diff --git a/scripts/set_fleet.yml b/scripts/set_fleet.yml new file mode 100644 index 00000000..46fd1bd2 --- /dev/null +++ b/scripts/set_fleet.yml @@ -0,0 +1,179 @@ +--- +- name: Set up Fleet + hosts: localhost + become: yes + gather_facts: no + + vars: + headers: + kbn-version: "8.12.2" + kbn-xsrf: "kibana" + Content-Type: "application/json" + + tasks: + - name: Source environment file and extract secrets + shell: | + source /opt/lme/lme-environment.env + set -a + . {{ playbook_dir }}/extract_secrets.sh -q + args: + executable: /bin/bash + changed_when: false + + - name: Set variables from environment + set_fact: + elastic_password: "{{ lookup('env', 'elastic') }}" + local_kbn_url: "{{ lookup('env', 'LOCAL_KBN_URL') }}" + ipvar: "{{ lookup('env', 'IPVAR') }}" + fleet_port: "{{ lookup('env', 'FLEET_PORT') }}" + no_log: true + + - name: Wait for Fleet API to be ready + uri: + url: "{{ local_kbn_url }}/api/fleet/settings" + method: GET + user: elastic + password: "{{ elastic_password }}" + validate_certs: no + headers: "{{ headers }}" + status_code: 200 + register: fleet_status + until: fleet_status.status == 200 + retries: 60 + delay: 10 + no_log: true + + - name: Get CA fingerprint + command: > + /nix/var/nix/profiles/default/bin/podman exec -w /usr/share/elasticsearch/config/certs/ca + lme-elasticsearch cat ca.crt | openssl x509 -nout -fingerprint -sha256 | cut -d "=" -f 2 | tr -d : | head -n1 + register: ca_fingerprint + changed_when: false + + - name: Set Fleet server hosts + uri: + url: "{{ local_kbn_url }}/api/fleet/settings" + method: PUT + user: elastic + password: "{{ elastic_password }}" + validate_certs: no + headers: "{{ headers }}" + body_format: json + body: + fleet_server_hosts: ["https://{{ ipvar }}:{{ fleet_port }}"] + register: fleet_server_hosts_result + no_log: true + + - name: Set Fleet default output hosts + uri: + url: "{{ local_kbn_url }}/api/fleet/outputs/fleet-default-output" + method: PUT + user: elastic + password: "{{ elastic_password }}" + validate_certs: no + headers: "{{ headers }}" + body_format: json + body: + hosts: ["https://{{ ipvar }}:9200"] + register: fleet_output_hosts_result + no_log: true + + - name: Set Fleet default output CA trusted fingerprint + uri: + url: "{{ local_kbn_url }}/api/fleet/outputs/fleet-default-output" + method: PUT + user: elastic + password: "{{ elastic_password }}" + validate_certs: no + headers: "{{ headers }}" + body_format: json + body: + ca_trusted_fingerprint: "{{ ca_fingerprint.stdout }}" + register: fleet_output_fingerprint_result + no_log: true + + - name: Set Fleet default output SSL verification mode + uri: + url: "{{ local_kbn_url }}/api/fleet/outputs/fleet-default-output" + method: PUT + user: elastic + password: "{{ elastic_password }}" + validate_certs: no + headers: "{{ headers }}" + body_format: json + body: + config_yaml: "ssl.verification_mode: certificate" + register: fleet_output_ssl_result + no_log: true + + - name: Create Endpoint Policy + uri: + url: "{{ local_kbn_url }}/api/fleet/agent_policies?sys_monitoring=true" + method: POST + user: elastic + password: "{{ elastic_password }}" + validate_certs: no + headers: "{{ headers }}" + body_format: json + body: + name: "Endpoint Policy" + description: "" + namespace: "default" + monitoring_enabled: ["logs", "metrics"] + inactivity_timeout: 1209600 + register: endpoint_policy_result + no_log: true + + - name: Get Endpoint package version + uri: + url: "{{ local_kbn_url }}/api/fleet/epm/packages/endpoint" + method: GET + user: elastic + password: "{{ elastic_password }}" + validate_certs: no + headers: "{{ headers }}" + register: endpoint_package_result + no_log: true + + - name: Create Elastic Defend package policy + uri: + url: "{{ local_kbn_url }}/api/fleet/package_policies" + method: POST + user: elastic + password: "{{ elastic_password }}" + validate_certs: no + headers: "{{ headers }}" + body_format: json + body: + name: "Elastic Defend" + description: "" + namespace: "default" + policy_id: "{{ endpoint_policy_result.json.item.id }}" + enabled: true + inputs: + - enabled: true + streams: [] + type: "ENDPOINT_INTEGRATION_CONFIG" + config: + _config: + value: + type: "endpoint" + endpointConfig: + preset: "EDRComplete" + package: + name: "endpoint" + title: "Elastic Defend" + version: "{{ endpoint_package_result.json.item.version }}" + register: elastic_defend_policy_result + no_log: true + + - name: Display results + debug: + var: "{{ item }}" + loop: + - fleet_server_hosts_result + - fleet_output_hosts_result + - fleet_output_fingerprint_result + - fleet_output_ssl_result + - endpoint_policy_result + - elastic_defend_policy_result \ No newline at end of file diff --git a/testing/v2/installers/install_v2/install.sh b/testing/v2/installers/install_v2/install.sh index e6643c2e..316b8414 100755 --- a/testing/v2/installers/install_v2/install.sh +++ b/testing/v2/installers/install_v2/install.sh @@ -95,8 +95,8 @@ echo "Running check-fleet script" ssh -o StrictHostKeyChecking=no $user@$hostname "sudo -E bash -c 'source /opt/lme/lme-environment.env && su $user -c \". ~/.bashrc && cd ~/LME && ./testing/v2/installers/lib/check_fleet.sh\"'" echo "Running set-fleet script" -#ssh -o StrictHostKeyChecking=no $user@$hostname "sudo -E bash -c 'source /opt/lme/lme-environment.env && su $user -c \". ~/.bashrc && cd ~/LME && ./scripts/set-fleet.sh\"'" -ssh -o StrictHostKeyChecking=no $user@$hostname "sudo -E bash -c 'cd ~/LME && ./scripts/set-fleet.sh'" +#ssh -o StrictHostKeyChecking=no $user@$hostname "sudo -E bash -c 'cd ~/LME && ./scripts/set-fleet.sh'" +ssh -o StrictHostKeyChecking=no $user@$hostname "sudo -E bash -c 'cd ~/LME/scripts && ansible-playbook set_fleet.yml'" echo "Installation and configuration completed successfully." From f9e986c20ef037a72455db86a614d38f787af760 Mon Sep 17 00:00:00 2001 From: cbaxley Date: Fri, 11 Oct 2024 08:14:10 -0400 Subject: [PATCH 06/48] Log secrets in set_fleet.yml --- scripts/set_fleet.yml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/scripts/set_fleet.yml b/scripts/set_fleet.yml index 46fd1bd2..966000ca 100644 --- a/scripts/set_fleet.yml +++ b/scripts/set_fleet.yml @@ -26,7 +26,7 @@ local_kbn_url: "{{ lookup('env', 'LOCAL_KBN_URL') }}" ipvar: "{{ lookup('env', 'IPVAR') }}" fleet_port: "{{ lookup('env', 'FLEET_PORT') }}" - no_log: true + no_log: false - name: Wait for Fleet API to be ready uri: @@ -62,7 +62,7 @@ body: fleet_server_hosts: ["https://{{ ipvar }}:{{ fleet_port }}"] register: fleet_server_hosts_result - no_log: true + no_log: false - name: Set Fleet default output hosts uri: @@ -76,7 +76,7 @@ body: hosts: ["https://{{ ipvar }}:9200"] register: fleet_output_hosts_result - no_log: true + no_log: false - name: Set Fleet default output CA trusted fingerprint uri: From adf85571b5e1fd0a5e88a50ad6ef724924de0ce7 Mon Sep 17 00:00:00 2001 From: cbaxley Date: Fri, 11 Oct 2024 08:37:44 -0400 Subject: [PATCH 07/48] Debug setting the environment variables in set_fleet.yml --- scripts/set_fleet.yml | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/scripts/set_fleet.yml b/scripts/set_fleet.yml index 966000ca..c127f7c2 100644 --- a/scripts/set_fleet.yml +++ b/scripts/set_fleet.yml @@ -28,6 +28,14 @@ fleet_port: "{{ lookup('env', 'FLEET_PORT') }}" no_log: false + - name: Debug - Display set variables + debug: + msg: + - "local_kbn_url: {{ local_kbn_url }}" + - "ipvar: {{ ipvar }}" + - "fleet_port: {{ fleet_port }}" + - "elastic_password is set: {{ elastic_password | length > 0 }}" + - name: Wait for Fleet API to be ready uri: url: "{{ local_kbn_url }}/api/fleet/settings" @@ -41,7 +49,7 @@ until: fleet_status.status == 200 retries: 60 delay: 10 - no_log: true + no_log: false - name: Get CA fingerprint command: > From 084b027d409a3bbb91e0138f49d044e610149aef Mon Sep 17 00:00:00 2001 From: cbaxley Date: Fri, 11 Oct 2024 09:02:42 -0400 Subject: [PATCH 08/48] Set the debug mode in set_fleet.yml --- scripts/set_fleet.yml | 20 ++++++++++++-------- testing/v2/installers/install_v2/install.sh | 2 +- 2 files changed, 13 insertions(+), 9 deletions(-) diff --git a/scripts/set_fleet.yml b/scripts/set_fleet.yml index c127f7c2..37849838 100644 --- a/scripts/set_fleet.yml +++ b/scripts/set_fleet.yml @@ -11,30 +11,34 @@ Content-Type: "application/json" tasks: - - name: Source environment file and extract secrets + - name: Source environment file, extract secrets, and set variables shell: | source /opt/lme/lme-environment.env set -a . {{ playbook_dir }}/extract_secrets.sh -q + echo "{\"elastic_password\": \"$elastic\", \"local_kbn_url\": \"$LOCAL_KBN_URL\", \"ipvar\": \"$IPVAR\", \"fleet_port\": \"$FLEET_PORT\"}" args: executable: /bin/bash + register: env_vars changed_when: false + no_log: true - name: Set variables from environment set_fact: - elastic_password: "{{ lookup('env', 'elastic') }}" - local_kbn_url: "{{ lookup('env', 'LOCAL_KBN_URL') }}" - ipvar: "{{ lookup('env', 'IPVAR') }}" - fleet_port: "{{ lookup('env', 'FLEET_PORT') }}" - no_log: false + elastic_password: "{{ (env_vars.stdout | from_json).elastic_password }}" + local_kbn_url: "{{ (env_vars.stdout | from_json).local_kbn_url }}" + ipvar: "{{ (env_vars.stdout | from_json).ipvar }}" + fleet_port: "{{ (env_vars.stdout | from_json).fleet_port }}" + no_log: true - - name: Debug - Display set variables + - name: Debug - Display set variables (comment out in production) debug: msg: - "local_kbn_url: {{ local_kbn_url }}" - "ipvar: {{ ipvar }}" - "fleet_port: {{ fleet_port }}" - "elastic_password is set: {{ elastic_password | length > 0 }}" + when: debug_mode | default(false) | bool - name: Wait for Fleet API to be ready uri: @@ -49,7 +53,7 @@ until: fleet_status.status == 200 retries: 60 delay: 10 - no_log: false + no_log: true - name: Get CA fingerprint command: > diff --git a/testing/v2/installers/install_v2/install.sh b/testing/v2/installers/install_v2/install.sh index 316b8414..4eac2ffa 100755 --- a/testing/v2/installers/install_v2/install.sh +++ b/testing/v2/installers/install_v2/install.sh @@ -96,7 +96,7 @@ ssh -o StrictHostKeyChecking=no $user@$hostname "sudo -E bash -c 'source /opt/lm echo "Running set-fleet script" #ssh -o StrictHostKeyChecking=no $user@$hostname "sudo -E bash -c 'cd ~/LME && ./scripts/set-fleet.sh'" -ssh -o StrictHostKeyChecking=no $user@$hostname "sudo -E bash -c 'cd ~/LME/scripts && ansible-playbook set_fleet.yml'" +ssh -o StrictHostKeyChecking=no $user@$hostname "sudo -E bash -c 'cd ~/LME/scripts && ansible-playbook set_fleet.yml -e \"debug_mode=true\"'" echo "Installation and configuration completed successfully." From f526d9e53f062996cae14b74119b18c5e15a81d5 Mon Sep 17 00:00:00 2001 From: cbaxley Date: Fri, 11 Oct 2024 09:34:39 -0400 Subject: [PATCH 09/48] Log the Fleet API call details in set_fleet.yml --- scripts/set_fleet.yml | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) diff --git a/scripts/set_fleet.yml b/scripts/set_fleet.yml index 37849838..11e1ee5f 100644 --- a/scripts/set_fleet.yml +++ b/scripts/set_fleet.yml @@ -49,11 +49,24 @@ validate_certs: no headers: "{{ headers }}" status_code: 200 + return_content: yes register: fleet_status until: fleet_status.status == 200 retries: 60 delay: 10 - no_log: true + no_log: false + + - name: Display Fleet API call details + debug: + msg: + - "Attempt: {{ ansible_loop.index }}" + - "URL: {{ fleet_status.url }}" + - "Status: {{ fleet_status.status }}" + - "Status Message: {{ fleet_status.msg }}" + - "Response Headers: {{ fleet_status.headers | to_nice_json }}" + - "Response Content: {{ fleet_status.content | to_nice_json }}" + when: fleet_status.changed + no_log: false - name: Get CA fingerprint command: > From a6d791cede85b4535ea9d56da6c06813bf7c1070 Mon Sep 17 00:00:00 2001 From: cbaxley Date: Fri, 11 Oct 2024 10:02:25 -0400 Subject: [PATCH 10/48] Loop through Fleet API calls in set_fleet.yml --- scripts/set_fleet.yml | 79 +++++++++++++++++++++++++++++-------------- 1 file changed, 53 insertions(+), 26 deletions(-) diff --git a/scripts/set_fleet.yml b/scripts/set_fleet.yml index 11e1ee5f..10e6bc9f 100644 --- a/scripts/set_fleet.yml +++ b/scripts/set_fleet.yml @@ -40,33 +40,60 @@ - "elastic_password is set: {{ elastic_password | length > 0 }}" when: debug_mode | default(false) | bool + - name: Initialize counter + set_fact: + attempt_counter: 0 + - name: Wait for Fleet API to be ready - uri: - url: "{{ local_kbn_url }}/api/fleet/settings" - method: GET - user: elastic - password: "{{ elastic_password }}" - validate_certs: no - headers: "{{ headers }}" - status_code: 200 - return_content: yes - register: fleet_status - until: fleet_status.status == 200 - retries: 60 - delay: 10 - no_log: false - - - name: Display Fleet API call details - debug: - msg: - - "Attempt: {{ ansible_loop.index }}" - - "URL: {{ fleet_status.url }}" - - "Status: {{ fleet_status.status }}" - - "Status Message: {{ fleet_status.msg }}" - - "Response Headers: {{ fleet_status.headers | to_nice_json }}" - - "Response Content: {{ fleet_status.content | to_nice_json }}" - when: fleet_status.changed - no_log: false + block: + - name: Attempt Fleet API call + uri: + url: "{{ local_kbn_url }}/api/fleet/settings" + method: GET + user: elastic + password: "{{ elastic_password }}" + validate_certs: no + headers: "{{ headers }}" + status_code: 200 + return_content: yes + register: fleet_status + ignore_errors: yes + + - name: Display Fleet API call details + debug: + msg: + - "Attempt: {{ attempt_counter }}" + - "URL: {{ fleet_status.url }}" + - "Status: {{ fleet_status.status | default('N/A') }}" + - "Status Message: {{ fleet_status.msg | default('N/A') }}" + - "Response Headers: {{ fleet_status.headers | default({}) | to_nice_json }}" + - "Response Content: {{ fleet_status.content | default('N/A') | to_nice_json }}" + + - name: Increment counter + set_fact: + attempt_counter: "{{ attempt_counter | int + 1 }}" + + - name: Check if max retries reached + fail: + msg: "Max retries reached. Fleet API is not ready." + when: attempt_counter | int >= 60 + + - name: Fail to force retry if not successful + fail: + msg: "Fleet API not ready, retrying..." + when: fleet_status.status is not defined or fleet_status.status != 200 + + rescue: + - name: Retry after delay + wait_for: + timeout: 10 + when: attempt_counter | int < 60 + + - name: Retry Fleet API call + include_tasks: "{{ playbook_dir }}/scripts/set_fleet.yml" + when: attempt_counter | int < 60 + + until: fleet_status.status is defined and fleet_status.status == 200 - name: Get CA fingerprint command: > From e5c37d596622cae08e7fa7b3cabf0506f2ea83a3 Mon Sep 17 00:00:00 2001 From: cbaxley Date: Fri, 11 Oct 2024 10:29:28 -0400 Subject: [PATCH 11/48] Change the Fleet API call to loop through attempts in set_fleet.yml --- scripts/set_fleet.yml | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/scripts/set_fleet.yml b/scripts/set_fleet.yml index 10e6bc9f..1a1adedf 100644 --- a/scripts/set_fleet.yml +++ b/scripts/set_fleet.yml @@ -93,7 +93,10 @@ include_tasks: "{{ playbook_dir }}/scripts/set_fleet.yml" when: attempt_counter | int < 60 - until: fleet_status.status is defined and fleet_status.status == 200 + - name: Confirm Fleet API is ready + debug: + msg: "Fleet API is ready" + when: fleet_status.status is defined and fleet_status.status == 200 - name: Get CA fingerprint command: > From 754cf6cdbf2586ed5969457098fb1d830dfb92d4 Mon Sep 17 00:00:00 2001 From: cbaxley Date: Fri, 11 Oct 2024 11:02:02 -0400 Subject: [PATCH 12/48] Try to set the Fleet API with retries in set_fleet.yml --- scripts/set_fleet.yml | 34 ++++++++-------------------------- 1 file changed, 8 insertions(+), 26 deletions(-) diff --git a/scripts/set_fleet.yml b/scripts/set_fleet.yml index 1a1adedf..2337358f 100644 --- a/scripts/set_fleet.yml +++ b/scripts/set_fleet.yml @@ -9,6 +9,8 @@ kbn-version: "8.12.2" kbn-xsrf: "kibana" Content-Type: "application/json" + max_retries: 60 + delay_seconds: 10 tasks: - name: Source environment file, extract secrets, and set variables @@ -40,10 +42,6 @@ - "elastic_password is set: {{ elastic_password | length > 0 }}" when: debug_mode | default(false) | bool - - name: Initialize counter - set_fact: - attempt_counter: 0 - - name: Wait for Fleet API to be ready block: - name: Attempt Fleet API call @@ -57,42 +55,26 @@ status_code: 200 return_content: yes register: fleet_status + retries: "{{ max_retries }}" + delay: "{{ delay_seconds }}" + until: fleet_status.status == 200 ignore_errors: yes - name: Display Fleet API call details debug: msg: - - "Attempt: {{ attempt_counter }}" + - "Attempt: {{ fleet_status.attempts }}" - "URL: {{ fleet_status.url }}" - "Status: {{ fleet_status.status | default('N/A') }}" - "Status Message: {{ fleet_status.msg | default('N/A') }}" - "Response Headers: {{ fleet_status.headers | default({}) | to_nice_json }}" - "Response Content: {{ fleet_status.content | default('N/A') | to_nice_json }}" - - name: Increment counter - set_fact: - attempt_counter: "{{ attempt_counter | int + 1 }}" - - - name: Check if max retries reached + - name: Fail if Fleet API is not ready fail: - msg: "Max retries reached. Fleet API is not ready." - when: attempt_counter | int >= 60 - - - name: Fail to force retry if not successful - fail: - msg: "Fleet API not ready, retrying..." + msg: "Fleet API is not ready after {{ max_retries }} attempts." when: fleet_status.status is not defined or fleet_status.status != 200 - rescue: - - name: Retry after delay - wait_for: - timeout: 10 - when: attempt_counter | int < 60 - - - name: Retry Fleet API call - include_tasks: "{{ playbook_dir }}/scripts/set_fleet.yml" - when: attempt_counter | int < 60 - - name: Confirm Fleet API is ready debug: msg: "Fleet API is ready" From a1965639cd2a84ac43c807c7ed5768967ecfe0d2 Mon Sep 17 00:00:00 2001 From: cbaxley Date: Fri, 11 Oct 2024 11:26:36 -0400 Subject: [PATCH 13/48] Attempt to output the Fleet API call details in set_fleet.yml --- scripts/set_fleet.yml | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/scripts/set_fleet.yml b/scripts/set_fleet.yml index 2337358f..4c6bc377 100644 --- a/scripts/set_fleet.yml +++ b/scripts/set_fleet.yml @@ -69,6 +69,10 @@ - "Status Message: {{ fleet_status.msg | default('N/A') }}" - "Response Headers: {{ fleet_status.headers | default({}) | to_nice_json }}" - "Response Content: {{ fleet_status.content | default('N/A') | to_nice_json }}" + when: fleet_status is changed + loop: "{{ range(1, fleet_status.attempts|int + 1)|list }}" + loop_control: + label: "Attempt {{ item }}" - name: Fail if Fleet API is not ready fail: From 22ad2c17eab1dd327671aee4cd37ae2654656913 Mon Sep 17 00:00:00 2001 From: cbaxley Date: Fri, 11 Oct 2024 11:55:08 -0400 Subject: [PATCH 14/48] A new way to handle the Fleet API call in set_fleet.yml --- scripts/set_fleet.yml | 35 +++++++++++++++++++++++++---------- 1 file changed, 25 insertions(+), 10 deletions(-) diff --git a/scripts/set_fleet.yml b/scripts/set_fleet.yml index 4c6bc377..4974a1c8 100644 --- a/scripts/set_fleet.yml +++ b/scripts/set_fleet.yml @@ -44,6 +44,10 @@ - name: Wait for Fleet API to be ready block: + - name: Initialize attempt counter + set_fact: + attempt_counter: 1 + - name: Attempt Fleet API call uri: url: "{{ local_kbn_url }}/api/fleet/settings" @@ -55,30 +59,41 @@ status_code: 200 return_content: yes register: fleet_status - retries: "{{ max_retries }}" - delay: "{{ delay_seconds }}" - until: fleet_status.status == 200 ignore_errors: yes - name: Display Fleet API call details debug: msg: - - "Attempt: {{ fleet_status.attempts }}" + - "Attempt: {{ attempt_counter }}" - "URL: {{ fleet_status.url }}" - "Status: {{ fleet_status.status | default('N/A') }}" - "Status Message: {{ fleet_status.msg | default('N/A') }}" - "Response Headers: {{ fleet_status.headers | default({}) | to_nice_json }}" - "Response Content: {{ fleet_status.content | default('N/A') | to_nice_json }}" - when: fleet_status is changed - loop: "{{ range(1, fleet_status.attempts|int + 1)|list }}" - loop_control: - label: "Attempt {{ item }}" - - name: Fail if Fleet API is not ready + - name: Increment attempt counter + set_fact: + attempt_counter: "{{ attempt_counter | int + 1 }}" + + - name: Check if max retries reached fail: - msg: "Fleet API is not ready after {{ max_retries }} attempts." + msg: "Max retries reached. Fleet API is not ready." + when: attempt_counter | int > max_retries + + - name: Wait before next attempt + wait_for: + timeout: "{{ delay_seconds }}" when: fleet_status.status is not defined or fleet_status.status != 200 + - name: Retry Fleet API call + include_tasks: "{{ lookup('env', 'PWD') }}/set_fleet.yml" + when: fleet_status.status is not defined or fleet_status.status != 200 + + rescue: + - name: Handle any unexpected errors + debug: + msg: "An unexpected error occurred: {{ ansible_failed_result }}" + - name: Confirm Fleet API is ready debug: msg: "Fleet API is ready" From de554ec4ac14050af72b10d2712e998c1a8607d7 Mon Sep 17 00:00:00 2001 From: cbaxley Date: Fri, 11 Oct 2024 12:59:13 -0400 Subject: [PATCH 15/48] Export the check_fleet_api.yml file in set_fleet.yml --- scripts/check_fleet_api.yml | 36 +++++++++++++++++++++++ scripts/set_fleet.yml | 57 ++++++++----------------------------- 2 files changed, 48 insertions(+), 45 deletions(-) create mode 100644 scripts/check_fleet_api.yml diff --git a/scripts/check_fleet_api.yml b/scripts/check_fleet_api.yml new file mode 100644 index 00000000..9da93039 --- /dev/null +++ b/scripts/check_fleet_api.yml @@ -0,0 +1,36 @@ +- name: Attempt Fleet API call + uri: + url: "{{ local_kbn_url }}/api/fleet/settings" + method: GET + user: elastic + password: "{{ elastic_password }}" + validate_certs: no + headers: "{{ headers }}" + status_code: 200 + return_content: yes + register: fleet_status + ignore_errors: yes + +- name: Display Fleet API call details + debug: + msg: + - "Attempt: {{ attempt_counter }}" + - "URL: {{ fleet_status.url }}" + - "Status: {{ fleet_status.status | default('N/A') }}" + - "Status Message: {{ fleet_status.msg | default('N/A') }}" + - "Response Headers: {{ fleet_status.headers | default({}) | to_nice_json }}" + - "Response Content: {{ fleet_status.content | default('N/A') | to_nice_json }}" + +- name: Increment attempt counter + set_fact: + attempt_counter: "{{ attempt_counter | int + 1 }}" + +- name: Check if max retries reached + fail: + msg: "Max retries reached. Fleet API is not ready." + when: attempt_counter | int > max_retries + +- name: Wait before next attempt + wait_for: + timeout: "{{ delay_seconds }}" + when: fleet_status.status is not defined or fleet_status.status != 200 \ No newline at end of file diff --git a/scripts/set_fleet.yml b/scripts/set_fleet.yml index 4974a1c8..9ef66171 100644 --- a/scripts/set_fleet.yml +++ b/scripts/set_fleet.yml @@ -42,57 +42,24 @@ - "elastic_password is set: {{ elastic_password | length > 0 }}" when: debug_mode | default(false) | bool + - name: Initialize attempt counter + set_fact: + attempt_counter: 1 + - name: Wait for Fleet API to be ready block: - - name: Initialize attempt counter - set_fact: - attempt_counter: 1 - - - name: Attempt Fleet API call - uri: - url: "{{ local_kbn_url }}/api/fleet/settings" - method: GET - user: elastic - password: "{{ elastic_password }}" - validate_certs: no - headers: "{{ headers }}" - status_code: 200 - return_content: yes - register: fleet_status - ignore_errors: yes - - - name: Display Fleet API call details - debug: - msg: - - "Attempt: {{ attempt_counter }}" - - "URL: {{ fleet_status.url }}" - - "Status: {{ fleet_status.status | default('N/A') }}" - - "Status Message: {{ fleet_status.msg | default('N/A') }}" - - "Response Headers: {{ fleet_status.headers | default({}) | to_nice_json }}" - - "Response Content: {{ fleet_status.content | default('N/A') | to_nice_json }}" - - - name: Increment attempt counter - set_fact: - attempt_counter: "{{ attempt_counter | int + 1 }}" - - - name: Check if max retries reached - fail: - msg: "Max retries reached. Fleet API is not ready." - when: attempt_counter | int > max_retries - - - name: Wait before next attempt - wait_for: - timeout: "{{ delay_seconds }}" + - name: Include Fleet API check tasks + include_tasks: check_fleet_api.yml + - name: Retry Fleet API check + include_tasks: check_fleet_api.yml when: fleet_status.status is not defined or fleet_status.status != 200 - - - name: Retry Fleet API call - include_tasks: "{{ lookup('env', 'PWD') }}/set_fleet.yml" - when: fleet_status.status is not defined or fleet_status.status != 200 - + loop: "{{ range(2, max_retries + 1)|list }}" + loop_control: + loop_var: attempt_counter rescue: - name: Handle any unexpected errors debug: - msg: "An unexpected error occurred: {{ ansible_failed_result }}" + msg: "An unexpected error occurred during Fleet API check." - name: Confirm Fleet API is ready debug: From 832920d2be72da39c25872e980fee20535192ade Mon Sep 17 00:00:00 2001 From: cbaxley Date: Fri, 11 Oct 2024 14:35:15 -0400 Subject: [PATCH 16/48] Make sure the password is in the check_fleet_api.yml file --- scripts/check_fleet_api.yml | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/scripts/check_fleet_api.yml b/scripts/check_fleet_api.yml index 9da93039..2ca595b4 100644 --- a/scripts/check_fleet_api.yml +++ b/scripts/check_fleet_api.yml @@ -1,3 +1,12 @@ +- name: Debug credentials + debug: + msg: + - "User: elastic" + - "Password is set: {{ elastic_password | default('') | length > 0 }}" + - "Password is set: {{ elastic_password }}" + - "URL: {{ local_kbn_url }}" + no_log: true + - name: Attempt Fleet API call uri: url: "{{ local_kbn_url }}/api/fleet/settings" @@ -8,6 +17,7 @@ headers: "{{ headers }}" status_code: 200 return_content: yes + force_basic_auth: yes register: fleet_status ignore_errors: yes From 0089796539ccf825e880210bf3740122d9d8d571 Mon Sep 17 00:00:00 2001 From: cbaxley Date: Fri, 11 Oct 2024 15:14:25 -0400 Subject: [PATCH 17/48] Exit the loop if the Fleet API call succeeds in check_fleet_api.yml --- scripts/check_fleet_api.yml | 15 +++++++-------- scripts/set_fleet.yml | 11 +++++------ 2 files changed, 12 insertions(+), 14 deletions(-) diff --git a/scripts/check_fleet_api.yml b/scripts/check_fleet_api.yml index 2ca595b4..35339631 100644 --- a/scripts/check_fleet_api.yml +++ b/scripts/check_fleet_api.yml @@ -3,7 +3,6 @@ msg: - "User: elastic" - "Password is set: {{ elastic_password | default('') | length > 0 }}" - - "Password is set: {{ elastic_password }}" - "URL: {{ local_kbn_url }}" no_log: true @@ -31,16 +30,16 @@ - "Response Headers: {{ fleet_status.headers | default({}) | to_nice_json }}" - "Response Content: {{ fleet_status.content | default('N/A') | to_nice_json }}" -- name: Increment attempt counter +- name: Set success flag if API call succeeded set_fact: - attempt_counter: "{{ attempt_counter | int + 1 }}" + api_success: true + when: fleet_status.status is defined and fleet_status.status == 200 -- name: Check if max retries reached - fail: - msg: "Max retries reached. Fleet API is not ready." - when: attempt_counter | int > max_retries +- name: Exit loop if API call succeeded + meta: end_play + when: api_success - name: Wait before next attempt wait_for: timeout: "{{ delay_seconds }}" - when: fleet_status.status is not defined or fleet_status.status != 200 \ No newline at end of file + when: not api_success \ No newline at end of file diff --git a/scripts/set_fleet.yml b/scripts/set_fleet.yml index 9ef66171..3554b1f6 100644 --- a/scripts/set_fleet.yml +++ b/scripts/set_fleet.yml @@ -42,20 +42,19 @@ - "elastic_password is set: {{ elastic_password | length > 0 }}" when: debug_mode | default(false) | bool - - name: Initialize attempt counter + - name: Initialize attempt counter and success flag set_fact: attempt_counter: 1 + api_success: false - name: Wait for Fleet API to be ready block: - name: Include Fleet API check tasks include_tasks: check_fleet_api.yml - - name: Retry Fleet API check - include_tasks: check_fleet_api.yml - when: fleet_status.status is not defined or fleet_status.status != 200 - loop: "{{ range(2, max_retries + 1)|list }}" + loop: "{{ range(1, max_retries + 1)|list }}" loop_control: loop_var: attempt_counter + when: not api_success rescue: - name: Handle any unexpected errors debug: @@ -64,7 +63,7 @@ - name: Confirm Fleet API is ready debug: msg: "Fleet API is ready" - when: fleet_status.status is defined and fleet_status.status == 200 + when: api_success - name: Get CA fingerprint command: > From a6630e8aeb52d58f15a80ba923f1257f7ff15f61 Mon Sep 17 00:00:00 2001 From: cbaxley Date: Tue, 15 Oct 2024 05:14:22 -0400 Subject: [PATCH 18/48] Wait a little longer for the results to be written to the index --- .github/workflows/cluster.yml | 28 ++++++++++++++-------------- scripts/check_fleet_api.yml | 4 ++-- scripts/set_fleet.yml | 5 ++--- 3 files changed, 18 insertions(+), 19 deletions(-) diff --git a/.github/workflows/cluster.yml b/.github/workflows/cluster.yml index 4aa9aafc..7ca13b4b 100644 --- a/.github/workflows/cluster.yml +++ b/.github/workflows/cluster.yml @@ -251,7 +251,7 @@ jobs: env: ES_PASSWORD: ${{ env.ES_PASSWORD }} run: | - sleep 120 + sleep 240 cd testing/v2/development docker compose -p ${{ env.UNIQUE_ID }} exec -T pipeline bash -c " ssh -o StrictHostKeyChecking=no lme-user@${{ env.AZURE_IP }} \ @@ -292,19 +292,19 @@ jobs: pytest -v selenium_tests/' " - - name: Cleanup Azure resources - if: always() - env: - AZURE_CLIENT_ID: ${{ secrets.AZURE_CLIENT_ID }} - AZURE_SECRET: ${{ secrets.AZURE_SECRET }} - AZURE_TENANT: ${{ secrets.AZURE_TENANT }} - AZURE_SUBSCRIPTION_ID: ${{ secrets.AZURE_SUBSCRIPTION_ID }} - run: | - cd testing/v2/development - docker compose -p ${{ env.UNIQUE_ID }} exec -T pipeline bash -c " - az login --service-principal -u $AZURE_CLIENT_ID -p $AZURE_SECRET --tenant $AZURE_TENANT - az group delete --name pipe-${{ env.UNIQUE_ID }} --yes --no-wait - " + # - name: Cleanup Azure resources + # if: always() + # env: + # AZURE_CLIENT_ID: ${{ secrets.AZURE_CLIENT_ID }} + # AZURE_SECRET: ${{ secrets.AZURE_SECRET }} + # AZURE_TENANT: ${{ secrets.AZURE_TENANT }} + # AZURE_SUBSCRIPTION_ID: ${{ secrets.AZURE_SUBSCRIPTION_ID }} + # run: | + # cd testing/v2/development + # docker compose -p ${{ env.UNIQUE_ID }} exec -T pipeline bash -c " + # az login --service-principal -u $AZURE_CLIENT_ID -p $AZURE_SECRET --tenant $AZURE_TENANT + # az group delete --name pipe-${{ env.UNIQUE_ID }} --yes --no-wait + # " - name: Stop and remove containers if: always() diff --git a/scripts/check_fleet_api.yml b/scripts/check_fleet_api.yml index 35339631..165daa6e 100644 --- a/scripts/check_fleet_api.yml +++ b/scripts/check_fleet_api.yml @@ -23,7 +23,7 @@ - name: Display Fleet API call details debug: msg: - - "Attempt: {{ attempt_counter }}" + - "Attempt: {{ current_attempt }}" - "URL: {{ fleet_status.url }}" - "Status: {{ fleet_status.status | default('N/A') }}" - "Status Message: {{ fleet_status.msg | default('N/A') }}" @@ -42,4 +42,4 @@ - name: Wait before next attempt wait_for: timeout: "{{ delay_seconds }}" - when: not api_success \ No newline at end of file + when: not api_success diff --git a/scripts/set_fleet.yml b/scripts/set_fleet.yml index 3554b1f6..5b5c5c75 100644 --- a/scripts/set_fleet.yml +++ b/scripts/set_fleet.yml @@ -44,7 +44,6 @@ - name: Initialize attempt counter and success flag set_fact: - attempt_counter: 1 api_success: false - name: Wait for Fleet API to be ready @@ -53,7 +52,7 @@ include_tasks: check_fleet_api.yml loop: "{{ range(1, max_retries + 1)|list }}" loop_control: - loop_var: attempt_counter + loop_var: current_attempt when: not api_success rescue: - name: Handle any unexpected errors @@ -198,4 +197,4 @@ - fleet_output_fingerprint_result - fleet_output_ssl_result - endpoint_policy_result - - elastic_defend_policy_result \ No newline at end of file + - elastic_defend_policy_result From 077cdfafd6caa351b7b03209e803ac546827b3e7 Mon Sep 17 00:00:00 2001 From: cbaxley Date: Tue, 15 Oct 2024 06:35:14 -0400 Subject: [PATCH 19/48] Update the cluster.yml workflow to wait a little longer for the results to be written to the index --- .github/workflows/cluster.yml | 26 +++++++++++++------------- scripts/check_fleet_api.yml | 11 +++++------ scripts/set_fleet.yml | 25 ++++++++++++++----------- 3 files changed, 32 insertions(+), 30 deletions(-) diff --git a/.github/workflows/cluster.yml b/.github/workflows/cluster.yml index 7ca13b4b..d3536cff 100644 --- a/.github/workflows/cluster.yml +++ b/.github/workflows/cluster.yml @@ -292,19 +292,19 @@ jobs: pytest -v selenium_tests/' " - # - name: Cleanup Azure resources - # if: always() - # env: - # AZURE_CLIENT_ID: ${{ secrets.AZURE_CLIENT_ID }} - # AZURE_SECRET: ${{ secrets.AZURE_SECRET }} - # AZURE_TENANT: ${{ secrets.AZURE_TENANT }} - # AZURE_SUBSCRIPTION_ID: ${{ secrets.AZURE_SUBSCRIPTION_ID }} - # run: | - # cd testing/v2/development - # docker compose -p ${{ env.UNIQUE_ID }} exec -T pipeline bash -c " - # az login --service-principal -u $AZURE_CLIENT_ID -p $AZURE_SECRET --tenant $AZURE_TENANT - # az group delete --name pipe-${{ env.UNIQUE_ID }} --yes --no-wait - # " + - name: Cleanup Azure resources + if: always() + env: + AZURE_CLIENT_ID: ${{ secrets.AZURE_CLIENT_ID }} + AZURE_SECRET: ${{ secrets.AZURE_SECRET }} + AZURE_TENANT: ${{ secrets.AZURE_TENANT }} + AZURE_SUBSCRIPTION_ID: ${{ secrets.AZURE_SUBSCRIPTION_ID }} + run: | + cd testing/v2/development + docker compose -p ${{ env.UNIQUE_ID }} exec -T pipeline bash -c " + az login --service-principal -u $AZURE_CLIENT_ID -p $AZURE_SECRET --tenant $AZURE_TENANT + az group delete --name pipe-${{ env.UNIQUE_ID }} --yes --no-wait + " - name: Stop and remove containers if: always() diff --git a/scripts/check_fleet_api.yml b/scripts/check_fleet_api.yml index 165daa6e..8fc28e9d 100644 --- a/scripts/check_fleet_api.yml +++ b/scripts/check_fleet_api.yml @@ -1,3 +1,7 @@ +- name: Wait before attempt + wait_for: + timeout: 5 + - name: Debug credentials debug: msg: @@ -32,12 +36,7 @@ - name: Set success flag if API call succeeded set_fact: - api_success: true - when: fleet_status.status is defined and fleet_status.status == 200 - -- name: Exit loop if API call succeeded - meta: end_play - when: api_success + api_success: "{{ fleet_status.status is defined and fleet_status.status == 200 }}" - name: Wait before next attempt wait_for: diff --git a/scripts/set_fleet.yml b/scripts/set_fleet.yml index 5b5c5c75..580d06e5 100644 --- a/scripts/set_fleet.yml +++ b/scripts/set_fleet.yml @@ -42,22 +42,25 @@ - "elastic_password is set: {{ elastic_password | length > 0 }}" when: debug_mode | default(false) | bool + - name: Wait for Kibana port to be available + wait_for: + host: "{{ ipvar }}" + port: 5601 + timeout: 300 + register: kibana_port_check + - name: Initialize attempt counter and success flag set_fact: api_success: false - name: Wait for Fleet API to be ready - block: - - name: Include Fleet API check tasks - include_tasks: check_fleet_api.yml - loop: "{{ range(1, max_retries + 1)|list }}" - loop_control: - loop_var: current_attempt - when: not api_success - rescue: - - name: Handle any unexpected errors - debug: - msg: "An unexpected error occurred during Fleet API check." + include_tasks: check_fleet_api.yml + loop: "{{ range(1, max_retries + 1)|list }}" + loop_control: + loop_var: current_attempt + until: api_success | default(false) | bool + retries: "{{ max_retries }}" + delay: "{{ delay_seconds }}" - name: Confirm Fleet API is ready debug: From ac39458565f4a554ef446df95273c795be15adb2 Mon Sep 17 00:00:00 2001 From: cbaxley Date: Tue, 15 Oct 2024 07:06:51 -0400 Subject: [PATCH 20/48] Delay in a different way --- scripts/check_fleet_api.yml | 5 ----- scripts/set_fleet.yml | 26 ++++++++++++++++++++++---- 2 files changed, 22 insertions(+), 9 deletions(-) diff --git a/scripts/check_fleet_api.yml b/scripts/check_fleet_api.yml index 8fc28e9d..741aa156 100644 --- a/scripts/check_fleet_api.yml +++ b/scripts/check_fleet_api.yml @@ -37,8 +37,3 @@ - name: Set success flag if API call succeeded set_fact: api_success: "{{ fleet_status.status is defined and fleet_status.status == 200 }}" - -- name: Wait before next attempt - wait_for: - timeout: "{{ delay_seconds }}" - when: not api_success diff --git a/scripts/set_fleet.yml b/scripts/set_fleet.yml index 580d06e5..14455478 100644 --- a/scripts/set_fleet.yml +++ b/scripts/set_fleet.yml @@ -52,12 +52,30 @@ - name: Initialize attempt counter and success flag set_fact: api_success: false + current_attempt: 1 - name: Wait for Fleet API to be ready - include_tasks: check_fleet_api.yml - loop: "{{ range(1, max_retries + 1)|list }}" - loop_control: - loop_var: current_attempt + block: + - name: Include Fleet API check tasks + include_tasks: check_fleet_api.yml + + - name: Increment attempt counter + set_fact: + current_attempt: "{{ current_attempt | int + 1 }}" + + - name: Check if max retries reached + fail: + msg: "Max retries reached. Fleet API is not ready." + when: current_attempt | int > max_retries + + - name: Retry Fleet API check + wait_for: + timeout: "{{ delay_seconds }}" + when: not api_success + rescue: + - name: Handle any unexpected errors + debug: + msg: "An unexpected error occurred during Fleet API check." until: api_success | default(false) | bool retries: "{{ max_retries }}" delay: "{{ delay_seconds }}" From ff25324fe38243e7f2e9d7c1d56f4f79d17222cf Mon Sep 17 00:00:00 2001 From: cbaxley Date: Tue, 15 Oct 2024 07:42:17 -0400 Subject: [PATCH 21/48] Attempt a different looping method --- scripts/set_fleet.yml | 19 ++++++++++--------- 1 file changed, 10 insertions(+), 9 deletions(-) diff --git a/scripts/set_fleet.yml b/scripts/set_fleet.yml index 14455478..43313ee0 100644 --- a/scripts/set_fleet.yml +++ b/scripts/set_fleet.yml @@ -66,19 +66,20 @@ - name: Check if max retries reached fail: msg: "Max retries reached. Fleet API is not ready." - when: current_attempt | int > max_retries - - - name: Retry Fleet API check - wait_for: - timeout: "{{ delay_seconds }}" - when: not api_success + when: current_attempt | int > max_retries and not api_success rescue: - name: Handle any unexpected errors debug: msg: "An unexpected error occurred during Fleet API check." - until: api_success | default(false) | bool - retries: "{{ max_retries }}" - delay: "{{ delay_seconds }}" + always: + - name: Retry Fleet API check + wait_for: + timeout: "{{ delay_seconds }}" + when: not api_success + when: not api_success + loop: "{{ range(1, max_retries + 1)|list }}" + loop_control: + loop_var: outer_attempt - name: Confirm Fleet API is ready debug: From 877e2680cbaf559532897e4705f3781c419f3e6f Mon Sep 17 00:00:00 2001 From: cbaxley Date: Tue, 15 Oct 2024 08:19:06 -0400 Subject: [PATCH 22/48] Remove the set-fleet script from the installer --- .github/workflows/cluster.yml | 26 ++++++++++----------- testing/v2/installers/install_v2/install.sh | 3 +-- 2 files changed, 14 insertions(+), 15 deletions(-) diff --git a/.github/workflows/cluster.yml b/.github/workflows/cluster.yml index d3536cff..7ca13b4b 100644 --- a/.github/workflows/cluster.yml +++ b/.github/workflows/cluster.yml @@ -292,19 +292,19 @@ jobs: pytest -v selenium_tests/' " - - name: Cleanup Azure resources - if: always() - env: - AZURE_CLIENT_ID: ${{ secrets.AZURE_CLIENT_ID }} - AZURE_SECRET: ${{ secrets.AZURE_SECRET }} - AZURE_TENANT: ${{ secrets.AZURE_TENANT }} - AZURE_SUBSCRIPTION_ID: ${{ secrets.AZURE_SUBSCRIPTION_ID }} - run: | - cd testing/v2/development - docker compose -p ${{ env.UNIQUE_ID }} exec -T pipeline bash -c " - az login --service-principal -u $AZURE_CLIENT_ID -p $AZURE_SECRET --tenant $AZURE_TENANT - az group delete --name pipe-${{ env.UNIQUE_ID }} --yes --no-wait - " + # - name: Cleanup Azure resources + # if: always() + # env: + # AZURE_CLIENT_ID: ${{ secrets.AZURE_CLIENT_ID }} + # AZURE_SECRET: ${{ secrets.AZURE_SECRET }} + # AZURE_TENANT: ${{ secrets.AZURE_TENANT }} + # AZURE_SUBSCRIPTION_ID: ${{ secrets.AZURE_SUBSCRIPTION_ID }} + # run: | + # cd testing/v2/development + # docker compose -p ${{ env.UNIQUE_ID }} exec -T pipeline bash -c " + # az login --service-principal -u $AZURE_CLIENT_ID -p $AZURE_SECRET --tenant $AZURE_TENANT + # az group delete --name pipe-${{ env.UNIQUE_ID }} --yes --no-wait + # " - name: Stop and remove containers if: always() diff --git a/testing/v2/installers/install_v2/install.sh b/testing/v2/installers/install_v2/install.sh index 4eac2ffa..0b3a539f 100755 --- a/testing/v2/installers/install_v2/install.sh +++ b/testing/v2/installers/install_v2/install.sh @@ -95,8 +95,7 @@ echo "Running check-fleet script" ssh -o StrictHostKeyChecking=no $user@$hostname "sudo -E bash -c 'source /opt/lme/lme-environment.env && su $user -c \". ~/.bashrc && cd ~/LME && ./testing/v2/installers/lib/check_fleet.sh\"'" echo "Running set-fleet script" -#ssh -o StrictHostKeyChecking=no $user@$hostname "sudo -E bash -c 'cd ~/LME && ./scripts/set-fleet.sh'" -ssh -o StrictHostKeyChecking=no $user@$hostname "sudo -E bash -c 'cd ~/LME/scripts && ansible-playbook set_fleet.yml -e \"debug_mode=true\"'" +#ssh -o StrictHostKeyChecking=no $user@$hostname "sudo -E bash -c 'cd ~/LME/scripts && ansible-playbook set_fleet.yml -e \"debug_mode=true\"'" echo "Installation and configuration completed successfully." From 2fefce64c2cb3960c95f1f3ac977c05925e5e68a Mon Sep 17 00:00:00 2001 From: cbaxley Date: Tue, 15 Oct 2024 09:06:01 -0400 Subject: [PATCH 23/48] Reverts to old loop method --- scripts/check_fleet_api.yml | 21 ++++++++++++++------- scripts/set_fleet.yml | 28 +++++++--------------------- 2 files changed, 21 insertions(+), 28 deletions(-) diff --git a/scripts/check_fleet_api.yml b/scripts/check_fleet_api.yml index 741aa156..2ca595b4 100644 --- a/scripts/check_fleet_api.yml +++ b/scripts/check_fleet_api.yml @@ -1,12 +1,9 @@ -- name: Wait before attempt - wait_for: - timeout: 5 - - name: Debug credentials debug: msg: - "User: elastic" - "Password is set: {{ elastic_password | default('') | length > 0 }}" + - "Password is set: {{ elastic_password }}" - "URL: {{ local_kbn_url }}" no_log: true @@ -27,13 +24,23 @@ - name: Display Fleet API call details debug: msg: - - "Attempt: {{ current_attempt }}" + - "Attempt: {{ attempt_counter }}" - "URL: {{ fleet_status.url }}" - "Status: {{ fleet_status.status | default('N/A') }}" - "Status Message: {{ fleet_status.msg | default('N/A') }}" - "Response Headers: {{ fleet_status.headers | default({}) | to_nice_json }}" - "Response Content: {{ fleet_status.content | default('N/A') | to_nice_json }}" -- name: Set success flag if API call succeeded +- name: Increment attempt counter set_fact: - api_success: "{{ fleet_status.status is defined and fleet_status.status == 200 }}" + attempt_counter: "{{ attempt_counter | int + 1 }}" + +- name: Check if max retries reached + fail: + msg: "Max retries reached. Fleet API is not ready." + when: attempt_counter | int > max_retries + +- name: Wait before next attempt + wait_for: + timeout: "{{ delay_seconds }}" + when: fleet_status.status is not defined or fleet_status.status != 200 \ No newline at end of file diff --git a/scripts/set_fleet.yml b/scripts/set_fleet.yml index 43313ee0..314463e9 100644 --- a/scripts/set_fleet.yml +++ b/scripts/set_fleet.yml @@ -35,7 +35,7 @@ - name: Debug - Display set variables (comment out in production) debug: - msg: + msg: - "local_kbn_url: {{ local_kbn_url }}" - "ipvar: {{ ipvar }}" - "fleet_port: {{ fleet_port }}" @@ -51,35 +51,21 @@ - name: Initialize attempt counter and success flag set_fact: + attempt_counter: 1 api_success: false - current_attempt: 1 - name: Wait for Fleet API to be ready block: - name: Include Fleet API check tasks include_tasks: check_fleet_api.yml - - - name: Increment attempt counter - set_fact: - current_attempt: "{{ current_attempt | int + 1 }}" - - - name: Check if max retries reached - fail: - msg: "Max retries reached. Fleet API is not ready." - when: current_attempt | int > max_retries and not api_success + loop: "{{ range(1, max_retries + 1)|list }}" + loop_control: + loop_var: attempt_counter + when: not api_success rescue: - name: Handle any unexpected errors debug: msg: "An unexpected error occurred during Fleet API check." - always: - - name: Retry Fleet API check - wait_for: - timeout: "{{ delay_seconds }}" - when: not api_success - when: not api_success - loop: "{{ range(1, max_retries + 1)|list }}" - loop_control: - loop_var: outer_attempt - name: Confirm Fleet API is ready debug: @@ -219,4 +205,4 @@ - fleet_output_fingerprint_result - fleet_output_ssl_result - endpoint_policy_result - - elastic_defend_policy_result + - elastic_defend_policy_result \ No newline at end of file From d6fc98fd04099b0df8c589e5ad402818a94d6e51 Mon Sep 17 00:00:00 2001 From: cbaxley Date: Tue, 15 Oct 2024 10:51:22 -0400 Subject: [PATCH 24/48] Check that fleet is ready in an external script --- scripts/check_fleet_api.sh | 59 +++++++++++++++++++++++++++++++++++++ scripts/check_fleet_api.yml | 46 ----------------------------- scripts/set_fleet.yml | 34 ++++++++++----------- 3 files changed, 75 insertions(+), 64 deletions(-) create mode 100755 scripts/check_fleet_api.sh delete mode 100644 scripts/check_fleet_api.yml diff --git a/scripts/check_fleet_api.sh b/scripts/check_fleet_api.sh new file mode 100755 index 00000000..80b41ed0 --- /dev/null +++ b/scripts/check_fleet_api.sh @@ -0,0 +1,59 @@ +#!/bin/bash + +get_script_path() { + local source="${BASH_SOURCE[0]}" + while [ -h "$source" ]; do + local dir="$(cd -P "$(dirname "$source")" && pwd)" + source="$(readlink "$source")" + [[ $source != /* ]] && source="$dir/$source" + done + echo "$(cd -P "$(dirname "$source")" && pwd)" +} + +SCRIPT_DIR="$(get_script_path)" + +HEADERS=( + -H "kbn-version: 8.12.2" + -H "kbn-xsrf: kibana" + -H 'Content-Type: application/json' +) + +# Function to check if Fleet API is ready +check_fleet_ready() { + local response + response=$(curl -k -s --user "elastic:${elastic}" \ + "${HEADERS[@]}" \ + "${LOCAL_KBN_URL}/api/fleet/settings") + + if [[ "$response" == *"Kibana server is not ready yet"* ]]; then + return 1 + else + return 0 + fi +} + +# Wait for Fleet API to be ready +wait_for_fleet() { + echo "Waiting for Fleet API to be ready..." + max_attempts=60 + attempt=1 + while ! check_fleet_ready; do + if [ $attempt -ge $max_attempts ]; then + echo "Fleet API did not become ready after $max_attempts attempts. Exiting." + exit 1 + fi + echo "Attempt $attempt: Fleet API not ready. Waiting 10 seconds..." + sleep 10 + attempt=$((attempt + 1)) + done + echo "Fleet API is ready. Proceeding with configuration..." +} + +#main: +source /opt/lme/lme-environment.env + +# Set the secrets values and export them (source instead of execute) +set -a +. $SCRIPT_DIR/extract_secrets.sh -q + +wait_for_fleet \ No newline at end of file diff --git a/scripts/check_fleet_api.yml b/scripts/check_fleet_api.yml deleted file mode 100644 index 2ca595b4..00000000 --- a/scripts/check_fleet_api.yml +++ /dev/null @@ -1,46 +0,0 @@ -- name: Debug credentials - debug: - msg: - - "User: elastic" - - "Password is set: {{ elastic_password | default('') | length > 0 }}" - - "Password is set: {{ elastic_password }}" - - "URL: {{ local_kbn_url }}" - no_log: true - -- name: Attempt Fleet API call - uri: - url: "{{ local_kbn_url }}/api/fleet/settings" - method: GET - user: elastic - password: "{{ elastic_password }}" - validate_certs: no - headers: "{{ headers }}" - status_code: 200 - return_content: yes - force_basic_auth: yes - register: fleet_status - ignore_errors: yes - -- name: Display Fleet API call details - debug: - msg: - - "Attempt: {{ attempt_counter }}" - - "URL: {{ fleet_status.url }}" - - "Status: {{ fleet_status.status | default('N/A') }}" - - "Status Message: {{ fleet_status.msg | default('N/A') }}" - - "Response Headers: {{ fleet_status.headers | default({}) | to_nice_json }}" - - "Response Content: {{ fleet_status.content | default('N/A') | to_nice_json }}" - -- name: Increment attempt counter - set_fact: - attempt_counter: "{{ attempt_counter | int + 1 }}" - -- name: Check if max retries reached - fail: - msg: "Max retries reached. Fleet API is not ready." - when: attempt_counter | int > max_retries - -- name: Wait before next attempt - wait_for: - timeout: "{{ delay_seconds }}" - when: fleet_status.status is not defined or fleet_status.status != 200 \ No newline at end of file diff --git a/scripts/set_fleet.yml b/scripts/set_fleet.yml index 314463e9..d27abc5a 100644 --- a/scripts/set_fleet.yml +++ b/scripts/set_fleet.yml @@ -49,28 +49,26 @@ timeout: 300 register: kibana_port_check - - name: Initialize attempt counter and success flag - set_fact: - attempt_counter: 1 - api_success: false - - name: Wait for Fleet API to be ready - block: - - name: Include Fleet API check tasks - include_tasks: check_fleet_api.yml - loop: "{{ range(1, max_retries + 1)|list }}" - loop_control: - loop_var: attempt_counter - when: not api_success - rescue: - - name: Handle any unexpected errors - debug: - msg: "An unexpected error occurred during Fleet API check." + become: yes + become_method: sudo + command: "{{ playbook_dir }}/check_fleet_api.sh" + register: fleet_api_check + changed_when: false + + - name: Display Fleet API check result + debug: + var: fleet_api_check.stdout_lines - name: Confirm Fleet API is ready debug: msg: "Fleet API is ready" - when: api_success + when: fleet_api_check.rc == 0 + + - name: Fail if Fleet API is not ready + fail: + msg: "Fleet API did not become ready. Please check the logs and try again." + when: fleet_api_check.rc != 0 - name: Get CA fingerprint command: > @@ -205,4 +203,4 @@ - fleet_output_fingerprint_result - fleet_output_ssl_result - endpoint_policy_result - - elastic_defend_policy_result \ No newline at end of file + - elastic_defend_policy_result From f4cbd7a6777031a9a09e5a7a2c2511d6bb9b1a2f Mon Sep 17 00:00:00 2001 From: cbaxley Date: Tue, 15 Oct 2024 10:57:58 -0400 Subject: [PATCH 25/48] Call the ansible playbook from the install script --- testing/v2/installers/install_v2/install.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/testing/v2/installers/install_v2/install.sh b/testing/v2/installers/install_v2/install.sh index 0b3a539f..464778c4 100755 --- a/testing/v2/installers/install_v2/install.sh +++ b/testing/v2/installers/install_v2/install.sh @@ -95,7 +95,7 @@ echo "Running check-fleet script" ssh -o StrictHostKeyChecking=no $user@$hostname "sudo -E bash -c 'source /opt/lme/lme-environment.env && su $user -c \". ~/.bashrc && cd ~/LME && ./testing/v2/installers/lib/check_fleet.sh\"'" echo "Running set-fleet script" -#ssh -o StrictHostKeyChecking=no $user@$hostname "sudo -E bash -c 'cd ~/LME/scripts && ansible-playbook set_fleet.yml -e \"debug_mode=true\"'" +ssh -o StrictHostKeyChecking=no $user@$hostname "sudo -E bash -c 'cd ~/LME/scripts && ansible-playbook set_fleet.yml -e \"debug_mode=true\"'" echo "Installation and configuration completed successfully." From 9ec854248a0b95d779af9e34074dd62464eee2ab Mon Sep 17 00:00:00 2001 From: cbaxley Date: Tue, 15 Oct 2024 11:33:49 -0400 Subject: [PATCH 26/48] Get the CA fingerprint from the Elasticsearch container --- scripts/set_fleet.yml | 15 +++++++++++++-- 1 file changed, 13 insertions(+), 2 deletions(-) diff --git a/scripts/set_fleet.yml b/scripts/set_fleet.yml index d27abc5a..f88b5d4e 100644 --- a/scripts/set_fleet.yml +++ b/scripts/set_fleet.yml @@ -71,11 +71,22 @@ when: fleet_api_check.rc != 0 - name: Get CA fingerprint - command: > + shell: > /nix/var/nix/profiles/default/bin/podman exec -w /usr/share/elasticsearch/config/certs/ca - lme-elasticsearch cat ca.crt | openssl x509 -nout -fingerprint -sha256 | cut -d "=" -f 2 | tr -d : | head -n1 + lme-elasticsearch cat ca.crt | openssl x509 -noout -fingerprint -sha256 | cut -d "=" -f 2 | tr -d : | head -n1 register: ca_fingerprint changed_when: false + ignore_errors: true + + - name: Display CA fingerprint + debug: + var: ca_fingerprint + when: ca_fingerprint is defined + + - name: Fail if CA fingerprint retrieval failed + fail: + msg: "Failed to retrieve CA fingerprint. Please check the Elasticsearch container and certificates." + when: ca_fingerprint is undefined or ca_fingerprint.rc != 0 - name: Set Fleet server hosts uri: From 03ac6a94118e15ffed37f92c485b6f9274f8599e Mon Sep 17 00:00:00 2001 From: cbaxley Date: Tue, 15 Oct 2024 12:34:38 -0400 Subject: [PATCH 27/48] Adds headers to the curl commands in the set_fleet.yml playbook --- scripts/set_fleet.yml | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/scripts/set_fleet.yml b/scripts/set_fleet.yml index f88b5d4e..bb9a4b3b 100644 --- a/scripts/set_fleet.yml +++ b/scripts/set_fleet.yml @@ -101,6 +101,20 @@ fleet_server_hosts: ["https://{{ ipvar }}:{{ fleet_port }}"] register: fleet_server_hosts_result no_log: false + ignore_errors: yes + + - name: Debug Fleet server hosts result + debug: + var: fleet_server_hosts_result + when: fleet_server_hosts_result is defined + + - name: Debug authentication variables + debug: + msg: + - "local_kbn_url: {{ local_kbn_url }}" + - "elastic_password is set: {{ elastic_password | length > 0 }}" + - "headers: {{ headers }}" + no_log: true - name: Set Fleet default output hosts uri: From 5b603b9d03acd0c147786151e8b540ecc8c70337 Mon Sep 17 00:00:00 2001 From: cbaxley Date: Tue, 15 Oct 2024 18:27:40 -0400 Subject: [PATCH 28/48] Address the hosts and fleet API issues --- scripts/set_fleet.yml | 18 ++++++++++-------- 1 file changed, 10 insertions(+), 8 deletions(-) diff --git a/scripts/set_fleet.yml b/scripts/set_fleet.yml index bb9a4b3b..d5562408 100644 --- a/scripts/set_fleet.yml +++ b/scripts/set_fleet.yml @@ -88,6 +88,16 @@ msg: "Failed to retrieve CA fingerprint. Please check the Elasticsearch container and certificates." when: ca_fingerprint is undefined or ca_fingerprint.rc != 0 + - name: Debug authentication variables + debug: + msg: + - "local_kbn_url: {{ local_kbn_url }}" + - "ipvar: {{ ipvar }}" + - "fleet_port: {{ fleet_port }}" + - "elastic_password is set: {{ elastic_password | length > 0 }}" + - "headers: {{ headers }}" + no_log: true + - name: Set Fleet server hosts uri: url: "{{ local_kbn_url }}/api/fleet/settings" @@ -108,14 +118,6 @@ var: fleet_server_hosts_result when: fleet_server_hosts_result is defined - - name: Debug authentication variables - debug: - msg: - - "local_kbn_url: {{ local_kbn_url }}" - - "elastic_password is set: {{ elastic_password | length > 0 }}" - - "headers: {{ headers }}" - no_log: true - - name: Set Fleet default output hosts uri: url: "{{ local_kbn_url }}/api/fleet/outputs/fleet-default-output" From 35e912198b12c332d09d6297597911f2d285ec32 Mon Sep 17 00:00:00 2001 From: cbaxley Date: Wed, 16 Oct 2024 07:20:55 -0400 Subject: [PATCH 29/48] Change the way we login to the Kibana API --- scripts/set_fleet.yml | 194 +++++++++++++++++++++++++++++------------- 1 file changed, 136 insertions(+), 58 deletions(-) diff --git a/scripts/set_fleet.yml b/scripts/set_fleet.yml index d5562408..4e04ad95 100644 --- a/scripts/set_fleet.yml +++ b/scripts/set_fleet.yml @@ -13,48 +13,114 @@ delay_seconds: 10 tasks: - - name: Source environment file, extract secrets, and set variables - shell: | - source /opt/lme/lme-environment.env + - name: Read lme-environment.env file + ansible.builtin.slurp: + src: /opt/lme/lme-environment.env + register: lme_env_content + + - name: Set environment variables + ansible.builtin.set_fact: + env_dict: "{{ env_dict | default({}) | combine({ item.split('=', 1)[0]: item.split('=', 1)[1] }) }}" + loop: "{{ (lme_env_content['content'] | b64decode).split('\n') }}" + when: item != '' and not item.startswith('#') + + - name: Display set environment variables + debug: + msg: "Set {{ item.key }}" + loop: "{{ env_dict | dict2items }}" + when: item.value | length > 0 + + - name: Source extract_secrets.sh + ansible.builtin.shell: | set -a . {{ playbook_dir }}/extract_secrets.sh -q - echo "{\"elastic_password\": \"$elastic\", \"local_kbn_url\": \"$LOCAL_KBN_URL\", \"ipvar\": \"$IPVAR\", \"fleet_port\": \"$FLEET_PORT\"}" + echo "elastic=$elastic" + echo "wazuh=$wazuh" + echo "kibana_system=$kibana_system" + echo "wazuh_api=$wazuh_api" args: executable: /bin/bash - register: env_vars - changed_when: false + register: extract_secrets_vars no_log: true - - name: Set variables from environment - set_fact: - elastic_password: "{{ (env_vars.stdout | from_json).elastic_password }}" - local_kbn_url: "{{ (env_vars.stdout | from_json).local_kbn_url }}" - ipvar: "{{ (env_vars.stdout | from_json).ipvar }}" - fleet_port: "{{ (env_vars.stdout | from_json).fleet_port }}" + - name: Set secret variables + ansible.builtin.set_fact: + env_dict: "{{ env_dict | combine({ item.split('=', 1)[0]: item.split('=', 1)[1] }) }}" + loop: "{{ extract_secrets_vars.stdout_lines }}" no_log: true - - name: Debug - Display set variables (comment out in production) + - name: Set playbook variables + ansible.builtin.set_fact: + ipvar: "{{ env_dict.IPVAR | default('') }}" + local_kbn_url: "{{ env_dict.LOCAL_KBN_URL | default('') }}" + local_es_url: "{{ env_dict.LOCAL_ES_URL | default('') }}" + stack_version: "{{ env_dict.STACK_VERSION | default('') }}" + cluster_name: "{{ env_dict.CLUSTER_NAME | default('') }}" + elastic_username: "{{ env_dict.ELASTIC_USERNAME | default('') }}" + elasticsearch_username: "{{ env_dict.ELASTICSEARCH_USERNAME | default('') }}" + kibana_fleet_username: "{{ env_dict.KIBANA_FLEET_USERNAME | default('') }}" + indexer_username: "{{ env_dict.INDEXER_USERNAME | default('') }}" + api_username: "{{ env_dict.API_USERNAME | default('') }}" + license: "{{ env_dict.LICENSE | default('') }}" + es_port: "{{ env_dict.ES_PORT | default('') }}" + kibana_port: "{{ env_dict.KIBANA_PORT | default('') }}" + fleet_port: "{{ env_dict.FLEET_PORT | default('') }}" + mem_limit: "{{ env_dict.MEM_LIMIT | default('') }}" + elastic_password: "{{ env_dict.elastic | default('') }}" + wazuh_password: "{{ env_dict.wazuh | default('') }}" + kibana_system_password: "{{ env_dict.kibana_system | default('') }}" + wazuh_api_password: "{{ env_dict.wazuh_api | default('') }}" + + - name: Debug - Display set variables (sensitive information redacted) debug: msg: - - "local_kbn_url: {{ local_kbn_url }}" - "ipvar: {{ ipvar }}" + - "local_kbn_url: {{ local_kbn_url }}" + - "local_es_url: {{ local_es_url }}" + - "stack_version: {{ stack_version }}" + - "cluster_name: {{ cluster_name }}" + - "elastic_username: {{ elastic_username }}" + - "elasticsearch_username: {{ elasticsearch_username }}" + - "kibana_fleet_username: {{ kibana_fleet_username }}" + - "indexer_username: {{ indexer_username }}" + - "api_username: {{ api_username }}" + - "license: {{ license }}" + - "es_port: {{ es_port }}" + - "kibana_port: {{ kibana_port }}" - "fleet_port: {{ fleet_port }}" - - "elastic_password is set: {{ elastic_password | length > 0 }}" - when: debug_mode | default(false) | bool + - "mem_limit: {{ mem_limit }}" + - "elastic password is set: {{ elastic_password | length > 0 }}" + - "wazuh password is set: {{ wazuh_password | length > 0 }}" + - "kibana_system password is set: {{ kibana_system_password | length > 0 }}" + - "wazuh_api password is set: {{ wazuh_api_password | length > 0 }}" - name: Wait for Kibana port to be available wait_for: host: "{{ ipvar }}" - port: 5601 + port: "{{ kibana_port | int }}" timeout: 300 register: kibana_port_check - name: Wait for Fleet API to be ready - become: yes - become_method: sudo - command: "{{ playbook_dir }}/check_fleet_api.sh" + ansible.builtin.shell: | + attempt=0 + max_attempts=30 + delay=10 + while [ $attempt -lt $max_attempts ]; do + response=$(curl -s -o /dev/null -w "%{http_code}" -k -u elastic:{{ elastic_password }} {{ local_kbn_url }}/api/fleet/agents/setup) + if [ "$response" = "200" ]; then + echo "Fleet API is ready. Proceeding with configuration..." + exit 0 + fi + echo "Waiting for Fleet API to be ready..." + sleep $delay + attempt=$((attempt+1)) + done + echo "Fleet API did not become ready within the expected time." + exit 1 register: fleet_api_check changed_when: false + no_log: true - name: Display Fleet API check result debug: @@ -63,54 +129,46 @@ - name: Confirm Fleet API is ready debug: msg: "Fleet API is ready" - when: fleet_api_check.rc == 0 + when: "'Fleet API is ready' in fleet_api_check.stdout" - name: Fail if Fleet API is not ready fail: - msg: "Fleet API did not become ready. Please check the logs and try again." - when: fleet_api_check.rc != 0 + msg: "Fleet API did not become ready within the expected time." + when: "'Fleet API is ready' not in fleet_api_check.stdout" - name: Get CA fingerprint - shell: > - /nix/var/nix/profiles/default/bin/podman exec -w /usr/share/elasticsearch/config/certs/ca - lme-elasticsearch cat ca.crt | openssl x509 -noout -fingerprint -sha256 | cut -d "=" -f 2 | tr -d : | head -n1 + ansible.builtin.shell: | + sudo bash -c ' + set -a + . /home/lme-user/LME/scripts/extract_secrets.sh -q + set +a + /nix/var/nix/profiles/default/bin/podman exec -w /usr/share/elasticsearch/config/certs/ca lme-elasticsearch cat ca.crt | openssl x509 -noout -fingerprint -sha256 | cut -d "=" -f 2 | tr -d : | head -n1 + ' register: ca_fingerprint changed_when: false - ignore_errors: true + become: yes + become_method: sudo + no_log: false - name: Display CA fingerprint debug: - var: ca_fingerprint - when: ca_fingerprint is defined - - - name: Fail if CA fingerprint retrieval failed - fail: - msg: "Failed to retrieve CA fingerprint. Please check the Elasticsearch container and certificates." - when: ca_fingerprint is undefined or ca_fingerprint.rc != 0 - - - name: Debug authentication variables - debug: - msg: - - "local_kbn_url: {{ local_kbn_url }}" - - "ipvar: {{ ipvar }}" - - "fleet_port: {{ fleet_port }}" - - "elastic_password is set: {{ elastic_password | length > 0 }}" - - "headers: {{ headers }}" - no_log: true + var: ca_fingerprint.stdout + when: ca_fingerprint is defined and ca_fingerprint.stdout is defined - name: Set Fleet server hosts uri: url: "{{ local_kbn_url }}/api/fleet/settings" method: PUT - user: elastic + user: "{{ elastic_username }}" password: "{{ elastic_password }}" + force_basic_auth: yes validate_certs: no headers: "{{ headers }}" body_format: json body: fleet_server_hosts: ["https://{{ ipvar }}:{{ fleet_port }}"] register: fleet_server_hosts_result - no_log: false + no_log: false # Set to true in production ignore_errors: yes - name: Debug Fleet server hosts result @@ -118,54 +176,72 @@ var: fleet_server_hosts_result when: fleet_server_hosts_result is defined + - name: Debug - Display authentication variables + debug: + msg: + - "elastic_username: {{ elastic_username }}" + - "elastic_password is set: {{ elastic_password | length > 0 }}" + - "local_kbn_url: {{ local_kbn_url }}" + no_log: false + - name: Set Fleet default output hosts uri: url: "{{ local_kbn_url }}/api/fleet/outputs/fleet-default-output" method: PUT - user: elastic + user: "{{ elastic_username }}" password: "{{ elastic_password }}" + force_basic_auth: yes validate_certs: no headers: "{{ headers }}" body_format: json body: hosts: ["https://{{ ipvar }}:9200"] register: fleet_output_hosts_result - no_log: false + no_log: false + ignore_errors: yes + + - name: Debug Fleet default output hosts result + debug: + var: fleet_output_hosts_result + when: fleet_output_hosts_result is defined - name: Set Fleet default output CA trusted fingerprint uri: url: "{{ local_kbn_url }}/api/fleet/outputs/fleet-default-output" method: PUT - user: elastic + user: "{{ elastic_username }}" password: "{{ elastic_password }}" + force_basic_auth: yes validate_certs: no headers: "{{ headers }}" body_format: json body: ca_trusted_fingerprint: "{{ ca_fingerprint.stdout }}" register: fleet_output_fingerprint_result - no_log: true + no_log: false - name: Set Fleet default output SSL verification mode uri: url: "{{ local_kbn_url }}/api/fleet/outputs/fleet-default-output" method: PUT - user: elastic + user: "{{ elastic_username }}" password: "{{ elastic_password }}" + force_basic_auth: yes validate_certs: no headers: "{{ headers }}" body_format: json body: config_yaml: "ssl.verification_mode: certificate" register: fleet_output_ssl_result - no_log: true + no_log: false - name: Create Endpoint Policy uri: url: "{{ local_kbn_url }}/api/fleet/agent_policies?sys_monitoring=true" method: POST - user: elastic + user: "{{ elastic_username }}" password: "{{ elastic_password }}" + force_basic_auth: yes validate_certs: no headers: "{{ headers }}" body_format: json @@ -176,25 +252,27 @@ monitoring_enabled: ["logs", "metrics"] inactivity_timeout: 1209600 register: endpoint_policy_result - no_log: true + no_log: false - name: Get Endpoint package version uri: url: "{{ local_kbn_url }}/api/fleet/epm/packages/endpoint" method: GET - user: elastic + user: "{{ elastic_username }}" password: "{{ elastic_password }}" + force_basic_auth: yes validate_certs: no headers: "{{ headers }}" register: endpoint_package_result - no_log: true + no_log: false - name: Create Elastic Defend package policy uri: url: "{{ local_kbn_url }}/api/fleet/package_policies" method: POST - user: elastic + user: "{{ elastic_username }}" password: "{{ elastic_password }}" + force_basic_auth: yes validate_certs: no headers: "{{ headers }}" body_format: json @@ -219,7 +297,7 @@ title: "Elastic Defend" version: "{{ endpoint_package_result.json.item.version }}" register: elastic_defend_policy_result - no_log: true + no_log: false - name: Display results debug: From 519bcf830032110106c2480a4fe593cf5da27bcd Mon Sep 17 00:00:00 2001 From: cbaxley Date: Wed, 16 Oct 2024 07:54:53 -0400 Subject: [PATCH 30/48] Increase the timeout for the Endpoint Policy API calls --- scripts/set_fleet.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/scripts/set_fleet.yml b/scripts/set_fleet.yml index 4e04ad95..00415e30 100644 --- a/scripts/set_fleet.yml +++ b/scripts/set_fleet.yml @@ -251,6 +251,7 @@ namespace: "default" monitoring_enabled: ["logs", "metrics"] inactivity_timeout: 1209600 + timeout: 120 register: endpoint_policy_result no_log: false From c9de1e1391583a2f86791e0744d47e7ae9e901c5 Mon Sep 17 00:00:00 2001 From: cbaxley Date: Wed, 16 Oct 2024 09:08:23 -0400 Subject: [PATCH 31/48] Increase the timeout for the Endpoint Policy API calls --- scripts/set_fleet.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/set_fleet.yml b/scripts/set_fleet.yml index 00415e30..7f4e0c25 100644 --- a/scripts/set_fleet.yml +++ b/scripts/set_fleet.yml @@ -251,7 +251,7 @@ namespace: "default" monitoring_enabled: ["logs", "metrics"] inactivity_timeout: 1209600 - timeout: 120 + timeout: 600 register: endpoint_policy_result no_log: false From 05c5ad07543f5112e30dfdb403cf5069fa9d10e2 Mon Sep 17 00:00:00 2001 From: cbaxley Date: Wed, 16 Oct 2024 09:48:24 -0400 Subject: [PATCH 32/48] Increase the timeout for the Defend Policy API calls --- scripts/set_fleet.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/scripts/set_fleet.yml b/scripts/set_fleet.yml index 7f4e0c25..3515ab5e 100644 --- a/scripts/set_fleet.yml +++ b/scripts/set_fleet.yml @@ -277,6 +277,7 @@ validate_certs: no headers: "{{ headers }}" body_format: json + timeout: 600 body: name: "Elastic Defend" description: "" From 50e9cb5d3cec87877ba9e64eac8749a88c22030c Mon Sep 17 00:00:00 2001 From: cbaxley Date: Wed, 16 Oct 2024 10:26:49 -0400 Subject: [PATCH 33/48] Only print debug information if debug_mode is true --- scripts/set_fleet.yml | 41 +++++++++++++++++++---------------------- 1 file changed, 19 insertions(+), 22 deletions(-) diff --git a/scripts/set_fleet.yml b/scripts/set_fleet.yml index 3515ab5e..1a659238 100644 --- a/scripts/set_fleet.yml +++ b/scripts/set_fleet.yml @@ -11,6 +11,7 @@ Content-Type: "application/json" max_retries: 60 delay_seconds: 10 + debug_mode: false tasks: - name: Read lme-environment.env file @@ -41,13 +42,13 @@ args: executable: /bin/bash register: extract_secrets_vars - no_log: true + no_log: "{{ not debug_mode }}" - name: Set secret variables ansible.builtin.set_fact: env_dict: "{{ env_dict | combine({ item.split('=', 1)[0]: item.split('=', 1)[1] }) }}" loop: "{{ extract_secrets_vars.stdout_lines }}" - no_log: true + no_log: "{{ not debug_mode }}" - name: Set playbook variables ansible.builtin.set_fact: @@ -77,9 +78,9 @@ - "ipvar: {{ ipvar }}" - "local_kbn_url: {{ local_kbn_url }}" - "local_es_url: {{ local_es_url }}" + - "elastic_username: {{ elastic_username }}" - "stack_version: {{ stack_version }}" - "cluster_name: {{ cluster_name }}" - - "elastic_username: {{ elastic_username }}" - "elasticsearch_username: {{ elasticsearch_username }}" - "kibana_fleet_username: {{ kibana_fleet_username }}" - "indexer_username: {{ indexer_username }}" @@ -93,6 +94,7 @@ - "wazuh password is set: {{ wazuh_password | length > 0 }}" - "kibana_system password is set: {{ kibana_system_password | length > 0 }}" - "wazuh_api password is set: {{ wazuh_api_password | length > 0 }}" + when: debug_mode | bool - name: Wait for Kibana port to be available wait_for: @@ -120,7 +122,7 @@ exit 1 register: fleet_api_check changed_when: false - no_log: true + no_log: "{{ not debug_mode }}" - name: Display Fleet API check result debug: @@ -148,12 +150,15 @@ changed_when: false become: yes become_method: sudo - no_log: false + no_log: "{{ not debug_mode }}" - name: Display CA fingerprint debug: var: ca_fingerprint.stdout - when: ca_fingerprint is defined and ca_fingerprint.stdout is defined + when: + - ca_fingerprint is defined + - ca_fingerprint.stdout is defined + - debug_mode | bool - name: Set Fleet server hosts uri: @@ -168,21 +173,13 @@ body: fleet_server_hosts: ["https://{{ ipvar }}:{{ fleet_port }}"] register: fleet_server_hosts_result - no_log: false # Set to true in production + no_log: "{{ not debug_mode }}" ignore_errors: yes - name: Debug Fleet server hosts result debug: var: fleet_server_hosts_result - when: fleet_server_hosts_result is defined - - - name: Debug - Display authentication variables - debug: - msg: - - "elastic_username: {{ elastic_username }}" - - "elastic_password is set: {{ elastic_password | length > 0 }}" - - "local_kbn_url: {{ local_kbn_url }}" - no_log: false + when: fleet_server_hosts_result is defined and debug_mode | bool - name: Set Fleet default output hosts uri: @@ -197,7 +194,7 @@ body: hosts: ["https://{{ ipvar }}:9200"] register: fleet_output_hosts_result - no_log: false + no_log: "{{ not debug_mode }}" ignore_errors: yes - name: Debug Fleet default output hosts result @@ -218,7 +215,7 @@ body: ca_trusted_fingerprint: "{{ ca_fingerprint.stdout }}" register: fleet_output_fingerprint_result - no_log: false + no_log: "{{ not debug_mode }}" - name: Set Fleet default output SSL verification mode uri: @@ -233,7 +230,7 @@ body: config_yaml: "ssl.verification_mode: certificate" register: fleet_output_ssl_result - no_log: false + no_log: "{{ not debug_mode }}" - name: Create Endpoint Policy uri: @@ -253,7 +250,7 @@ inactivity_timeout: 1209600 timeout: 600 register: endpoint_policy_result - no_log: false + no_log: "{{ not debug_mode }}" - name: Get Endpoint package version uri: @@ -265,7 +262,7 @@ validate_certs: no headers: "{{ headers }}" register: endpoint_package_result - no_log: false + no_log: "{{ not debug_mode }}" - name: Create Elastic Defend package policy uri: @@ -299,7 +296,7 @@ title: "Elastic Defend" version: "{{ endpoint_package_result.json.item.version }}" register: elastic_defend_policy_result - no_log: false + no_log: "{{ not debug_mode }}" - name: Display results debug: From 3efbe3e785fdad18b3d03cba62709361b584c61f Mon Sep 17 00:00:00 2001 From: cbaxley Date: Wed, 16 Oct 2024 11:00:01 -0400 Subject: [PATCH 34/48] Keeps the azure resources on builds --- .github/workflows/cluster.yml | 28 ++++++++++++++-------------- 1 file changed, 14 insertions(+), 14 deletions(-) diff --git a/.github/workflows/cluster.yml b/.github/workflows/cluster.yml index 7ca13b4b..a35d30b6 100644 --- a/.github/workflows/cluster.yml +++ b/.github/workflows/cluster.yml @@ -251,7 +251,7 @@ jobs: env: ES_PASSWORD: ${{ env.ES_PASSWORD }} run: | - sleep 240 + sleep 360 cd testing/v2/development docker compose -p ${{ env.UNIQUE_ID }} exec -T pipeline bash -c " ssh -o StrictHostKeyChecking=no lme-user@${{ env.AZURE_IP }} \ @@ -292,19 +292,19 @@ jobs: pytest -v selenium_tests/' " - # - name: Cleanup Azure resources - # if: always() - # env: - # AZURE_CLIENT_ID: ${{ secrets.AZURE_CLIENT_ID }} - # AZURE_SECRET: ${{ secrets.AZURE_SECRET }} - # AZURE_TENANT: ${{ secrets.AZURE_TENANT }} - # AZURE_SUBSCRIPTION_ID: ${{ secrets.AZURE_SUBSCRIPTION_ID }} - # run: | - # cd testing/v2/development - # docker compose -p ${{ env.UNIQUE_ID }} exec -T pipeline bash -c " - # az login --service-principal -u $AZURE_CLIENT_ID -p $AZURE_SECRET --tenant $AZURE_TENANT - # az group delete --name pipe-${{ env.UNIQUE_ID }} --yes --no-wait - # " + - name: Cleanup Azure resources + if: always() + env: + AZURE_CLIENT_ID: ${{ secrets.AZURE_CLIENT_ID }} + AZURE_SECRET: ${{ secrets.AZURE_SECRET }} + AZURE_TENANT: ${{ secrets.AZURE_TENANT }} + AZURE_SUBSCRIPTION_ID: ${{ secrets.AZURE_SUBSCRIPTION_ID }} + run: | + cd testing/v2/development + docker compose -p ${{ env.UNIQUE_ID }} exec -T pipeline bash -c " + az login --service-principal -u $AZURE_CLIENT_ID -p $AZURE_SECRET --tenant $AZURE_TENANT + az group delete --name pipe-${{ env.UNIQUE_ID }} --yes --no-wait + " - name: Stop and remove containers if: always() From d924ea10a2b0b259b4593a8304056f70ea3c65cc Mon Sep 17 00:00:00 2001 From: Michael Reeves Date: Wed, 16 Oct 2024 11:55:42 -0400 Subject: [PATCH 35/48] Fixing Error with certs where the permissions should only be on first generation! --- config/setup/init-setup.sh | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/config/setup/init-setup.sh b/config/setup/init-setup.sh index c5e9ccc2..9884d2c3 100644 --- a/config/setup/init-setup.sh +++ b/config/setup/init-setup.sh @@ -24,12 +24,13 @@ if [ ! -f "${CERTS_DIR}/certs.zip" ]; then elasticsearch-certutil cert --silent --pem --in "${INSTANCES_PATH}" --out "${CERTS_DIR}/certs.zip" --ca-cert "${CERTS_DIR}/ca/ca.crt" --ca-key "${CERTS_DIR}/ca/ca.key" unzip -o "${CERTS_DIR}/certs.zip" -d "${CERTS_DIR}" cat "${CERTS_DIR}/elasticsearch/elasticsearch.crt" "${CERTS_DIR}/ca/ca.crt" > "${CERTS_DIR}/elasticsearch/elasticsearch.chain.pem" -fi -echo "Setting file permissions... certs" -chown -R elasticsearch:elasticsearch "${CERTS_DIR}" -find "${CERTS_DIR}" -type d -exec chmod 755 {} \; -find "${CERTS_DIR}" -type f -exec chmod 644 {} \; + echo "Setting file permissions... certs" + chown -R elasticsearch:elasticsearch "${CERTS_DIR}" + find "${CERTS_DIR}" -type d -exec chmod 755 {} \; + find "${CERTS_DIR}" -type f -exec chmod 644 {} \; + + echo "Setting file permissions... data" + chown -R elasticsearch:elasticsearch "${DATA_DIR}" +fi -echo "Setting file permissions... data" -chown -R elasticsearch:elasticsearch "${DATA_DIR}" From 4dd138ae17780fae35728267081dbb8c546fa0ea Mon Sep 17 00:00:00 2001 From: Michael Reeves Date: Wed, 16 Oct 2024 14:44:56 -0400 Subject: [PATCH 36/48] Remove sysctl edits to lower privileged ports and add 443 to kibana container --- quadlet/lme-kibana.container | 2 +- scripts/set_sysctl_limits.sh | 3 +-- 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/quadlet/lme-kibana.container b/quadlet/lme-kibana.container index 344631df..cbbba789 100644 --- a/quadlet/lme-kibana.container +++ b/quadlet/lme-kibana.container @@ -21,7 +21,7 @@ EnvironmentFile=/opt/lme/lme-environment.env Image=localhost/kibana:LME_LATEST Network=lme PodmanArgs=--memory 4gb --network-alias lme-kibana --requires lme-elasticsearch --health-interval=2s -PublishPort=5601:5601 +PublishPort=5601:5601,443:5601 Volume=lme_certs:/usr/share/kibana/config/certs Volume=lme_kibanadata:/usr/share/kibana/data Volume=/opt/lme/config/kibana.yml:/usr/share/kibana/config/kibana.yml diff --git a/scripts/set_sysctl_limits.sh b/scripts/set_sysctl_limits.sh index cd0b87fe..918e2abb 100755 --- a/scripts/set_sysctl_limits.sh +++ b/scripts/set_sysctl_limits.sh @@ -33,7 +33,6 @@ update_sysctl() { } # Update sysctl settings -update_sysctl "net.ipv4.ip_unprivileged_port_start" "80" update_sysctl "vm.max_map_count" "262144" update_sysctl "net.core.rmem_max" "7500000" update_sysctl "net.core.wmem_max" "7500000" @@ -60,4 +59,4 @@ sysctl vm.max_map_count sysctl net.core.rmem_max sysctl net.core.wmem_max -echo "Script execution completed." \ No newline at end of file +echo "Script execution completed." From 7eb4e5d100d8bee307d4d8991c48861217121dcc Mon Sep 17 00:00:00 2001 From: Michael Reeves Date: Wed, 16 Oct 2024 14:45:46 -0400 Subject: [PATCH 37/48] Add notes on starting vms via azure cli to testing v2 --- testing/v2/installers/README.md | 30 ++++++++++++++++++++++++++++++ 1 file changed, 30 insertions(+) diff --git a/testing/v2/installers/README.md b/testing/v2/installers/README.md index dd579b80..c042d3df 100644 --- a/testing/v2/installers/README.md +++ b/testing/v2/installers/README.md @@ -14,3 +14,33 @@ Quick Start #reminder activiate venv first: `source ~/LME/venv/bin/activate` ./azure/build_azure_linux_network.py -g lme-cbaxley-m1 -s 0.0.0.0 -vs Standard_D8_v4 -l westus -ast 00:00 -pub Canonical -io 0001-com-ubuntu-server-noble-daily -is 24_04-daily-lts-gen2 + +## creating clients: +Windows: +``` +az vm create ` + --resource-group xxxxxx ` + --nsg NSG1 ` + --image Win2019Datacenter ` + --admin-username admin-user ` + --admin-password xxxxxxxxxxxxxx ` + --vnet-name VNet1 ` + --subnet SNet1 ` + --public-ip-sku Standard ` + --name WINDOWS +``` + +Ubuntu: +``` +az vm create ` + --resource-group XXXXX ` + --nsg NSG1 ` + --image Ubuntu2204 ` + --admin-username admin-user ` + --admin-password XXXXXXXX ` + --vnet-name VNet1 ` + --subnet SNet1 ` + --public-ip-sku Standard ` + --name linux-client +``` + From 4a9109ed5ec6ce74645ee0f82e73cceaab59fb53 Mon Sep 17 00:00:00 2001 From: Michael Reeves Date: Wed, 16 Oct 2024 14:46:06 -0400 Subject: [PATCH 38/48] Fix ansible errors in checking for passwords that are created --- scripts/install_lme_local.yml | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/scripts/install_lme_local.yml b/scripts/install_lme_local.yml index 5ebbff5f..d6e849c9 100644 --- a/scripts/install_lme_local.yml +++ b/scripts/install_lme_local.yml @@ -244,7 +244,6 @@ set_fact: ansible_env: "{{ ansible_env | combine({'PATH': ansible_env.PATH ~ ':/nix/var/nix/profiles/default/bin'}) }}" - - name: Update PATH in user's profile lineinfile: path: "~/.profile" @@ -291,7 +290,9 @@ args: executable: /bin/bash ignore_errors: true - + #only fail on a real error + failed_when: result.rc != 0 and (result.rc == 1 and result.changed == false) + - name: Set podman secret passwords shell: | source /root/.profile @@ -306,7 +307,8 @@ - wazuh_api - wazuh become: yes - when: result is failed + ## only run this when + when: result.rc == 1 - name: Install Quadlets hosts: localhost From 8bf047722389266df49f269b1a7973feca685e2b Mon Sep 17 00:00:00 2001 From: Michael Reeves Date: Wed, 16 Oct 2024 14:46:41 -0400 Subject: [PATCH 39/48] Add debugging commands, and remove references to 443 for kibana from debug commands --- README.md | 32 ++++++++++++++++++-------------- 1 file changed, 18 insertions(+), 14 deletions(-) diff --git a/README.md b/README.md index 37470677..ecfc6b4d 100644 --- a/README.md +++ b/README.md @@ -20,10 +20,11 @@ Ubuntu 22.04 server running podman containers setup as podman quadlets controlle ### Required Ports: Ports required are as follows: - Elasticsearch: *9200* - - Kibana: 443 + - Kibana: 443,5601 - Wazuh: *1514,1515,1516,55000,514* - Agent: *8220* +**Kibana NOTE**: 5601 is the default port, and we've set kibana to listen on 443 as well ### Diagram: @@ -106,7 +107,7 @@ You can run this installer to run the total install in ansible. ```bash sudo apt update && sudo apt install -y ansible # cd ~/LME-PRIV/lme-2-arch # Or path to your clone of this repo -ansible-playbook install_lme_local.yml +ansible-playbook ./scripts/install_lme_local.yml ``` This assumes that you have the repo in `~/LME/`. @@ -116,7 +117,6 @@ ansible-playbook ./scripts/install_lme_local.yml -e "clone_dir=/path/to/clone/di ``` This also assumes your user can sudo without a password. If you need to input a password when you sudo, you can run it with the `-K` flag and it will prompt you for a password. -There is a step that will fail, this is expected, it is checking for podman secrets to see if they exist... on an intial install none will exist :) #### Steps performed in automated install: TODO finalize this with more words @@ -133,9 +133,7 @@ TODO finalize this with more words 1. `/opt/lme` will be owned by the lmed user, all lme services will run and execute as lmed, and this ensures least privilege in lmed's execution because lmed is a non-admin,unprivileged user. -3. [this script](/scripts/set_sysctl_limits.sh) is executed via ansible AND will change unprivileged ports to start at 80, to allow kibana to listen on 443 from a user run container. If this is not desired, we will be publishing steps to setup firewall rules using ufw//iptables to manage the firewall on this host at a later time. - -4. the master password will be stored at `/etc/lme/pass.sh` and owned by root, while service user passwords will be stored at `/etc/lme/vault/` +2. the master password will be stored at `/etc/lme/pass.sh` and owned by root, while service user passwords will be stored at `/etc/lme/vault/` ### Verification post install: @@ -156,15 +154,13 @@ sudo -i journalctl -xu lme.service #try resetting failed: sudo -i systemctl reset-failed lme* sudo -i systemctl restart lme.service -``` -2. Check you can connect to elasticsearch -```bash -#substitute your password below: -curl -k -u elastic:$(sudo -i ansible-vault view /etc/lme/vault/$(sudo -i podman secret ls | grep elastic | awk '{print $1}') | tr -d '\n') https://localhost:9200 +#also try inspecting container logs: +#CONTAINER_NAME=lme-elasticsearch +sudo -i podman logs -f $CONTAINER_NAME ``` -3. Check conatiners are running: +2. Check conatiners are running and healthy: ```bash sudo -i podman ps --format "{{.Names}} {{.Status}}" ``` @@ -176,11 +172,19 @@ lme-kibana Up 2 hours (healthy) lme-wazuh-manager Up About an hour lme-fleet-server Up 50 minutes ``` +We are working on getting health check commands for wazuh and fleet, currently they are not integrated + +3. Check you can connect to elasticsearch +```bash +#substitute your password below: +curl -k -u elastic:$(sudo -i ansible-vault view /etc/lme/vault/$(sudo -i podman secret ls | grep elastic | awk '{print $1}') | tr -d '\n') https://localhost:9200 +``` 4. Check you can connect to kibana +You can use an ssh proxy to forward a local port to the remote linux host ```bash -#connect via ssh -ssh -L 8080:localhost:443 [YOUR-LINUX-SERVER] +#connect via ssh if you need to +ssh -L 8080:localhost:5601 [YOUR-LINUX-SERVER] #go to browser: #https://localhost:8080 ``` From d06c3f34f865ef7770f1aefe1625a42c90dcc3bc Mon Sep 17 00:00:00 2001 From: cbaxley Date: Wed, 16 Oct 2024 16:15:29 -0400 Subject: [PATCH 40/48] Update the cluster.yml file to use the new IP address for the Azure instance --- .github/workflows/cluster.yml | 14 +++++--------- 1 file changed, 5 insertions(+), 9 deletions(-) diff --git a/.github/workflows/cluster.yml b/.github/workflows/cluster.yml index a35d30b6..71b20473 100644 --- a/.github/workflows/cluster.yml +++ b/.github/workflows/cluster.yml @@ -82,7 +82,7 @@ jobs: cd /home/lme-user/LME/testing/v2/installers && \ python3 ./azure/build_azure_linux_network.py \ -g pipe-${{ env.UNIQUE_ID }} \ - -s 0.0.0.0/0 \ + -s ${{ env.AZURE_IP }}/32 \ -vs Standard_D8_v4 \ -l centralus \ -ast 23:00 \ @@ -265,9 +265,7 @@ jobs: run: | cd testing/v2/development docker compose -p ${{ env.UNIQUE_ID }} exec -T pipeline bash -c " - cd /home/lme-user/LME/testing/v2/installers && \ - IP_ADDRESS=\$(cat pipe-${{ env.UNIQUE_ID }}.ip.txt) && \ - ssh lme-user@\$IP_ADDRESS 'cd /home/lme-user/LME/testing/tests && \ + ssh lme-user@${{ env.AZURE_IP }} 'cd /home/lme-user/LME/testing/tests && \ echo ELASTIC_PASSWORD=\"$ES_PASSWORD\" >> .env && \ echo KIBANA_PASSWORD=\"$KIBANA_PASSWORD\" >> .env && \ echo elastic=\"$ES_PASSWORD\" >> .env && \ @@ -282,14 +280,12 @@ jobs: run: | cd testing/v2/development docker compose -p ${{ env.UNIQUE_ID }} exec -T pipeline bash -c " - cd /home/lme-user/LME/testing/v2/installers && \ - IP_ADDRESS=\$(cat pipe-${{ env.UNIQUE_ID }}.ip.txt) && \ - ssh lme-user@\$IP_ADDRESS 'cd /home/lme-user/LME/testing/tests && \ + ssh lme-user@${{ env.AZURE_IP }} 'cd /home/lme-user/LME/testing/tests && \ echo ELASTIC_PASSWORD=\"$ES_PASSWORD\" >> .env && \ echo KIBANA_PASSWORD=\"$KIBANA_PASSWORD\" >> .env && \ echo elastic=\"$ES_PASSWORD\" >> .env && \ source venv/bin/activate && \ - pytest -v selenium_tests/' + pytest -v selenium_tests/' " - name: Cleanup Azure resources @@ -311,4 +307,4 @@ jobs: run: | cd testing/v2/development docker compose -p ${{ env.UNIQUE_ID }} down - docker system prune -af \ No newline at end of file + docker system prune -af From 3d2fdef493411e4c879ffdc548a5338d8df93c9f Mon Sep 17 00:00:00 2001 From: cbaxley Date: Wed, 16 Oct 2024 16:16:16 -0400 Subject: [PATCH 41/48] Only allow the ip address of the host to connect to the azure instance --- .github/workflows/cluster.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/cluster.yml b/.github/workflows/cluster.yml index 71b20473..33b5566f 100644 --- a/.github/workflows/cluster.yml +++ b/.github/workflows/cluster.yml @@ -82,7 +82,7 @@ jobs: cd /home/lme-user/LME/testing/v2/installers && \ python3 ./azure/build_azure_linux_network.py \ -g pipe-${{ env.UNIQUE_ID }} \ - -s ${{ env.AZURE_IP }}/32 \ + -s ${{ env.IP_ADDRESS }}/32 \ -vs Standard_D8_v4 \ -l centralus \ -ast 23:00 \ From b9801c6b70be1d98ea416a958cd6706b8bcc76c7 Mon Sep 17 00:00:00 2001 From: Michael Reeves Date: Thu, 17 Oct 2024 00:00:06 -0400 Subject: [PATCH 42/48] remove unnecassary script --- config/vault-pass.sh | 2 -- 1 file changed, 2 deletions(-) delete mode 100755 config/vault-pass.sh diff --git a/config/vault-pass.sh b/config/vault-pass.sh deleted file mode 100755 index b0f7b8b3..00000000 --- a/config/vault-pass.sh +++ /dev/null @@ -1,2 +0,0 @@ -#!/bin/bash -echo $LME_ANSIBLE_VAULT_PASS From 18fb506a7e6a956135ec2f904b53c85ce98a8755 Mon Sep 17 00:00:00 2001 From: cbaxley Date: Thu, 17 Oct 2024 09:42:47 -0400 Subject: [PATCH 43/48] Move ansible files to the ansible directory --- .github/workflows/cluster.yml | 6 +++--- .github/workflows/linux_only.yml | 12 ++++++++---- {scripts => ansible}/install_lme_local.yml | 0 {scripts => ansible}/set_fleet.yml | 6 +++--- testing/v2/installers/install_v2/install.sh | 4 ++-- 5 files changed, 16 insertions(+), 12 deletions(-) rename {scripts => ansible}/install_lme_local.yml (100%) rename {scripts => ansible}/set_fleet.yml (98%) diff --git a/.github/workflows/cluster.yml b/.github/workflows/cluster.yml index 33b5566f..f30cfe81 100644 --- a/.github/workflows/cluster.yml +++ b/.github/workflows/cluster.yml @@ -2,9 +2,9 @@ name: Cluster Run - Minimega on: workflow_dispatch: - pull_request: - branches: - - '*' + # pull_request: + # branches: + # - '*' jobs: build-and-test-cluster: diff --git a/.github/workflows/linux_only.yml b/.github/workflows/linux_only.yml index 34652b48..c5e5223e 100644 --- a/.github/workflows/linux_only.yml +++ b/.github/workflows/linux_only.yml @@ -2,9 +2,9 @@ name: Linux Only on: workflow_dispatch: - # pull_request: - # branches: - # - '*' + pull_request: + branches: + - '*' jobs: build-and-test-linux-only: @@ -16,6 +16,7 @@ jobs: ES_PASSWORD: "" KIBANA_PASSWORD: "" AZURE_IP: "" + IP_ADDRESS: "" steps: - name: Checkout repository @@ -26,6 +27,9 @@ jobs: cd testing/v2/development echo "HOST_UID=$(id -u)" > .env echo "HOST_GID=$(id -g)" >> .env + PUBLIC_IP=$(curl -s https://api.ipify.org) + echo "IP_ADDRESS=$PUBLIC_IP" >> $GITHUB_ENV + - name: Start pipeline container run: | @@ -57,7 +61,7 @@ jobs: cd /home/lme-user/LME/testing/v2/installers && \ python3 ./azure/build_azure_linux_network.py \ -g pipe-${{ env.UNIQUE_ID }} \ - -s 0.0.0.0/0 \ + -s ${{ env.IP_ADDRESS }}/32 \ -vs Standard_E4d_v4 \ -l westus \ -ast 23:00 \ diff --git a/scripts/install_lme_local.yml b/ansible/install_lme_local.yml similarity index 100% rename from scripts/install_lme_local.yml rename to ansible/install_lme_local.yml diff --git a/scripts/set_fleet.yml b/ansible/set_fleet.yml similarity index 98% rename from scripts/set_fleet.yml rename to ansible/set_fleet.yml index 1a659238..d7839383 100644 --- a/scripts/set_fleet.yml +++ b/ansible/set_fleet.yml @@ -31,10 +31,10 @@ loop: "{{ env_dict | dict2items }}" when: item.value | length > 0 - - name: Source extract_secrets.sh + - name: Source extract_secrets ansible.builtin.shell: | set -a - . {{ playbook_dir }}/extract_secrets.sh -q + . {{ playbook_dir }}/../scripts/extract_secrets.sh -q echo "elastic=$elastic" echo "wazuh=$wazuh" echo "kibana_system=$kibana_system" @@ -142,7 +142,7 @@ ansible.builtin.shell: | sudo bash -c ' set -a - . /home/lme-user/LME/scripts/extract_secrets.sh -q + . {{ playbook_dir }}/../scripts/extract_secrets.sh -q set +a /nix/var/nix/profiles/default/bin/podman exec -w /usr/share/elasticsearch/config/certs/ca lme-elasticsearch cat ca.crt | openssl x509 -noout -fingerprint -sha256 | cut -d "=" -f 2 | tr -d : | head -n1 ' diff --git a/testing/v2/installers/install_v2/install.sh b/testing/v2/installers/install_v2/install.sh index 464778c4..d8183c03 100755 --- a/testing/v2/installers/install_v2/install.sh +++ b/testing/v2/installers/install_v2/install.sh @@ -42,7 +42,7 @@ ssh -o StrictHostKeyChecking=no $user@$hostname << EOF EOF echo "Running ansible installer" -ssh -o StrictHostKeyChecking=no $user@$hostname "cd ~/LME && ansible-playbook scripts/install_lme_local.yml" +ssh -o StrictHostKeyChecking=no $user@$hostname "cd ~/LME && ansible-playbook ansible/install_lme_local.yml" echo "Waiting for Kibana and Elasticsearch to start..." @@ -95,7 +95,7 @@ echo "Running check-fleet script" ssh -o StrictHostKeyChecking=no $user@$hostname "sudo -E bash -c 'source /opt/lme/lme-environment.env && su $user -c \". ~/.bashrc && cd ~/LME && ./testing/v2/installers/lib/check_fleet.sh\"'" echo "Running set-fleet script" -ssh -o StrictHostKeyChecking=no $user@$hostname "sudo -E bash -c 'cd ~/LME/scripts && ansible-playbook set_fleet.yml -e \"debug_mode=true\"'" +ssh -o StrictHostKeyChecking=no $user@$hostname "sudo -E bash -c 'cd ~/LME/ansible && ansible-playbook set_fleet.yml -e \"debug_mode=true\"'" echo "Installation and configuration completed successfully." From 2ea05680f8ea1e70ff45e97d24db1758a2b259da Mon Sep 17 00:00:00 2001 From: Michael Reeves Date: Thu, 17 Oct 2024 10:31:04 -0400 Subject: [PATCH 44/48] Moving Upgrade Readme into upgrade directory --- scripts/{ => upgrade}/README.md | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename scripts/{ => upgrade}/README.md (100%) diff --git a/scripts/README.md b/scripts/upgrade/README.md similarity index 100% rename from scripts/README.md rename to scripts/upgrade/README.md From b3610b64c928d429660b5761468c26fe736b7224 Mon Sep 17 00:00:00 2001 From: Michael Reeves Date: Thu, 17 Oct 2024 10:48:22 -0400 Subject: [PATCH 45/48] Add upgrading docs and remove dev notes --- docs/markdown/maintenance/upgrading.md | 146 +--------------------- docs/markdown/reference/dev-notes.md | 163 ------------------------- 2 files changed, 2 insertions(+), 307 deletions(-) delete mode 100644 docs/markdown/reference/dev-notes.md diff --git a/docs/markdown/maintenance/upgrading.md b/docs/markdown/maintenance/upgrading.md index 5f48ea70..bb947a0e 100644 --- a/docs/markdown/maintenance/upgrading.md +++ b/docs/markdown/maintenance/upgrading.md @@ -1,148 +1,6 @@ # Upgrading -Please see https://github.com/cisagov/LME/releases/ for our latest release. +This page serves as a landing page for future upgrading when we release new versions. -Below you can find the upgrade paths that are currently supported and what steps are required for these upgrades. Note that major version upgrades tend to include significant changes, and so will require manual intervention and will not be automatically applied, even if auto-updates are enabled. - -Applying these changes is automated for any new installations. But, if you have an existing installation, you need to conduct some extra steps. **Before performing any of these steps it is advised to take a backup of the current installation using the method described [here](/docs/markdown/maintenance/backups.md).** - -## 1. Finding your LME version (and the components versions) -When reporting an issue or suggesting improvements, it is important to include the versions of all the components, where possible. This ensures that the issue has not already been fixed! - -### 1.1. Windows Server -* Operating System: Press "Windows Key"+R and type ```winver``` -* WEC Config: Open EventViewer > Subscriptions > "LME" > Description should contain version number -* Winlogbeat Config: At the top of the file C:\Program Files\lme\winlogbeat.yml there should be a version number. -* Winlogbeat.exe version: Using PowerShell, navigate to the location of the Winlogbeat executable ("C:\Program Files\lme\winlogbeat-x.x.x-windows-x86_64") and run `.\winlogbeat version`. -* Sysmon config: From either the top of the file or look at the status dashboard -* Sysmon executable: Either run sysmon.exe or look at the status dashboard - -### 1.2. Linux Server -* Docker: on the Linux server type ```docker --version``` -* Linux: on the Linux server type ```cat /etc/os-release``` -* Logstash config: on the Linux server type ```sudo docker config inspect logstash.conf --pretty``` - - -## 2. Upgrade from versions prior to v0.5 -LME does not support upgrading directly from versions prior to v0.5 to v1.0. Prior to switching to CISA's repo, first upgrade to the latest version of LME published by the NCSC (v0.5.1). Then follow the instructions above to upgrade to v1.0. - - -## 3. Upgrade from v0.5 to v1.0.0 - -Since LME's transition from the NCSC to CISA, the location of the LME repository has changed from `https://github.com/ukncsc/lme` to `https://github.com/cisagov/lme`. To obtain any further updates to LME on the ELK server, you will need to transition to the new git repository. Because vital configuration files are stored within the same folder as the git repo, it's simpler to copy the old LME folder to a different location, clone the new repo, copy the files and folders unique to your system, and then optionally delete the old folder. You can do this by running the following commands: - - -``` -sudo mv /opt/lme /opt/lme_old -sudo git clone https://github.com/cisagov/lme.git /opt/lme -sudo cp -r /opt/lme_old/Chapter\ 3\ Files/certs/ /opt/lme/Chapter\ 3\ Files/ -sudo cp /opt/lme_old/Chapter\ 3\ Files/docker-compose-stack-live.yml /opt/lme/Chapter\ 3\ Files/ -sudo cp /opt/lme_old/Chapter\ 3\ Files/get-docker.sh /opt/lme/Chapter\ 3\ Files/ -sudo cp /opt/lme_old/Chapter\ 3\ Files/logstash.edited.conf /opt/lme/Chapter\ 3\ Files/ -sudo cp /opt/lme_old/files_for_windows.zip /opt/lme/ -sudo cp /opt/lme_old/lme.conf /opt/lme/ -sudo cp /opt/lme_old/lme_update.sh /opt/lme/ -``` -Finally, you'll need to grab your old dashboard_update password and add it into the new dashboard_update script: -``` -OLD_Password=[OLD_PASSWORD_HERE] -sudo cp /opt/lme/Chapter\ 3\ Files/dashboard_update.sh /opt/lme/ -sed -i "s/dashboardupdatepassword/$OLD_Password/g" /opt/lme/dashboard_update.sh -``` - - -### 3.1. ELK Stack Update -You can update the ELK stack portion of LME to v1.0 (including dashboards and ELK stack containers) by running the following on the Linux server: - -``` -cd /opt/lme/Chapter\ 3\ Files/ -sudo ./deploy.sh upgrade -``` -**The last step of this script makes all files only readable by their owner in /opt/lme, so that all root owned files with passwords in them are only readable by root. This prevents a local unprivileged user from gaining access to the elastic stack.** - -Once the deploy update is finished, next update the dashboards that are provided alongside LME to the latest version. This can be done by running the below script, with more detailed instructions available [here](/docs/markdown/chapter4.md#411-import-initial-dashboards): - -\*\**NOTE:*\*\* *You may need to wait several minutes for Kibana to successfully initialize after the update before running this script during the upgrade process. If you encounter a "Failed to connect" error or an "Entity Too Large" error wait for several minutes before trying again.* - -##### Optional Substep: Clear out old dashboards -**Skip this step if you don't want to clear out the old dashboards** - -The LME team will not be maintaining any old dashboards from the old NCSC LME version, so if you would like to clean up your LME you can remove the dashboards by navigating to: https:///app/management/kibana/objects - -From there select all the dashboards in the search: `type:(dashboard)` and delete them. -Then you can re-import the new dashboards like above. - -If you have any custom dashboards you should download them manually and add them to the repo as discussed in the new dashboard's folder [README](/Chapter 4 Files/dashboards/Readme.md). - -Most data from the old LME should display just fine in the new dashboards, but there could be some issues, so please feel free to file an issue if there are problems. - - -``` -sudo /opt/lme/dashboard_update.sh -``` - -The rules built-in to the Elastic SIEM can then be updated to the latest version by following the instructions listed in [Chapter 4](/docs/markdown/chapter4.md#42-enable-the-detection-engine) and selecting the option to update the prebuilt rules when prompted, before making sure all of the rules are activated: - -![Update Rules](/docs/imgs/update-rules.png) - - - -### 3.2. Winlogbeat Update -The winlogbeat.yml file used with LME v0.5.1 is not compatible with Winlogbeat 8.5.0, the version used with LME v1.0. As such, running `./deploy.sh update` from step 1.1.1 regenerates a new config file. - -**Your client may still authenticate and push logs to elasticsearch, but for both the security of the client and your LME setup we suggest you still update** - -To update Winlogbeat: -1. Copy files_for_windows.zip to the Event Collector, following the instructions listed under [3.2.4 Download Files for Windows Event Collector](/docs/markdown/chapter3/chapter3.md#324-download-files-for-windows-event-collector). -2. From an elevated PowerShell session, navigate to the location of the Winlogbeat executable ("C:\Program Files\lme\winlogbeat-x.x.x-windows-x86_64\") and then run `./uninstall-service-winlogbeat.ps1` -3. Re-install Winlogbeat, using the new copy of files_for_windows.zip, following the instructions listed under [3.3 Configuring Winlogbeat on Windows Event Collector Server](/docs/markdown/chapter3/chapter3.md#33-configuring-winlogbeat-on-windows-event-collector-server) - -### 3.3. Network Share Updates -LME v1.0 made a minor change to the file structure used in the SYSVOL folder, so a few manual changes are needed to accommodate this. -1. Set up the SYSVOL folder as described in [2.2.1 - Folder Layout](/docs/markdown/chapter2.md#221---folder-layout). -2. Replace the old version of update.bat with the [latest version](/Chapter%202%20Files/GPO%20Deployment/update.bat). -3. Update the path to update.bat used in the LME-Sysmon-Task GPO (refer to [2.2.3 - Scheduled task GPO Policy](/docs/markdown/chapter2.md#223---scheduled-task-gpo-policy)). - -### 3.4. Checklist -1. Have the ELK stack components been upgraded on the Linux server? While on the Linux server, run `sudo docker ps | grep lme`. Version 8.7.1 of Logstash, Kibana, and Elasticsearch should be running. -2. Has Winlogbeat been updated to version 8.5.0? From Event Collector, using PowerShell, navigate to the location of the Winlogbeat executable ("C:\Program Files\lme\winlogbeat-x.x.x-windows-x86_64") and run `.\winlogbeat version`. -3. Is the LME folder inside SYSVOL properly structured? Refer to the checklist listed at the end of chapter 2. -4. Are the events from all clients visible inside elastic? Refer to [4.1.2 Check you are receiving logs](/docs/markdown/chapter4.md#412-check-you-are-receiving-logs). - -## 4. Upgrade to v1.3.1 - -This is a hotfix to the install script and some additional troubleshooting steps added to documentation on space management. Unless you're encountering problems with your current installation, or if your logs are running out of space, there's no need to upgrade to v1.3.1, as it doesn't offer any additional functionality changes. - -## 5. Upgrade to v1.3.2 - -This is a hotfix to address dashboards which failed to load on a fresh install of v1.3.1. If you are currently running v1.3.0, you do not need to upgrade at this time. If you are running versions **before** 1.3.0 or are running v1.3.1, we recommend you upgrade to the latest version. - -Please refer to the [Upgrading to latest version](/docs/markdown/maintenance/upgrading.md#upgrading-to-latest-version) to apply the hotfix. - -## 6. v1.3.3 - Update on data retention failure during LME install - -This is a hotfix to address an error with data retention failure in the deploy.sh script during a fresh LME install. We recommend you upgrade to the latest version if you require disk sizes of 1TB or greater. - -If you've tried to install LME before, then run the following commands as root: -``` -git pull -git checkout main -cd /opt/lme/Chapter\ 3\ Files/ -sudo ./deploy.sh uninstall -sudo docker volume rm lme-esdata -sudo docker volume rm lme-logstashdata -sudo ./deploy.sh install -``` - -## 7. Upgrade to latest version -To fetch the latest changes, on the Linux server, run the following commands as root: -``` -git pull -git checkout main -cd /opt/lme/Chapter\ 3\ Files/ -sudo ./deploy.sh uninstall -sudo ./deploy.sh install -``` - -The deploy.sh script should have now created new files on the Linux server at location /opt/lme/files_for_windows.zip . This file needs to be copied across and used on the Windows Event Collector server like it was explained in Chapter 3 sections [3.2.4 & 3.3 ](/docs/markdown/chapter3/chapter3.md#324-download-files-for-windows-event-collector). +Currently the only upgrade path is from 1.4 -> 2.0 [HERE](/scripts/upgrade/README.md). diff --git a/docs/markdown/reference/dev-notes.md b/docs/markdown/reference/dev-notes.md deleted file mode 100644 index b4dfbeba..00000000 --- a/docs/markdown/reference/dev-notes.md +++ /dev/null @@ -1,163 +0,0 @@ -# Dev notes: -TODO update these to be relevant/new - -Notes to convert compose -> quadlet -1. start the containers with compose -2. podlet generate from the containers created - -### compose: -running: -```shell -podman-compose up -d -``` - -stopping: -```shell -podman-compose down --remove-orphans - -#only run if you want to remove all volumes: -podman-compose down -v --remove-orphans -``` - -### install/get podlet: -``` -#https://github.com/containers/podlet/releases -wget https://github.com/containers/podlet/releases/download/v0.3.0/podlet-x86_64-unknown-linux-gnu.tar.xz -#add it to path: -cp ./podlet-x86_64-unknown-linux-gnu/podlet .local/bin/ -``` - -### generate the quadlet files: -[DOCS](https://docs.podman.io/en/latest/markdown/podman-systemd.unit.5.html), [BLOG](https://mo8it.com/blog/quadlet/) - -``` -cd ~/LME-PRIV/quadlet - -for x in $(podman ps --filter label=io.podman.compose.project=lme-2-arch -a --format "{{.Names}}");do echo $x; podlet generate container $x > $x.container;done -``` - -### dealing with journalctl logs: -https://unix.stackexchange.com/questions/638432/clear-failed-states-or-all-old-logs-from-systemctl-status-service -``` -#delete all logs: -sudo rm /var/log/journal/$STRING_OF_HEX/user-1000* -``` - -### debugging commands: -``` -systemctl --user stop lme.service -systemctl --user status lme* -systemctl --user restart lme.service -journalctl --user -u lme-fleet-server.service -systemctl --user status lme* -cp -r $CLONE_DIRECTORY/config/ /opt/lme && cp -r $CLONE_DIRECTORY/quadlet /opt/lme -systemctl --user daemon-reload && systemctl --user list-unit-files lme\* -systemctl --user reset-failed -podman volume rm -a - -###make sure all ports are free as well: -sudo ss -tulpn -``` - -### password setup stuff: -#### setup the config directory -This will setup the container config so it uses ansible vault for podman secret creation AND sets up the proper ansible-vault environment variables. - -``` -ln -sf /opt/lme/config/containers.conf $HOME/.config/containers/containers.conf -#preserve `chmod +x` executable -cp -rTp config/ /opt/lme/config -#source our password env var: -. ./scripts/set_vault_key_env.sh -#create the vault directory: -/opt/lme/vault/ -``` - -#### create password file: -This will setup the ansible vault files in the expected paths -``` -ansible-vault create /opt/lme/vault.yml -``` - -### **Manual Install OLD**( optional if not running ansible install): -``` -export CLONE_DIRECTORY=~/LME-PRIV/lme-2-arch -#systemd will setup nix: -#Old way to setup nix if desired: sh <(curl -L https://nixos.org/nix/install) --daemon -sudo apt install jq uidmap nix-bin nix-setup-systemd - -sudo nix-channel --add https://nixos.org/channels/nixpkgs-unstable nixpkgs -sudo nix-channel --update - -# Add user to nix group in /etc/group -sudo usermod -aG nix-users $USER - -#install podman and podman-compose -sudo nix-env -iA nixpkgs.podman - -# Set the path for root and lme-user -#echo 'export PATH=$PATH:$HOME/.nix-profile/bin' >> ~/.bashrc -echo 'export PATH=$PATH:/nix/var/nix/profiles/default/bin' >> ~/.bashrc -sudo sh -c 'echo "export PATH=$PATH:/nix/var/nix/profiles/default/bin" >> /root/.bashrc' - -#to allow 443/80 bind and setup memory/limits -sudo NON_ROOT_USER=$USER $CLONE_DIRECTORY/set_sysctl_limits.sh - -#export XDG_CONFIG_HOME="$HOME/.config" -#export XDG_RUNTIME_DIR=/run/user/$(id -u) - -#setup user-generator on systemd: -sudo $CLONE_DIRECTORY/link_latest_podman_quadlet.sh - -#setup loginctl -sudo loginctl enable-linger $USER -``` - -Quadlet configuration for containers is in: `/quadlet/` -1. setup `/opt/lme` thats the running directory for lme: -```bash -sudo mkdir -p /opt/lme -sudo chown -R $USER:$USER /opt/lme -cp -r $CLONE_DIRECTORY/config/ /opt/lme/ -cp -r $CLONE_DIRECTORY/quadlet/ /opt/lme/ - -#setup quadlets -mkdir -p ~/.config/containers/ -ln -s /opt/lme/quadlet ~/.config/containers/systemd - -#setup service file -mkdir -p ~/.config/systemd/user -ln -s /opt/lme/quadlet/lme.service ~/.config/systemd/user/ -``` - -### pull and tag all containers: -This will let us maintain the lme container versions using the `LME_LATEST` tag. Whenever we update, we change the local image to point to the newest update, and run `podman auto-update` to update the containers. - -**NOTE TO FUTURE SELVES: NEEDS TO BE `LOCALHOST` TO AVOID REMOTE TAGGING ATTACK** - -```bash -sudo mkdir -p /etc/containers -sudo tee /etc/containers/policy.json < Date: Thu, 17 Oct 2024 10:48:52 -0400 Subject: [PATCH 46/48] Update main readme docs: - add section for LME introductory content - disclaimer around small simple siem - add Pre-Requisites page - add Upgrading 1.4 -> 2.0 docs - note on lme-frontend coming later - remove references to lmed and make docs accurate to current installation process - add TODOs for docs that still need updated --- README.md | 108 +++++++++++++++++++++++++++++++++++++++--------------- 1 file changed, 79 insertions(+), 29 deletions(-) diff --git a/README.md b/README.md index ecfc6b4d..6eaca06e 100644 --- a/README.md +++ b/README.md @@ -1,18 +1,53 @@ -![N|Solid](/docs/imgs/cisa.png) - [![Downloads](https://img.shields.io/github/downloads/cisagov/lme/total.svg)]() -# Logging Made Easy: Podmanized -This will eventually be merged with the Readme file at [LME-README](https://github.com/cisagov/LME). + +# Logging Made Easy: + +CISA's Logging Made Easy has a self-install tutorial for organizations to gain a basic level of centralized security logging for Windows clients and provide functionality to detect attacks. LME is the integration of multiple open software platforms which come at no cost to users. LME helps users integrate software platforms together to produce an end-to-end logging capability. LME also provides some pre-made configuration files and scripts, although there is the option to do this on your own. + +Logging Made Easy can: + +- Show where administrative commands are being run on enrolled devices +- See who is using which machine +- In conjunction with threat reports, it is possible to query for the presence of an attacker in the form of Tactics, Techniques and Procedures (TTPs) + +## Disclaimer: + +LME is still in development, and version 2.1 will address scaling out the deployment. + +While LME offers SEIM like capabilities, it should be consider a small simple SIEM. + +The LME team simplified the process and created clear instruction on what to download and which configugrations to use, and created convinent scripts to auto configure when possible. + +LME is not able to comment on or troubleshoot individual installations. If you believe you have have found an issue with the LME code or documentation please submit a GitHub issue. If you have a question about your installation, please look through all open and closed issues to see if it has been addressed before. If not, then submit a [GitHub issue](https://github.com/cisagov/lme/issues) using the Bug Template, ensuring that you provide all the requested information. + +For general questions about LME and suggestions, please visit [GitHub Discussions](https://github.com/cisagov/lme/discussions) to add a discussion post. + +## Who is Logging Made Easy for? + +From single IT administrators with a handful of devices in their network to larger organizations. + +LME is suited for for: + +- Organizations without [SOC](https://en.wikipedia.org/wiki/Information_security_operations_center), SIEM or any monitoring in place at the moment. +- Organizations that lack the budget, time or understanding to set up a logging system. +- Organizations that that require gathering logs and monitoring IT +- Organizations that understand LMEs limitiation + ## Table of Contents: +- [Pre-Requisites:](#architecture) - [Architecture:](#architecture) - [Installation:](#installation) - [Deploying Agents:](#deploying-agents) - [Password Encryption:](#password-encryption) -- [Further Documentation:](#documentation) +- [Further Documentation & Upgrading:](#documentation) + +## Pre-Requisites +If you are unsure you meet the pre-requisites to installing LME, please read our [prerequisites documentation](/docs/markdown/prerequisites.md). +The biggest Pre-requisite is setting up hardware for your ubuntu server with a minimum of `2 processors`, `16gb ram`, and `128gb` of dedicated storage for LME's Elasticsearch database. ## Architecture: Ubuntu 22.04 server running podman containers setup as podman quadlets controlled via systemd. @@ -20,7 +55,7 @@ Ubuntu 22.04 server running podman containers setup as podman quadlets controlle ### Required Ports: Ports required are as follows: - Elasticsearch: *9200* - - Kibana: 443,5601 + - Kibana: *443,5601* - Wazuh: *1514,1515,1516,55000,514* - Agent: *8220* @@ -41,7 +76,7 @@ Podman is more secure (by default) against container escape attacks than Docker. - Elastic agents provide integrations, have more features than winlogbeat. - wazuh-manager: runs the wazuh manager so we can deploy and manage wazuh agents. - Wazuh (open source) gives EDR (Endpoint Detection Response) with security dashboards to cover the security of all of the machines. - - lme-frontend: will host an api and gui that unifies the architecture behind one interface + - lme-frontend (*coming in a future release*): will host an api and gui that unifies the architecture behind one interface ### Agents: Wazuh agents will enable EDR capabilities, while Elastic agents will enable logging capabilities. @@ -50,10 +85,11 @@ Wazuh agents will enable EDR capabilities, while Elastic agents will enable logg - https://github.com/elastic/elastic-agent ## Installation: - -If you are unsure you meet the pre-requisites to installing LME, please read our [prerequisites documentation](/docs/markdown/prerequisites.md) Please ensure you follow all the configuration steps required below. +**Upgrading**: +If you are a previous user of LME and wish to upgrade from 1.4 -> 2.0, please see our [upgrade documentation](/docs/markdown/maintenance/upgrading.md). + ### Downloading LME: **All steps will assume you start in your cloned directory of LME on your ubuntu 22.04 server** @@ -76,7 +112,7 @@ in `setup` find the configuration for certificate generation and password settin `instances.yml` defines the certificates that will get created. The shellscripts initialize accounts and create certificates, and will run from their respective quadlet definitions `lme-setup-accts` and `lme-setup-certs` respectively. -Quadlet configuration for containers is in: `/quadlet/`. These are mapped to the root's systemd unit files, but will execute as the `lmed` user. +Quadlet configuration for containers is in: `/quadlet/`. These are mapped to the root's systemd unit files, but will execute as a non-privileged user. \***TO EDIT**:\* The only file that really needs to be touched is creating `/config/lme-environment.env`, which sets up the required environment variables @@ -126,14 +162,21 @@ TODO finalize this with more words 3. Setup Nix 4. set service user passwords 5. Install Quadlets -6. Setup Containers for root +6. Setup Containers for root: The contianers listed in `$clone_directory/config/containers.txt` will be pulled and tagged 7. Start lme.service #### NOTES: -1. `/opt/lme` will be owned by the lmed user, all lme services will run and execute as lmed, and this ensures least privilege in lmed's execution because lmed is a non-admin,unprivileged user. +1. `/opt/lme` will be owned by root, all lme services will run and execute as unprivileged users. The active lme configuration is stored in `/opt/lme/config`. + +2. Other relevant directories are listed here: +- `/root/.config/containers/containers.conf`: LME will setup a custom podman configuration for secrets management via [ansible vault](https://docs.ansible.com/ansible/latest/cli/ansible-vault.html). +- `/etc/lme`: storage directory for the master password and user password vault +- `/etc/lme/pass.sh`: the master password file +- `/etc/containers/systemd`: directory where LME installs its quadlet service files +- `/etc/systemd/system`: directory where lme.service is installed -2. the master password will be stored at `/etc/lme/pass.sh` and owned by root, while service user passwords will be stored at `/etc/lme/vault/` +3. the master password will be stored at `/etc/lme/pass.sh` and owned by root, while service user passwords will be stored at `/etc/lme/vault/` ### Verification post install: @@ -246,7 +289,8 @@ systemctl start wazuh-agent From PowerShell with admin capabilities run the following command ``` -Invoke-WebRequest -Uri https://packages.wazuh.com/4.x/windows/wazuh-agent-4.7.5-1.msi -OutFile wazuh-agent-4.7.5-1.msi; Start-Process msiexec.exe -ArgumentList '/i wazuh-agent-4.7.5-1.msi /q WAZUH_MANAGER="IPADDRESS OF WAZUH HOST MACHINE"' -Wait -NoNewWindow +Invoke-WebRequest -Uri https://packages.wazuh.com/4.x/windows/wazuh-agent-4.7.5-1.msi -OutFile wazuh-agent-4.7.5-1.msi;` +Start-Process msiexec.exe -ArgumentList '/i wazuh-agent-4.7.5-1.msi /q WAZUH_MANAGER="IPADDRESS OF WAZUH HOST MACHINE"' -Wait -NoNewWindow` ``` Start the service: @@ -265,12 +309,11 @@ NET START Wazuh ## Password Encryption: Password encryption is enabled using ansible-vault to store all lme user and lme service user passwords at rest. We do submit a hash of the password to Have I been pwned to check to see if it is compromised: [READ MORE HERE](https://haveibeenpwned.com/FAQs) + ### where are passwords stored?: ```bash # Define user-specific paths -USER_CONFIG_DIR="/root/.config/lme" -USER_VAULT_DIR="/opt/lme/vault" -USER_SECRETS_CONF="$USER_CONFIG_DIR/secrets.conf" +USER_VAULT_DIR="/etc/lme/vault" PASSWORD_FILE="/etc/lme/pass.sh" ``` @@ -288,29 +331,36 @@ lme-user@ubuntu:~/LME-TEST$ sudo -i ${PWD}/scripts/password_management.sh -h ### grabbing passwords: To view the appropriate service user password use ansible-vault, as root: ``` +#script: +$CLONE_DIRECTORY/scripts/extract_secrets.sh -p #to print + +#add them as variables to your current shell +source $CLONE_DIRECTORY/scripts/extract_secrets.sh #without printing values +source $CLONE_DIRECTORY/scripts/extract_secrets.sh -q #with no output + +## manually: #where wazuh_api is the service user whose password you want: sudo -i ansible-vault view /etc/lme/vault/$(sudo -i podman secret ls | grep wazuh_api | awk '{print $1}') ``` - - # Documentation: ### Logging Guidance - [LME in the CLOUD](/docs/markdown/logging-guidance/cloud.md) - - [Log Retention](/docs/markdown/logging-guidance/retention.md) TODO update to be current + - [Log Retention](/docs/markdown/logging-guidance/retention.md) *TODO*: change link to new documentation - [Additional Log Types](/docs/markdown/logging-guidance/other-logging.md) -### Reference: TODO update these to current - - [FAQ](/docs/markdown/reference/faq.md) - - [Troubleshooting](/docs/markdown/reference/troubleshooting.md) +## Reference: + - [FAQ](/docs/markdown/reference/faq.md) *TODO* + - [Troubleshooting](/docs/markdown/reference/troubleshooting.md) *TODO* - [Dashboard Descriptions](/docs/markdown/reference/dashboard-descriptions.md) - [Guide to Organizational Units](/docs/markdown/chapter1/guide_to_ous.md) - [Security Model](/docs/markdown/reference/security-model.md) - - [DEV NOTES](/docs/markdown/reference/dev-notes) -### Maintenance: - - [Backups](/docs/markdown/maintenance/backups.md) - - [Upgrading](/docs/markdown/maintenance/upgrading.md) - - [Certificates](/docs/markdown/maintenance/certificates.md) - +## Maintenance: + - [Backups](/docs/markdown/maintenance/backups.md) *TODO* change link to new documentation + - [Upgrading 1x -> 2x](/scripts/upgrade/README.md) + - [Certificates](/docs/markdown/maintenance/certificates.md) *TODO* + +## Agents: +*TODO* add in docs in new documentation From e26f0bccd9c1bb3cafc518d1c90df05664075c67 Mon Sep 17 00:00:00 2001 From: Michael Reeves Date: Thu, 17 Oct 2024 14:28:04 -0400 Subject: [PATCH 47/48] Adding updated cloud docs and firewall explanation --- docs/markdown/logging-guidance/cloud.md | 65 +++++++++++++++++++++++-- 1 file changed, 62 insertions(+), 3 deletions(-) diff --git a/docs/markdown/logging-guidance/cloud.md b/docs/markdown/logging-guidance/cloud.md index b8da5737..56ad50e5 100644 --- a/docs/markdown/logging-guidance/cloud.md +++ b/docs/markdown/logging-guidance/cloud.md @@ -5,6 +5,7 @@ These docs attempt to answer some FAQ and other documentation around Logging Mad ## Does LME run in the cloud? Yes, Logging Made easy is a simple client-server model, and Logging Made Easy can be deployed in the cloud for cloud infrastructure or in the cloud for on-prem machines. + ### Deploying LME in the cloud for on prem systems: In order for the LME agents to talk to LME in the cloud you'll need to ensure the clients you want to monitor can communicate through: 1) the cloud firewall AND 2) logging Made easy's own server firewall. @@ -12,11 +13,11 @@ In order for the LME agents to talk to LME in the cloud you'll need to ensure th The easiest way is to make sure you can hit these LME server ports from the on-prem client: - WAZUH ([DOCS](https://documentation.wazuh.com/current/user-manual/agent/agent-enrollment/requirements.html)): 1514,1515 - - Agent ([DOCS](https://www.elastic.co/guide/en/elastic-stack/current/installing-stack-demo-self.html#install-stack-self-elastic-agent)): 8220 + - Agent ([DOCS](https://www.elastic.co/guide/en/elastic-stack/current/installing-stack-demo-self.html#install-stack-self-elastic-agent)): 8220 -You'll need to make sure the Cloud firewall is setup to allow those ports. On azure, this is a NSG rule you'll need to set for the LME virtual machine. +You'll need to make sure your Cloud firewall is setup to allow those ports. On azure, network security groups (NSG) run a firewall on your virtual machines network interfaces. You'll need to update your LME virtual machine's rules to allow inbound connections on the agent ports. Azure has a detailed guide for how to add security rules [here](https://learn.microsoft.com/en-us/azure/virtual-network/manage-network-security-group?tabs=network-security-group-portal#create-a-security-rule) -Then on LME, you'll want to make sure you have either the firewall disabled (if you're using hte cloud firewall as the main firewall): +Then on LME, you'll want to make sure you have either the firewall disabled (if you're using the cloud firewall as the main firewall): ``` lme-user@ubuntu:~$ sudo ufw status Status: inactive @@ -38,6 +39,64 @@ To Action From 8220 (v6) ALLOW Anywhere (v6) ``` +You can add the above ports to ufw via the following command: +``` +sudo ufw allow 1514 +sudo ufw allow 1515 +sudo ufw allow 8220 +``` + +In addition, you'll need to setup rules to forward traffic to the container network: +``` +ufw allow in on eth0 out on podman1 to any port +``` +Theres a helpful stackoverflow article on why: [LINK](https://stackoverflow.com/questions/70870689/configure-ufw-for-podman-on-port-443) +Your `podman1` interface name maybe differently, check the output of your network interfaces here and see if its also called podman1: +``` +sudo -i podman network inspect lme | jq 'map(select(.name == "lme")) | map(.network_interface) | .[]' +``` + ### Deploying LME for cloud infrastructure: Every cloud setup is different, but as long as the LME server is on the same network and able to talk to the machines you want to monitor everything should be good to go. + +## Other firewall rules +You may also want to access kibana from outside the cloud as well. You'll want to make sure you either allow port `5601` or port `443` inbound from the cloud firewall AND virtual machine firewall. + +``` +root@ubuntu:/opt/lme# sudo ufw allow 443 +Rule added +Rule added (v6) +``` + +``` +root@ubuntu:/opt/lme# sudo ufw status +Status: active + +To Action From +-- ------ ---- +22 ALLOW Anywhere +1514 ALLOW Anywhere +1515 ALLOW Anywhere +8220 ALLOW Anywhere +443 ALLOW Anywhere +22 (v6) ALLOW Anywhere (v6) +1514 (v6) ALLOW Anywhere (v6) +1515 (v6) ALLOW Anywhere (v6) +8220 (v6) ALLOW Anywhere (v6) +443 (v6) ALLOW Anywhere (v6) +``` + +### Don't lock yourself out AND Enabling the firewall + +You also probably don't want to lock yourself out of ssh, so make sure to enable port 22! +``` +sudo ufw allow 22 +``` + +Enable ufw: +``` +sudo ufw enable +``` + + From a9da11de0f374175d872ce8a84ccfdca86ea4d4d Mon Sep 17 00:00:00 2001 From: Michael Reeves Date: Thu, 17 Oct 2024 14:29:58 -0400 Subject: [PATCH 48/48] Update FAQ and Pre-requisites --- docs/markdown/prerequisites.md | 65 +++++++++++++++++----------------- docs/markdown/reference/faq.md | 7 ++-- 2 files changed, 34 insertions(+), 38 deletions(-) diff --git a/docs/markdown/prerequisites.md b/docs/markdown/prerequisites.md index f34e9ed0..039478cc 100644 --- a/docs/markdown/prerequisites.md +++ b/docs/markdown/prerequisites.md @@ -3,26 +3,21 @@ ## What kind of IT skills do I need to install LME? - The LME project can be installed by someone at the skill level of a systems administrator or enthusiast. If you have ever… - * Installed a Windows server and connected it to an Active Directory domain -* Ideally deployed a Group Policy Object (GPO) * Changed firewall rules * Installed a Linux operating system, and logged in over SSH. - … then you are likely to have the skills to install LME! -We estimate that you should allow a couple of days to run through the entire installation process, though you can break up the process to fit your schedule. While we have automated steps where we can and made the instructions as detailed as possible, installation will require more steps than simply using an installation wizard. +We estimate that you should allow a couple of hours to run through the entire installation process. While we have automated steps where we can and made the instructions as detailed as possible, installation will require more steps than simply using an installation wizard. ## High level overview diagram of the LME system -![High level overview](/docs/imgs/chapter_overview.jpg) -

-Figure 1: High level overview, linking to documentation chapters -

+![diagram](/docs/imgs/lme-architecture-v2.jpg) + +Please see the [main readme](/README.md#Diagram) for a more detailed description ## How much does LME cost? @@ -44,51 +39,55 @@ Text in **bold** means that you have to make a decision or take an action that n Text in *italics* is an easy way of doing something, such as running a script. Double check you are comfortable doing this. A longer, manual, way is also provided. -``` Text in boxes is a command you need to type ``` - +``` +Text in boxes is a command you need to type +``` You should follow each chapter in order, and complete the checklist at the end before continuing. ## Scaling the solution To keep LME simple, our guide only covers single server setups. It’s difficult to estimate how much load the single server setup will take. -It’s possible to scale the solution to multiple event collectors and ELK nodes, but that will require more experience with the technologies involved. +It’s possible to scale the solution to multiple event collectors and ELK nodes, but that will require more experience with the technologies involved. We plan to publish documentation for scaling LME in the future. ## Required infrastructure To begin your Logging Made Easy installation, you will need access to (or creation of) the following servers: -* A Domain Controller to administer a Windows Active Directory. This is for deploying Group Policy Objects (GPO) * A server with 2 processor cores and at least 8GB RAM. We will install the Windows Event Collector Service on this machine, set it up as a Windows Event Collector (WEC), and join it to the domain. - * If budget allows, we recommend having a dedicated server for Windows Event collection. If this is not possible, the WEC can be setup on an existing server, but consider the performance impacts. - * The WEC server can be Windows Server 2016 (or later) or Windows 8.1 client (or later) -* A Debian-based Linux server. We will install our database (Elasticsearch) and dashboard software on this machine. This is all taken care of through Docker containers. +* An ubuntu linux 22.04 server. We will install our database (Elasticsearch) and dashboard software on this machine. This is all taken care of through Podman containers. ### Minimum Hardware Requirements: - - CPU: 2 processor cores, + - CPU: 2 processor cores, 4+ recommended - MEMORY: 16GB RAM, (32GB+ recommended by [Elastic](https://www.elastic.co/guide/en/cloud-enterprise/current/ece-hardware-prereq.html)), - STORAGE: dedicated 128GB storage for ELK (not including storage for OS and other files) - This is estimated to only support ~17 clients of log streaming data/day, and Elasticsearch will automatically purge old logs to make space for new ones. We **highly** suggest more storage than 128GB for any other sized enterprise network. - -### Notes: - * **DO NOT install Docker from the "Featured Snaps" section of the Ubuntu Server install procedure, we install the Docker community edition later.** - * The deploy script has only been tested on Ubuntu: `18.04` Long Term Support (LTS) and `22.04` LTS. + +#### confirm these settings: +to check memory run this command, look under the "free" column +```bash +$ free -h +total used free shared buff/cache available +Mem: 31Gi 6.4Gi 22Gi 4.0Mi 2.8Gi 24Gi +Swap: 0B 0B 0B +``` + +to check the number of CPUs +```bash +$ lscpu | egrep 'CPU\(s\)' +``` + +to check hardware storage, typically the /dev/root will be your main filesystem. The number of gigabytes available is in the Avail column +```bash +$ df -h +Filesystem Size Used Avail Use% Mounted on +/dev/root 124G 13G 112G 11% / +``` ## Where to install the servers Servers can be either on premise, in a public cloud or private cloud. It is your choice, but you'll need to consider how to network between the clients and servers. ## What firewall rules are needed? +TODO -![Overview of Network rules](/docs/imgs/troubleshooting-overview.jpg) -

-Figure 1: Overview of Network rules -

- -| Diagram Reference | Protocol information | -| :---: |-------------| -| a | Outbound WinRM using TCP 5985.

Link is HTTP, underlying data is authenticated and encrypted with Kerberos.

See [this Microsoft article](https://docs.microsoft.com/en-us/windows/security/threat-protection/use-windows-event-forwarding-to-assist-in-intrusion-detection) for more information | -| b | Inbound WinRM TCP 5985.

Link is HTTP, underlying data is authenticated and encrypted with Kerberos.

See [this Microsoft article](https://docs.microsoft.com/en-us/windows/security/threat-protection/use-windows-event-forwarding-to-assist-in-intrusion-detection) for more information

(optional) Inbound TCP 3389 for Remote Desktop management | -| c | Outbound TCP 5044.

Lumberjack protocol using TLS mutual authentication. | -| d | Inbound TCP 5044.

Lumberjack protocol using TLS mutual authentication.

Inbound TCP 443 for dashboard access

(optional) Inbound TCP 22 for SSH management | -## Now move onto [Chapter 1 – Setup Windows Event Forwarding](/docs/markdown/chapter1/chapter1.md) diff --git a/docs/markdown/reference/faq.md b/docs/markdown/reference/faq.md index cc9db992..68745224 100644 --- a/docs/markdown/reference/faq.md +++ b/docs/markdown/reference/faq.md @@ -15,13 +15,10 @@ When reporting an issue or suggesting improvements, it is important to include t * Sysmon executable: Either run sysmon.exe or look at the status dashboard - ### Linux Server -* Docker: on the Linux server type ```docker --version``` +* Podman: on the Linux server type ```podman --version``` * Linux: on the Linux server type ```cat /etc/os-release``` -* Logstash config: on the Linux server type ```sudo docker config inspect logstash.conf --pretty``` - - +* LME: show the contents of ```/opt/lme/config```, please redact private data ## Reporting a bug