Skip to content

Commit

Permalink
Get cluster health from v1/info
Browse files Browse the repository at this point in the history
  • Loading branch information
willmostly committed Feb 23, 2024
1 parent e2aaa0e commit 836fec5
Show file tree
Hide file tree
Showing 9 changed files with 139 additions and 26 deletions.
6 changes: 1 addition & 5 deletions docs/quickstart-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -22,11 +22,7 @@ dataStore:
driver: org.postgresql.Driver

clusterStatsConfiguration:
useApi: true

backendState:
username: gateway
ssl: false
clusterStatsMonitorType: COORDINATOR_INFO

modules:
- io.trino.gateway.ha.module.HaGatewayProviderModule
Expand Down
6 changes: 1 addition & 5 deletions gateway-ha/gateway-ha-config-docker.yml
Original file line number Diff line number Diff line change
Expand Up @@ -15,12 +15,8 @@ dataStore:
driver: org.postgresql.Driver
queryHistoryHoursRetention: 24

backendState:
username: lb_query
ssl: false

clusterStatsConfiguration:
useApi: true
clusterStatsMonitorType: COORDINATOR_INFO

server:
applicationConnectors:
Expand Down
6 changes: 1 addition & 5 deletions gateway-ha/gateway-ha-config.yml
Original file line number Diff line number Diff line change
Expand Up @@ -15,12 +15,8 @@ dataStore:
driver: org.postgresql.Driver
queryHistoryHoursRetention: 24

backendState:
username: lb_query
ssl: false

clusterStatsConfiguration:
useApi: true
clusterStatsMonitorType: COORDINATOR_INFO

server:
applicationConnectors:
Expand Down
14 changes: 14 additions & 0 deletions gateway-ha/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -223,6 +223,20 @@
<artifactId>metrics-healthchecks</artifactId>
</dependency>

<dependency>
<groupId>io.trino</groupId>
<artifactId>trino-client</artifactId>
<version>411</version>
<!-- This is the most recent version of the client
that can be used due to https://github.com/trinodb/trino/issues/20820 -->
<exclusions>
<exclusion>
<groupId>com.squareup.okhttp3</groupId>
<artifactId>*</artifactId>
</exclusion>
</exclusions>
</dependency>

<dependency>
<groupId>jakarta.annotation</groupId>
<artifactId>jakarta.annotation-api</artifactId>
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,74 @@
/*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

package io.trino.gateway.ha.clustermonitor;

import com.fasterxml.jackson.databind.ObjectMapper;
import io.trino.client.ServerInfo;
import io.trino.gateway.ha.config.ProxyBackendConfiguration;
import okhttp3.Call;
import okhttp3.HttpUrl;
import okhttp3.OkHttpClient;
import okhttp3.Request;
import okhttp3.Response;
import org.eclipse.jetty.http.HttpStatus;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import java.io.IOException;

public class ClusterStatsCoordinatorInfoMonitor
implements ClusterStatsMonitor
{
private static OkHttpClient client = new OkHttpClient();
private static final ObjectMapper objectMapper = new ObjectMapper();
private static final Logger log = LoggerFactory.getLogger(ClusterStatsCoordinatorInfoMonitor.class);

@Override
public ClusterStats monitor(ProxyBackendConfiguration backend)
{
ClusterStats.Builder clusterStats = ClusterStats.builder(backend.getName());

clusterStats.healthy(isReadyStatus(backend.getProxyTo()))
.proxyTo(backend.getProxyTo())
.externalUrl(backend.getExternalUrl())
.routingGroup(backend.getRoutingGroup());

return clusterStats.build();
}

private boolean isReadyStatus(String baseUrl)
{
String statusUrl = baseUrl + "/v1/status";

Request request = new Request.Builder()
.url(HttpUrl.parse(statusUrl))
.get()
.build();

Call call = client.newCall(request);

try (Response res = call.execute()) {
if (res.code() != HttpStatus.OK_200) {
return false;
}
ServerInfo info = objectMapper.readValue(res.body().string(), ServerInfo.class);
return !info.isStarting();
}
catch (IOException e) {
log.warn("Error checking health of backend {}. Error: {}", baseUrl, e.getMessage());
}
return false;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -15,17 +15,17 @@

public class ClusterStatsConfiguration
{
private boolean useApi;
private ClusterStatsMonitorType clusterStatsMonitorType;

public ClusterStatsConfiguration() {}

public boolean isUseApi()
public ClusterStatsMonitorType getClusterStatsMonitorType()
{
return this.useApi;
return clusterStatsMonitorType;
}

public void setUseApi(boolean useApi)
public void setClusterStatsMonitorType(ClusterStatsMonitorType clusterStatsMonitorType)
{
this.useApi = useApi;
this.clusterStatsMonitorType = clusterStatsMonitorType;
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
/*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package io.trino.gateway.ha.config;

public enum ClusterStatsMonitorType
{
NOOP,
COORDINATOR_INFO,
UI_API,
JDBC
}
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
import com.google.inject.Singleton;
import io.dropwizard.core.setup.Environment;
import io.trino.gateway.baseapp.AppModule;
import io.trino.gateway.ha.clustermonitor.ClusterStatsCoordinatorInfoMonitor;
import io.trino.gateway.ha.clustermonitor.ClusterStatsHttpMonitor;
import io.trino.gateway.ha.clustermonitor.ClusterStatsJdbcMonitor;
import io.trino.gateway.ha.clustermonitor.ClusterStatsMonitor;
Expand All @@ -41,13 +42,18 @@ public ClusterStatsMonitor getClusterStatsMonitor()
{
ClusterStatsConfiguration clusterStatsConfig = config.getClusterStatsConfiguration();
if (config.getBackendState() == null) {
return new NoopClusterStatsMonitor();
return new ClusterStatsCoordinatorInfoMonitor();
}
if (clusterStatsConfig.isUseApi()) {
return new ClusterStatsHttpMonitor(config.getBackendState());
}
else {
return new ClusterStatsJdbcMonitor(config.getBackendState());
switch (clusterStatsConfig.getClusterStatsMonitorType()) {
case COORDINATOR_INFO:
return new ClusterStatsCoordinatorInfoMonitor();
case UI_API:
return new ClusterStatsHttpMonitor(config.getBackendState());
case JDBC:
return new ClusterStatsJdbcMonitor(config.getBackendState());
case NOOP:
default:
return new NoopClusterStatsMonitor();
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -56,12 +56,21 @@ public void testJdbcMonitor()
testClusterStatsMonitor(ClusterStatsJdbcMonitor::new);
}

public void testCoordinatorInfoMonitor()
{
testClusterStatsMonitor(new ClusterStatsCoordinatorInfoMonitor());
}

private void testClusterStatsMonitor(Function<BackendStateConfiguration, ClusterStatsMonitor> monitorFactory)
{
BackendStateConfiguration backendStateConfiguration = new BackendStateConfiguration();
backendStateConfiguration.setUsername("test_user");
ClusterStatsMonitor monitor = monitorFactory.apply(backendStateConfiguration);
testClusterStatsMonitor(monitor);
}

private void testClusterStatsMonitor(ClusterStatsMonitor monitor)
{
ProxyBackendConfiguration proxyBackend = new ProxyBackendConfiguration();
proxyBackend.setProxyTo("http://localhost:" + trino.getMappedPort(8080));
proxyBackend.setName("test_cluster");
Expand Down

0 comments on commit 836fec5

Please sign in to comment.