Skip to content

Commit

Permalink
Correctly clean metrics when node info cannot be retrieved
Browse files Browse the repository at this point in the history
 + Some false metrics can be created when a node start to be un-responsive and that nodeInfo contains only partial information. Those false metrics will never be cleaned as the jmx connection still exist
  • Loading branch information
Romain GÉRARD committed Sep 26, 2018
1 parent 687ac2d commit 775b2bd
Showing 1 changed file with 15 additions and 9 deletions.
24 changes: 15 additions & 9 deletions src/main/java/com/criteo/nosql/cassandra/exporter/JmxScraper.java
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,6 @@

import javax.management.MBeanAttributeInfo;
import javax.management.MBeanServerConnection;
import javax.management.MalformedObjectNameException;
import javax.management.ObjectName;
import javax.management.openmbean.CompositeData;
import javax.management.openmbean.CompositeType;
Expand All @@ -16,10 +15,8 @@
import javax.management.remote.rmi.RMIConnectorServer;
import javax.naming.Context;
import javax.rmi.ssl.SslRMIClientSocketFactory;
import java.io.IOException;
import java.util.*;
import java.util.regex.Pattern;
import java.util.stream.Collectors;
import java.util.stream.Stream;

import static java.util.stream.Collectors.toList;
Expand Down Expand Up @@ -150,11 +147,17 @@ public void run(final boolean forever) throws Exception {

do {
final long now = System.currentTimeMillis();
final NodeInfo nodeInfo = NodeInfo.getNodeInfo(beanConn);

// If we can't get the node info, exit the run early in order to avoid creating stale metrics
// that will never be cleaned after that
// This situation can appear if the node start to be unresponsive and that some jmx operation timeouts
final Optional<NodeInfo> nodeInfo = NodeInfo.getNodeInfo(beanConn);
if (!nodeInfo.isPresent()) return;

beanConn.queryMBeans(null, null).stream()
.flatMap(objectInstance -> toMBeanInfos(beanConn, objectInstance.getObjectName()))
.filter(m -> shouldScrap(m, now))
.forEach(mBean -> updateMetric(beanConn, mBean, nodeInfo));
.forEach(mBean -> updateMetric(beanConn, mBean, nodeInfo.get()));


lastScrapes.forEach((k,lastScrape) -> {
Expand Down Expand Up @@ -340,22 +343,24 @@ private NodeInfo(String clusterName, String datacenterName, Set<String> keyspace
this.tables = tables;
}

static NodeInfo getNodeInfo(MBeanServerConnection beanConn) {
String clusterName = "";
String datacenterName = "";
static Optional<NodeInfo> getNodeInfo(MBeanServerConnection beanConn) {
String clusterName;
String datacenterName;
Set<String> keyspaces = new HashSet<>();
Set<String> tables = new HashSet<>();

try {
clusterName = beanConn.getAttribute(ObjectName.getInstance("org.apache.cassandra.db:type=StorageService"), "ClusterName").toString();
} catch (Exception e) {
logger.error("Cannot retrieve the cluster name information for the node", e);
return Optional.empty();
}

try {
datacenterName = beanConn.getAttribute(ObjectName.getInstance("org.apache.cassandra.db:type=EndpointSnitchInfo"), "Datacenter").toString();
} catch (Exception e) {
logger.error("Cannot retrieve the datacenter name information for the node", e);
return Optional.empty();
}

try {
Expand All @@ -367,9 +372,10 @@ static NodeInfo getNodeInfo(MBeanServerConnection beanConn) {
}
} catch (Exception e) {
logger.error("Cannot retrieve keyspaces/tables information", e);
return Optional.empty();
}

return new NodeInfo(clusterName, datacenterName, keyspaces, tables);
return Optional.of(new NodeInfo(clusterName, datacenterName, keyspaces, tables));
}
}
}
Expand Down

0 comments on commit 775b2bd

Please sign in to comment.