-
Notifications
You must be signed in to change notification settings - Fork 801
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Add support for native histograms in OM parser #1040
Changes from 10 commits
977b0b2
fd1b563
e32d2a8
cb013d8
4b1f527
eb6d9de
86f165a
c69a500
c06db3f
d394c71
90cd08e
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,7 +1,7 @@ | ||
import re | ||
from typing import Dict, List, Optional, Sequence, Tuple, Union | ||
|
||
from .samples import Exemplar, Sample, Timestamp | ||
from .samples import Exemplar, NativeHistogram, Sample, Timestamp | ||
|
||
METRIC_TYPES = ( | ||
'counter', 'gauge', 'summary', 'histogram', | ||
|
@@ -36,11 +36,11 @@ def __init__(self, name: str, documentation: str, typ: str, unit: str = ''): | |
self.type: str = typ | ||
self.samples: List[Sample] = [] | ||
|
||
def add_sample(self, name: str, labels: Dict[str, str], value: float, timestamp: Optional[Union[Timestamp, float]] = None, exemplar: Optional[Exemplar] = None) -> None: | ||
def add_sample(self, name: str, labels: Dict[str, str], value: float, timestamp: Optional[Union[Timestamp, float]] = None, exemplar: Optional[Exemplar] = None, native_histogram: Optional[NativeHistogram] = None) -> None: | ||
"""Add a sample to the metric. | ||
|
||
Internal-only, do not use.""" | ||
self.samples.append(Sample(name, labels, value, timestamp, exemplar)) | ||
self.samples.append(Sample(name, labels, value, timestamp, exemplar, native_histogram)) | ||
|
||
def __eq__(self, other: object) -> bool: | ||
return (isinstance(other, Metric) | ||
|
@@ -236,6 +236,7 @@ def __init__(self, | |
sum_value: Optional[float] = None, | ||
labels: Optional[Sequence[str]] = None, | ||
unit: str = '', | ||
native_hist_bucket_factor: Optional[float] = None | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. 🤔 Not sure if we need this here at all (it will need to be in There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yes, I deleted it. |
||
): | ||
Metric.__init__(self, name, documentation, 'histogram', unit) | ||
if sum_value is not None and buckets is None: | ||
|
@@ -284,7 +285,6 @@ def add_metric(self, | |
Sample(self.name + '_sum', dict(zip(self._labelnames, labels)), sum_value, timestamp)) | ||
|
||
|
||
|
||
class GaugeHistogramMetricFamily(Metric): | ||
"""A single gauge histogram and its samples. | ||
|
||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -6,7 +6,7 @@ | |
import re | ||
|
||
from ..metrics_core import Metric, METRIC_LABEL_NAME_RE | ||
from ..samples import Exemplar, Sample, Timestamp | ||
from ..samples import BucketSpan, Exemplar, NativeHistogram, Sample, Timestamp | ||
from ..utils import floatToGoString | ||
|
||
|
||
|
@@ -364,6 +364,101 @@ def _parse_remaining_text(text): | |
return val, ts, exemplar | ||
|
||
|
||
def _parse_nh_sample(text, suffixes): | ||
labels_start = text.find("{") | ||
# check if it's a native histogram with labels | ||
re_nh_without_labels = re.compile(r'^[^{} ]+ {[^{}]+}$') | ||
re_nh_with_labels = re.compile(r'[^{} ]+{[^{}]+} {[^{}]+}$') | ||
csmarchbanks marked this conversation as resolved.
Show resolved
Hide resolved
|
||
print('we are matching \'{}\''.format(text)) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. We should remove the debug printing before merging, there are a couple other lines in this function as well. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I could see just another one, I hope I removed them all now XD |
||
if re_nh_with_labels.match(text): | ||
print('nh without labels matches') | ||
nh_value_start = text.rindex("{") | ||
labels_end = nh_value_start - 2 | ||
labelstext = text[labels_start + 1:labels_end] | ||
labels = _parse_labels(labelstext) | ||
name_end = labels_start | ||
name = text[:name_end] | ||
if name.endswith(suffixes): | ||
raise ValueError("the sample name of a native histogram with labels should have no suffixes", name) | ||
nh_value = text[nh_value_start:] | ||
nat_hist_value = _parse_nh_struct(nh_value) | ||
return Sample(name, labels, None, None, None, nat_hist_value) | ||
# check if it's a native histogram | ||
if re_nh_without_labels.match(text): | ||
nh_value_start = labels_start | ||
nh_value = text[nh_value_start:] | ||
name_end = nh_value_start - 1 | ||
name = text[:name_end] | ||
if name.endswith(suffixes): | ||
raise ValueError("the sample name of a native histogram should have no suffixes", name) | ||
nat_hist_value = _parse_nh_struct(nh_value) | ||
return Sample(name, None, None, None, None, nat_hist_value) | ||
else: | ||
# it's not a native histogram | ||
return | ||
|
||
|
||
def _parse_nh_struct(text): | ||
pattern = r'(\w+):\s*([^,}]+)' | ||
|
||
re_spans = re.compile(r'(positive_spans|negative_spans):\[(\d+:\d+,\d+:\d+)\]') | ||
re_deltas = re.compile(r'(positive_deltas|negative_deltas):\[(-?\d+(?:,-?\d+)*)\]') | ||
|
||
items = dict(re.findall(pattern, text)) | ||
spans = dict(re_spans.findall(text)) | ||
deltas = dict(re_deltas.findall(text)) | ||
|
||
count_value = int(items['count']) | ||
sum_value = int(items['sum']) | ||
schema = int(items['schema']) | ||
zero_threshold = float(items['zero_threshold']) | ||
zero_count = int(items['zero_count']) | ||
|
||
try: | ||
pos_spans_text = spans['positive_spans'] | ||
elems = pos_spans_text.split(',') | ||
arg1 = [int(x) for x in elems[0].split(':')] | ||
arg2 = [int(x) for x in elems[1].split(':')] | ||
pos_spans = (BucketSpan(arg1[0], arg1[1]), BucketSpan(arg2[0], arg2[1])) | ||
except KeyError: | ||
pos_spans = None | ||
|
||
try: | ||
neg_spans_text = spans['negative_spans'] | ||
elems = neg_spans_text.split(',') | ||
arg1 = [int(x) for x in elems[0].split(':')] | ||
arg2 = [int(x) for x in elems[1].split(':')] | ||
neg_spans = (BucketSpan(arg1[0], arg1[1]), BucketSpan(arg2[0], arg2[1])) | ||
except KeyError: | ||
neg_spans = None | ||
|
||
try: | ||
pos_deltas_text = deltas['positive_deltas'] | ||
elems = pos_deltas_text.split(',') | ||
pos_deltas = tuple([int(x) for x in elems]) | ||
except KeyError: | ||
pos_deltas = None | ||
|
||
try: | ||
neg_deltas_text = deltas['negative_deltas'] | ||
elems = neg_deltas_text.split(',') | ||
neg_deltas = tuple([int(x) for x in elems]) | ||
except KeyError: | ||
neg_deltas = None | ||
|
||
return NativeHistogram( | ||
count_value=count_value, | ||
sum_value=sum_value, | ||
schema=schema, | ||
zero_threshold=zero_threshold, | ||
zero_count=zero_count, | ||
pos_spans=pos_spans, | ||
neg_spans=neg_spans, | ||
pos_deltas=pos_deltas, | ||
neg_deltas=neg_deltas | ||
) | ||
|
||
|
||
def _group_for_sample(sample, name, typ): | ||
if typ == 'info': | ||
# We can't distinguish between groups for info metrics. | ||
|
@@ -406,6 +501,8 @@ def do_checks(): | |
for s in samples: | ||
suffix = s.name[len(name):] | ||
g = _group_for_sample(s, name, 'histogram') | ||
if len(suffix) == 0: | ||
continue | ||
if g != group or s.timestamp != timestamp: | ||
if group is not None: | ||
do_checks() | ||
|
@@ -486,6 +583,8 @@ def build_metric(name, documentation, typ, unit, samples): | |
metric.samples = samples | ||
return metric | ||
|
||
is_nh = False | ||
typ = None | ||
for line in fd: | ||
if line[-1] == '\n': | ||
line = line[:-1] | ||
|
@@ -518,7 +617,7 @@ def build_metric(name, documentation, typ, unit, samples): | |
group_timestamp_samples = set() | ||
samples = [] | ||
allowed_names = [parts[2]] | ||
|
||
if parts[1] == 'HELP': | ||
if documentation is not None: | ||
raise ValueError("More than one HELP for metric: " + line) | ||
|
@@ -537,8 +636,18 @@ def build_metric(name, documentation, typ, unit, samples): | |
else: | ||
raise ValueError("Invalid line: " + line) | ||
else: | ||
sample = _parse_sample(line) | ||
if sample.name not in allowed_names: | ||
if typ == 'histogram': | ||
# set to true to account for native histograms naming exceptions/sanitizing differences | ||
is_nh = True | ||
sample = _parse_nh_sample(line, tuple(type_suffixes['histogram'])) | ||
csmarchbanks marked this conversation as resolved.
Show resolved
Hide resolved
|
||
# It's not a native histogram | ||
if sample is None: | ||
is_nh = False | ||
sample = _parse_sample(line) | ||
else: | ||
is_nh = False | ||
sample = _parse_sample(line) | ||
if sample.name not in allowed_names and not is_nh: | ||
if name is not None: | ||
yield build_metric(name, documentation, typ, unit, samples) | ||
# Start an unknown metric. | ||
|
@@ -570,26 +679,29 @@ def build_metric(name, documentation, typ, unit, samples): | |
or _isUncanonicalNumber(sample.labels['quantile']))): | ||
raise ValueError("Invalid quantile label: " + line) | ||
|
||
g = tuple(sorted(_group_for_sample(sample, name, typ).items())) | ||
if group is not None and g != group and g in seen_groups: | ||
raise ValueError("Invalid metric grouping: " + line) | ||
if group is not None and g == group: | ||
if (sample.timestamp is None) != (group_timestamp is None): | ||
raise ValueError("Mix of timestamp presence within a group: " + line) | ||
if group_timestamp is not None and group_timestamp > sample.timestamp and typ != 'info': | ||
raise ValueError("Timestamps went backwards within a group: " + line) | ||
if not is_nh: | ||
g = tuple(sorted(_group_for_sample(sample, name, typ).items())) | ||
if group is not None and g != group and g in seen_groups: | ||
raise ValueError("Invalid metric grouping: " + line) | ||
if group is not None and g == group: | ||
if (sample.timestamp is None) != (group_timestamp is None): | ||
raise ValueError("Mix of timestamp presence within a group: " + line) | ||
if group_timestamp is not None and group_timestamp > sample.timestamp and typ != 'info': | ||
raise ValueError("Timestamps went backwards within a group: " + line) | ||
else: | ||
group_timestamp_samples = set() | ||
|
||
series_id = (sample.name, tuple(sorted(sample.labels.items()))) | ||
if sample.timestamp != group_timestamp or series_id not in group_timestamp_samples: | ||
# Not a duplicate due to timestamp truncation. | ||
samples.append(sample) | ||
group_timestamp_samples.add(series_id) | ||
|
||
group = g | ||
group_timestamp = sample.timestamp | ||
seen_groups.add(g) | ||
else: | ||
group_timestamp_samples = set() | ||
|
||
series_id = (sample.name, tuple(sorted(sample.labels.items()))) | ||
if sample.timestamp != group_timestamp or series_id not in group_timestamp_samples: | ||
# Not a duplicate due to timestamp truncation. | ||
samples.append(sample) | ||
group_timestamp_samples.add(series_id) | ||
|
||
group = g | ||
group_timestamp = sample.timestamp | ||
seen_groups.add(g) | ||
|
||
if typ == 'stateset' and sample.value not in [0, 1]: | ||
raise ValueError("Stateset samples can only have values zero and one: " + line) | ||
|
@@ -606,7 +718,7 @@ def build_metric(name, documentation, typ, unit, samples): | |
(typ in ['histogram', 'gaugehistogram'] and sample.name.endswith('_bucket')) | ||
or (typ in ['counter'] and sample.name.endswith('_total'))): | ||
raise ValueError("Invalid line only histogram/gaugehistogram buckets and counters can have exemplars: " + line) | ||
|
||
if name is not None: | ||
yield build_metric(name, documentation, typ, unit, samples) | ||
|
||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Nit, could you just remove this line so we don't have a needless diff/history entry?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Done!