Skip to content

Commit

Permalink
Better support for Druid cardinality estimation mertics (#613)
Browse files Browse the repository at this point in the history
* added rocognition of thetasketch and HLL metrics

* make sure the name agreed with SQL convention
  • Loading branch information
axeisghost authored and mistercrunch committed Jun 14, 2016
1 parent bc58c5d commit 347c39b
Showing 1 changed file with 24 additions and 10 deletions.
34 changes: 24 additions & 10 deletions caravel/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -1078,6 +1078,8 @@ def sync_to_db(cls, name, cluster):
if datatype == "STRING":
col_obj.groupby = True
col_obj.filterable = True
if datatype == "hyperUnique" or datatype == "thetaSketch":
col_obj.count_distinct = True
if col_obj:
col_obj.type = cols[col]['type']
session.flush()
Expand Down Expand Up @@ -1447,17 +1449,29 @@ def generate_metrics(self):
'type': mt, 'name': name, 'fieldName': self.column_name})
))
if self.count_distinct:
mt = 'count_distinct'
name = 'count_distinct__' + self.column_name
metrics.append(DruidMetric(
metric_name=name,
verbose_name='COUNT(DISTINCT {})'.format(self.column_name),
metric_type='count_distinct',
json=json.dumps({
'type': 'cardinality',
'name': name,
'fieldNames': [self.column_name]})
))
if self.type == 'hyperUnique' or self.type == 'thetaSketch':
metrics.append(DruidMetric(
metric_name=name,
verbose_name='COUNT(DISTINCT {})'.format(self.column_name),
metric_type=self.type,
json=json.dumps({
'type': self.type,
'name': name,
'fieldName': self.column_name
})
))
else:
mt = 'count_distinct'
metrics.append(DruidMetric(
metric_name=name,
verbose_name='COUNT(DISTINCT {})'.format(self.column_name),
metric_type='count_distinct',
json=json.dumps({
'type': 'cardinality',
'name': name,
'fieldNames': [self.column_name]})
))
session = get_session()
new_metrics = []
for metric in metrics:
Expand Down

0 comments on commit 347c39b

Please sign in to comment.