diff --git a/app/models.py b/app/models.py index cb7e04d3bf4a0..7cde14a588a66 100644 --- a/app/models.py +++ b/app/models.py @@ -1,19 +1,20 @@ from flask.ext.appbuilder import Model -from pydruid import client from datetime import timedelta -from flask.ext.appbuilder.models.mixins import AuditMixin, FileColumn +from flask.ext.appbuilder.models.mixins import AuditMixin from sqlalchemy import Column, Integer, String, ForeignKey, Text, Boolean, DateTime from sqlalchemy import create_engine, MetaData from sqlalchemy import Table as sqlaTable from sqlalchemy.orm import relationship -from app import get_session from dateutil.parser import parse +from pydruid import client +from pydruid.utils.filters import Dimension, Filter +from copy import deepcopy import logging import json import requests -from app import db +from app import db, get_session class Queryable(object): @property @@ -301,6 +302,7 @@ def sync_to_db(cls, name, cluster): col_obj.datasource = datasource col_obj.generate_metrics() #session.commit() + def query( self, groupby, metrics, granularity, @@ -328,7 +330,38 @@ def query( qry['filter'] = filter if limit_spec: qry['limit_spec'] = limit_spec + client = self.cluster.get_pydruid_client() + if timeseries_limit: + # Limit on the number of timeseries, doing a two-phases query + pre_qry = deepcopy(qry) + pre_qry['granularity'] = "all" + client.groupby(**qry) + df = client.export_pandas() + if not df is None and not df.empty: + dims = qry['dimensions'] + filters = [] + for index, row in df.iterrows(): + fields = [] + for dim in dims: + f = Filter.build_filter(Dimension(dim) == row[dim]) + fields.append(f) + if len(fields) > 1: + filt = Filter(type="and", fields=fields) + filters.append(Filter.build_filter(filt)) + elif fields: + filters.append(fields[0]) + + if filters: + ff = Filter(type="or", fields=filters) + if not filter: + qry['filter'] = ff + else: + qry['filter'] = Filter(type="and", fields=[ + Filter.build_filter(ff), + Filter.build_filter(filter)]) + qry['limit_spec'] = None + client.groupby(**qry) df = client.export_pandas() return df diff --git a/app/viz.py b/app/viz.py index 2bcfcea554c9b..3ccc3fbf70b61 100644 --- a/app/viz.py +++ b/app/viz.py @@ -1,4 +1,3 @@ -from pydruid.utils.filters import Dimension, Filter from datetime import datetime from flask import flash, request import pandas as pd @@ -7,6 +6,7 @@ from app.highchart import Highchart from wtforms import Form, SelectMultipleField, SelectField, TextField import config +from pydruid.utils.filters import Dimension, Filter CHART_ARGS = { @@ -257,32 +257,6 @@ def bake_query(self): Doing a 2 phase query where we limit the number of series. """ qry = self.query_obj() - orig_filter = qry['filter'] if 'filter' in qry else '' - qry['granularity'] = "all" - df = self.datasource.query(**qry) - if not df is None: - dims = qry['groupby'] - filters = [] - for index, row in df.iterrows(): - fields = [] - for dim in dims: - f = Filter.build_filter(Dimension(dim) == row[dim]) - fields.append(f) - if len(fields) > 1: - filters.append(Filter.build_filter(Filter(type="and", fields=fields))) - elif fields: - filters.append(fields[0]) - - qry = self.query_obj() - if filters: - ff = Filter(type="or", fields=filters) - if not orig_filter: - qry['filter'] = ff - else: - qry['filter'] = Filter(type="and", fields=[ - Filter.build_filter(ff), - Filter.build_filter(orig_filter)]) - qry['limit_spec'] = None return self.datasource.query(**qry) class TimeSeriesCompareViz(TimeSeriesViz):