From 8f0c285504a25666a373c50fd8976e50e3c5c2f3 Mon Sep 17 00:00:00 2001 From: Seth Michael Larson Date: Fri, 4 Dec 2020 09:45:34 -0600 Subject: [PATCH] Support 'calendar_interval' and 'fixed_interval' in DateHistogramFacet --- docs/faceted_search.rst | 2 +- elasticsearch_dsl/faceted_search.py | 38 ++++- tests/conftest.py | 11 ++ tests/test_faceted_search.py | 44 ++++++ tests/test_integration/test_faceted_search.py | 146 +++++++++++------- 5 files changed, 177 insertions(+), 64 deletions(-) diff --git a/docs/faceted_search.rst b/docs/faceted_search.rst index c2f7e5e0e..bc1777723 100644 --- a/docs/faceted_search.rst +++ b/docs/faceted_search.rst @@ -49,7 +49,7 @@ There are several different facets available: provides an option to split documents into groups based on a value of a field, for example ``TermsFacet(field='category')`` ``DateHistogramFacet`` - split documents into time intervals, example: ``DateHistogramFacet(field="published_date", interval="day")`` + split documents into time intervals, example: ``DateHistogramFacet(field="published_date", calendar_interval="day")`` ``HistogramFacet`` similar to ``DateHistogramFacet`` but for numerical values: ``HistogramFacet(field="rating", interval=2)`` diff --git a/elasticsearch_dsl/faceted_search.py b/elasticsearch_dsl/faceted_search.py index 9c653a85c..e102ed5cb 100644 --- a/elasticsearch_dsl/faceted_search.py +++ b/elasticsearch_dsl/faceted_search.py @@ -168,14 +168,34 @@ def get_value_filter(self, filter_value): ) +def _date_interval_month(d): + return (d + timedelta(days=32)).replace(day=1) + + +def _date_interval_week(d): + return d + timedelta(days=7) + + +def _date_interval_day(d): + return d + timedelta(days=1) + + +def _date_interval_hour(d): + return d + timedelta(hours=1) + + class DateHistogramFacet(Facet): agg_type = "date_histogram" DATE_INTERVALS = { - "month": lambda d: (d + timedelta(days=32)).replace(day=1), - "week": lambda d: d + timedelta(days=7), - "day": lambda d: d + timedelta(days=1), - "hour": lambda d: d + timedelta(hours=1), + "month": _date_interval_month, + "1M": _date_interval_month, + "week": _date_interval_week, + "1w": _date_interval_week, + "day": _date_interval_day, + "1d": _date_interval_day, + "hour": _date_interval_hour, + "1h": _date_interval_hour, } def __init__(self, **kwargs): @@ -194,12 +214,20 @@ def get_value(self, bucket): return bucket["key"] def get_value_filter(self, filter_value): + for interval_type in ("calendar_interval", "fixed_interval"): + if interval_type in self._params: + break + else: + interval_type = "interval" + return Range( _expand__to_dot=False, **{ self._params["field"]: { "gte": filter_value, - "lt": self.DATE_INTERVALS[self._params["interval"]](filter_value), + "lt": self.DATE_INTERVALS[self._params[interval_type]]( + filter_value + ), } } ) diff --git a/tests/conftest.py b/tests/conftest.py index 3a481bbc7..b7326c3fb 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -18,6 +18,7 @@ import os +import re from datetime import datetime from elasticsearch.helpers import bulk @@ -47,6 +48,16 @@ def client(): skip() +@fixture(scope="session") +def es_version(client): + info = client.info() + print(info) + yield tuple( + int(x) + for x in re.match(r"^([0-9.]+)", info["version"]["number"]).group(1).split(".") + ) + + @fixture def write_client(client): yield client diff --git a/tests/test_faceted_search.py b/tests/test_faceted_search.py index 8589193ae..66a096e44 100644 --- a/tests/test_faceted_search.py +++ b/tests/test_faceted_search.py @@ -17,6 +17,8 @@ from datetime import datetime +import pytest + from elasticsearch_dsl.faceted_search import ( DateHistogramFacet, FacetedSearch, @@ -144,3 +146,45 @@ def test_date_histogram_facet_with_1970_01_01_date(): dhf = DateHistogramFacet() assert dhf.get_value({"key": None}) == datetime(1970, 1, 1, 0, 0) assert dhf.get_value({"key": 0}) == datetime(1970, 1, 1, 0, 0) + + +@pytest.mark.parametrize( + ["interval_type", "interval"], + [ + ("interval", "month"), + ("calendar_interval", "month"), + ("interval", "week"), + ("calendar_interval", "week"), + ("interval", "day"), + ("calendar_interval", "day"), + ("fixed_interval", "day"), + ("interval", "hour"), + ("fixed_interval", "hour"), + ("interval", "1M"), + ("calendar_interval", "1M"), + ("interval", "1w"), + ("calendar_interval", "1w"), + ("interval", "1d"), + ("calendar_interval", "1d"), + ("fixed_interval", "1d"), + ("interval", "1h"), + ("fixed_interval", "1h"), + ], +) +def test_date_histogram_interval_types(interval_type, interval): + dhf = DateHistogramFacet(field="@timestamp", **{interval_type: interval}) + assert dhf.get_aggregation().to_dict() == { + "date_histogram": { + "field": "@timestamp", + interval_type: interval, + "min_doc_count": 0, + } + } + dhf.get_value_filter(datetime.now()) + + +def test_date_histogram_no_interval_keyerror(): + dhf = DateHistogramFacet(field="@timestamp") + with pytest.raises(KeyError) as e: + dhf.get_value_filter(datetime.now()) + assert str(e.value) == "'interval'" diff --git a/tests/test_integration/test_faceted_search.py b/tests/test_integration/test_faceted_search.py index 9f6253e46..de0a2b311 100644 --- a/tests/test_integration/test_faceted_search.py +++ b/tests/test_integration/test_faceted_search.py @@ -17,6 +17,8 @@ from datetime import datetime +import pytest + from elasticsearch_dsl import A, Boolean, Date, Document, Keyword from elasticsearch_dsl.faceted_search import ( DateHistogramFacet, @@ -29,25 +31,6 @@ from .test_document import PullRequest -class CommitSearch(FacetedSearch): - index = "flat-git" - fields = ( - "description", - "files", - ) - - facets = { - "files": TermsFacet(field="files"), - "frequency": DateHistogramFacet( - field="authored_date", interval="day", min_doc_count=1 - ), - "deletions": RangeFacet( - field="stats.deletions", - ranges=[("ok", (None, 1)), ("good", (1, 5)), ("better", (5, None))], - ), - } - - class Repos(Document): is_public = Boolean() created_at = Date() @@ -64,19 +47,6 @@ class Index: name = "git" -class RepoSearch(FacetedSearch): - index = "git" - doc_types = [Repos] - facets = { - "public": TermsFacet(field="is_public"), - "created": DateHistogramFacet(field="created_at", interval="month"), - } - - def search(self): - s = super(RepoSearch, self).search() - return s.filter("term", commit_repo="repo") - - class MetricSearch(FacetedSearch): index = "git" doc_types = [Commit] @@ -86,15 +56,72 @@ class MetricSearch(FacetedSearch): } -class PRSearch(FacetedSearch): - index = "test-prs" - doc_types = [PullRequest] - facets = { - "comments": NestedFacet( - "comments", - DateHistogramFacet(field="comments.created_at", interval="month"), +@pytest.fixture(scope="session") +def commit_search_cls(es_version): + if es_version >= (7, 2): + interval_kwargs = {"fixed_interval": "1d"} + else: + interval_kwargs = {"interval": "day"} + + class CommitSearch(FacetedSearch): + index = "flat-git" + fields = ( + "description", + "files", ) - } + + facets = { + "files": TermsFacet(field="files"), + "frequency": DateHistogramFacet( + field="authored_date", min_doc_count=1, **interval_kwargs + ), + "deletions": RangeFacet( + field="stats.deletions", + ranges=[("ok", (None, 1)), ("good", (1, 5)), ("better", (5, None))], + ), + } + + return CommitSearch + + +@pytest.fixture(scope="session") +def repo_search_cls(es_version): + interval_type = "calendar_interval" if es_version >= (7, 2) else "interval" + + class RepoSearch(FacetedSearch): + index = "git" + doc_types = [Repos] + facets = { + "public": TermsFacet(field="is_public"), + "created": DateHistogramFacet( + field="created_at", **{interval_type: "month"} + ), + } + + def search(self): + s = super(RepoSearch, self).search() + return s.filter("term", commit_repo="repo") + + return RepoSearch + + +@pytest.fixture(scope="session") +def pr_search_cls(es_version): + interval_type = "calendar_interval" if es_version >= (7, 2) else "interval" + + class PRSearch(FacetedSearch): + index = "test-prs" + doc_types = [PullRequest] + facets = { + "comments": NestedFacet( + "comments", + DateHistogramFacet( + field="comments.created_at", **{interval_type: "month"} + ), + ) + } + + return PRSearch def test_facet_with_custom_metric(data_client): @@ -106,36 +133,36 @@ def test_facet_with_custom_metric(data_client): assert dates[0] == 1399038439000 -def test_nested_facet(pull_request): - prs = PRSearch() +def test_nested_facet(pull_request, pr_search_cls): + prs = pr_search_cls() r = prs.execute() assert r.hits.total.value == 1 assert [(datetime(2018, 1, 1, 0, 0), 1, False)] == r.facets.comments -def test_nested_facet_with_filter(pull_request): - prs = PRSearch(filters={"comments": datetime(2018, 1, 1, 0, 0)}) +def test_nested_facet_with_filter(pull_request, pr_search_cls): + prs = pr_search_cls(filters={"comments": datetime(2018, 1, 1, 0, 0)}) r = prs.execute() assert r.hits.total.value == 1 assert [(datetime(2018, 1, 1, 0, 0), 1, True)] == r.facets.comments - prs = PRSearch(filters={"comments": datetime(2018, 2, 1, 0, 0)}) + prs = pr_search_cls(filters={"comments": datetime(2018, 2, 1, 0, 0)}) r = prs.execute() assert not r.hits -def test_datehistogram_facet(data_client): - rs = RepoSearch() +def test_datehistogram_facet(data_client, repo_search_cls): + rs = repo_search_cls() r = rs.execute() assert r.hits.total.value == 1 assert [(datetime(2014, 3, 1, 0, 0), 1, False)] == r.facets.created -def test_boolean_facet(data_client): - rs = RepoSearch() +def test_boolean_facet(data_client, repo_search_cls): + rs = repo_search_cls() r = rs.execute() assert r.hits.total.value == 1 @@ -144,9 +171,8 @@ def test_boolean_facet(data_client): assert value is True -def test_empty_search_finds_everything(data_client): - cs = CommitSearch() - +def test_empty_search_finds_everything(data_client, es_version, commit_search_cls): + cs = commit_search_cls() r = cs.execute() assert r.hits.total.value == 52 @@ -190,8 +216,10 @@ def test_empty_search_finds_everything(data_client): ] == r.facets.deletions -def test_term_filters_are_shown_as_selected_and_data_is_filtered(data_client): - cs = CommitSearch(filters={"files": "test_elasticsearch_dsl"}) +def test_term_filters_are_shown_as_selected_and_data_is_filtered( + data_client, commit_search_cls +): + cs = commit_search_cls(filters={"files": "test_elasticsearch_dsl"}) r = cs.execute() @@ -234,16 +262,18 @@ def test_term_filters_are_shown_as_selected_and_data_is_filtered(data_client): ] == r.facets.deletions -def test_range_filters_are_shown_as_selected_and_data_is_filtered(data_client): - cs = CommitSearch(filters={"deletions": "better"}) +def test_range_filters_are_shown_as_selected_and_data_is_filtered( + data_client, commit_search_cls +): + cs = commit_search_cls(filters={"deletions": "better"}) r = cs.execute() assert 19 == r.hits.total.value -def test_pagination(data_client): - cs = CommitSearch() +def test_pagination(data_client, commit_search_cls): + cs = commit_search_cls() cs = cs[0:20] assert 52 == cs.count()