Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Support 'calendar_interval' and 'fixed_interval' in DateHistogramFacet #1467

Merged
merged 2 commits into from
Dec 4, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion docs/faceted_search.rst
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,7 @@ There are several different facets available:
provides an option to split documents into groups based on a value of a field, for example ``TermsFacet(field='category')``

``DateHistogramFacet``
split documents into time intervals, example: ``DateHistogramFacet(field="published_date", interval="day")``
split documents into time intervals, example: ``DateHistogramFacet(field="published_date", calendar_interval="day")``

``HistogramFacet``
similar to ``DateHistogramFacet`` but for numerical values: ``HistogramFacet(field="rating", interval=2)``
Expand Down
38 changes: 33 additions & 5 deletions elasticsearch_dsl/faceted_search.py
Original file line number Diff line number Diff line change
Expand Up @@ -168,14 +168,34 @@ def get_value_filter(self, filter_value):
)


def _date_interval_month(d):
return (d + timedelta(days=32)).replace(day=1)


def _date_interval_week(d):
return d + timedelta(days=7)


def _date_interval_day(d):
return d + timedelta(days=1)


def _date_interval_hour(d):
return d + timedelta(hours=1)


class DateHistogramFacet(Facet):
agg_type = "date_histogram"

DATE_INTERVALS = {
"month": lambda d: (d + timedelta(days=32)).replace(day=1),
"week": lambda d: d + timedelta(days=7),
"day": lambda d: d + timedelta(days=1),
"hour": lambda d: d + timedelta(hours=1),
"month": _date_interval_month,
"1M": _date_interval_month,
"week": _date_interval_week,
"1w": _date_interval_week,
"day": _date_interval_day,
"1d": _date_interval_day,
"hour": _date_interval_hour,
"1h": _date_interval_hour,
}

def __init__(self, **kwargs):
Expand All @@ -194,12 +214,20 @@ def get_value(self, bucket):
return bucket["key"]

def get_value_filter(self, filter_value):
for interval_type in ("calendar_interval", "fixed_interval"):
if interval_type in self._params:
break
else:
interval_type = "interval"

return Range(
_expand__to_dot=False,
**{
self._params["field"]: {
"gte": filter_value,
"lt": self.DATE_INTERVALS[self._params["interval"]](filter_value),
"lt": self.DATE_INTERVALS[self._params[interval_type]](
filter_value
),
}
}
)
Expand Down
11 changes: 11 additions & 0 deletions tests/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@


import os
import re
from datetime import datetime

from elasticsearch.helpers import bulk
Expand Down Expand Up @@ -47,6 +48,16 @@ def client():
skip()


@fixture(scope="session")
def es_version(client):
info = client.info()
print(info)
yield tuple(
int(x)
for x in re.match(r"^([0-9.]+)", info["version"]["number"]).group(1).split(".")
)


@fixture
def write_client(client):
yield client
Expand Down
44 changes: 44 additions & 0 deletions tests/test_faceted_search.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,8 @@

from datetime import datetime

import pytest

from elasticsearch_dsl.faceted_search import (
DateHistogramFacet,
FacetedSearch,
Expand Down Expand Up @@ -144,3 +146,45 @@ def test_date_histogram_facet_with_1970_01_01_date():
dhf = DateHistogramFacet()
assert dhf.get_value({"key": None}) == datetime(1970, 1, 1, 0, 0)
assert dhf.get_value({"key": 0}) == datetime(1970, 1, 1, 0, 0)


@pytest.mark.parametrize(
["interval_type", "interval"],
[
("interval", "month"),
("calendar_interval", "month"),
("interval", "week"),
("calendar_interval", "week"),
("interval", "day"),
("calendar_interval", "day"),
("fixed_interval", "day"),
("interval", "hour"),
("fixed_interval", "hour"),
("interval", "1M"),
("calendar_interval", "1M"),
("interval", "1w"),
("calendar_interval", "1w"),
("interval", "1d"),
("calendar_interval", "1d"),
("fixed_interval", "1d"),
("interval", "1h"),
("fixed_interval", "1h"),
],
)
def test_date_histogram_interval_types(interval_type, interval):
dhf = DateHistogramFacet(field="@timestamp", **{interval_type: interval})
assert dhf.get_aggregation().to_dict() == {
"date_histogram": {
"field": "@timestamp",
interval_type: interval,
"min_doc_count": 0,
}
}
dhf.get_value_filter(datetime.now())


def test_date_histogram_no_interval_keyerror():
dhf = DateHistogramFacet(field="@timestamp")
with pytest.raises(KeyError) as e:
dhf.get_value_filter(datetime.now())
assert str(e.value) == "'interval'"
146 changes: 88 additions & 58 deletions tests/test_integration/test_faceted_search.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,8 @@

from datetime import datetime

import pytest

from elasticsearch_dsl import A, Boolean, Date, Document, Keyword
from elasticsearch_dsl.faceted_search import (
DateHistogramFacet,
Expand All @@ -29,25 +31,6 @@
from .test_document import PullRequest


class CommitSearch(FacetedSearch):
index = "flat-git"
fields = (
"description",
"files",
)

facets = {
"files": TermsFacet(field="files"),
"frequency": DateHistogramFacet(
field="authored_date", interval="day", min_doc_count=1
),
"deletions": RangeFacet(
field="stats.deletions",
ranges=[("ok", (None, 1)), ("good", (1, 5)), ("better", (5, None))],
),
}


class Repos(Document):
is_public = Boolean()
created_at = Date()
Expand All @@ -64,19 +47,6 @@ class Index:
name = "git"


class RepoSearch(FacetedSearch):
index = "git"
doc_types = [Repos]
facets = {
"public": TermsFacet(field="is_public"),
"created": DateHistogramFacet(field="created_at", interval="month"),
}

def search(self):
s = super(RepoSearch, self).search()
return s.filter("term", commit_repo="repo")


class MetricSearch(FacetedSearch):
index = "git"
doc_types = [Commit]
Expand All @@ -86,15 +56,72 @@ class MetricSearch(FacetedSearch):
}


class PRSearch(FacetedSearch):
index = "test-prs"
doc_types = [PullRequest]
facets = {
"comments": NestedFacet(
"comments",
DateHistogramFacet(field="comments.created_at", interval="month"),
@pytest.fixture(scope="session")
def commit_search_cls(es_version):
if es_version >= (7, 2):
interval_kwargs = {"fixed_interval": "1d"}
else:
interval_kwargs = {"interval": "day"}

class CommitSearch(FacetedSearch):
index = "flat-git"
fields = (
"description",
"files",
)
}

facets = {
"files": TermsFacet(field="files"),
"frequency": DateHistogramFacet(
field="authored_date", min_doc_count=1, **interval_kwargs
),
"deletions": RangeFacet(
field="stats.deletions",
ranges=[("ok", (None, 1)), ("good", (1, 5)), ("better", (5, None))],
),
}

return CommitSearch


@pytest.fixture(scope="session")
def repo_search_cls(es_version):
interval_type = "calendar_interval" if es_version >= (7, 2) else "interval"

class RepoSearch(FacetedSearch):
index = "git"
doc_types = [Repos]
facets = {
"public": TermsFacet(field="is_public"),
"created": DateHistogramFacet(
field="created_at", **{interval_type: "month"}
),
}

def search(self):
s = super(RepoSearch, self).search()
return s.filter("term", commit_repo="repo")

return RepoSearch


@pytest.fixture(scope="session")
def pr_search_cls(es_version):
interval_type = "calendar_interval" if es_version >= (7, 2) else "interval"

class PRSearch(FacetedSearch):
index = "test-prs"
doc_types = [PullRequest]
facets = {
"comments": NestedFacet(
"comments",
DateHistogramFacet(
field="comments.created_at", **{interval_type: "month"}
),
)
}

return PRSearch


def test_facet_with_custom_metric(data_client):
Expand All @@ -106,36 +133,36 @@ def test_facet_with_custom_metric(data_client):
assert dates[0] == 1399038439000


def test_nested_facet(pull_request):
prs = PRSearch()
def test_nested_facet(pull_request, pr_search_cls):
prs = pr_search_cls()
r = prs.execute()

assert r.hits.total.value == 1
assert [(datetime(2018, 1, 1, 0, 0), 1, False)] == r.facets.comments


def test_nested_facet_with_filter(pull_request):
prs = PRSearch(filters={"comments": datetime(2018, 1, 1, 0, 0)})
def test_nested_facet_with_filter(pull_request, pr_search_cls):
prs = pr_search_cls(filters={"comments": datetime(2018, 1, 1, 0, 0)})
r = prs.execute()

assert r.hits.total.value == 1
assert [(datetime(2018, 1, 1, 0, 0), 1, True)] == r.facets.comments

prs = PRSearch(filters={"comments": datetime(2018, 2, 1, 0, 0)})
prs = pr_search_cls(filters={"comments": datetime(2018, 2, 1, 0, 0)})
r = prs.execute()
assert not r.hits


def test_datehistogram_facet(data_client):
rs = RepoSearch()
def test_datehistogram_facet(data_client, repo_search_cls):
rs = repo_search_cls()
r = rs.execute()

assert r.hits.total.value == 1
assert [(datetime(2014, 3, 1, 0, 0), 1, False)] == r.facets.created


def test_boolean_facet(data_client):
rs = RepoSearch()
def test_boolean_facet(data_client, repo_search_cls):
rs = repo_search_cls()
r = rs.execute()

assert r.hits.total.value == 1
Expand All @@ -144,9 +171,8 @@ def test_boolean_facet(data_client):
assert value is True


def test_empty_search_finds_everything(data_client):
cs = CommitSearch()

def test_empty_search_finds_everything(data_client, es_version, commit_search_cls):
cs = commit_search_cls()
r = cs.execute()

assert r.hits.total.value == 52
Expand Down Expand Up @@ -190,8 +216,10 @@ def test_empty_search_finds_everything(data_client):
] == r.facets.deletions


def test_term_filters_are_shown_as_selected_and_data_is_filtered(data_client):
cs = CommitSearch(filters={"files": "test_elasticsearch_dsl"})
def test_term_filters_are_shown_as_selected_and_data_is_filtered(
data_client, commit_search_cls
):
cs = commit_search_cls(filters={"files": "test_elasticsearch_dsl"})

r = cs.execute()

Expand Down Expand Up @@ -234,16 +262,18 @@ def test_term_filters_are_shown_as_selected_and_data_is_filtered(data_client):
] == r.facets.deletions


def test_range_filters_are_shown_as_selected_and_data_is_filtered(data_client):
cs = CommitSearch(filters={"deletions": "better"})
def test_range_filters_are_shown_as_selected_and_data_is_filtered(
data_client, commit_search_cls
):
cs = commit_search_cls(filters={"deletions": "better"})

r = cs.execute()

assert 19 == r.hits.total.value


def test_pagination(data_client):
cs = CommitSearch()
def test_pagination(data_client, commit_search_cls):
cs = commit_search_cls()
cs = cs[0:20]

assert 52 == cs.count()
Expand Down