-
Notifications
You must be signed in to change notification settings - Fork 803
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[WIP] Pagination #806
base: main
Are you sure you want to change the base?
[WIP] Pagination #806
Conversation
When is the pagination branch going to be merged to master and upgrade elasticsearch-dsl from 7.1.0 to say 7.1.1?? I am currently using elasticsearch-dsl==7.1.0 which does not have the pagination feature and I need it badly. Please merge! |
I also would love to see this landed. This is a feature we'd like to use. |
@honzakral is it WIP since 2018 or is it ready waiting for something? there's something left to do on it? maybe I can help if it is the case. I really love to see this feature around. |
Any updates on this? |
If anyone else, like me, is waiting with bated breath for a pagination feature, here's what I believe is a working solution in the meantime (I haven't thoroughly tested it, but I haven't found any problems yet). This is a slight expansion on the code in 806 and adds some functionality to the get_page function, allowing it to select the best option among starting from the beginning of the search, starting from the end, or starting from the user-provided values from the current page. If the target page is more than 10,000 results away from the closest reference page, this function will page through the intermediate results 10,000 at a time until it gets within 10,000, then go through the remaining intermediate results, and finally grab the target page. It's not a perfect solution and it might be problematic with paging through the middle of very large result sets, but short of that I think it will do pretty well. Code heredef _reverse_sort_entry(self, sort_entry):
# "field"
if isinstance(sort_entry, string_types):
if sort_entry == '_score':
return {'_score': 'asc'}
return {sort_entry: 'desc'}
f, sort_entry = sort_entry.copy().popitem()
# {"field": "asc/desc"}
if isinstance(sort_entry, string_types):
return {f: 'asc' if sort_entry == 'desc' else 'desc'}
# {"field": {"order": "asc/desc"}}
sort_entry = sort_entry.copy()
sort_entry['order'] = 'asc' if sort_entry['order'] == 'desc' else 'desc'
return {f: sort_entry}
def get_page_count(self, size=None):
size = size if size is not None else self._extra.get("size", 10)
if size == 0:
return 0
pages, docs_left = divmod(self.count(), size)
if docs_left:
pages += 1
return pages
def get_sort_vals(self, hit, sorters):
"""Helper function to extract the sorter values from a hit
:param hit: An elasticsearch hit
:type hit: Hit
:param sorters: A list of the properties used to sort the search
:type sorters: List
:return: A list of the hit's relevant values
:rtype: List
"""
hit = hit.to_dict()
hit_vals = []
for val in sorters:
if isinstance(val, dict):
for k in val.keys():
if k.endswith('.keyword'):
val = k[:-8]
else:
val = k
elif val.endswith('.keyword'):
val = val[:-8]
hit_vals.append(hit.get(val))
return hit_vals
def get_page(self, page_no, size=None, current_page=0, first_row=None, last_row=None):
if page_no == 0:
raise ValueError("Search pagination is 1-based.")
size = size if size is not None else self._extra.get("size", 10)
s = self._clone()
if 'id' not in s._sort:
s._sort.append('id')
if page_no * size <= 10000:
s._extra["from"] = size * (page_no - 1)
s._extra["size"] = size
resp = s.execute()
return resp
total_pages = s.get_page_count(size=size)
if page_no < 0:
neg_page_no = page_no
else:
neg_page_no = page_no - total_pages - 1
if ((page_no > total_pages / 2) or page_no < 0)and abs(neg_page_no) * size <= 10000:
last_page_size = s.count() % size
if neg_page_no == -1:
s._extra["from"] = 0
s._extra["size"] = last_page_size
else:
s._extra["from"] = last_page_size + (size * (abs(neg_page_no) - 2))
s._extra["size"] = size
s._sort = [s._reverse_sort_entry(se) for se in s._sort]
resp = s.execute()
# reverse the hits in the page when pagination from back
resp['hits']['hits'] = resp.to_dict()['hits']['hits'][::-1]
return resp
jump_from_end = total_pages - page_no
jump_from_current = abs(page_no - current_page)
if page_no <= jump_from_end and page_no <= jump_from_current:
if last_row:
records_jump = ((page_no - current_page) * size) - size
while records_jump > 10000:
last_hit_vals = s.get_sort_vals(last_row, s._sort)
last_row = s.get_next_page(last_hit=last_hit_vals, size=10000)[-1]
records_jump -= 10000
while records_jump > 0:
last_hit_vals = s.get_sort_vals(last_row, s._sort)
last_row = s.get_next_page(last_hit=last_hit_vals, size=records_jump)[-1]
records_jump -= records_jump
last_hit_vals = s.get_sort_vals(last_row, s._sort)
return s.get_next_page(last_hit=last_hit_vals, size=size)
elif current_page > 0:
s._extra["from"] = size * (current_page - 1)
s._extra["size"] = size
resp = s.execute()
return resp
else:
s._extra["from"] = size * (page_no - 1)
s._extra["size"] = size
resp = s.execute()
return resp
elif jump_from_current <= jump_from_end:
page_jump_count = page_no - current_page
if page_jump_count > 0:
if last_row:
records_jump = (page_jump_count * size) - size
while records_jump > 10000:
last_hit_vals = s.get_sort_vals(last_row, s._sort)
last_row = s.get_next_page(last_hit=last_hit_vals, size=10000)[-1]
records_jump -= 10000
while records_jump > 0:
last_hit_vals = s.get_sort_vals(last_row, s._sort)
last_row = s.get_next_page(last_hit=last_hit_vals, size=records_jump)[-1]
records_jump -= records_jump
last_hit_vals = s.get_sort_vals(last_row, s._sort)
return s.get_next_page(last_hit=last_hit_vals, size=size)
elif current_page > 0:
s._extra["from"] = size * (current_page - 1)
s._extra["size"] = size
resp = s.execute()
return resp
else:
s._extra["from"] = size * (page_no - 1)
s._extra["size"] = size
resp = s.execute()
return resp
elif page_jump_count < 0:
if first_row:
records_jump = abs((page_jump_count * size) + size)
while records_jump > 10000:
first_hit_vals = s.get_sort_vals(first_row, s._sort)
first_row = s.get_previous_page(first_hit=first_hit_vals, size=10000)[0]
records_jump -= 10000
while records_jump > 0:
first_hit_vals = s.get_sort_vals(first_row, s._sort)
first_row = s.get_previous_page(first_hit=first_hit_vals, size=records_jump)[0]
records_jump -= records_jump
first_hit_vals = s.get_sort_vals(first_row, s._sort)
return s.get_previous_page(first_hit=first_hit_vals, size=size)
elif current_page > 0:
s._extra["from"] = size * (current_page - 1)
s._extra["size"] = size
resp = s.execute()
return resp
else:
s._extra["from"] = size * (page_no - 1)
s._extra["size"] = size
resp = s.execute()
return resp
else:
last_page_size = s.count() % size
first_row = s.get_page(-1, size)[0]
records_jump = (abs(neg_page_no) * size) - (size * 2)
while records_jump > 10000:
first_hit_vals = s.get_sort_vals(first_row, s._sort)
first_row = s.get_previous_page(first_hit=first_hit_vals, size=10000)[0]
records_jump -= 10000
while records_jump > 0:
first_hit_vals = s.get_sort_vals(first_row, s._sort)
first_row = s.get_previous_page(first_hit=first_hit_vals, size=records_jump)[0]
records_jump -= records_jump
first_hit_vals = s.get_sort_vals(first_row, s._sort)
return s.get_previous_page(first_hit=first_hit_vals, size=size)
def get_next_page(self, last_hit, size=None):
size = size if size is not None else self._extra.get("size", 10)
s = self._clone()
s._extra["from"] = 0
s._extra["size"] = size
s._extra["search_after"] = list(last_hit)
return s.execute()
def get_previous_page(self, first_hit, size=None):
size = size if size is not None else self._extra.get("size", 10)
s = self._clone()
s._extra["from"] = 0
s._extra["size"] = size
s._extra["search_after"] = list(first_hit)
# reverse the sort order
s._sort = [self._reverse_sort_entry(se) for se in self._sort]
resp = s.execute()
# reverse the hits in the page
resp['hits']['hits'] = resp.to_dict()['hits']['hits'][::-1]
return resp
|
implementing #802