Skip to content

Commit

Permalink
Fix split query time-ranges calculation (#762)
Browse files Browse the repository at this point in the history
* Fixing _calc_split_ranges for split queries
Thx to @pjain90 for the fix

* Fix fail_on_partial parameter in Azure Monitor driver

* Fixing tests for split_queries changes

* Additional test case fixes
  • Loading branch information
ianhelle authored Apr 18, 2024
1 parent 949cf07 commit 5cc27c7
Show file tree
Hide file tree
Showing 4 changed files with 15 additions and 13 deletions.
8 changes: 4 additions & 4 deletions msticpy/data/core/query_provider_connections_mixin.py
Original file line number Diff line number Diff line change
Expand Up @@ -410,14 +410,14 @@ def _calc_split_ranges(start: datetime, end: datetime, split_delta: pd.Timedelta
# Since the generated time ranges are based on deltas from 'start'
# we need to adjust the end time on the final range.
# If the difference between the calculated last range end and
# the query 'end' that the user requested is small (< 10% of a delta),
# the query 'end' that the user requested is small (< 0.1% of a delta),
# we just replace the last "end" time with our query end time.
if (ranges[-1][1] - end) < (split_delta / 10):
ranges[-1] = ranges[-1][0], end
if (end - ranges[-1][1]) < (split_delta / 1000):
ranges[-1] = ranges[-1][0], pd.Timestamp(end)
else:
# otherwise append a new range starting after the last range
# in ranges and ending in 'end"
# note - we need to add back our subtracted 1 nanosecond
ranges.append((ranges[-1][0] + pd.Timedelta("1ns"), end))
ranges.append((ranges[-1][1] + pd.Timedelta("1ns"), pd.Timestamp(end)))

return ranges
4 changes: 3 additions & 1 deletion msticpy/data/drivers/azure_monitor_driver.py
Original file line number Diff line number Diff line change
Expand Up @@ -322,7 +322,9 @@ def query_with_results(
help_uri=_HELP_URL,
)
time_span_value = self._get_time_span_value(**kwargs)
fail_on_partial = kwargs.get("fail_if_partial", False)
fail_on_partial = kwargs.get(
"fail_if_partial", kwargs.get("fail_on_partial", False)
)
server_timeout = kwargs.pop("timeout", self._def_timeout)

workspace_id = next(iter(self._workspace_ids), None) or self._workspace_id
Expand Down
8 changes: 4 additions & 4 deletions tests/data/test_async_queries.py
Original file line number Diff line number Diff line change
Expand Up @@ -104,7 +104,7 @@ def test_split_queries_sync():
"print", host_name="DESKTOP-12345", start=start, end=end, split_query_by="1H"
)
queries = result_queries.split("\n\n")
check.equal(len(queries), 5)
check.equal(len(queries), 6)

for idx, (st_time, e_time) in enumerate(ranges):
check.is_in(st_time.isoformat(sep=" "), queries[idx])
Expand All @@ -119,7 +119,7 @@ def test_split_queries_sync():
host_name="DESKTOP-12345", start=start, end=end, split_query_by="1H"
)
# verify len of result is 2x single_result
check.equal(single_results.shape[0] * 5, result_queries.shape[0])
check.equal(single_results.shape[0] * 6, result_queries.shape[0])
# verify columns/schema is the same.
check.equal(list(single_results.columns), list(result_queries.columns))

Expand All @@ -144,7 +144,7 @@ def test_split_queries_async():
"print", host_name="DESKTOP-12345", start=start, end=end, split_query_by="1H"
)
queries = result_queries.split("\n\n")
check.equal(len(queries), 5)
check.equal(len(queries), 6)

for idx, (st_time, e_time) in enumerate(ranges):
check.is_in(st_time.isoformat(sep=" "), queries[idx])
Expand All @@ -159,6 +159,6 @@ def test_split_queries_async():
host_name="DESKTOP-12345", start=start, end=end, split_query_by="1H"
)
# verify len of result is 2x single_result
check.equal(single_results.shape[0] * 5, result_queries.shape[0])
check.equal(single_results.shape[0] * 6, result_queries.shape[0])
# verify columns/schema is the same.
check.equal(list(single_results.columns), list(result_queries.columns))
8 changes: 4 additions & 4 deletions tests/data/test_dataqueries.py
Original file line number Diff line number Diff line change
Expand Up @@ -359,7 +359,7 @@ def test_split_ranges(self):
delta = pd.Timedelta("1h")

ranges = _calc_split_ranges(start, end, delta)
self.assertEqual(len(ranges), 5)
self.assertEqual(len(ranges), 6)
self.assertEqual(ranges[0][0], start)
self.assertEqual(ranges[-1][1], end)

Expand All @@ -369,7 +369,7 @@ def test_split_ranges(self):

end = end + pd.Timedelta("20min")
ranges = _calc_split_ranges(start, end, delta)
self.assertEqual(len(ranges), 5)
self.assertEqual(len(ranges), 6)
self.assertEqual(ranges[0][0], start)
self.assertEqual(ranges[-1][1], end)

Expand All @@ -386,7 +386,7 @@ def test_split_queries(self):
"print", start=start, end=end, split_query_by="1H"
)
queries = result_queries.split("\n\n")
self.assertEqual(len(queries), 5)
self.assertEqual(len(queries), 6)

for idx, (st_time, e_time) in enumerate(ranges):
self.assertIn(st_time.isoformat(sep="T") + "Z", queries[idx])
Expand Down Expand Up @@ -416,7 +416,7 @@ def test_split_queries_err(self):
"print", start=start, end=end, split_query_by="Invalid"
)
queries = result_queries.split("\n\n")
self.assertEqual(len(queries), 5)
self.assertEqual(len(queries), 6)


_LOCAL_DATA_PATHS = [str(get_test_data_path().joinpath("localdata"))]
Expand Down

0 comments on commit 5cc27c7

Please sign in to comment.