Skip to content

Commit

Permalink
use polars directly instead of going through pandas
Browse files Browse the repository at this point in the history
  • Loading branch information
danielhrisca committed Jan 27, 2025
1 parent c4da959 commit f2f8c50
Show file tree
Hide file tree
Showing 2 changed files with 76 additions and 41 deletions.
18 changes: 12 additions & 6 deletions src/asammdf/blocks/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -1358,6 +1358,7 @@ def components(
prefix: str = "",
master: NDArray[Any] | None = None,
only_basenames: bool = False,
use_polars : bool=False,
) -> tuple[str, Series[Any]]:
"""yield pandas Series and unique name based on the ndarray object
Expand All @@ -1379,6 +1380,11 @@ def components(
.. versionadded:: 5.13.0
use_polars (False) : bool
use polars
.. versionadded:: 8.1.0
Returns
-------
name, series : (str, values)
Expand All @@ -1403,11 +1409,11 @@ def components(
values = values.byteswap().view(values.dtype.newbyteorder())

if len(values.shape) > 1:
values = Series(
values = list(values) if use_polars else Series(
list(values),
index=master,
)
else:
elif not use_polars:
values = Series(
values,
index=master,
Expand All @@ -1426,11 +1432,11 @@ def components(
else:
axis_name = unique_names.get_unique_name(name)
if len(values.shape) > 1:
values = Series(
values = list(values) if use_polars else Series(
list(values),
index=master,
)
else:
elif not use_polars:
values = Series(
values,
index=master,
Expand Down Expand Up @@ -1464,11 +1470,11 @@ def components(
else:
name_ = unique_names.get_unique_name(name)
if len(values.shape) > 1:
values = Series(
values = list(values) if use_polars else Series(
list(values),
index=master,
)
else:
elif not use_polars:
values = Series(
values,
index=master,
Expand Down
99 changes: 64 additions & 35 deletions src/asammdf/mdf.py
Original file line number Diff line number Diff line change
Expand Up @@ -4878,10 +4878,10 @@ def to_dataframe(

signals[s_index] = sig

if use_interpolation:
if use_interpolation or use_polars:
same_master = np.array_equal(master, group_master)

if not same_master and interpolate_outwards_with_nan:
if not same_master and interpolate_outwards_with_nan and not use_polars:
idx = np.argwhere((master >= group_master[0]) & (master <= group_master[-1])).flatten()

cycles = len(group_master)
Expand All @@ -4899,7 +4899,7 @@ def to_dataframe(
for signal in signals
]

if not same_master and interpolate_outwards_with_nan:
if not same_master and interpolate_outwards_with_nan and not use_polars:
for sig in signals:
sig.timestamps = sig.timestamps[idx]
sig.samples = sig.samples[idx]
Expand All @@ -4915,26 +4915,38 @@ def to_dataframe(
if signals:
diffs = np.diff(group_master, prepend=-np.inf) > 0
if np.all(diffs):
index = pd.Index(group_master, tupleize_cols=False)
if use_polars:
index = group_master
else:
index = pd.Index(group_master, tupleize_cols=False)

else:
idx = np.argwhere(diffs).flatten()
group_master = group_master[idx]

index = pd.Index(group_master, tupleize_cols=False)
if use_polars:
index = group_master
else:
index = pd.Index(group_master, tupleize_cols=False)

for sig in signals:
sig.samples = sig.samples[idx]
sig.timestamps = sig.timestamps[idx]
else:
index = pd.Index(group_master, tupleize_cols=False)
if use_polars:
index = group_master
else:
index = pd.Index(group_master, tupleize_cols=False)

size = len(index)
for sig in signals:
if sig.timestamps.dtype.byteorder not in target_byte_order:
sig.timestamps = sig.timestamps.byteswap().view(sig.timestamps.dtype.newbyteorder())

sig_index = index if len(sig) == size else pd.Index(sig.timestamps, tupleize_cols=False)
if use_polars:
sig_index = index
else:
sig_index = index if len(sig) == size else pd.Index(sig.timestamps, tupleize_cols=False)

# byte arrays
if len(sig.samples.shape) > 1:
Expand All @@ -4948,11 +4960,12 @@ def to_dataframe(
if sig.samples.dtype.byteorder not in target_byte_order:
sig.samples = sig.samples.byteswap().view(sig.samples.dtype.newbyteorder())

df[channel_name] = pd.Series(
df[channel_name] = list(sig.samples) if use_polars else pd.Series(
list(sig.samples),
index=sig_index,
)


# arrays and structures
elif sig.samples.dtype.names:
for name, series in components(
Expand All @@ -4961,6 +4974,7 @@ def to_dataframe(
used_names,
master=sig_index,
only_basenames=only_basenames,
use_polars=use_polars,
):
df[name] = series

Expand All @@ -4980,7 +4994,7 @@ def to_dataframe(
if sig.samples.dtype.byteorder not in target_byte_order:
sig.samples = sig.samples.byteswap().view(sig.samples.dtype.newbyteorder())

df[channel_name] = pd.Series(sig.samples, index=sig_index)
df[channel_name] = sig.samples if use_polars else pd.Series(sig.samples, index=sig_index)

if progress is not None:
if callable(progress):
Expand All @@ -4991,42 +5005,57 @@ def to_dataframe(
if progress.stop:
return TERMINATED

strings, nonstrings = {}, {}
if use_polars:
if not POLARS_AVAILABLE:
raise MdfException("to_dataframe(use_polars=True) requires polars")

for col, series in df.items():
if series.dtype.kind == "S":
strings[col] = series
else:
nonstrings[col] = series
if numeric_1D_only:
df = {col: series for col, series in df.items() if series.dtype.kind in "uif"}

if numeric_1D_only:
nonstrings = {col: series for col, series in nonstrings.items() if series.dtype.kind in "uif"}
strings = {}
if time_as_date:
master = self.header.start_time + pd.to_timedelta(master, unit="s")
elif time_from_zero and len(master):
master = master - master[0]

df = pd.DataFrame(nonstrings, index=master)
df = {
'timestamps': master,
**df
}
return pl.DataFrame(df)

if strings:
df_strings = pd.DataFrame(strings, index=master)
df = pd.concat([df, df_strings], axis=1)

df.index.name = "timestamps"
else:

if time_as_date:
delta = pd.to_timedelta(df.index, unit="s")
strings, nonstrings = {}, {}

new_index = self.header.start_time + delta
df.set_index(new_index, inplace=True)
for col, series in df.items():
if series.dtype.kind == "S":
strings[col] = series
else:
nonstrings[col] = series

elif time_from_zero and len(master):
df.set_index(df.index - df.index[0], inplace=True)
if numeric_1D_only:
nonstrings = {col: series for col, series in df.items() if series.dtype.kind in "uif"}
strings = {}

if use_polars:
if POLARS_AVAILABLE:
return pl.from_pandas(df, include_index=True)
else:
raise MdfException("to_dataframe(use_polars=True) requires polars")
df = pd.DataFrame(nonstrings, index=master)

if strings:
df_strings = pd.DataFrame(strings, index=master)
df = pd.concat([df, df_strings], axis=1)

df.index.name = "timestamps"

if time_as_date:
delta = pd.to_timedelta(df.index, unit="s")

new_index = self.header.start_time + delta
df.set_index(new_index, inplace=True)

elif time_from_zero and len(master):
df.set_index(df.index - df.index[0], inplace=True)

return df
return df

def extract_bus_logging(
self,
Expand Down

0 comments on commit f2f8c50

Please sign in to comment.