Adds on-the-fly enabling of function argument casting

GoldenCheetah · Mar 30, 2018 · 5cb2065 · 5cb2065
1 parent c2bd15e
commit 5cb2065
Show file tree

Hide file tree

Showing 7 changed files with 254 additions and 245 deletions.
diff --git a/sweat/metrics/core.py b/sweat/metrics/core.py
@@ -1,10 +1,9 @@
 import numpy as np
 import pandas as pd
 from collections import namedtuple
-from sweat.utils import cast_array_to_original_type
 
 
-def mask_fill(arg, mask=None, value=0.0, **kwargs):
+def mask_fill(y, mask=None, value=0.0, **kwargs):
     """Replace masked values
 
     Parameters
@@ -17,28 +16,22 @@ def mask_fill(arg, mask=None, value=0.0, **kwargs):
 
     Returns
     -------
-    y: type of input argument
+    y: ndarray
 
-
-    In case the arg is an ndarray all operations will be performed on the original array.
+    All operations will be performed on the original array.
     To preserve original array pass a copy to the function
     """
 
     if mask is None:
-        return arg
-
-    y = np.array(arg)
+        return y
 
     mask = np.array(mask, dtype=bool)
     y[~mask] = value
 
-    rv = cast_array_to_original_type(y, type(arg))
-
-    return rv
-
+    return y
 
 
-def rolling_mean(arg, window=10, mask=None, value=0.0, **kwargs):
+def rolling_mean(y, window=10, mask=None, value=0.0, **kwargs):
     """Compute rolling mean
 
     Compute *uniform* or *ewma* rolling mean of the stream. In-process masking with replacement is
@@ -58,27 +51,26 @@ def rolling_mean(arg, window=10, mask=None, value=0.0, **kwargs):
 
     Returns
     -------
-    y: type of input argument
+    y: ndarray
 
     The moving array will indicate which samples to set to zero before
     applying rolling mean.
     """
     if mask is not None:
-        arg = mask_fill(arg, mask, value, **kwargs)
+        y = mask_fill(y, mask, value, **kwargs)
+
+    y = pd.Series(y)
 
-    y = pd.Series(arg)
 
     if kwargs.get('type', 'uniform') == 'ewma':
         y = y.ewm(span=window, min_periods=1).mean().values
     else:
         y = y.rolling(window, min_periods=1).mean().values
 
-    y = cast_array_to_original_type(y, type(arg))
-
     return y
 
 
-def median_filter(arg, window=31, threshold=1, value=None, **kwargs):
+def median_filter(y, window=31, threshold=1, value=None, **kwargs):
     """Outlier replacement using median filter
 
     Detect outliers using median filter and replace with rolling median or specified value
@@ -95,12 +87,12 @@ def median_filter(arg, window=31, threshold=1, value=None, **kwargs):
 
     Returns
     -------
-    y: type of input argument
+    y: ndarray
 
-    In case the arg is an ndarray all operations will be performed on the original array.
+    All operations will be performed on the original array.
     To preserve original array pass a copy to the function
     """
-    y = pd.Series(arg)
+    y = pd.Series(y)
 
     rolling_median = y.rolling(window, min_periods=1).median()
 
@@ -118,11 +110,7 @@ def median_filter(arg, window=31, threshold=1, value=None, **kwargs):
     else:
         y[outlier_idx] = rolling_median[outlier_idx]
 
-    y = y.as_matrix()
-
-    y = cast_array_to_original_type(y, type(arg))
-
-    return y
+    return y.values
 
 
 # FTP based 7-zones with left bind edge set to -0.001
@@ -137,7 +125,7 @@ def median_filter(arg, window=31, threshold=1, value=None, **kwargs):
 HEART_RATE_ZONES_ZNAME = ["Z1", "Z2", "Z3", "Z4", "Z5"]
 
 
-def compute_zones(arg, **kwargs):
+def compute_zones(y, **kwargs):
     """Convert stream into respective zones stream
 
     Watts streams can be converted either into ftp based 7-zones or into custom zones
@@ -159,8 +147,6 @@ def compute_zones(arg, **kwargs):
     array-like of int, the same type as arg
     """
 
-    arg_s = pd.Series(arg)
-
     if kwargs.get('zones', None):
         abs_zones = kwargs.get('zones')
 
@@ -176,8 +162,7 @@ def compute_zones(arg, **kwargs):
     labels = kwargs.get('labels', list(range(1, len(abs_zones))))
     assert len(abs_zones) == (len(labels) + 1)
 
-    y = pd.cut(arg_s, bins=abs_zones, labels=labels)
-    y = cast_array_to_original_type(y, type(arg))
+    y = pd.cut(y, bins=abs_zones, labels=labels)
 
     return y
 
@@ -209,7 +194,7 @@ def best_interval(arg, window, mask=None, value=0.0, **kwargs):
     return rv
 
 
-def time_in_zones(arg, **kwargs):
+def time_in_zones(y, **kwargs):
     """Time in zones
 
     Calculate time [sec] spent in each zone
@@ -221,14 +206,12 @@ def time_in_zones(arg, **kwargs):
 
     Returns
     -------
-    array-like, the same type as arg
+    ndarray
     """
-    type_arg = type(arg)
-    z = pd.Series(compute_zones(arg, **kwargs))
+    z = pd.Series(compute_zones(y, **kwargs))
     tiz = z.groupby(z).count()
-    rv = cast_array_to_original_type(tiz, type_arg)
 
-    return rv
+    return tiz.values
 
 
 def weighted_average_power(arg, mask=None, value=0.0, **kwargs):
@@ -255,15 +238,12 @@ def weighted_average_power(arg, mask=None, value=0.0, **kwargs):
     else:
         _rolling_mean = rolling_mean(arg, window=30, mask=mask, value=value)
 
-    if type(_rolling_mean) == list:
-        _rolling_mean = np.asarray(_rolling_mean, dtype=np.float)
-
     rv = np.mean(np.power(_rolling_mean, 4)) ** (1/4)
 
     return rv
 
 
-def mean_max(arg, mask=None, value=0.0, **kwargs):
+def mean_max(y, mask=None, value=0.0, **kwargs):
     """Mean-max curve
 
     Compute mean-max (power duration curve) from the stream. Mask-filter options can be
@@ -280,11 +260,10 @@ def mean_max(arg, mask=None, value=0.0, **kwargs):
 
     Returns
     -------
-    rv : type of input argument
-        Power-Duration Curve
+    ndarray
     """
 
-    y = mask_fill(arg, mask=mask, value=value)
+    y = mask_fill(y, mask=mask, value=value)
     y = pd.Series(y)
 
     # Compute the accumulated energy from the power data
@@ -295,7 +274,6 @@ def mean_max(arg, mask=None, value=0.0, **kwargs):
     y = np.array([])
     for t in np.arange(1, len(energy)):
         y = np.append(y, energy.diff(t).max()/(t))
-    y = cast_array_to_original_type(y, type(arg))
 
     return y
 
@@ -315,7 +293,7 @@ def multiple_best_intervals(arg, duration, number):
 
     Returns
     -------
-    pd.Series
+    ndarray
     """
     moving_average = arg.rolling(duration).mean()
     length = len(moving_average)
@@ -335,4 +313,4 @@ def multiple_best_intervals(arg, duration, number):
         overlap_max_index = min(length, max_index+duration)
         moving_average.loc[overlap_min_index:overlap_max_index] = np.nan
 
-    return pd.Series(mean_max_bests)
+    return mean_max_bests
diff --git a/sweat/metrics/power.py b/sweat/metrics/power.py
@@ -1,29 +1,20 @@
 import pandas as pd
-from sweat.utils import cast_array_to_original_type
 
 
 def wpk(power, weight):
     """Watts per kilo
 
     Parameters
     ----------
-    power : list, ndarray, series
+    power : ndarray
     weight : number
 
     Returns
     -------
     array-like
     """
 
-    if not isinstance(power, pd.Series):
-        y = pd.Series(power)
-    else:
-        y = power
-
-    rv = y/weight
-    rv = cast_array_to_original_type(rv, type(power))
-
-    return rv
+    return power/weight
 
 
 def relative_intensity(wap, threshold_power):

diff --git a/sweat/utils.py b/sweat/utils.py
@@ -1,31 +1,107 @@
+import functools
+import inspect
+import sys
+import types
+
 import numpy as np
 import pandas as pd
-import logging
 
-logger = logging.getLogger(__name__)
+
+CAST_TYPES = [list, pd.Series]
 
 
-def cast_array_to_original_type(arg, arg_type):
-    """Cast array to another array-like type
+def type_casting(func):
+    """Type casting
+    This decorator casts input arguments of types [list, pandas.Series] to numpy.ndarray
+    so the algorithms accept these input arguments.
+    As a bonus, the decorator casts the return value of the algorithm to the type of the first
+    array-like input argument.
 
     Parameters
     ----------
-    arg: array-like {list, ndarray, pd.Series}
-    arg_type: type
+    module_or_func : [types.ModuleType, types.FunctionType]
+        Module or function that has to be type casted. Can be None.
 
     Returns
     -------
-    casted : arg_type array-like
+    function
+        Decorated function.
     """
+    @functools.wraps(func)
+    def func_wrapper(*args, **kwargs):
+        output_type = None
+        new_args = []
+
+        for arg in args:
+            input_type = type(arg)
+
+            if input_type in CAST_TYPES:
+                new_args.append(np.asarray(arg))
+
+                if output_type is None:
+                    # Type of first array-like argument is used for output casting
+                    output_type = input_type
+            else:
+                new_args.append(arg)
+
+        # There is no use-case for type casting kwargs now.
+        # If there is, this can be uncommented and tests can be added.
+        # new_kwargs = dict()
+        # for key, value in kwargs.items():
+        #     input_type = type(value)
+
+        #     if input_type in CAST_TYPES:
+        #         new_kwargs[key] = np.asarray(value)
+
+        #         if output_type is None:
+        #             # Type of first array-like argument is used for output casting
+        #             output_type = input_type
+        #     else:
+        #         new_kwargs[key] = value
+        # 
+        # output = func(*new_args, **new_kwargs)
+
+        output = func(*new_args)
+        if output_type is not None and isinstance(output, np.ndarray):
+            return output_type(output)
+        else:
+            return output
 
-    if arg_type == list:
-        return list(arg)
+    return func_wrapper
+
+
+def enable_type_casting(module_or_func=None):
+    """Enable type casting
+    This method enables casting of input arguments to numpy.ndarray so the algorithms accept
+    array-like input arguments of types list and pandas.Series.
+    As a bonus, the return value of the algorithm is casted to the type of the first array-like input argument.
+
+    Parameters
+    ----------
+    module_or_func : [types.ModuleType, types.FunctionType]
+        Module or function that has to be type casted. Can be None.
+
+    Returns
+    -------
+    function
+        Decorated function.
+    """
+    if module_or_func is None:
+        # Because sys.modules changes during this operation we cannot loop over sys.modules directly
+        key_values = [(key, value) for key, value in sys.modules.items()]
+        for key, value in key_values:
+            # @TODO this if statement might not cover all cases (or too much cases)
+            if key.startswith('sweat.hrm') or key.startswith('sweat.pdm') or key.startswith('sweat.metrics'):
+                enable_type_casting(module_or_func=value)
 
-    elif arg_type == np.ndarray:
-        return np.array(arg)
+    elif isinstance(module_or_func, types.ModuleType):
+        for name, obj in [(name, obj) for name, obj in inspect.getmembers(module_or_func)]:
+            if inspect.isfunction(obj) and inspect.getmodule(obj).__package__ == module_or_func.__package__:
+                func = getattr(module_or_func, name)
+                setattr(module_or_func, name, type_casting(func))
 
-    elif arg_type == pd.Series:
-        return pd.Series(arg)
+    elif isinstance(module_or_func, types.FunctionType):
+        return type_casting(module_or_func)
 
     else:
-        raise ValueError("arg_type must be list, ndarray or pd.Series")
+        raise ValueError('enable_type_casting takes arguments of types [ModuleType, FunctionType]')
diff --git a/tests/io/models/test_models.py b/tests/io/models/test_models.py
@@ -109,8 +109,8 @@ def test_is_valid_invalid_max_value(self):
     def test_compute_mean_max_power(self, wdf_big):
         mmp = wdf_big.compute_mean_max_power()
 
-        assert mmp.iloc[1] == 263.0
-        assert mmp.iloc[300] == 209.37209302325581
+        assert mmp[1] == 263.0
+        assert mmp[300] == 209.37209302325581
 
     def test_compute_weighted_average_power(self, wdf_big):
         assert wdf_big.compute_weighted_average_power() == pytest.approx(156.24624656343036, 0.1)