Implement slope test; close #22

ceholden · Aug 9, 2015 · ebcc470 · ebcc470
1 parent 316b9e3
commit ebcc470
Show file tree

Hide file tree

Showing 3 changed files with 30 additions and 6 deletions.
diff --git a/scripts/line_yatsm.py b/scripts/line_yatsm.py
@@ -218,6 +218,8 @@ def run_pixel(X, Y, dataset_config, yatsm_config, px=0, py=0):
       model_result (ndarray): NumPy array of model results from YATSM
 
     """
+    # Extract design info
+    design_info = X.design_info
     # Continue if valid observations are less than 50% of dataset
     valid = cyprep.get_valid_mask(
       Y[:dataset_config['mask_band'] - 1, :],
@@ -253,7 +255,9 @@ def run_pixel(X, Y, dataset_config, yatsm_config, px=0, py=0):
                   swir1_band=dataset_config['swir1_band'] - 1,
                   remove_noise=yatsm_config['remove_noise'],
                   dynamic_rmse=yatsm_config['dynamic_rmse'],
+                  slope_test=yatsm_config['slope_test'],
                   lassocv=yatsm_config['lassocv'],
+                  design_info=design_info,
                   px=px,
                   py=py,
                   logger=logger)

diff --git a/yatsm/config_parser.py b/yatsm/config_parser.py
@@ -37,6 +37,7 @@
 retrain_time = 365.25
 screening = RLM
 screening_crit = 400.0
+slope_test = False
 remove_noise = True
 dynamic_rmse = False
 lassocv = False
@@ -122,6 +123,10 @@ def parse_algorithm_config(config):
     yatsm_config['retrain_time'] = config.getfloat('YATSM', 'retrain_time')
     yatsm_config['screening'] = config.get('YATSM', 'screening')
     yatsm_config['screening_crit'] = config.getfloat('YATSM', 'screening_crit')
+    try:
+        yatsm_config['slope_test'] = config.getfloat('YATSM', 'slope_test')
+    except:
+        yatsm_config['slope_test'] = config.getboolean('YATSM', 'slope_test')
     yatsm_config['remove_noise'] = config.getboolean('YATSM', 'remove_noise')
     yatsm_config['dynamic_rmse'] = config.getboolean('YATSM', 'dynamic_rmse')
     yatsm_config['lassocv'] = config.getboolean('YATSM', 'lassocv')

diff --git a/yatsm/yatsm.py b/yatsm/yatsm.py
@@ -52,6 +52,11 @@ class YATSM(object):
         noise) (default: True)
       dynamic_rmse (bool, optional): Vary RMSE as a function of day of year (
         default: False)
+      slope_test (float or bool, optional): Use an additional slope test to
+        assess the suitability of the training period. A value of True
+        enables the test and uses the `threshold` parameter as the test
+        criterion. False turns off the test or a float value enables the test
+        but overrides the test criterion threshold. (default: False)
       lassocv (bool, optional): Use scikit-learn LarsLassoCV over glmnet
       design_info (patsy.DesignInfo, optional): design information for X, if
         X is created using Patsy
@@ -71,13 +76,22 @@ def __init__(self, X, Y,
                  fit_indices=None, test_indices=None, retrain_time=ndays,
                  screening='RLM', screening_crit=400.0,
                  green_band=green_band, swir1_band=swir1_band,
-                 remove_noise=True, dynamic_rmse=False,
+                 remove_noise=True, dynamic_rmse=False, slope_test=False,
                  lassocv=False,
                  design_info=None, px=0, py=0,
                  logger=None):
+        # Store data
+        self.X = X
+        self.Y = Y
+
         # Setup logger
         self.logger = logger or logging.getLogger('yatsm')
 
+        # Setup slope test
+        self.slope_test = slope_test
+        if self.slope_test is True:
+            self.slope_test = threshold
+
         # Configure which implementation of LASSO we're using
         self.lassocv = lassocv
         if self.lassocv:
@@ -87,10 +101,6 @@ def __init__(self, X, Y,
             self.fit_models = self.fit_models_GLMnet
             self.logger.info('Using Lasso from GLMnet (lambda = 20)')
 
-        # Store data
-        self.X = X
-        self.Y = Y
-
         # Find column index of X containing date from Patsy
         if design_info:
             self.design_info = design_info
@@ -611,16 +621,21 @@ def train(self):
         # Ensure first and last points aren't unusual
         start_resid = np.zeros(len(self.test_indices))
         end_resid = np.zeros(len(self.test_indices))
+        slope_resid = np.zeros(len(self.test_indices))
         for i, (b, m) in enumerate(zip(self.test_indices, models)):
             start_resid[i] = (np.abs(self._Y[b, self.start] -
                                      m.predict(self._X[self.start, :])) /
                               max(self.min_rmse, m.rmse))
             end_resid[i] = (np.abs(self._Y[b, self.here] -
                                    m.predict(self._X[self.here, :])) /
                             max(self.min_rmse, m.rmse))
+            slope_resid[i] = (np.abs(m.coef[1] * (self.here - self.start)) /
+                              max(self.min_rmse, m.rmse))
 
         if np.linalg.norm(start_resid) > self.threshold or \
-                np.linalg.norm(end_resid) > self.threshold:
+                np.linalg.norm(end_resid) > self.threshold or \
+                (self.slope_test and
+                 np.linalg.norm(slope_resid) > self.threshold):
             self.logger.debug('Training period unstable')
             self.start += 1
             self.here = self._here