From 066693754b982a428eb1bc61942b8b12f0cd3f10 Mon Sep 17 00:00:00 2001
From: Chris Holden <ceholden@gmail.com>
Date: Fri, 21 Aug 2015 15:47:13 -0400
Subject: [PATCH] Switch to YAML config files; add model pickles

See issues #26, #29, #30
---
 examples/classifiers/RandomForest.yaml        |  32 ++++++
 examples/p022r049_example.ini                 | 103 -----------------
 examples/p022r049_example.yaml                | 102 +++++++++++++++++
 examples/p035r032_example.ini                 | 103 -----------------
 examples/p035r032_example.yaml                | 107 ++++++++++++++++++
 .../regression/LassoCV_n100_alpha_0-50.pkl    | Bin 0 -> 1263 bytes
 examples/regression/Lasso_alpha20.pkl         | Bin 0 -> 291 bytes
 examples/regression/OLS.pkl                   | Bin 0 -> 114 bytes
 examples/regression/README.md                 |  26 +++++
 yatsm/classifiers/RandomForest.ini            |  34 ------
 10 files changed, 267 insertions(+), 240 deletions(-)
 create mode 100644 examples/classifiers/RandomForest.yaml
 delete mode 100644 examples/p022r049_example.ini
 create mode 100644 examples/p022r049_example.yaml
 delete mode 100644 examples/p035r032_example.ini
 create mode 100644 examples/p035r032_example.yaml
 create mode 100644 examples/regression/LassoCV_n100_alpha_0-50.pkl
 create mode 100644 examples/regression/Lasso_alpha20.pkl
 create mode 100644 examples/regression/OLS.pkl
 create mode 100644 examples/regression/README.md
 delete mode 100644 yatsm/classifiers/RandomForest.ini

diff --git a/examples/classifiers/RandomForest.yaml b/examples/classifiers/RandomForest.yaml
new file mode 100644
index 00000000..a534db6e
--- /dev/null
+++ b/examples/classifiers/RandomForest.yaml
@@ -0,0 +1,32 @@
+# Default configuration file for Random Forest algorithm
+#
+# See sklearn implementation API here:
+# http://scikit-learn.org/stable/modules/generated/sklearn.ensemble.RandomForestClassifier.html
+
+# Specify algorithm and hyperparameters
+algorithm: RandomForest
+    # Number of trees in forest
+    n_estimators: 500
+    # Criterion for qualify of split ['gini', 'entropy']
+    criterion: "gini"
+    # number of features tried at each node
+    max_features: "auto"
+    # maximum depth of tree
+    max_depth:
+    # minimum number of samples required to split an internal node
+    min_samples_split: 2
+    # minimum number of samples in newly created leaves
+    min_samples_leaf: 1
+    # maximum leaf nodes -- if not None max_depth is ignored
+    max_leaf_nodes:
+    # Use bootstrap sample
+    bootstrap: True
+    # use out-of-bag sample for generalization error
+    oob_score: True
+    # number of jobs in parallel for fit and predict
+    n_jobs: 1
+
+# Algorithm fit parameters
+fit:
+    # Sample weights for training data
+    sample_weight:
diff --git a/examples/p022r049_example.ini b/examples/p022r049_example.ini
deleted file mode 100644
index 1b34e4bd..00000000
--- a/examples/p022r049_example.ini
+++ /dev/null
@@ -1,103 +0,0 @@
-# Example configuration file for YATSM line runner
-#
-# This configuration includes details about the dataset and how YATSM should
-# run
-
-# Version of config
-[metadata]
-version = 0.4
-
-# Section for Dataset
-[dataset]
-# Text file containing dates and images
-input_file = /home/ceholden/Documents/yatsm/examples/p022r049_input.csv
-# Input date format
-date_format = %Y%j
-# Output location
-output = /home/ceholden/Documents/landsat_stack/p022r049/images/YATSM
-# Output file prefix (e.g., [prefix]_[line].npz)
-output_prefix = yatsm_r
-# Total number of bands
-n_bands = 8
-# Mask band (e.g., Fmask)
-mask_band = 8
-# List of integer values to mask within the mask band
-mask_values = 2, 3, 4, 255
-# Valid range of non-mask band data
-# specify 1 range for all bands, or specify ranges for each band
-valid_range = 0, 10000
-# Indices for multi-temporal cloud masking (indexed on 1)
-green_band = 2
-swir1_band = 5
-# Use BIP image reader? If not, use GDAL to read in
-use_bip_reader = true
-# Directory location for caching dataset lines
-cache_line_dir = /home/ceholden/Documents/landsat_stack/p022r049/images/.yatsm_cache
-
-# Section for YATSM parameters
-[YATSM]
-consecutive = 5
-threshold = 3
-min_obs = 16
-min_rmse = 150
-# Patsy style model specification for timeseries model
-design_matrix = 1 + x + harm(x, 1)
-test_indices = 2, 4, 5
-# Number of days between model fit updates during monitoring period
-retrain_time = 365.25
-# Multitemporal cloud/shadow screening algorithm and threshold
-screening = RLM
-screening_crit = 400.0
-# Training period slope test
-slope_test = False
-# Remove observation if no change has been detected, but first observation is
-# above threshold
-remove_noise = True
-dynamic_rmse = False
-lassocv = False
-reverse = False
-robust = False
-# Commission test alpha value for test; leave blank to ignore test
-commission_alpha =
-
-# Section for phenology fitting
-[phenology]
-# Boolean for calculating phenology, or not
-calc_pheno = False
-# Specification for dataset indices required for EVI based phenology monitoring
-red_index = 2
-nir_index = 3
-blue_index = 0
-# Scale factor for reflectance bands
-scale = 0.0001
-# You can also specify index of EVI if contained in dataset to override calculation
-evi_index =
-evi_scale =
-# Number of years to group together when normalizing EVI to upper and lower percentiles
-year_interval = 3
-# Upper and lower percentiles of EVI used for max/min scaling
-q_min = 10
-q_max = 90
-
-# Section for segmentation
-[segment]
-# Segmentation image
-segmentation =
-# Resegmentation threshold (0 turns off resegmentation)
-resegment_crit = 0
-# Resegmentation size thresholds
-resegment_minpix = 5
-resegment_maxpix = 50
-
-# Section for training and classification
-[classification]
-# Training data file
-training_image = /home/ceholden/Documents/yatsm/examples/training_data.gtif
-# Training data masked values
-roi_mask_values = 0,255
-# Date range
-training_start = 1999-01-01
-training_end = 2001-01-01
-training_date_format = %Y-%m-%d
-# Cache X feature input and y labels for training data image into file?
-cache_training =
diff --git a/examples/p022r049_example.yaml b/examples/p022r049_example.yaml
new file mode 100644
index 00000000..7d48b1d1
--- /dev/null
+++ b/examples/p022r049_example.yaml
@@ -0,0 +1,102 @@
+# Example configuration file for YATSM
+# As of v0.5.0, config files are to be written in YAML
+#
+# Quotes around strings are optional, but encouraged, except where the leading
+# character would produce a parsing error (e.g., when writing the
+# date_format, "%Y%j")
+
+version: "0.5.0"
+
+dataset:
+    # Text file containing dates and images
+    input_file: "/home/ceholden/Documents/yatsm/examples/p022r049_input.csv"
+    # Input date format
+    date_format: "%Y%j"
+    # Output location
+    output: "/home/ceholden/Documents/landsat_stack/p022r049/images/YATSM"
+    # Output file prefix (e.g., [prefix]_[line].npz)
+    output_prefix: "yatsm_r"
+    # Total number of bands
+    n_bands: 8
+    # Mask band (e.g., Fmask)
+    mask_band: 8
+    # List of integer values to mask within the mask band
+    mask_values: [2, 3, 4, 255]
+    # Valid range of non-mask band data
+    # specify 1 range for all bands, or specify ranges for each band
+    valid_range: [0, 10000]
+    # Indices for multi-temporal cloud masking (indexed on 1)
+    green_band: 2
+    swir1_band: 5
+    # Use BIP image reader? If not, use GDAL to read in
+    use_bip_reader: true
+    # Directory location for caching dataset lines
+    cache_line_dir: "/home/ceholden/Documents/landsat_stack/p022r049/images/.yatsm_cache"
+
+# Parameters common to all timeseries analysis models within YATSM package
+YATSM:
+    algorithm: "CCDCesque"
+    prediction: "LassoCV"
+    design_matrix: "1 + x + harm(x, 1)"
+    reverse: False
+    robust: False
+    commission_alpha:
+
+# Parameters for CCDCesque algorithm -- referenced by "algorithm" key in YATSM
+CCDCesque:
+    consecutive: 5
+    threshold: 3.0
+    min_obs: 16
+    min_rmse: 150
+    test_indices: 2, 4, 5
+    retrain_time: 365.25
+    screening: RLM
+    screening_crit: 400.0
+    slope_test: False
+    remove_noise: True
+    dynamic_rmse: False
+
+# Regression estimator
+LassoCV:
+    pickle: "/home/ceholden/Documents/yatsm/examples/regression/LassoCV_n100_alpha_0-50.pkl"
+
+# Section for phenology fitting
+phenology:
+    calc_pheno: False
+    # Specification for dataset indices required for EVI based phenology monitoring
+    red_index: 2
+    nir_index: 3
+    blue_index: 0
+    # Scale factor for reflectance bands
+    scale: 0.0001
+    # You can also specify index of EVI if contained in dataset to override calculation
+    evi_index:
+    evi_scale:
+    # Number of years to group together when normalizing EVI to upper and lower percentiles
+    year_interval: 3
+    # Upper and lower percentiles of EVI used for max/min scaling
+    q_min: 10
+    q_max: 90
+
+# Section for segmentation
+segment:
+    # Segmentation image
+    segmentation:
+    # Resegmentation threshold (0 turns off resegmentation)
+    resegment_crit: 0
+    # Resegmentation size thresholds
+    resegment_minpix: 5
+    resegment_maxpix: 50
+
+# Section for training and classification
+classification:
+    # Training data file
+    training_image: "/home/ceholden/Documents/yatsm/examples/training_data.gtif"
+    # Training data masked values
+    roi_mask_values: [0, 255]
+    # Date range
+    training_start: "1999-01-01"
+    training_end: "2001-01-01"
+    training_date_format: "%Y-%m-%d"
+    # Cache X feature input and y labels for training data image into file?
+    cache_training:
diff --git a/examples/p035r032_example.ini b/examples/p035r032_example.ini
deleted file mode 100644
index 799fd29a..00000000
--- a/examples/p035r032_example.ini
+++ /dev/null
@@ -1,103 +0,0 @@
-# Example configuration file for YATSM line runner
-#
-# This configuration includes details about the dataset and how YATSM should
-# run
-
-# Version of config
-[metadata]
-version = 0.4
-
-# Section for Dataset
-[dataset]
-# Text file containing dates and images
-input_file = /home/ceholden/Documents/yatsm/examples/p035r032_input.csv
-# Input date format
-date_format = %Y%j
-# Output location
-output = /home/ceholden/Documents/landsat_stack/p035r032/images/YATSM
-# Output file prefix (e.g., [prefix]_[line].npz)
-output_prefix = yatsm_r
-# Total number of bands
-n_bands = 8
-# Mask band (e.g., Fmask)
-mask_band = 8
-# List of integer values to mask within the mask band
-mask_values = 2, 3, 4, 255
-# Valid range of non-mask band data
-# specify 1 range for all bands, or specify ranges for each band
-valid_range = 0, 10000
-# Indices for multi-temporal cloud masking (indexed on 1)
-green_band = 2
-swir1_band = 5
-# Use BIP image reader? If not, use GDAL to read in
-use_bip_reader = false
-# Directory location for caching dataset lines
-cache_line_dir = /home/ceholden/Documents/landsat_stack/p035r032/images/.yatsm_cache
-
-# Section for YATSM parameters
-[YATSM]
-consecutive = 5
-threshold = 3.5
-min_obs = 24
-min_rmse = 150
-# Patsy style model specification for timeseries model
-design_matrix = 1 + x + harm(x, 1) + harm(x, 2)
-test_indices = 2, 3, 4, 5
-# Number of days between model fit updates during monitoring period
-retrain_time = 365.25
-# Multitemporal cloud/shadow screening algorithm and threshold
-screening = RLM
-screening_crit = 400.0
-# Training period slope test
-slope_test = False 
-# Remove observation if no change has been detected, but first observation is
-# above threshold
-remove_noise = True
-dynamic_rmse = False
-lassocv = False
-reverse = True
-robust = False
-# Commission test alpha value for test; leave blank to ignore test
-commission_alpha =
-
-# Section for phenology fitting
-[phenology]
-# Boolean for calculating phenology, or not
-calc_pheno = True
-# Specification for dataset indices required for EVI based phenology monitoring
-red_index = 2
-nir_index = 3
-blue_index = 0
-# Scale factor for reflectance bands
-scale = 0.0001
-# You can also specify index of EVI if contained in dataset to override calculation
-evi_index =
-evi_scale =
-# Number of years to group together when normalizing EVI to upper and lower percentiles
-year_interval = 3
-# Upper and lower percentiles of EVI used for max/min scaling
-q_min = 10
-q_max = 90
-
-# Section for segmentation
-[segment]
-# Segmentation image
-segmentation = /home/ceholden/Documents/yatsm/sandbox/segment/p035r032/bgw_seg.armap.15
-# Resegmentation threshold (0 turns off resegmentation)
-resegment_crit = 0
-# Resegmentation size thresholds
-resegment_minpix = 5
-resegment_maxpix = 50
-
-# Section for training and classification
-[classification]
-# Training data file
-training_image =
-# Training data masked values
-roi_mask_values = 0,255
-# Date range
-training_start = 1999-01-01
-training_end = 2001-01-01
-training_date_format = %Y-%m-%d
-# Cache X feature input and y labels for training data image into file?
-cache_training =
diff --git a/examples/p035r032_example.yaml b/examples/p035r032_example.yaml
new file mode 100644
index 00000000..64607ece
--- /dev/null
+++ b/examples/p035r032_example.yaml
@@ -0,0 +1,107 @@
+# Example configuration file for YATSM line runner
+#
+# This configuration includes details about the dataset and how YATSM should
+# run
+
+# Version of config
+version: "0.5.0"
+
+dataset:
+    # Text file containing dates and images
+    input_file: "/home/ceholden/Documents/yatsm/examples/p035r032_input.csv"
+    # Input date format
+    date_format: "%Y%j"
+    # Output location
+    output: "/home/ceholden/Documents/landsat_stack/p035r032/images/YATSM"
+    # Output file prefix (e.g., [prefix]_[line].npz)
+    output_prefix: "yatsm_r"
+    # Total number of bands
+    n_bands: 8
+    # Mask band (e.g., Fmask)
+    mask_band: 8
+    # List of integer values to mask within the mask band
+    mask_values: [2, 3, 4, 255]
+    # Valid range of non-mask band data
+    # specify 1 range for all bands, or specify ranges for each band
+    valid_range: [0, 10000]
+    # Indices for multi-temporal cloud masking (indexed on 1)
+    green_band: 2
+    swir1_band: 5
+    # Use BIP image reader? If not, use GDAL to read in
+    use_bip_reader: False
+    # Directory location for caching dataset lines
+    cache_line_dir: "/home/ceholden/Documents/landsat_stack/p035r032/images/.yatsm_cache"
+
+# Parameters common to all timeseries analysis models within YATSM package
+YATSM:
+    algorithm: "CCDCesque"
+    prediction: "LassoCV"
+    design_matrix: "1 + x + harm(x, 1) + harm(x, 2)"
+    reverse: False
+    robust: False
+    commission_alpha:
+
+# Parameters for CCDCesque algorithm -- referenced by "algorithm" key in YATSM
+CCDCesque:
+    consecutive: 5
+    threshold: 3.5
+    min_obs: 24
+    min_rmse: 150
+    test_indices: 2, 3, 4, 5
+    retrain_time: 365.25
+    screening: RLM
+    screening_crit: 400.0
+    slope_test: False
+    remove_noise: True
+    dynamic_rmse: False
+
+# Regression estimator
+LassoCV:
+    pickle: "/home/ceholden/Documents/yatsm/examples/regression/LassoCV_n100_alpha_0-50.pkl"
+
+Lasso20:
+    pickle: "/home/ceholden/Documents/yatsm/examples/regression/Lasso_alpha20.pkl"
+
+OLS:
+    pickle: "/home/ceholden/Documents/yatsm/examples/regression/OLS.pkl"
+
+# Section for phenology fitting
+phenology:
+    calc_pheno: False
+    # Specification for dataset indices required for EVI based phenology monitoring
+    red_index: 2
+    nir_index: 3
+    blue_index: 0
+    # Scale factor for reflectance bands
+    scale: 0.0001
+    # You can also specify index of EVI if contained in dataset to override calculation
+    evi_index:
+    evi_scale:
+    # Number of years to group together when normalizing EVI to upper and lower percentiles
+    year_interval: 3
+    # Upper and lower percentiles of EVI used for max/min scaling
+    q_min: 10
+    q_max: 90
+
+# Section for segmentation
+segment:
+    # Segmentation image
+    segmentation:
+    # Resegmentation threshold (0 turns off resegmentation)
+    resegment_crit: 0
+    # Resegmentation size thresholds
+    resegment_minpix: 5
+    resegment_maxpix: 50
+
+# Section for training and classification
+classification:
+    # Training data file
+    training_image: "/home/ceholden/Documents/yatsm/examples/training_data.gtif"
+    # Training data masked values
+    roi_mask_values: [0, 255]
+    # Date range
+    training_start: "1999-01-01"
+    training_end: "2001-01-01"
+    training_date_format: "%Y-%m-%d"
+    # Cache X feature input and y labels for training data image into file?
+    cache_training:
diff --git a/examples/regression/LassoCV_n100_alpha_0-50.pkl b/examples/regression/LassoCV_n100_alpha_0-50.pkl
new file mode 100644
index 0000000000000000000000000000000000000000..c25120d61892167c84b7ee0b45b7a0a23fe51d67
GIT binary patch
literal 1263
zcmV<L1Q7dLMlg6ZWHBHhARr(hARr(hARr(hAb4Ialv_{}cNoQUfsll#;U)+XkXA*7
zctKDs1DkA;O$ekIT||*)Hk(CalimF%3lbFs+K3<`Me2aXQ7I$Pc0}4~sa3pyai-Ra
zh2s6vQemjTR76Pys-@NM;d~G0%sFqrA`Wi<&_ZA~Qfe`iGv8pPOoT;>Q<TkQCNYLE
zm<T&gkc=<|v)d^+T?kl9iUGR-a3WCwN!hHJ#k`XM+%lAhS?C<h4m@x&*GfBPI&6g0
znrmUqn9YVcga#XdQ>2}-<>HJGc)m#qNz)q_@U$!^@Iu)}z%K;?JrG7}*;<nmc)tyr
z7za%Vfe7VfuLZNTEG@ef%+`ZBky<XwP5@$^PM7?8=KdEgivd2yvW?+f7As&!!x|uX
znnZSvjEb5B&{+QdrnHaWg({w2?!1}<K`+;}W>hC_fu@!<hort@paSWMwV79|pv)b-
zBMtYyfYcq>ozPt_h_(!MEtmcdtvTOzNxy3fx?}Y^b8|94w(e2Q1GHwP47%R3_508i
zSqw|;b{1sIjxRZ@yEk1dJCP&E|I<}3J04?=OXRl88jfeLy1uPP_Ii<5<1{@gd*VM@
z;#1)z&pcM-PUwq}_p{k<et4q1n`znL8<ZheMRghDly*61yiUBRw^E*X*Lh$=Q?q<Y
zOncRj?Ok%W<cY*m{z%@cIOiOSe<|Nwci!z8?GH20e;a-ou>u|la(E1MYvA|6((O}?
zCU~iWr5-LRf@2TvMY7CCVES-Y&<|nX!e1}m6A(CX1D@pYJ+pbkaMjdQwf?B5qPEqu
zrd+c?QD3Me{hQuXh$a^O+&4d6F;ULeZ#WGUpAMF;D%4geMtZM=q9;x&HeoxZd**d2
zDi`uf&WztzjCl8}`WL$un`ez}kvM&oei!}_KR6bxoJ{AC2ip*3k6V)!KW<chCzS=W
z(h8M<g62pnu~r#)yVGX?|3<mH_yc^MN4GM3@+U{l^&#c`dC$L+-r}lit8OGVo(og0
z&Y=5zjqxhOiiaHwQ#YzYnja%yCeW(Tv(P)Q{w`CUlyFzvbv3F&e>syKd-S3zuUB?y
z`=Prk7hRFMRrW$fRjvH!pVwk!$nTTA)|JbUO>+w`ox7k!yrV_W!~}+9o?3k+q<0tc
zV%bEUNOl;3-6@+2uC*ah+VuMU`PY!zgq_J*u0f=((x*8sj-x&>ZY$_nFkf9Ar9X8s
zWsSPSpNXn#(5tV+_lNVlt?HZMV?xcXQgushelg+mxq7sDTRmKKUVZJN#aBSwRxf|%
z`Pt=?arJJc<X(O398GmFchGMlQq!>YiOV$sYls0|demPA4XA7lDkyhoioHi#@~t(R
zl7~4J&h*ooK2djDM#NRkA@4!$=-^`w(-n=iw6l_9#J?^-+Yy{>KQ%UIH6ELM#8l{-
z9kM<d_%guHi1J9ocFJf6{#rKbl}Ff&l$`(pW$3JIGh;B53}M3wngMh7p#nQ$A#lb_
zksuJ|;SStl#z7FurzyLcG3OCrUKz?K4R3J}tTmy0E0%9C&s2br)FCd)C1^X4AW3P)
z)Sj`HX;eu6hXyO1%Mf5b>Vf4l6bMB{HjFe;R)d|H!4L@3p<ITtfN&%!IIe$K0!PnG
ZQ?$d70T%2-Id~q3(B&GX{{nHL2Y?mPSmgi!

literal 0
HcmV?d00001

diff --git a/examples/regression/Lasso_alpha20.pkl b/examples/regression/Lasso_alpha20.pkl
new file mode 100644
index 0000000000000000000000000000000000000000..7c7f62ea7ce4c81f4ca819edb65edfb934df8abe
GIT binary patch
literal 291
zcmX|5Jxc>Y6g+b#-mQrrt@s19@IVmT2o~4GbPw#7b?*fi_xA1E-KY@+?Q(z9O0e-K
zctmlUVTNI5-Ud28SX9AP*0`Q!OAXpe6NSb&l~6VqHM%5bDn=5VoxbzdD-TYfbQD@;
z%?ongfp}7(l`*N1c)No#qD7qyIUd4Due3FqM=)8KB#lcLH2TClYbflw45|-uQBS|K
z4WMY%vow8*%bU*}#zDD;Hth*n%t82onp(f?<!%Q<O}LOeo<p9<@}!;~U4DEWU!OtY
qDVk&+k@vd(2Kkk|<^44bmG$?E59+Inze(}`O7da-Yi8N<uKEE?wP1k&

literal 0
HcmV?d00001

diff --git a/examples/regression/OLS.pkl b/examples/regression/OLS.pkl
new file mode 100644
index 0000000000000000000000000000000000000000..e38b86d329a20d502fe30c4e3ea845674ded6572
GIT binary patch
literal 114
zcmZo*N-oaMNlh%u)62=s1Jd!i`6;P6dP#}Jsa!r_k)YJ{qSWHz%=|pALI%yoLdM!c
zCXG<ey!@iv#GK5k)I#RYP`2d!g39=aLY9tDw!HYP{G{SSR&T~o-n7h;_{_YL)S~3n
Mf|5eEj?yGO04^OUQ2+n{

literal 0
HcmV?d00001

diff --git a/examples/regression/README.md b/examples/regression/README.md
new file mode 100644
index 00000000..3014d311
--- /dev/null
+++ b/examples/regression/README.md
@@ -0,0 +1,26 @@
+# Regression Models
+
+This directory contains example regression method objects from `scikit-learn`. These objects are serialized to disk into Python "pickle" files created using `sklearn.external.joblib`. These "pickled" files are included as examples because `YATSM` (`YATSM>=0.5.0`) can be run using a variety of prediction methods as long as they are serializable as a class object with a `fit` and `predict` interface similar to estimators from the `scikit-learn` package.
+
+## Examples
+Current examples include:
+
+1. `Lasso_alpha20.pkl`
+    - Lasso regression method where `alpha` is fixed to a value of `20`. This specific parameterization of Lasso regression is used by Zhu Zhe in the CCDC algorithm.
+2. `LassoCV_n100_alpha_0-50.pkl`
+    - Lasso regression method where the `alpha` (usually called `lambda`, the tradeoff between least squares and L1 shrinkage) hyperparameter is crossvalidated among `n=100` values ranging between `0` and `50`.
+3. `OLS.pkl`
+    * Ordinary Least Squares
+
+## Creation
+
+Custom regression estimators may be created as "pickles" as follows:
+
+``` python
+In [1]: import sklearn.linear_model, sklearn.externals
+
+In [2]: lasso = sklearn.linear_model.Lasso(alpha=20.0)
+
+In [3]: sklearn.externals.joblib.dump(lasso, 'Lasso_alpha20.pkl')
+Out[3]: ['Lasso_alpha20.pkl']
+```
diff --git a/yatsm/classifiers/RandomForest.ini b/yatsm/classifiers/RandomForest.ini
deleted file mode 100644
index 8c29786d..00000000
--- a/yatsm/classifiers/RandomForest.ini
+++ /dev/null
@@ -1,34 +0,0 @@
-# Default configuration file for Random Forest algorithm
-#
-# See sklearn implementation API here:
-# http://scikit-learn.org/stable/modules/generated/sklearn.ensemble.RandomForestClassifier.html
-
-[metadata]
-# Classification algorithm
-algorithm = RandomForest
-
-[init]
-# Number of trees in forest
-n_estimators = 500
-# Criterion for qualify of split ['gini', 'entropy']
-criterion = gini
-# number of features tried at each node
-max_features = auto
-# maximum depth of tree
-max_depth = None
-# minimum number of samples required to split an internal node
-min_samples_split = 2
-# minimum number of samples in newly created leaves
-min_samples_leaf = 1
-# maximum leaf nodes -- if not None max_depth is ignored
-max_leaf_nodes = None
-# Use bootstrap sample
-bootstrap = True
-# use out-of-bag sample for generalization error
-oob_score = True
-# number of jobs in parallel for fit and predict
-n_jobs = 1
-
-[fit]
-# Sample weights for training data
-sample_weight = None