mlpack · zoq · Dec 7, 2018 · Nov 7, 2018 · Dec 4, 2018 · Dec 4, 2018
diff --git a/doc/optimizers.md b/doc/optimizers.md
@@ -547,6 +547,53 @@ TODO
  * [Frank-Wolfe in Wikipedia](https://en.wikipedia.org/wiki/Frank%E2%80%93Wolfe_algorithm)
  * [Differentiable functions](#differentiable-functions)
 
+## FTML (Follow the Moving Leader)
+
+*An optimizer for [differentiable separable functions](#differentiable-separable-functions).*
+
+Follow the Moving Leader (FTML) is an optimizer where recent samples are
+weighted more heavily in each iteration, so FTML can adapt more quickly to
+changes.
+
+#### Constructors
+
+ * `FTML()`
+ * `FTML(`_`stepSize, batchSize`_`)`
+ * `FTML(`_`stepSize, batchSize, beta1, beta2, epsilon, maxIterations, tolerance, shuffle`_`)`
+
+#### Attributes
+
+| **type** | **name** | **description** | **default** |
+|----------|----------|-----------------|-------------|
+| `double` | **`stepSize`** | Step size for each iteration. | `0.001` |
+| `size_t` | **`batchSize`** | Number of points to process in a single step. | `32` |
+| `double` | **`beta1`** | Exponential decay rate for the first moment estimates. | `0.9` |
+| `double` | **`beta2`** | Exponential decay rate for the weighted infinity norm estimates. | `0.999` |
+| `double` | **`eps`** | Value used to initialize the mean squared gradient parameter. | `1e-8` |
+| `size_t` | **`max_iterations`** | Maximum number of iterations allowed (0 means no limit). | `100000` |
+| `double` | **`tolerance`** | Maximum absolute tolerance to terminate algorithm. | `1e-5` |
+| `bool` | **`shuffle`** | If true, the function order is shuffled; otherwise, each function is visited in linear order. | `true` |
+
+The attributes of the optimizer may also be modified via the member methods
+`StepSize()`, `BatchSize()`, `Beta1()`, `Beta2()`, `Epsilon()`, `MaxIterations()`,
+`Tolerance()`, and `Shuffle()`.
+
+#### Examples
+
+```c++
+RosenbrockFunction f;
+arma::mat coordinates = f.GetInitialPoint();
+
+FTML optimizer(0.001, 32, 0.9, 0.999, 1e-8, 100000, 1e-5, true);
+optimizer.Optimize(f, coordinates);
+```
+
+#### See also:
+ * [Follow the Moving Leader in Deep Learning](http://proceedings.mlr.press/v70/zheng17a/zheng17a.pdf)
+ * [SGD in Wikipedia](https://en.wikipedia.org/wiki/Stochastic_gradient_descent)
+ * [SGD](#standard-sgd)
+ * [Differentiable separable functions](#differentiable-separable-functions)
+
 ## Gradient Descent
 
 *An optimizer for [differentiable functions](#differentiable-functions).*

diff --git a/include/ensmallen.hpp b/include/ensmallen.hpp
@@ -68,6 +68,7 @@
 #include "ensmallen_bits/bigbatch_sgd/bigbatch_sgd.hpp"
 #include "ensmallen_bits/cmaes/cmaes.hpp"
 #include "ensmallen_bits/cne/cne.hpp"
+#include "ensmallen_bits/ftml/ftml.hpp"
 
 #include "ensmallen_bits/function.hpp" // TODO: should move to function/
 

diff --git a/include/ensmallen_bits/ftml/ftml.hpp b/include/ensmallen_bits/ftml/ftml.hpp
@@ -0,0 +1,159 @@
+/**
+ * @file ftml.hpp
+ * @author Marcus Edel
+ *
+ * Definition of Follow the Moving Leader (FTML).
+ *
+ * ensmallen is free software; you may redistribute it and/or modify it under
+ * the terms of the 3-clause BSD license.  You should have received a copy of
+ * the 3-clause BSD license along with ensmallen.  If not, see
+ * http://www.opensource.org/licenses/BSD-3-Clause for more information.
+ */
+#ifndef ENSMALLEN_FTML_FTML_HPP
+#define ENSMALLEN_FTML_FTML_HPP
+
+#include <ensmallen_bits/sgd/sgd.hpp>
+
+#include "ftml_update.hpp"
+
+namespace ens {
+
+/**
+ * Follow the Moving Leader (FTML) is an optimizer where recent samples are
+ * weighted more heavily in each iteration, so FTML can adapt more quickly to
+ * changes.
+ *
+ * For more information, see the following.
+ *
+ * @code
+ * @inproceedings{Zheng2017,
+ *   author    = {Shuai Zheng and James T. Kwok},
+ *   title     = {Follow the Moving Leader in Deep Learning},
+ *   year      = {2017}
+ *   booktitle = {Proceedings of the 34th International Conference on Machine
+ *                Learning},
+ *   pages     = {4110--4119},
+ *   series    = {Proceedings of Machine Learning Research},
+ *   publisher = {PMLR},
+ * }
+ * @endcode
+ *
+ * For FTML to work, a DecomposableFunctionType template parameter is
+ * required. This class must implement the following function:
+ *
+ *   size_t NumFunctions();
+ *   double Evaluate(const arma::mat& coordinates,
+ *                   const size_t i,
+ *                   const size_t batchSize);
+ *   void Gradient(const arma::mat& coordinates,
+ *                 const size_t i,
+ *                 arma::mat& gradient,
+ *                 const size_t batchSize);
+ *
+ * NumFunctions() should return the number of functions (\f$n\f$), and in the
+ * other two functions, the parameter i refers to which individual function (or
+ * gradient) is being evaluated.  So, for the case of a data-dependent function,
+ * such as NCA, NumFunctions() should return the number of points in the
+ * dataset, and Evaluate(coordinates, 0) will evaluate the objective function on
+ * the first point in the dataset (presumably, the dataset is held internally in
+ * the DecomposableFunctionType).
+ */
+class FTML
+{
+ public:
+  /**
+   * Construct the FTML optimizer with the given function and parameters. The
+   * defaults here are not necessarily good for the given problem, so it is
+   * suggested that the values used be tailored to the task at hand.  The
+   * maximum number of iterations refers to the maximum number of points that
+   * are processed (i.e., one iteration equals one point; one iteration does not
+   * equal one pass over the dataset).
+   *
+   * @param stepSize Step size for each iteration.
+   * @param batchSize Number of points to process in a single step.
+   * @param beta1 Exponential decay rate for the first moment estimates.
+   * @param beta2 Exponential decay rate for the weighted infinity norm
+            estimates.
+   * @param epsilon Epsilon is the minimum allowed gradient.
+   * @param maxIterations Maximum number of iterations allowed (0 means no
+   *        limit).
+   * @param tolerance Maximum absolute tolerance to terminate algorithm.
+   * @param shuffle If true, the function order is shuffled; otherwise, each
+   *        function is visited in linear order.
+   */
+  FTML(const double stepSize = 0.001,
+       const size_t batchSize = 32,
+       const double beta1 = 0.9,
+       const double beta2 = 0.999,
+       const double epsilon = 1e-8,
+       const size_t maxIterations = 100000,
+       const double tolerance = 1e-5,
+       const bool shuffle = true);
+
+  /**
+   * Optimize the given function using FTML. The given starting point will
+   * be modified to store the finishing point of the algorithm, and the final
+   * objective value is returned.
+   *
+   * @tparam DecomposableFunctionType Type of the function to be optimized.
+   * @param function Function to optimize.
+   * @param iterate Starting point (will be modified).
+   * @return Objective value of the final point.
+   */
+  template<typename DecomposableFunctionType>
+  double Optimize(DecomposableFunctionType& function, arma::mat& iterate)
+  {
+    return optimizer.Optimize(function, iterate);
+  }
+
+  //! Get the step size.
+  double StepSize() const { return optimizer.StepSize(); }
+  //! Modify the step size.
+  double& StepSize() { return optimizer.StepSize(); }
+
+  //! Get the batch size.
+  size_t BatchSize() const { return optimizer.BatchSize(); }
+  //! Modify the batch size.
+  size_t& BatchSize() { return optimizer.BatchSize(); }
+
+  //! Get the smoothing parameter.
+  double Beta1() const { return optimizer.UpdatePolicy().Beta1(); }
+  //! Modify the smoothing parameter.
+  double& Beta1() { return optimizer.UpdatePolicy().Beta1(); }
+
+  //! Get the second moment coefficient.
+  double Beta2() const { return optimizer.UpdatePolicy().Beta2(); }
+  //! Modify the second moment coefficient.
+  double& Beta2() { return optimizer.UpdatePolicy().Beta2(); }
+
+  //! Get the value used to initialise the mean squared gradient parameter.
+  double Epsilon() const { return optimizer.UpdatePolicy().Epsilon(); }
+  //! Modify the value used to initialise the mean squared gradient parameter.
+  double& Epsilon() { return optimizer.UpdatePolicy().Epsilon(); }
+
+  //! Get the maximum number of iterations (0 indicates no limit).
+  size_t MaxIterations() const { return optimizer.MaxIterations(); }
+  //! Modify the maximum number of iterations (0 indicates no limit).
+  size_t& MaxIterations() { return optimizer.MaxIterations(); }
+
+  //! Get the tolerance for termination.
+  double Tolerance() const { return optimizer.Tolerance(); }
+  //! Modify the tolerance for termination.
+  double& Tolerance() { return optimizer.Tolerance(); }
+
+  //! Get whether or not the individual functions are shuffled.
+  bool Shuffle() const { return optimizer.Shuffle(); }
+  //! Modify whether or not the individual functions are shuffled.
+  bool& Shuffle() { return optimizer.Shuffle(); }
+
+ private:
+  //! The Stochastic Gradient Descent object with the FTMLUpdate update policy.
+  SGD<FTMLUpdate> optimizer;
+};
+
+} // namespace ens
+
+// Include implementation.
+#include "ftml_impl.hpp"
+
+#endif
diff --git a/include/ensmallen_bits/ftml/ftml_impl.hpp b/include/ensmallen_bits/ftml/ftml_impl.hpp
@@ -0,0 +1,38 @@
+/**
+ * @file ftml_impl.hpp
+ * @author Marcus Edel
+ *
+ * Implementation of the Follow the Moving Leader (FTML) optimizer.
+ *
+ * ensmallen is free software; you may redistribute it and/or modify it under
+ * the terms of the 3-clause BSD license.  You should have received a copy of
+ * the 3-clause BSD license along with ensmallen.  If not, see
+ * http://www.opensource.org/licenses/BSD-3-Clause for more information.
+ */
+#ifndef ENSMALLEN_FTML_FTML_IMPL_HPP
+#define ENSMALLEN_FTML_FTML_IMPL_HPP
+
+// In case it hasn't been included yet.
+#include "ftml.hpp"
+
+namespace ens {
+
+inline FTML::FTML(const double stepSize,
+                  const size_t batchSize,
+                  const double beta1,
+                  const double beta2,
+                  const double epsilon,
+                  const size_t maxIterations,
+                  const double tolerance,
+                  const bool shuffle) :
+    optimizer(stepSize,
+              batchSize,
+              maxIterations,
+              tolerance,
+              shuffle,
+              FTMLUpdate(epsilon, beta1, beta2))
+{ /* Nothing to do. */ }
+
+} // namespace ens
+
+#endif