Skip to content

Commit

Permalink
Try to use functions for ransac
Browse files Browse the repository at this point in the history
  • Loading branch information
akleeman committed Sep 19, 2018
1 parent 89423ed commit 1f26ada
Show file tree
Hide file tree
Showing 13 changed files with 429 additions and 129 deletions.
51 changes: 29 additions & 22 deletions albatross/core/indexing.h
Original file line number Diff line number Diff line change
Expand Up @@ -15,15 +15,17 @@

#include "core/dataset.h"
#include <Eigen/Core>
#include <algorithm>
#include <functional>
#include <iostream>
#include <iterator>
#include <map>
#include <numeric>
#include <vector>

namespace albatross {

using s32 = int32_t;
using FoldIndices = std::vector<s32>;
using FoldIndices = std::vector<std::size_t>;
using FoldName = std::string;
using FoldIndexer = std::map<FoldName, FoldIndices>;

Expand Down Expand Up @@ -141,20 +143,26 @@ template <typename FeatureType> struct RegressionFold {
test_indices(test_indices_){};
};

inline FoldIndices get_train_indices(const FoldIndices &test_indices,
const int n) {
const s32 k = static_cast<s32>(test_indices.size());
// The train indices are all the indices that are not test indices.
FoldIndices train_indices(n - k);
s32 train_cnt = 0;
for (s32 j = 0; j < n; j++) {
if (std::find(test_indices.begin(), test_indices.end(), j) ==
test_indices.end()) {
train_indices[train_cnt] = j;
train_cnt++;
}
}
return train_indices;
template <typename X>
inline std::vector<X> vector_set_difference(const std::vector<X> &x,
const std::vector<X> &y) {
std::vector<X> diff;
std::set_difference(x.begin(), x.end(), y.begin(), y.end(),
std::inserter(diff, diff.begin()));
return diff;
}

/*
* Computes the indices between 0 and n - 1 which are NOT contained
* in `indices`. Here complement is the mathematical interpretation
* of the word meaning "the part required to make something whole".
* In other words, indices and indices_complement(indices) should
* contain all the numbers between 0 and n-1
*/
inline FoldIndices indices_complement(const FoldIndices &indices, const int n) {
FoldIndices all_indices(n);
std::iota(all_indices.begin(), all_indices.end(), 0);
return vector_set_difference(all_indices, indices);
}

/*
Expand All @@ -168,7 +176,7 @@ static inline std::vector<RegressionFold<FeatureType>>
folds_from_fold_indexer(const RegressionDataset<FeatureType> &dataset,
const FoldIndexer &groups) {
// For a dataset with n features, we'll have n folds.
const s32 n = static_cast<s32>(dataset.features.size());
const std::size_t n = dataset.features.size();
std::vector<RegressionFold<FeatureType>> folds;
// For each fold, partition into train and test sets.
for (const auto &pair : groups) {
Expand All @@ -177,7 +185,7 @@ folds_from_fold_indexer(const RegressionDataset<FeatureType> &dataset,
// from changing the input FoldIndexer we perform a copy here.
const FoldName group_name(pair.first);
const FoldIndices test_indices(pair.second);
const auto train_indices = get_train_indices(test_indices, n);
const auto train_indices = indices_complement(test_indices, n);

std::vector<FeatureType> train_features =
subset(train_indices, dataset.features);
Expand Down Expand Up @@ -205,7 +213,7 @@ template <typename FeatureType>
static inline FoldIndexer
leave_one_out_indexer(const RegressionDataset<FeatureType> &dataset) {
FoldIndexer groups;
for (s32 i = 0; i < static_cast<s32>(dataset.features.size()); i++) {
for (std::size_t i = 0; i < dataset.features.size(); i++) {
FoldName group_name = std::to_string(i);
groups[group_name] = {i};
}
Expand All @@ -221,9 +229,8 @@ static inline FoldIndexer leave_one_group_out_indexer(
const RegressionDataset<FeatureType> &dataset,
const std::function<FoldName(const FeatureType &)> &get_group_name) {
FoldIndexer groups;
for (s32 i = 0; i < static_cast<s32>(dataset.features.size()); i++) {
const std::string k =
get_group_name(dataset.features[static_cast<std::size_t>(i)]);
for (std::size_t i = 0; i < dataset.features.size(); i++) {
const std::string k = get_group_name(dataset.features[i]);
// Get the existing indices if we've already encountered this group_name
// otherwise initialize a new one.
FoldIndices indices;
Expand Down
11 changes: 4 additions & 7 deletions albatross/core/model.h
Original file line number Diff line number Diff line change
Expand Up @@ -66,8 +66,7 @@ class RegressionModel : public ParameterHandlingMixin {
void fit(const std::vector<FeatureType> &features,
const MarginalDistribution &targets) {
assert(features.size() > 0);
assert(static_cast<s32>(features.size()) ==
static_cast<s32>(targets.size()));
assert(features.size() == static_cast<std::size_t>(targets.size()));
fit_(features, targets);
has_been_fit_ = true;
}
Expand Down Expand Up @@ -174,8 +173,7 @@ class RegressionModel : public ParameterHandlingMixin {
detail::PredictTypeIdentity<JointDistribution> &&identity) const {
assert(has_been_fit());
JointDistribution preds = predict_(features);
assert(static_cast<s32>(preds.mean.size()) ==
static_cast<s32>(features.size()));
assert(static_cast<std::size_t>(preds.mean.size()) == features.size());
return preds;
}

Expand All @@ -184,8 +182,7 @@ class RegressionModel : public ParameterHandlingMixin {
detail::PredictTypeIdentity<MarginalDistribution> &&identity) const {
assert(has_been_fit());
MarginalDistribution preds = predict_marginal_(features);
assert(static_cast<s32>(preds.mean.size()) ==
static_cast<s32>(features.size()));
assert(static_cast<std::size_t>(preds.mean.size()) == features.size());
return preds;
}

Expand All @@ -194,7 +191,7 @@ class RegressionModel : public ParameterHandlingMixin {
detail::PredictTypeIdentity<Eigen::VectorXd> &&identity) const {
assert(has_been_fit());
Eigen::VectorXd preds = predict_mean_(features);
assert(static_cast<s32>(preds.size()) == static_cast<s32>(features.size()));
assert(static_cast<std::size_t>(preds.size()) == features.size());
return preds;
}

Expand Down
49 changes: 34 additions & 15 deletions albatross/models/gp.h
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,35 @@ template <typename FeatureType> struct GaussianProcessFit {
}
};

template <typename FeatureType>
inline JointDistribution predict_from_covariance_and_fit(
const Eigen::MatrixXd &cross_cov, const Eigen::MatrixXd &pred_cov,
const GaussianProcessFit<FeatureType> &model_fit) {
const Eigen::VectorXd pred = cross_cov.transpose() * model_fit.information;
auto ldlt = model_fit.train_ldlt;
Eigen::MatrixXd posterior_cov = ldlt.solve(cross_cov);
posterior_cov = cross_cov.transpose() * posterior_cov;
posterior_cov = pred_cov - posterior_cov;
return JointDistribution(pred, posterior_cov);
}

template <typename FeatureType>
inline GaussianProcessFit<FeatureType>
fit_from_covariance(const std::vector<FeatureType> &features,
const Eigen::MatrixXd &train_cov,
const MarginalDistribution &targets) {
GaussianProcessFit<FeatureType> model_fit;
model_fit.train_features = features;
Eigen::MatrixXd cov(train_cov);
if (targets.has_covariance()) {
cov += targets.covariance;
}
model_fit.train_ldlt = Eigen::SerializableLDLT(cov.ldlt());
// Precompute the information vector
model_fit.information = model_fit.train_ldlt.solve(targets.mean);
return model_fit;
}

template <typename FeatureType, typename SubFeatureType = FeatureType>
using SerializableGaussianProcess =
SerializableRegressionModel<FeatureType,
Expand Down Expand Up @@ -101,7 +130,7 @@ class GaussianProcessRegression
symmetric_covariance(covariance_function_, features);
auto ldlt = this->model_fit_.train_ldlt;
pred_cov -= cross_cov * ldlt.solve(cross_cov.transpose());
assert(static_cast<s32>(pred.size()) == static_cast<s32>(features.size()));
assert(static_cast<std::size_t>(pred.size()) == features.size());
return InspectionDistribution(pred, pred_cov);
}

Expand Down Expand Up @@ -212,27 +241,17 @@ class GaussianProcessRegression
serializable_fit_(const std::vector<FeatureType> &features,
const MarginalDistribution &targets) const override {
Eigen::MatrixXd cov = symmetric_covariance(covariance_function_, features);
FitType model_fit;
model_fit.train_features = features;
if (targets.has_covariance()) {
cov += targets.covariance;
}
model_fit.train_ldlt = Eigen::SerializableLDLT(cov.ldlt());
// Precompute the information vector
model_fit.information = model_fit.train_ldlt.solve(targets.mean);
return model_fit;
return fit_from_covariance(features, cov, targets);
}

JointDistribution
predict_(const std::vector<FeatureType> &features) const override {
const auto cross_cov = asymmetric_covariance(
covariance_function_, features, this->model_fit_.train_features);
const Eigen::VectorXd pred = cross_cov * this->model_fit_.information;
covariance_function_, this->model_fit_.train_features, features);
Eigen::MatrixXd pred_cov =
symmetric_covariance(covariance_function_, features);
auto ldlt = this->model_fit_.train_ldlt;
pred_cov -= cross_cov * ldlt.solve(cross_cov.transpose());
return JointDistribution(pred, pred_cov);
return predict_from_covariance_and_fit(cross_cov, pred_cov,
this->model_fit_);
}

virtual MarginalDistribution
Expand Down
8 changes: 4 additions & 4 deletions albatross/models/least_squares.h
Original file line number Diff line number Diff line change
Expand Up @@ -70,11 +70,11 @@ class LeastSquaresRegression
protected:
Eigen::VectorXd
predict_mean_(const std::vector<Eigen::VectorXd> &features) const override {
int n = static_cast<s32>(features.size());
std::size_t n = features.size();
Eigen::VectorXd mean(n);
for (s32 i = 0; i < n; i++) {
mean(i) =
features[static_cast<std::size_t>(i)].dot(this->model_fit_.coefs);
for (std::size_t i = 0; i < n; i++) {
mean(static_cast<Eigen::Index>(i)) =
features[i].dot(this->model_fit_.coefs);
}
return mean;
}
Expand Down
162 changes: 162 additions & 0 deletions albatross/models/ransac_gp.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,162 @@
/*
* Copyright (C) 2018 Swift Navigation Inc.
* Contact: Swift Navigation <[email protected]>
*
* This source is subject to the license found in the file 'LICENSE' which must
* be distributed together with this source. All other rights reserved.
*
* THIS CODE AND INFORMATION IS PROVIDED "AS IS" WITHOUT WARRANTY OF ANY KIND,
* EITHER EXPRESSED OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE IMPLIED
* WARRANTIES OF MERCHANTABILITY AND/OR FITNESS FOR A PARTICULAR PURPOSE.
*/

#ifndef ALBATROSS_MODELS_RANSAC_GP_H
#define ALBATROSS_MODELS_RANSAC_GP_H

#include "crossvalidation.h"
#include "gp.h"
#include "outlier.h"
#include <random>

namespace albatross {

template <typename FeatureType, typename CovarianceFunction>
class RansacGaussianProcessRegression
: public GaussianProcessRegression<FeatureType, CovarianceFunction> {
public:
using BaseModel = GaussianProcessRegression<FeatureType, CovarianceFunction>;
using FitType = GaussianProcessFit<FeatureType>;

RansacGaussianProcessRegression(CovarianceFunction &covariance_function)
: BaseModel(covariance_function){};

template <typename Archive> void save(Archive &archive) const {
archive(cereal::base_class<BaseModel>(this));
}

template <typename Archive> void load(Archive &archive) {
archive(cereal::base_class<BaseModel>(this));
}

protected:
/*
* Cross validation specializations
*
* Note the naming here uses a trailing underscore. This is to avoid
* name hiding when implementing one of these methods in a derived
* class:
*
* https://stackoverflow.com/questions/1628768/why-does-an-overridden-function-in-the-derived-class-hide-other-overloads-of-the
*/
virtual std::vector<JointDistribution> cross_validated_predictions_(
const RegressionDataset<FeatureType> &dataset,
const FoldIndexer &fold_indexer,
const detail::PredictTypeIdentity<JointDistribution> &identity) override {
const auto folds = folds_from_fold_indexer(dataset, fold_indexer);
return this->template cross_validated_predictions<JointDistribution>(folds);
}

virtual std::vector<MarginalDistribution> cross_validated_predictions_(
const RegressionDataset<FeatureType> &dataset,
const FoldIndexer &fold_indexer,
const detail::PredictTypeIdentity<MarginalDistribution> &identity)
override {
const auto folds = folds_from_fold_indexer(dataset, fold_indexer);
return this->template cross_validated_predictions<MarginalDistribution>(
folds);
}

virtual std::vector<Eigen::VectorXd> cross_validated_predictions_(
const RegressionDataset<FeatureType> &dataset,
const FoldIndexer &fold_indexer,
const detail::PredictTypeIdentity<PredictMeanOnly> &identity) override {
const auto folds = folds_from_fold_indexer(dataset, fold_indexer);
return this->template cross_validated_predictions<PredictMeanOnly>(folds);
}

FitType
serializable_fit_(const std::vector<FeatureType> &features,
const MarginalDistribution &targets) const override {

Eigen::MatrixXd cov =
symmetric_covariance(this->covariance_function_, features);
if (targets.has_covariance()) {
cov += targets.covariance;
}

double threshold = 1.;
std::size_t min_inliers = 3;
std::size_t min_features = 3;
std::size_t max_iterations = 20;
EvaluationMetric<JointDistribution> metric =
albatross::evaluation_metrics::negative_log_likelihood;

struct FitAndIndices {
FitType model_fit;
Indexer fit_indices;
};

std::function<FitAndIndices(const Indexer &)> fitter =
[&](const Indexer &indexer) {
const auto train_features = subset(indexer, features);
const auto train_targets = subset(indexer, targets);
auto train_cov = symmetric_subset(indexer, cov);
const FitAndIndices fit_and_indices = {
fit_from_covariance(train_features, train_cov, train_targets),
indexer};
return fit_and_indices;
};

std::function<double(const Indexer &, const FitAndIndices &)>
outlier_metric = [&](const Indexer &test_indices,
const FitAndIndices &fit_and_indices) {
const auto cross_cov =
subset(fit_and_indices.fit_indices, test_indices, cov);
const auto pred_cov = symmetric_subset(test_indices, cov);
const auto pred = predict_from_covariance_and_fit(
cross_cov, pred_cov, fit_and_indices.model_fit);
const auto target = subset(test_indices, targets);
double metric_value = metric(pred, target);
return metric_value;
};

std::function<double(const Indexer &)> model_metric = [&](
const Indexer &inliers) {
RegressionDataset<FeatureType> inlier_dataset(subset(inliers, features),
subset(inliers, targets));
const FoldIndexer inlier_loo = leave_one_out_indexer(inlier_dataset);

BaseModel model;
model.set_params(this->get_params());
double mean_metric = cross_validated_scores<double>(
metric, inlier_dataset, inlier_loo, &model)
.mean();
return mean_metric;
};

const RegressionDataset<FeatureType> dataset(features, targets);
const auto loo_indexer = leave_one_out_indexer(dataset);

auto inliers = ransac<FitAndIndices>(
fitter, outlier_metric, model_metric, map_values(loo_indexer),
threshold, min_features, min_inliers, max_iterations);

const auto inlier_features = subset(inliers, features);
const auto inlier_targets = subset(inliers, targets);
const auto inlier_cov = symmetric_subset(inliers, cov);

return fit_from_covariance(inlier_features, inlier_cov, inlier_targets);
}
};

template <typename FeatureType, typename CovFunc>
std::unique_ptr<RansacGaussianProcessRegression<FeatureType, CovFunc>>
ransac_gp_pointer_from_covariance(CovFunc covariance_function) {
return std::make_unique<
RansacGaussianProcessRegression<FeatureType, CovFunc>>(
covariance_function);
}

} // namespace albatross

#endif
Loading

0 comments on commit 1f26ada

Please sign in to comment.