Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

RANSAC Outlier Detection #51

Merged
merged 2 commits into from
Oct 4, 2018
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,7 @@ set(albatross_HEADERS
albatross/tuning_metrics.h
albatross/map_utils.h
albatross/csv_utils.h
albatross/outlier.h
albatross/core/keys.h
albatross/core/dataset.h
albatross/core/model.h
Expand Down
53 changes: 30 additions & 23 deletions albatross/core/indexing.h
Original file line number Diff line number Diff line change
Expand Up @@ -15,15 +15,17 @@

#include "core/dataset.h"
#include <Eigen/Core>
#include <algorithm>
#include <functional>
#include <iostream>
#include <iterator>
#include <map>
#include <numeric>
#include <vector>

namespace albatross {

using s32 = int32_t;
using FoldIndices = std::vector<s32>;
using FoldIndices = std::vector<std::size_t>;
using FoldName = std::string;
using FoldIndexer = std::map<FoldName, FoldIndices>;

Expand Down Expand Up @@ -83,7 +85,7 @@ inline Eigen::MatrixXd subset(const std::vector<SizeType> &row_indices,
auto ii = static_cast<Eigen::Index>(i);
auto jj = static_cast<Eigen::Index>(j);
auto row_index = static_cast<Eigen::Index>(row_indices[i]);
auto col_index = static_cast<Eigen::Index>(col_indices[i]);
auto col_index = static_cast<Eigen::Index>(col_indices[j]);
out(ii, jj) = v(row_index, col_index);
}
}
Expand Down Expand Up @@ -141,20 +143,26 @@ template <typename FeatureType> struct RegressionFold {
test_indices(test_indices_){};
};

inline FoldIndices get_train_indices(const FoldIndices &test_indices,
const int n) {
const s32 k = static_cast<s32>(test_indices.size());
// The train indices are all the indices that are not test indices.
FoldIndices train_indices(n - k);
s32 train_cnt = 0;
for (s32 j = 0; j < n; j++) {
if (std::find(test_indices.begin(), test_indices.end(), j) ==
test_indices.end()) {
train_indices[train_cnt] = j;
train_cnt++;
}
}
return train_indices;
template <typename X>
inline std::vector<X> vector_set_difference(const std::vector<X> &x,
const std::vector<X> &y) {
std::vector<X> diff;
std::set_difference(x.begin(), x.end(), y.begin(), y.end(),
std::inserter(diff, diff.begin()));
return diff;
}

/*
* Computes the indices between 0 and n - 1 which are NOT contained
* in `indices`. Here complement is the mathematical interpretation
* of the word meaning "the part required to make something whole".
* In other words, indices and indices_complement(indices) should
* contain all the numbers between 0 and n-1
*/
inline FoldIndices indices_complement(const FoldIndices &indices, const int n) {
FoldIndices all_indices(n);
std::iota(all_indices.begin(), all_indices.end(), 0);
return vector_set_difference(all_indices, indices);
}

/*
Expand All @@ -168,7 +176,7 @@ static inline std::vector<RegressionFold<FeatureType>>
folds_from_fold_indexer(const RegressionDataset<FeatureType> &dataset,
const FoldIndexer &groups) {
// For a dataset with n features, we'll have n folds.
const s32 n = static_cast<s32>(dataset.features.size());
const std::size_t n = dataset.features.size();
std::vector<RegressionFold<FeatureType>> folds;
// For each fold, partition into train and test sets.
for (const auto &pair : groups) {
Expand All @@ -177,7 +185,7 @@ folds_from_fold_indexer(const RegressionDataset<FeatureType> &dataset,
// from changing the input FoldIndexer we perform a copy here.
const FoldName group_name(pair.first);
const FoldIndices test_indices(pair.second);
const auto train_indices = get_train_indices(test_indices, n);
const auto train_indices = indices_complement(test_indices, n);

std::vector<FeatureType> train_features =
subset(train_indices, dataset.features);
Expand Down Expand Up @@ -205,7 +213,7 @@ template <typename FeatureType>
static inline FoldIndexer
leave_one_out_indexer(const RegressionDataset<FeatureType> &dataset) {
FoldIndexer groups;
for (s32 i = 0; i < static_cast<s32>(dataset.features.size()); i++) {
for (std::size_t i = 0; i < dataset.features.size(); i++) {
FoldName group_name = std::to_string(i);
groups[group_name] = {i};
}
Expand All @@ -221,9 +229,8 @@ static inline FoldIndexer leave_one_group_out_indexer(
const RegressionDataset<FeatureType> &dataset,
const std::function<FoldName(const FeatureType &)> &get_group_name) {
FoldIndexer groups;
for (s32 i = 0; i < static_cast<s32>(dataset.features.size()); i++) {
const std::string k =
get_group_name(dataset.features[static_cast<std::size_t>(i)]);
for (std::size_t i = 0; i < dataset.features.size(); i++) {
const std::string k = get_group_name(dataset.features[i]);
// Get the existing indices if we've already encountered this group_name
// otherwise initialize a new one.
FoldIndices indices;
Expand Down
11 changes: 4 additions & 7 deletions albatross/core/model.h
Original file line number Diff line number Diff line change
Expand Up @@ -66,8 +66,7 @@ class RegressionModel : public ParameterHandlingMixin {
void fit(const std::vector<FeatureType> &features,
const MarginalDistribution &targets) {
assert(features.size() > 0);
assert(static_cast<s32>(features.size()) ==
static_cast<s32>(targets.size()));
assert(features.size() == static_cast<std::size_t>(targets.size()));
fit_(features, targets);
has_been_fit_ = true;
}
Expand Down Expand Up @@ -174,8 +173,7 @@ class RegressionModel : public ParameterHandlingMixin {
detail::PredictTypeIdentity<JointDistribution> &&identity) const {
assert(has_been_fit());
JointDistribution preds = predict_(features);
assert(static_cast<s32>(preds.mean.size()) ==
static_cast<s32>(features.size()));
assert(static_cast<std::size_t>(preds.mean.size()) == features.size());
return preds;
}

Expand All @@ -184,8 +182,7 @@ class RegressionModel : public ParameterHandlingMixin {
detail::PredictTypeIdentity<MarginalDistribution> &&identity) const {
assert(has_been_fit());
MarginalDistribution preds = predict_marginal_(features);
assert(static_cast<s32>(preds.mean.size()) ==
static_cast<s32>(features.size()));
assert(static_cast<std::size_t>(preds.mean.size()) == features.size());
return preds;
}

Expand All @@ -194,7 +191,7 @@ class RegressionModel : public ParameterHandlingMixin {
detail::PredictTypeIdentity<Eigen::VectorXd> &&identity) const {
assert(has_been_fit());
Eigen::VectorXd preds = predict_mean_(features);
assert(static_cast<s32>(preds.size()) == static_cast<s32>(features.size()));
assert(static_cast<std::size_t>(preds.size()) == features.size());
return preds;
}

Expand Down
54 changes: 36 additions & 18 deletions albatross/models/gp.h
Original file line number Diff line number Diff line change
Expand Up @@ -10,8 +10,8 @@
* WARRANTIES OF MERCHANTABILITY AND/OR FITNESS FOR A PARTICULAR PURPOSE.
*/

#ifndef ALBATROSS_GP_GP_H
#define ALBATROSS_GP_GP_H
#ifndef ALBATROSS_MODELS_GP_H
#define ALBATROSS_MODELS_GP_H

#include "evaluate.h"
#include "stdio.h"
Expand Down Expand Up @@ -46,6 +46,35 @@ template <typename FeatureType> struct GaussianProcessFit {
}
};

template <typename FeatureType>
inline JointDistribution predict_from_covariance_and_fit(
const Eigen::MatrixXd &cross_cov, const Eigen::MatrixXd &pred_cov,
const GaussianProcessFit<FeatureType> &model_fit) {
const Eigen::VectorXd pred = cross_cov.transpose() * model_fit.information;
auto ldlt = model_fit.train_ldlt;
Eigen::MatrixXd posterior_cov = ldlt.solve(cross_cov);
posterior_cov = cross_cov.transpose() * posterior_cov;
posterior_cov = pred_cov - posterior_cov;
return JointDistribution(pred, posterior_cov);
}

template <typename FeatureType>
inline GaussianProcessFit<FeatureType>
fit_from_covariance(const std::vector<FeatureType> &features,
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can we make this a constructor of GaussianProcessFit<T>?

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

ah yeah, good idea.

const Eigen::MatrixXd &train_cov,
const MarginalDistribution &targets) {
GaussianProcessFit<FeatureType> model_fit;
model_fit.train_features = features;
Eigen::MatrixXd cov(train_cov);
if (targets.has_covariance()) {
cov += targets.covariance;
}
model_fit.train_ldlt = Eigen::SerializableLDLT(cov.ldlt());
// Precompute the information vector
model_fit.information = model_fit.train_ldlt.solve(targets.mean);
return model_fit;
}

template <typename FeatureType, typename SubFeatureType = FeatureType>
using SerializableGaussianProcess =
SerializableRegressionModel<FeatureType,
Expand Down Expand Up @@ -101,7 +130,7 @@ class GaussianProcessRegression
symmetric_covariance(covariance_function_, features);
auto ldlt = this->model_fit_.train_ldlt;
pred_cov -= cross_cov * ldlt.solve(cross_cov.transpose());
assert(static_cast<s32>(pred.size()) == static_cast<s32>(features.size()));
assert(static_cast<std::size_t>(pred.size()) == features.size());
return InspectionDistribution(pred, pred_cov);
}

Expand Down Expand Up @@ -212,27 +241,17 @@ class GaussianProcessRegression
serializable_fit_(const std::vector<FeatureType> &features,
const MarginalDistribution &targets) const override {
Eigen::MatrixXd cov = symmetric_covariance(covariance_function_, features);
FitType model_fit;
model_fit.train_features = features;
if (targets.has_covariance()) {
cov += targets.covariance;
}
model_fit.train_ldlt = Eigen::SerializableLDLT(cov.ldlt());
// Precompute the information vector
model_fit.information = model_fit.train_ldlt.solve(targets.mean);
return model_fit;
return fit_from_covariance(features, cov, targets);
}

JointDistribution
predict_(const std::vector<FeatureType> &features) const override {
const auto cross_cov = asymmetric_covariance(
covariance_function_, features, this->model_fit_.train_features);
const Eigen::VectorXd pred = cross_cov * this->model_fit_.information;
covariance_function_, this->model_fit_.train_features, features);
Eigen::MatrixXd pred_cov =
symmetric_covariance(covariance_function_, features);
auto ldlt = this->model_fit_.train_ldlt;
pred_cov -= cross_cov * ldlt.solve(cross_cov.transpose());
return JointDistribution(pred, pred_cov);
return predict_from_covariance_and_fit(cross_cov, pred_cov,
this->model_fit_);
}

virtual MarginalDistribution
Expand Down Expand Up @@ -261,7 +280,6 @@ class GaussianProcessRegression
return pred;
}

private:
CovarianceFunction covariance_function_;
std::string model_name_;
};
Expand Down
8 changes: 4 additions & 4 deletions albatross/models/least_squares.h
Original file line number Diff line number Diff line change
Expand Up @@ -70,11 +70,11 @@ class LeastSquaresRegression
protected:
Eigen::VectorXd
predict_mean_(const std::vector<Eigen::VectorXd> &features) const override {
int n = static_cast<s32>(features.size());
std::size_t n = features.size();
Eigen::VectorXd mean(n);
for (s32 i = 0; i < n; i++) {
mean(i) =
features[static_cast<std::size_t>(i)].dot(this->model_fit_.coefs);
for (std::size_t i = 0; i < n; i++) {
mean(static_cast<Eigen::Index>(i)) =
features[i].dot(this->model_fit_.coefs);
}
return mean;
}
Expand Down
Loading