From 87205d012c08b7a082ad5fa7cffd226734fec626 Mon Sep 17 00:00:00 2001 From: Cesar Souza Date: Mon, 23 Jan 2017 21:08:04 +0100 Subject: [PATCH] GH-390 : MachineLearning.KMeans: Balanced clustering. --- Copyright.txt | 28 + .../Accord.MachineLearning.csproj | 58 +- .../Clustering/KMeans/BalancedKMeans.cs | 244 ++++++++ .../Clustering/KMeans/KMeans.cs | 17 +- Sources/Accord.Math/Accord.Math.csproj | 58 +- Sources/Accord.Math/Matrix/Matrix.Get.cs | 26 +- Sources/Accord.Math/Optimization/Munkres.cs | 541 ++++++++++++++++++ .../MulticlassScoreClassifierBase.cs | 4 +- .../Accord.Tests.MachineLearning.csproj | 3 +- .../Clustering/BalancedKMeansTest.cs | 301 ++++++++++ .../Accord.Tests.Math.csproj | 4 +- .../Accord.Tests.Math/Optimization/Munkres.cs | 420 ++++++++++++++ .../Optimization/MunkresTest.cs | 263 +++++++++ 13 files changed, 1900 insertions(+), 67 deletions(-) create mode 100644 Sources/Accord.MachineLearning/Clustering/KMeans/BalancedKMeans.cs create mode 100644 Sources/Accord.Math/Optimization/Munkres.cs create mode 100644 Unit Tests/Accord.Tests.MachineLearning/Clustering/BalancedKMeansTest.cs create mode 100644 Unit Tests/Accord.Tests.Math/Optimization/Munkres.cs create mode 100644 Unit Tests/Accord.Tests.Math/Optimization/MunkresTest.cs diff --git a/Copyright.txt b/Copyright.txt index 9bef3c120..33ed9cae8 100644 --- a/Copyright.txt +++ b/Copyright.txt @@ -343,3 +343,31 @@ NLopt Numerical Optimization Library - Copyright (c) 2008-2014 Steven G. Johnson AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + + + Munkres algorithm - The Hungarian method for solving the assignment problem + + The MIT License (MIT) + + Copyright (c) 2000 Robert A. Pilgrim + Murray State University + Dept. of Computer Science & Information Systems + Murray,Kentucky + + Permission is hereby granted, free of charge, to any person obtaining a copy + of this software and associated documentation files (the "Software"), to deal + in the Software without restriction, including without limitation the rights + to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + copies of the Software, and to permit persons to whom the Software is + furnished to do so, subject to the following conditions: + + The above copyright notice and this permission notice shall be included in all + copies or substantial portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + SOFTWARE. \ No newline at end of file diff --git a/Sources/Accord.MachineLearning/Accord.MachineLearning.csproj b/Sources/Accord.MachineLearning/Accord.MachineLearning.csproj index f8621d26c..e4083154a 100644 --- a/Sources/Accord.MachineLearning/Accord.MachineLearning.csproj +++ b/Sources/Accord.MachineLearning/Accord.MachineLearning.csproj @@ -5,35 +5,36 @@ Accord.MachineLearning Accord.MachineLearning - + - Full - False - True - DEBUG;TRACE - $(SolutionDir)..\Debug\ - - - - TRACE;NET35 - v3.5 - - - TRACE;NET40 - v4.0 - - - TRACE;NET45 - v4.5 - - - TRACE;NET46 - v4.6 - - - TRACE;MONO - v4.5 - + Full + False + True + DEBUG;TRACE + $(SolutionDir)..\Debug\ + + + + + TRACE;NET35 + v3.5 + + + TRACE;NET40 + v4.0 + + + TRACE;NET45 + v4.5 + + + TRACE;NET46 + v4.6 + + + TRACE;MONO + v4.5 + @@ -47,6 +48,7 @@ Properties\VersionInfo.cs + diff --git a/Sources/Accord.MachineLearning/Clustering/KMeans/BalancedKMeans.cs b/Sources/Accord.MachineLearning/Clustering/KMeans/BalancedKMeans.cs new file mode 100644 index 0000000000..2de78a7c4 --- /dev/null +++ b/Sources/Accord.MachineLearning/Clustering/KMeans/BalancedKMeans.cs @@ -0,0 +1,244 @@ +// Accord Machine Learning Library +// The Accord.NET Framework +// http://accord-framework.net +// +// Copyright © César Souza, 2009-2017 +// cesarsouza at gmail.com +// +// This library is free software; you can redistribute it and/or +// modify it under the terms of the GNU Lesser General Public +// License as published by the Free Software Foundation; either +// version 2.1 of the License, or (at your option) any later version. +// +// This library is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +// Lesser General Public License for more details. +// +// You should have received a copy of the GNU Lesser General Public +// License along with this library; if not, write to the Free Software +// Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA +// + +namespace Accord.MachineLearning +{ + using System; + using System.Collections.Generic; + using Accord.Math; + using Accord.Math.Distances; + using Math.Optimization; + using Statistics; + using System.Threading.Tasks; + + /// + /// Balanced K-Means algorithm. + /// + /// + /// + /// The Balanced k-Means algorithm attempts to find a clustering where each cluster + /// has approximately the same number of data points. The Balanced k-Means implementation + /// used in the framework uses the algorithm to solve the assignment + /// problem thus enforcing balance between the clusters. + /// + /// + /// + /// How to perform clustering with Balanced K-Means. + /// + /// + /// + /// + /// + /// + /// + /// + [Serializable] + public class BalancedKMeans : KMeans + { + + /// + /// Gets the labels assigned for each data point in the last + /// call to . + /// + /// + /// The labels. + /// + public int[] Labels { get; private set; } + + /// + /// Initializes a new instance of the Balanced K-Means algorithm. + /// + /// + /// The number of clusters to divide the input data into. + /// The distance function to use. Default is to + /// use the distance. + /// + public BalancedKMeans(int k, IDistance distance) + : base(k, distance) + { + } + + /// + /// Initializes a new instance of the Balanced K-Means algorithm. + /// + /// + /// The number of clusters to divide the input data into. + /// + public BalancedKMeans(int k) + : base(k) { } + + + /// + /// Learns a model that can map the given inputs to the desired outputs. + /// + /// The model inputs. + /// The weight of importance for each input sample. + /// A model that has learned how to produce suitable outputs + /// given the input data . + public override KMeansClusterCollection Learn(double[][] x, double[] weights = null) + { + // Initial argument checking + if (x == null) + throw new ArgumentNullException("x"); + + if (x.Length < K) + throw new ArgumentException("Not enough points. There should be more points than the number K of clusters."); + + if (weights == null) + { + weights = Vector.Ones(x.Length); + } + else + { + if (x.Length != weights.Length) + throw new ArgumentException("Data weights vector must be the same length as data samples."); + } + + double weightSum = weights.Sum(); + if (weightSum <= 0) + throw new ArgumentException("Not enough points. There should be more points than the number K of clusters."); + + if (!x.IsRectangular()) + throw new DimensionMismatchException("data", "The points matrix should be rectangular. The vector at position {} has a different length than previous ones."); + + int k = this.K; + int rows = x.Length; + int cols = x[0].Length; + + // Perform a random initialization of the clusters + // if the algorithm has not been initialized before. + // + if (this.Clusters.Centroids[0] == null) + { + Randomize(x); + } + + // Initial variables + int[] labels = new int[rows]; + double[] count = new double[k]; + double[][] centroids = Clusters.Centroids; + double[][] newCentroids = new double[k][]; + for (int i = 0; i < newCentroids.Length; i++) + newCentroids[i] = new double[cols]; + + Object[] syncObjects = new Object[K]; + for (int i = 0; i < syncObjects.Length; i++) + syncObjects[i] = new Object(); + + Iterations = 0; + + bool shouldStop = false; + + var m = new Munkres(x.Length, x.Length); + double[][] costMatrix = m.CostMatrix; + + while (!shouldStop) // Main loop + { + Array.Clear(count, 0, count.Length); + for (int i = 0; i < newCentroids.Length; i++) + Array.Clear(newCentroids[i], 0, newCentroids[i].Length); + for (int i = 0; i < labels.Length; i++) + labels[i] = -1; + + // Set the cost matrix for Munkres algorithm + for (int i = 0; i < costMatrix.Length; i++) + for (int j = 0; j < costMatrix[i].Length; j++) + costMatrix[i][j] = Distance.Distance(x[j], centroids[i % k]); + + //string str = costMatrix.ToCSharp(); + + m.Minimize(); // solve the assignment problem + + for (int i = 0; i < x.Length; i++) + { + if (m.Solution[i] >= 0) + labels[(int)m.Solution[i]] = i % k; + } + + // For each point in the data set, + Parallel.For(0, x.Length, ParallelOptions, i => + { + // Get the point + double[] point = x[i]; + double weight = weights[i]; + + // Get the nearest cluster centroid + int c = labels[i]; + + if (c >= 0) + { + // Get the closest cluster centroid + double[] centroid = newCentroids[c]; + + lock (syncObjects[c]) + { + // Increase the cluster's sample counter + count[c] += weight; + + // Accumulate in the cluster centroid + for (int j = 0; j < point.Length; j++) + centroid[j] += point[j] * weight; + } + } + }); + + // Next we will compute each cluster's new centroid + // by dividing the accumulated sums by the number of + // samples in each cluster, thus averaging its members. + Parallel.For(0, newCentroids.Length, ParallelOptions, i => + { + double sum = count[i]; + + if (sum > 0) + { + for (int j = 0; j < newCentroids[i].Length; j++) + newCentroids[i][j] /= sum; + } + }); + + // The algorithm stops when there is no further change in the + // centroids (relative difference is less than the threshold). + shouldStop = converged(centroids, newCentroids); + + // go to next generation + Parallel.For(0, centroids.Length, ParallelOptions, i => + { + for (int j = 0; j < centroids[i].Length; j++) + centroids[i][j] = newCentroids[i][j]; + }); + } + + for (int i = 0; i < Clusters.Centroids.Length; i++) + { + // Compute the proportion of samples in the cluster + Clusters.Proportions[i] = count[i] / weightSum; + } + + this.Labels = labels; + + ComputeInformation(x, labels); + + return Clusters; + } + + } +} diff --git a/Sources/Accord.MachineLearning/Clustering/KMeans/KMeans.cs b/Sources/Accord.MachineLearning/Clustering/KMeans/KMeans.cs index 932526441..47a3460bd 100644 --- a/Sources/Accord.MachineLearning/Clustering/KMeans/KMeans.cs +++ b/Sources/Accord.MachineLearning/Clustering/KMeans/KMeans.cs @@ -259,7 +259,7 @@ public IDistance Distance /// last call to this class' Compute methods. /// /// - public int Iterations { get; private set; } + public int Iterations { get; protected set; } /// /// Gets the cluster distortion error after the @@ -370,7 +370,7 @@ public virtual KMeansClusterCollection Learn(double[][] x, double[] weights = nu if (x[i].Length != cols) throw new DimensionMismatchException("x", "The points matrix should be rectangular. The vector at position {} has a different length than previous ones."); - Compute(x, weights, weightSum); + compute(x, weights, weightSum); return clusters; } @@ -399,7 +399,14 @@ public virtual int[] Compute(double[][] data, double[] weights) return Learn(data, weights).Decide(data); } - private void ComputeInformation(double[][] data, int[] labels) + /// + /// Computes the information about each cluster (covariance, proportions and error). + /// + /// + /// The data points. + /// The assigned labels. + /// + protected void ComputeInformation(double[][] data, int[] labels) { // Compute distortion and other metrics regarding the clustering if (ComputeCovariances) @@ -439,7 +446,7 @@ private void ComputeInformation(double[][] data, int[] labels) /// The weight to consider for each data sample. This is used in weighted K-Means /// The total sum of the weights in . /// - private int[] Compute(double[][] data, double[] weights, double weightSum) + private int[] compute(double[][] data, double[] weights, double weightSum) { this.Iterations = 0; @@ -560,7 +567,7 @@ private int[] Compute(double[][] data, double[] weights, double weightSum) /// Returns if all centroids had a percentage change /// less than . Returns otherwise. /// - private bool converged(double[][] centroids, double[][] newCentroids) + protected bool converged(double[][] centroids, double[][] newCentroids) { Iterations++; diff --git a/Sources/Accord.Math/Accord.Math.csproj b/Sources/Accord.Math/Accord.Math.csproj index d15fa3942..611a5e11f 100644 --- a/Sources/Accord.Math/Accord.Math.csproj +++ b/Sources/Accord.Math/Accord.Math.csproj @@ -5,35 +5,36 @@ Accord.Math Accord.Math - + - Full - False - True - DEBUG;TRACE - $(SolutionDir)..\Debug\ - - - - TRACE;NET35 - v3.5 - - - TRACE;NET40 - v4.0 - - - TRACE;NET45 - v4.5 - - - TRACE;NET46 - v4.6 - - - TRACE;MONO - v4.5 - + Full + False + True + DEBUG;TRACE + $(SolutionDir)..\Debug\ + + + + + TRACE;NET35 + v3.5 + + + TRACE;NET40 + v4.0 + + + TRACE;NET45 + v4.5 + + + TRACE;NET46 + v4.6 + + + TRACE;MONO + v4.5 + @@ -207,6 +208,7 @@ + diff --git a/Sources/Accord.Math/Matrix/Matrix.Get.cs b/Sources/Accord.Math/Matrix/Matrix.Get.cs index eb29f7f2b..9abb5dd2f 100644 --- a/Sources/Accord.Math/Matrix/Matrix.Get.cs +++ b/Sources/Accord.Math/Matrix/Matrix.Get.cs @@ -288,12 +288,34 @@ public static T[][] Get(this T[][] source, /// End column index in the destination matrix. /// public static T[][] Set(this T[][] destination, - int startRow, int endRow, int startColumn, int endColumn, - T[][] values) + int startRow, int endRow, int startColumn, int endColumn, T[][] values) { return set(destination, values, startRow, endRow, startColumn, endColumn); } + /// + /// Sets elements from a matrix to a given value. + /// + /// + /// The matrix of values to be changed. + /// The function used to determine whether an + /// element in the matrix should be changed or not. + /// The values to set the elements to. + /// + public static T[][] Set(this T[][] values, Func match, T value) + { + for (int i = 0; i < values.Length; i++) + { + for (int j = 0; j < values[i].Length; j++) + { + if (match(values[i][j])) + values[i][j] = value; + } + } + + return values; + } + /// /// Returns a sub matrix extracted from the current matrix. /// diff --git a/Sources/Accord.Math/Optimization/Munkres.cs b/Sources/Accord.Math/Optimization/Munkres.cs new file mode 100644 index 0000000000..4fa305258 --- /dev/null +++ b/Sources/Accord.Math/Optimization/Munkres.cs @@ -0,0 +1,541 @@ +// Accord Math Library +// The Accord.NET Framework +// http://accord-framework.net +// +// Copyright © César Souza, 2009-2017 +// cesarsouza at gmail.com +// +// This library is free software; you can redistribute it and/or +// modify it under the terms of the GNU Lesser General Public +// License as published by the Free Software Foundation; either +// version 2.1 of the License, or (at your option) any later version. +// +// This library is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +// Lesser General Public License for more details. +// +// You should have received a copy of the GNU Lesser General Public +// License along with this library; if not, write to the Free Software +// Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA +// +// +// This class contains the original code written by Robert A. Pilgrim, originally +// shared under the permissive MIT license. The original license text is reproduced +// below: +// +// The MIT License (MIT) +// +// Copyright (c) 2000 Robert A. Pilgrim +// Murray State University +// Dept. of Computer Science & Information Systems +// Murray,Kentucky +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in all +// copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +// SOFTWARE. +// + +namespace Accord.Math.Optimization +{ + using Accord.MachineLearning; + using System; + using System.Collections.Generic; + using System.Linq; + using System.Text; + using System.Threading.Tasks; + + /// + /// Hungarian method for solving the assignment problem, also known + /// as the Kuhn–Munkres algorithm or Munkres assignment algorithm. + /// + /// + /// + /// + /// + /// The Hungarian method is a combinatorial optimization algorithm that solves the assignment + /// problem in polynomial time and which anticipated later primal-dual methods. It was developed + /// and published in 1955 by Harold Kuhn, who gave the name "Hungarian method" because the algorithm + /// was largely based on the earlier works of two Hungarian mathematicians: Dénes Kőnig and Jenő + /// Egerváry. + /// + /// James Munkres reviewed the algorithm in 1957 and observed that it is (strongly) polynomial. + /// Since then the algorithm has been known also as the Kuhn–Munkres algorithm or Munkres assignment + /// algorithm.The time complexity of the original algorithm was O(n^4), however Edmonds and Karp, and + /// independently Tomizawa noticed that it can be modified to achieve an O(n^3) running time. Ford and + /// Fulkerson extended the method to general transportation problems. In 2006, it was discovered that + /// Carl Gustav Jacobi had solved the assignment problem in the 19th century, and the solution had been + /// published posthumously in 1890 in Latin. + /// + /// + /// This code has been based on the original MIT-licensed code by R.A. Pilgrim, + /// available in http://csclab.murraystate.edu/~bob.pilgrim/445/munkres.html. + /// + /// + /// References: + /// + /// + /// R. A. Pilgrim (2000). Munkres' Assignment Algorithm Modified for + /// Rectangular Matrices. Available in http://csclab.murraystate.edu/~bob.pilgrim/445/munkres.html + /// + /// Wikipedia contributors. "Hungarian algorithm." Wikipedia, The Free Encyclopedia. + /// Wikipedia, The Free Encyclopedia, 23 Jan. 2016. + /// + /// + /// + /// + /// + /// + /// + /// + /// + /// + public class Munkres : IOptimizationMethod + { + + /// + /// Gets or sets the cost matrix for this assignment algorithm. + /// + /// + /// The cost matrix. + /// + public double[][] CostMatrix { get; protected set; } + + + /// + /// Gets the number of variables (free parameters) + /// in the optimization problem. In the assigment + /// problem, this gives the number of jobs (or tasks) + /// to be performed. + /// + /// + /// The number of tasks in the assignment problem. + /// + public int NumberOfVariables { get; protected set; } + + /// + /// Gets or sets the number of workers in the assignment algorithm. + /// The workers are the entites that can be assigned jobs according + /// to the costs in the . + /// + /// + /// The number of workers. + /// + public int NumberOfWorkers { get; protected set; } + + /// + /// Gets the current solution found, the values of + /// the parameters which optimizes the function. + /// + /// + /// The solution. + /// + public double[] Solution { get; set; } + + /// + /// Gets the output of the function at the current . + /// + /// + /// The value. + /// + public double Value { get; protected set; } + + internal double[][] C; + internal int[][] M; + internal List> path; + internal int[] RowCover; + internal int[] ColCover; + + private int path_row_0; + private int path_col_0; + + + /// + /// Initializes a new instance of the class. + /// + /// + /// The number of jobs (tasks) that can be assigned. + /// The number of workers that can receive an assignment. + /// + public Munkres(int numberOfJobs, int numberOfWorkers) + { + init(numberOfJobs, numberOfWorkers, Jagged.Zeros(numberOfWorkers, numberOfJobs)); + } + + /// + /// Initializes a new instance of the class. + /// + /// + /// The cost matrix where each row represents + /// a worker, each column represents a task, and each individual element + /// represents how much it costs for a particular worker to receive (be + /// assigned) a particular task. + /// + public Munkres(double[][] costMatrix) + { + int workers = costMatrix.Rows(); + int jobs = costMatrix.Columns(); + init(jobs, workers, costMatrix); + } + + private void init(int jobs, int workers, double[][] costMatrix) + { + this.NumberOfVariables = jobs; + this.NumberOfWorkers = workers; + this.RowCover = new int[workers]; + this.ColCover = new int[jobs]; + this.CostMatrix = costMatrix; + this.M = Jagged.CreateAs(costMatrix); + this.path = new List>(); + this.Solution = new double[jobs]; + } + + /// + /// Finds the minimum value of a function. The solution vector + /// will be made available at the property. + /// + /// + /// Returns true if the method converged to a . + /// In this case, the found value will also be available at the + /// property. + /// + public bool Minimize() + { + return run(CostMatrix.Copy()); + } + + + /// + /// Finds the maximum value of a function. The solution vector + /// will be made available at the property. + /// + /// + /// Returns true if the method converged to a . + /// In this case, the found value will also be available at the + /// property. + /// + public bool Maximize() + { + return run(CostMatrix.Multiply(-1)); + } + + private bool run(double[][] m) + { + this.C = m; + int step = 1; + + while (step > 0) + { + step = RunStep(step); + } + + return true; + } + + + internal int RunStep(int step) + { + switch (step) + { + case 1: + return step_one(); + case 2: + return step_two(); + case 3: + return step_three(); + case 4: + return step_four(); + case 5: + return step_five(); + case 6: + return step_six(); + case 7: + return step_seven(); + default: + throw new InvalidOperationException(); + } + } + + + + /// + /// For each row of the cost matrix, find the smallest element + /// and subtract it from every element in its row. + /// + /// + /// Go to step 2. + /// + private int step_one() + { + for (int r = 0; r < C.Length; r++) + C[r].Subtract(C[r].Min(), result: C[r]); + + return 2; + } + + /// + /// Find a zero (Z) in the resulting matrix. If there is no starred + /// zero in its row or column, star Z. Repeat for each element in the + /// matrix. + /// + /// + /// Go to step 3. + /// + private int step_two() + { + M.Clear(); + for (int r = 0; r < RowCover.Length; r++) + for (int c = 0; c < ColCover.Length; c++) + if (C[r][c] == 0 && RowCover[r] == 0 && ColCover[c] == 0) + ColCover[c] = RowCover[r] = M[r][c] = 1; + + RowCover.Clear(); + ColCover.Clear(); + return 3; + } + + /// + /// Cover each column containing a starred zero. If K columns are covered, + /// the starred zeros describe a complete set of unique assignments. In this + /// case, go to DONE, otherwise, go to Step 4. + /// + /// + /// If K columns are covered, returns 7. Otherwise, returns 4. + /// + private int step_three() + { + for (int r = 0; r < M.Length; r++) + for (int c = 0; c < M[r].Length; c++) + if (M[r][c] == 1) + ColCover[c] = 1; + + int count = 0; + for (int c = 0; c < ColCover.Length; c++) + if (ColCover[c] == 1) + count += 1; + + if (count >= ColCover.Length || count >= RowCover.Length) + return 7; // done + return 4; + } + + + /// + /// Find a noncovered zero and prime it. If there is no starred zero + /// in the row containing this primed zero, Go to Step 5. Otherwise, + /// cover this row and uncover the column containing the starred zero. + /// Continue in this manner until there are no uncovered zeros left. + /// Save the smallest uncovered value and Go to Step 6. + /// + /// + /// Goes to step 5 or 6. + /// + private int step_four() + { + int row = -1; + int col = -1; + + while (true) + { + if (!find_a_zero(out row, out col)) + return 6; + + M[row][col] = 2; + if (has_star_in_row(row)) + { + col = find_star_in_row(row); + RowCover[row] = 1; + ColCover[col] = 0; + } + else + { + this.path_row_0 = row; + this.path_col_0 = col; + return 5; + } + } + } + + // methods to support step 4 + private bool find_a_zero(out int row, out int col) + { + for (int r = 0; r < RowCover.Length; r++) + { + for (int c = 0; c < ColCover.Length; c++) + { + if (C[r][c] == 0 && RowCover[r] == 0 && ColCover[c] == 0) + { + row = r; + col = c; + return true; + } + } + } + + row = col = -1; + return false; + } + + private bool has_star_in_row(int row) + { + for (int c = 0; c < M[row].Length; c++) + if (M[row][c] == 1) + return true; + return false; + } + + private int find_star_in_row(int row) + { + int col = -1; + for (int c = 0; c < M[row].Length; c++) + if (M[row][c] == 1) + col = c; + return col; + } + + + + + /// + /// Construct a series of alternating primed and starred zeros as follows. + /// Let Z0 represent the uncovered primed zero found in Step 4. Let Z1 denote + /// the starred zero in the column of Z0 (if any). Let Z2 denote the primed zero + /// in the row of Z1 (there will always be one). Continue until the series + /// terminates at a primed zero that has no starred zero in its column. + /// Unstar each starred zero of the series, star each primed zero of the series, + /// erase all primes and uncover every line in the matrix. + /// + /// + /// Return to Step 3. + /// + private int step_five() + { + int c = -1; + int r; + path.Clear(); + path.Add(Tuple.Create(path_row_0, path_col_0)); + + while (find_star_in_col(path[path.Count - 1].Item2, out r)) + { + path.Add(Tuple.Create(r, path[path.Count - 1].Item2)); + find_prime_in_row(path[path.Count - 1].Item1, ref c); + path.Add(Tuple.Create(path[path.Count - 1].Item1, c)); + } + + augment_path(); + RowCover.Clear(); + ColCover.Clear(); + erase_primes(); + return 3; + } + + + // methods to support step 5 + private bool find_star_in_col(int c, out int r) + { + r = -1; + for (int i = 0; i < M.Length; i++) + if (M[i][c] == 1) + r = i; + return r >= 0; + } + + private void find_prime_in_row(int r, ref int c) + { + for (int j = 0; j < M[r].Length; j++) + if (M[r][j] == 2) + c = j; + } + + private void augment_path() + { + for (int p = 0; p < path.Count; p++) + { + int i = path[p].Item1; + int j = path[p].Item2; + M[i][j] = (M[i][j] == 1) ? 0 : 1; + } + } + + private void erase_primes() + { + for (int r = 0; r < M.Length; r++) + for (int c = 0; c < M[r].Length; c++) + if (M[r][c] == 2) + M[r][c] = 0; + } + + + /// + /// Add the value found in Step 4 to every element of each covered row, and subtract + /// it from every element of each uncovered column. + /// + /// + /// Return to step 4. + /// + private int step_six() + { + double minval = find_smallest(); + for (int r = 0; r < RowCover.Length; r++) + { + for (int c = 0; c < ColCover.Length; c++) + { + if (RowCover[r] == 1) + C[r][c] += minval; + if (ColCover[c] == 0) + C[r][c] -= minval; + } + } + + return 4; + } + + //methods to support step 6 + private double find_smallest() + { + double minval = Double.PositiveInfinity; + for (int r = 0; r < RowCover.Length; r++) + for (int c = 0; c < ColCover.Length; c++) + if (RowCover[r] == 0 && ColCover[c] == 0) + if (minval > C[r][c]) + minval = C[r][c]; + return minval; + } + + private int step_seven() + { + // DONE: Assignment pairs are indicated by the positions of the starred zeros in the + // cost matrix.If C(i, j) is a starred zero, then the element associated with row i + // is assigned to the element associated with column j. + // + // (http://csclab.murraystate.edu/~bob.pilgrim/445/munkres.html) + double value = 0; + for (int col = 0; col < M.Columns(); col++) + { + int row; + if (find_star_in_col(col, out row)) + value += CostMatrix[row][col]; + Solution[col] = row; + } + + Value = value; + return -1; + } + + + } +} \ No newline at end of file diff --git a/Sources/Accord.Statistics/Accord.MachineLearning/Classifiers/Multiple/Multiclass/MulticlassScoreClassifierBase.cs b/Sources/Accord.Statistics/Accord.MachineLearning/Classifiers/Multiple/Multiclass/MulticlassScoreClassifierBase.cs index b207d6342..bf1314cbc 100644 --- a/Sources/Accord.Statistics/Accord.MachineLearning/Classifiers/Multiple/Multiclass/MulticlassScoreClassifierBase.cs +++ b/Sources/Accord.Statistics/Accord.MachineLearning/Classifiers/Multiple/Multiclass/MulticlassScoreClassifierBase.cs @@ -94,9 +94,9 @@ public virtual double[] Scores(TInput input, out int decision, double[] result) /// public override int Decide(TInput input) { - double max = Double.NegativeInfinity; + double max = Score(input, classIndex: 0); int imax = 0; - for (int i = 0; i < NumberOfOutputs; i++) + for (int i = 1; i < NumberOfOutputs; i++) { double d = Score(input, classIndex: i); if (d > max) diff --git a/Unit Tests/Accord.Tests.MachineLearning/Accord.Tests.MachineLearning.csproj b/Unit Tests/Accord.Tests.MachineLearning/Accord.Tests.MachineLearning.csproj index a4f5c5108..38dee367c 100644 --- a/Unit Tests/Accord.Tests.MachineLearning/Accord.Tests.MachineLearning.csproj +++ b/Unit Tests/Accord.Tests.MachineLearning/Accord.Tests.MachineLearning.csproj @@ -5,7 +5,7 @@ Accord.Tests.MachineLearning Accord.Tests.MachineLearning - + true full @@ -71,6 +71,7 @@ + diff --git a/Unit Tests/Accord.Tests.MachineLearning/Clustering/BalancedKMeansTest.cs b/Unit Tests/Accord.Tests.MachineLearning/Clustering/BalancedKMeansTest.cs new file mode 100644 index 0000000000..214a1d71c --- /dev/null +++ b/Unit Tests/Accord.Tests.MachineLearning/Clustering/BalancedKMeansTest.cs @@ -0,0 +1,301 @@ +// Accord Unit Tests +// The Accord.NET Framework +// http://accord-framework.net +// +// Copyright © César Souza, 2009-2017 +// cesarsouza at gmail.com +// +// This library is free software; you can redistribute it and/or +// modify it under the terms of the GNU Lesser General Public +// License as published by the Free Software Foundation; either +// version 2.1 of the License, or (at your option) any later version. +// +// This library is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +// Lesser General Public License for more details. +// +// You should have received a copy of the GNU Lesser General Public +// License along with this library; if not, write to the Free Software +// Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA +// + +namespace Accord.Tests.MachineLearning +{ + using Accord.MachineLearning; + using Accord.Math; + using NUnit.Framework; + using System; + using System.Linq; + using System.IO; + using System.Reflection; + using System.Runtime.Serialization.Formatters.Binary; + using Accord.IO; + using Math.Distances; + + [TestFixture] + public class BalancedKMeansTest + { + + + [Test] + public void learn_test() + { + #region doc_learn + Accord.Math.Random.Generator.Seed = 0; + + // Declare some observations + double[][] observations = + { + new double[] { -5, -2, -1 }, + new double[] { -5, -5, -6 }, + new double[] { 2, 1, 1 }, + new double[] { 1, 1, 2 }, + new double[] { 1, 2, 2 }, + new double[] { 3, 1, 2 }, + new double[] { 11, 5, 4 }, + new double[] { 15, 5, 6 }, + new double[] { 10, 5, 6 }, + }; + + double[][] orig = observations.MemberwiseClone(); + + // Create a new K-Means algorithm with 3 clusters + BalancedKMeans kmeans = new BalancedKMeans(3) + { + // Note: in balanced k-means the chances of the algorithm oscillating + // between two solutions increases considerably. For this reason, we + // set a max-iterations limit to avoid iterating indefinitely. + MaxIterations = 100 + }; + + // Compute the algorithm, retrieving an integer array + // containing the labels for each of the observations + KMeansClusterCollection clusters = kmeans.Learn(observations); + + // As a result, the first two observations should belong to the + // same cluster (thus having the same label). The same should + // happen to the next four observations and to the last three. + int[] labels = clusters.Decide(observations); + + #endregion + + Assert.AreEqual(labels[0], labels[1]); + + Assert.AreEqual(labels[2], labels[3]); + Assert.AreEqual(labels[2], labels[4]); + Assert.AreEqual(labels[2], labels[5]); + + Assert.AreEqual(labels[6], labels[7]); + Assert.AreEqual(labels[6], labels[8]); + + Assert.AreNotEqual(labels[0], labels[2]); + Assert.AreNotEqual(labels[2], labels[6]); + Assert.AreNotEqual(labels[0], labels[6]); + + int[] labels2 = kmeans.Clusters.Nearest(observations); + Assert.IsTrue(labels.IsEqual(labels2)); + + // the data must not have changed! + Assert.IsTrue(orig.IsEqual(observations)); + + var c = new KMeansClusterCollection.KMeansCluster[clusters.Count]; + int i = 0; + foreach (var cluster in clusters) + c[i++] = cluster; + + for (i = 0; i < c.Length; i++) + Assert.AreSame(c[i], clusters[i]); + } + + [Test] + public void learn_test_new_distance() + { + #region doc_learn + Accord.Math.Random.Generator.Seed = 0; + + // Declare some observations + double[][] observations = + { + new double[] { -5, -2, -1 }, + new double[] { -5, -5, -6 }, + new double[] { 2, 1, 1 }, + new double[] { 1, 1, 2 }, + new double[] { 1, 2, 2 }, + new double[] { 3, 1, 2 }, + new double[] { 11, 5, 4 }, + new double[] { 15, 5, 6 }, + new double[] { 10, 5, 6 }, + }; + + double[][] orig = observations.MemberwiseClone(); + + // Create a new K-Means algorithm with 3 clusters + BalancedKMeans kmeans = new BalancedKMeans(3, new Manhattan()); + + // Note: in balanced k-means the chances of the algorithm oscillating + // between two solutions increases considerably. For this reason, we + // set a max-iterations limit to avoid iterating indefinitely. + kmeans.MaxIterations = 100; + + // Compute the algorithm, retrieving an integer array + // containing the labels for each of the observations + KMeansClusterCollection clusters = kmeans.Learn(observations); + + // As a result, the first two observations should belong to the + // same cluster (thus having the same label). The same should + // happen to the next four observations and to the last three. + int[] labels = clusters.Decide(observations); + + // In case we would like equilibrated class labels, we can get the + // balanced class labels directly from the learning algorithm: + int[] balanced = kmeans.Labels; + #endregion + + Assert.AreEqual(labels[0], labels[1]); + + Assert.AreEqual(labels[2], labels[3]); + Assert.AreEqual(labels[2], labels[4]); + Assert.AreEqual(labels[2], labels[5]); + + Assert.AreEqual(labels[6], labels[7]); + Assert.AreEqual(labels[6], labels[8]); + + Assert.AreNotEqual(labels[0], labels[2]); + Assert.AreNotEqual(labels[2], labels[6]); + Assert.AreNotEqual(labels[0], labels[6]); + + + Assert.AreEqual(balanced[0], balanced[1]); + Assert.AreEqual(balanced[0], balanced[2]); + + Assert.AreEqual(balanced[3], balanced[4]); + Assert.AreEqual(balanced[3], balanced[5]); + + Assert.AreEqual(balanced[6], balanced[7]); + Assert.AreEqual(balanced[6], balanced[8]); + + Assert.AreNotEqual(balanced[0], balanced[3]); + Assert.AreNotEqual(balanced[3], balanced[6]); + Assert.AreNotEqual(balanced[0], balanced[6]); + + int[] labels2 = kmeans.Clusters.Nearest(observations); + Assert.IsTrue(labels.IsEqual(labels2)); + + // the data must not have changed! + Assert.IsTrue(orig.IsEqual(observations)); + + var c = new KMeansClusterCollection.KMeansCluster[clusters.Count]; + int i = 0; + foreach (var cluster in clusters) + c[i++] = cluster; + + for (i = 0; i < c.Length; i++) + Assert.AreSame(c[i], clusters[i]); + } + + [Test] + public void KMeansConstructorTest2() + { + Accord.Math.Tools.SetupGenerator(0); + + // Declare some observations + double[][] observations = + { + new double[] { -5, -2, -1 }, + new double[] { -5, -5, -6 }, + new double[] { 2, 1, 1 }, + new double[] { 1, 1, 2 }, + new double[] { 1, 2, 2 }, + new double[] { 3, 1, 2 }, + new double[] { 11, 5, 4 }, + new double[] { 15, 5, 6 }, + new double[] { 10, 5, 6 }, + }; + + double error, e; + + // Create a new algorithm + BalancedKMeans kmeans = new BalancedKMeans(3); + + // note: in balanced k-means the chances of the algorithm + // oscillating between two solutions increases considerably + kmeans.MaxIterations = 100; + + kmeans.Randomize(observations); + + // Save the first initialization + double[][] initial = kmeans.Clusters.Centroids.MemberwiseClone(); + + // Compute the first K-Means + kmeans.Compute(observations, out error); + + // Create more K-Means algorithms + // without the same initialization + bool differ = false; + for (int i = 0; i < 1000; i++) + { + kmeans = new BalancedKMeans(3); + kmeans.MaxIterations = 100; + kmeans.Compute(observations, out e); + + if (error != e) + differ = true; + } + + Assert.IsTrue(differ); + } + + [Test] + public void KMeansConstructorTest3() + { + // Create a new algorithm + BalancedKMeans kmeans = new BalancedKMeans(3); + Assert.IsNotNull(kmeans.Clusters); + Assert.IsNotNull(kmeans.Distance); + Assert.IsNotNull(kmeans.Clusters.Centroids); + Assert.IsNotNull(kmeans.Clusters.Count); + Assert.IsNotNull(kmeans.Clusters.Covariances); + Assert.IsNotNull(kmeans.Clusters.Proportions); + } + + [Test] + public void KMeansConstructorTest_Distance() + { + // Create a new algorithm + BalancedKMeans kmeans = new BalancedKMeans(3, new Manhattan()); + Assert.IsNotNull(kmeans.Distance); + Assert.IsTrue(kmeans.Distance is Accord.Math.Distances.Manhattan); + } + + [Test] + [ExpectedException(typeof(ArgumentException))] + public void KMeansMoreClustersThanSamples() + { + Accord.Math.Tools.SetupGenerator(0); + + + // Declare some observations + double[][] observations = + { + new double[] { -5, -2, -1 }, + new double[] { -5, -5, -6 }, + new double[] { 2, 1, 1 }, + new double[] { 1, 1, 2 }, + new double[] { 1, 2, 2 }, + new double[] { 3, 1, 2 }, + new double[] { 11, 5, 4 }, + new double[] { 15, 5, 6 }, + new double[] { 10, 5, 6 }, + }; + + double[][] orig = observations.MemberwiseClone(); + + BalancedKMeans kmeans = new BalancedKMeans(15); + + int[] labels = kmeans.Compute(observations); + } + + + } +} diff --git a/Unit Tests/Accord.Tests.Math/Accord.Tests.Math.csproj b/Unit Tests/Accord.Tests.Math/Accord.Tests.Math.csproj index 9916fe953..502aaf417 100644 --- a/Unit Tests/Accord.Tests.Math/Accord.Tests.Math.csproj +++ b/Unit Tests/Accord.Tests.Math/Accord.Tests.Math.csproj @@ -5,7 +5,7 @@ Accord.Tests.Math Accord.Tests.Math - + true full @@ -88,6 +88,8 @@ + + diff --git a/Unit Tests/Accord.Tests.Math/Optimization/Munkres.cs b/Unit Tests/Accord.Tests.Math/Optimization/Munkres.cs new file mode 100644 index 0000000000..4ffc810e2 --- /dev/null +++ b/Unit Tests/Accord.Tests.Math/Optimization/Munkres.cs @@ -0,0 +1,420 @@ +/* + The MIT License (MIT) + + Copyright (c) 2000 Robert A. Pilgrim + Murray State University + Dept. of Computer Science & Information Systems + Murray,Kentucky + + Permission is hereby granted, free of charge, to any person obtaining a copy + of this software and associated documentation files (the "Software"), to deal + in the Software without restriction, including without limitation the rights + to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + copies of the Software, and to permit persons to whom the Software is + furnished to do so, subject to the following conditions: + + The above copyright notice and this permission notice shall be included in all + copies or substantial portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + SOFTWARE. + + */ + +using System; +using System.Collections.Generic; +using System.Linq; +using System.Text; +using System.Threading.Tasks; + +namespace Munkres +{ + // This is the original code from http://csclab.murraystate.edu/~bob.pilgrim/445/munkres.html + // with minimal modifications. The only included modifications aim to increase the debugability + // of the original code and did not alter its behavior. + + public static class MunkresProgram + { + public static double[,] C = new double[30, 30]; + public static double[,] C_orig = new double[30, 30]; + public static double[,] M = new double[30, 30]; + public static int[,] path = new int[61, 2]; + public static int[] RowCover = new int[30]; + public static int[] ColCover = new int[30]; + public static int nrow; + public static int ncol; + public static int path_count = 0; + public static int path_row_0; + public static int path_col_0; + public static int asgn = 0; + public static int step; + + private static void resetMaskandCovers() + { + for (int r = 0; r < nrow; r++) + { + RowCover[r] = 0; + for (int c = 0; c < ncol; c++) + { + M[r, c] = 0; + } + } + for (int c = 0; c < ncol; c++) + ColCover[c] = 0; + } + + //For each row of the cost matrix, find the smallest element and subtract + //it from every element in its row. When finished, Go to Step 2. + private static void step_one(ref int step) + { + double min_in_row; + + for (int r = 0; r < nrow; r++) + { + min_in_row = C[r, 0]; + for (int c = 0; c < ncol; c++) + if (C[r, c] < min_in_row) + min_in_row = C[r, c]; + for (int c = 0; c < ncol; c++) + C[r, c] -= min_in_row; + } + step = 2; + } + + //Find a zero (Z) in the resulting matrix. If there is no starred + //zero in its row or column, star Z. Repeat for each element in the + //matrix. Go to Step 3. + private static void step_two(ref int step) + { + for (int r = 0; r < nrow; r++) + for (int c = 0; c < ncol; c++) + { + if (C[r, c] == 0 && RowCover[r] == 0 && ColCover[c] == 0) + { + M[r, c] = 1; + RowCover[r] = 1; + ColCover[c] = 1; + } + } + for (int r = 0; r < nrow; r++) + RowCover[r] = 0; + for (int c = 0; c < ncol; c++) + ColCover[c] = 0; + step = 3; + } + + //Cover each column containing a starred zero. If K columns are covered, + //the starred zeros describe a complete set of unique assignments. In this + //case, Go to DONE, otherwise, Go to Step 4. + private static void step_three(ref int step) + { + int colcount; + for (int r = 0; r < nrow; r++) + for (int c = 0; c < ncol; c++) + if (M[r, c] == 1) + ColCover[c] = 1; + + colcount = 0; + for (int c = 0; c < ncol; c++) + if (ColCover[c] == 1) + colcount += 1; + if (colcount >= ncol || colcount >= nrow) + step = 7; + else + step = 4; + } + + //methods to support step 4 + private static void find_a_zero(ref int row, ref int col) + { + int r = 0; + int c; + bool done; + row = -1; + col = -1; + done = false; + while (!done) + { + c = 0; + while (true) + { + if (C[r, c] == 0 && RowCover[r] == 0 && ColCover[c] == 0) + { + row = r; + col = c; + done = true; + } + c += 1; + if (c >= ncol || done) + break; + } + r += 1; + if (r >= nrow) + done = true; + } + } + + private static bool star_in_row(int row) + { + bool tmp = false; + for (int c = 0; c < ncol; c++) + if (M[row, c] == 1) + tmp = true; + return tmp; + } + + private static void find_star_in_row(int row, ref int col) + { + col = -1; + for (int c = 0; c < ncol; c++) + if (M[row, c] == 1) + col = c; + } + + //Find a noncovered zero and prime it. If there is no starred zero + //in the row containing this primed zero, Go to Step 5. Otherwise, + //cover this row and uncover the column containing the starred zero. + //Continue in this manner until there are no uncovered zeros left. + //Save the smallest uncovered value and Go to Step 6. + private static void step_four(ref int step) + { + int row = -1; + int col = -1; + bool done; + + done = false; + while (!done) + { + find_a_zero(ref row, ref col); + if (row == -1) + { + done = true; + step = 6; + } + else + { + M[row, col] = 2; + if (star_in_row(row)) + { + find_star_in_row(row, ref col); + RowCover[row] = 1; + ColCover[col] = 0; + } + else + { + done = true; + step = 5; + path_row_0 = row; + path_col_0 = col; + } + } + } + } + + // methods to support step 5 + private static void find_star_in_col(int c, ref int r) + { + r = -1; + for (int i = 0; i < nrow; i++) + if (M[i, c] == 1) + r = i; + } + + private static void find_prime_in_row(int r, ref int c) + { + for (int j = 0; j < ncol; j++) + if (M[r, j] == 2) + c = j; + } + + private static void augment_path() + { + for (int p = 0; p < path_count; p++) + if (M[path[p, 0], path[p, 1]] == 1) + M[path[p, 0], path[p, 1]] = 0; + else + M[path[p, 0], path[p, 1]] = 1; + } + + private static void clear_covers() + { + for (int r = 0; r < nrow; r++) + RowCover[r] = 0; + for (int c = 0; c < ncol; c++) + ColCover[c] = 0; + } + + private static void erase_primes() + { + for (int r = 0; r < nrow; r++) + for (int c = 0; c < ncol; c++) + if (M[r, c] == 2) + M[r, c] = 0; + } + + + //Construct a series of alternating primed and starred zeros as follows. + //Let Z0 represent the uncovered primed zero found in Step 4. Let Z1 denote + //the starred zero in the column of Z0 (if any). Let Z2 denote the primed zero + //in the row of Z1 (there will always be one). Continue until the series + //terminates at a primed zero that has no starred zero in its column. + //Unstar each starred zero of the series, star each primed zero of the series, + //erase all primes and uncover every line in the matrix. Return to Step 3. + private static void step_five(ref int step) + { + bool done; + int r = -1; + int c = -1; + + path_count = 1; + path[path_count - 1, 0] = path_row_0; + path[path_count - 1, 1] = path_col_0; + done = false; + while (!done) + { + find_star_in_col(path[path_count - 1, 1], ref r); + if (r > -1) + { + path_count += 1; + path[path_count - 1, 0] = r; + path[path_count - 1, 1] = path[path_count - 2, 1]; + + find_prime_in_row(path[path_count - 1, 0], ref c); + path_count += 1; + path[path_count - 1, 0] = path[path_count - 2, 0]; + path[path_count - 1, 1] = c; + } + else + done = true; + } + augment_path(); + clear_covers(); + erase_primes(); + step = 3; + } + + //methods to support step 6 + private static void find_smallest(ref double minval) + { + for (int r = 0; r < nrow; r++) + for (int c = 0; c < ncol; c++) + if (RowCover[r] == 0 && ColCover[c] == 0) + if (minval > C[r, c]) + minval = C[r, c]; + } + + //Add the value found in Step 4 to every element of each covered row, and subtract + //it from every element of each uncovered column. Return to Step 4 without + //altering any stars, primes, or covered lines. + private static void step_six(ref int step) + { + double minval = Double.MaxValue; + find_smallest(ref minval); + for (int r = 0; r < nrow; r++) + for (int c = 0; c < ncol; c++) + { + if (RowCover[r] == 1) + C[r, c] += minval; + if (ColCover[c] == 0) + C[r, c] -= minval; + } + step = 4; + } + + private static void step_seven(ref int step) + { + Console.WriteLine("\n\n---------Run Complete----------"); + } + + public static void RunMunkres(double[,] C) + { + Init(C); + + bool done = false; + while (!done) + { + ShowCostMatrix(); + ShowMaskMatrix(); + done = RunStep(done); + } + } + + public static void Init(double[,] C) + { + MunkresProgram.C = C; + MunkresProgram.nrow = C.GetLength(0); + MunkresProgram.ncol = C.GetLength(1) ; + resetMaskandCovers(); + + step = 1; + } + + public static bool RunStep(bool done) + { + switch (step) + { + case 1: + step_one(ref step); + break; + case 2: + step_two(ref step); + break; + case 3: + step_three(ref step); + break; + case 4: + step_four(ref step); + break; + case 5: + step_five(ref step); + break; + case 6: + step_six(ref step); + break; + case 7: + step_seven(ref step); + return true; + } + + return done; + } + + private static void ShowCostMatrix() + { + Console.WriteLine("\n"); + Console.WriteLine("------------Step {0}-------------", step); + for (int r = 0; r < nrow; r++) + { + Console.WriteLine(); + Console.Write(" "); + for (int c = 0; c < ncol; c++) + { + Console.Write(Convert.ToString(C[r, c]) + " "); + } + } + } + + private static void ShowMaskMatrix() + { + Console.WriteLine(); + Console.Write("\n "); + for (int c = 0; c < ncol; c++) + Console.Write(" " + Convert.ToString(ColCover[c])); + for (int r = 0; r < nrow; r++) + { + Console.Write("\n " + Convert.ToString(RowCover[r]) + " "); + for (int c = 0; c < ncol; c++) + { + Console.Write(Convert.ToString(M[r, c]) + " "); + } + } + } + + } + +} diff --git a/Unit Tests/Accord.Tests.Math/Optimization/MunkresTest.cs b/Unit Tests/Accord.Tests.Math/Optimization/MunkresTest.cs new file mode 100644 index 0000000000..19c82943e --- /dev/null +++ b/Unit Tests/Accord.Tests.Math/Optimization/MunkresTest.cs @@ -0,0 +1,263 @@ +// Accord Unit Tests +// The Accord.NET Framework +// http://accord-framework.net +// +// Copyright © César Souza, 2009-2017 +// cesarsouza at gmail.com +// +// This library is free software; you can redistribute it and/or +// modify it under the terms of the GNU Lesser General Public +// License as published by the Free Software Foundation; either +// version 2.1 of the License, or (at your option) any later version. +// +// This library is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +// Lesser General Public License for more details. +// +// You should have received a copy of the GNU Lesser General Public +// License along with this library; if not, write to the Free Software +// Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA +// + +namespace Accord.Tests.Math +{ + using System; + using System.Collections.Generic; + using Accord.Math; + using Accord.Math.Optimization; + using NUnit.Framework; + using Munkres; + + [TestFixture] + public class MunkresTest + { + + [Test] + public void minimize_test() + { + #region doc_example + // This is the same example that is found in Wikipedia: + // https://en.wikipedia.org/wiki/Hungarian_algorithm + + double[][] costMatrix = + { + // Clean bathroom, Sweep floors, Wash windows + /* Armond */ new double[] { 2, 3, 3 }, + /* Francine */ new double[] { 3, 2, 3 }, + /* Herbert */ new double[] { 3, 3, 2 }, + }; + + // Create a new Hungarian algorithm + Munkres m = new Munkres(costMatrix); + + bool success = m.Minimize(); // solve it (should return true) + double[] solution = m.Solution; // Solution will be 0, 1, 2 + double minimumCost = m.Value; // should be 6 + #endregion + + Assert.AreEqual(3, m.NumberOfVariables); + Assert.AreEqual(3, m.NumberOfWorkers); + + Assert.IsTrue(success); + Assert.AreEqual(6, minimumCost); + Assert.AreEqual(0, solution[0]); + Assert.AreEqual(1, solution[1]); + Assert.AreEqual(2, solution[2]); + } + + [Test] + public void minimize_test2() + { + double[][] costMatrix = Jagged.Magic(5); + + Munkres m = new Munkres(costMatrix); + + Assert.AreEqual(5, m.NumberOfVariables); + Assert.AreEqual(5, m.NumberOfWorkers); + + bool success = m.Minimize(); + Assert.IsTrue(success); + Assert.AreEqual(15, m.Value); + Assert.AreEqual(2, m.Solution[0]); + Assert.AreEqual(1, m.Solution[1]); + Assert.AreEqual(0, m.Solution[2]); + Assert.AreEqual(4, m.Solution[3]); + Assert.AreEqual(3, m.Solution[4]); + } + + [Test] + public void minimize_test3() + { + // http://csclab.murraystate.edu/~bob.pilgrim/445/munkres.html + + double[][] costMatrix = + { + new double[] { 1, 2, 3 }, + new double[] { 2, 4, 6 }, + new double[] { 3, 6, 9 }, + }; + + Munkres m = new Munkres(costMatrix); + + Assert.AreEqual(3, m.NumberOfVariables); + Assert.AreEqual(3, m.NumberOfWorkers); + + bool success = m.Minimize(); + Assert.IsTrue(success); + Assert.AreEqual(10, m.Value); + Assert.AreEqual(2, m.Solution[0]); + Assert.AreEqual(1, m.Solution[1]); + Assert.AreEqual(0, m.Solution[2]); + } + + [Test] + public void minimize_test_more_jobs() + { + double[][] costMatrix = + { + new double[] { 1, 2, 3, 4 }, + new double[] { 2, 4, 6, 8 }, + new double[] { 3, 6, 9, 12 }, + }; + + Munkres m = new Munkres(costMatrix); + + Assert.AreEqual(4, m.NumberOfVariables); + Assert.AreEqual(3, m.NumberOfWorkers); + + bool success = m.Minimize(); + Assert.IsTrue(success); + Assert.AreEqual(10, m.Value); + Assert.AreEqual(2, m.Solution[0]); + Assert.AreEqual(1, m.Solution[1]); + Assert.AreEqual(0, m.Solution[2]); + Assert.AreEqual(-1, m.Solution[3]); + } + + [Test] + public void minimize_test_more_workers() + { + double[][] costMatrix = + { + new double[] { 1, 2, 3 }, + new double[] { 2, 4, 6 }, + new double[] { 3, 6, 9 }, + new double[] { 4, 8, 12 }, + }; + + Munkres m = new Munkres(costMatrix); + + Assert.AreEqual(3, m.NumberOfVariables); + Assert.AreEqual(4, m.NumberOfWorkers); + + bool success = m.Minimize(); + Assert.IsTrue(success); + Assert.AreEqual(10, m.Value); + Assert.AreEqual(2, m.Solution[0]); + Assert.AreEqual(1, m.Solution[1]); + Assert.AreEqual(0, m.Solution[2]); + } + + [Test] + public void minimize_rectangular() + { + Accord.Math.Random.Generator.Seed = 0; + double[][] costMatrix = Jagged.Random(10, 7); + costMatrix.Set(x => x > 0.7, Double.PositiveInfinity); + + Munkres m = new Munkres(costMatrix); + + Assert.AreEqual(7, m.NumberOfVariables); + Assert.AreEqual(10, m.NumberOfWorkers); + + bool success = m.Minimize(); + Assert.IsTrue(success); + Assert.AreEqual(1.1474781912506922, m.Value, 1e-10); + Assert.AreEqual(7, m.Solution[0]); + Assert.AreEqual(2, m.Solution[1]); + Assert.AreEqual(3, m.Solution[2]); + Assert.AreEqual(1, m.Solution[3]); + Assert.AreEqual(0, m.Solution[4]); + Assert.AreEqual(8, m.Solution[5]); + Assert.AreEqual(6, m.Solution[6]); + } + + [Test] + public void minimize_index_out_of_range1() + { + double[][] costMatrix = new double[][] + { + new double[] { 330, 456, 106, 120, 113, 84, 0, 20, 5 }, + new double[] { 246, 360, 62, 72, 69, 44, 8, 44, 9 }, + new double[] { 4.55555555555556, 33.8888888888889, 39.2222222222222, 36.5555555555556, 42.8888888888889, 56.5555555555556, 272.555555555556, 427.222222222222, 272.222222222222 }, + new double[] { 330, 456, 106, 120, 113, 84, 0, 20, 5 }, + new double[] { 246, 360, 62, 72, 69, 44, 8, 44, 9 }, + new double[] { 4.55555555555556, 33.8888888888889, 39.2222222222222, 36.5555555555556, 42.8888888888889, 56.5555555555556, 272.555555555556, 427.222222222222, 272.222222222222 }, + new double[] { 330, 456, 106, 120, 113, 84, 0, 20, 5 }, + new double[] { 246, 360, 62, 72, 69, 44, 8, 44, 9 }, + new double[] { 4.55555555555556, 33.8888888888889, 39.2222222222222, 36.5555555555556, 42.8888888888889, 56.5555555555556, 272.555555555556, 427.222222222222, 272.222222222222 } + }; + + MunkresProgram.Init(costMatrix.ToMatrix()); + + var m = new Munkres(costMatrix); + m.C = costMatrix.Copy(); + int step1 = 1; + bool done = false; + while (step1 >= 0) + { + step1 = m.RunStep(step1); + done = MunkresProgram.RunStep(done); + + Assert.IsTrue(step1 == MunkresProgram.step || (step1 == -1 && MunkresProgram.step == 7)); + Assert.IsTrue(m.C.IsEqual(MunkresProgram.C)); + for (int i = 0; i < m.M.Length; i++) + for (int j = 0; j < m.M[i].Length; j++) + Assert.AreEqual(m.M[i][j], MunkresProgram.M[i,j]); + } + + Assert.AreEqual(9, m.NumberOfVariables); + Assert.AreEqual(9, m.NumberOfWorkers); + + bool success = m.Minimize(); + Assert.IsTrue(success); + Assert.AreEqual(275, m.Value, 1e-10); + Assert.AreEqual(8, m.Solution[0]); + Assert.AreEqual(5, m.Solution[1]); + Assert.AreEqual(7, m.Solution[2]); + Assert.AreEqual(2, m.Solution[3]); + Assert.AreEqual(1, m.Solution[4]); + Assert.AreEqual(4, m.Solution[5]); + Assert.AreEqual(6, m.Solution[6]); + Assert.AreEqual(3, m.Solution[7]); + Assert.AreEqual(0, m.Solution[8]); + } + + + [Test, Ignore] + public void minimize_rectangular_inf() + { + double inf = Double.PositiveInfinity; + + double[][] costMatrix = + { + new double[] { 1.0, inf, inf }, + new double[] { 3.0, inf, inf }, + new double[] { inf, 5.0, 0.5 }, + }; + + Munkres m = new Munkres(costMatrix); + + Assert.AreEqual(3, m.NumberOfVariables); + Assert.AreEqual(3, m.NumberOfWorkers); + + bool success = m.Minimize(); + Assert.IsTrue(success); + Assert.AreEqual(10, m.Value); + Assert.AreEqual(2, m.Solution[0]); + Assert.AreEqual(1, m.Solution[1]); + Assert.AreEqual(0, m.Solution[2]); + } + } +}