From 0df71d60b3a7151e3be2ee7eff4b69b56fd00257 Mon Sep 17 00:00:00 2001 From: fated Date: Wed, 29 Oct 2014 00:11:39 +0000 Subject: [PATCH 1/8] add basic files for libcp --- cp-offline.cpp | 313 ++++++++ cp.cpp | 932 ++++++++++++++++++++++ cp.h | 41 + knn.cpp | 284 +++++++ knn.h | 41 + svm.cpp | 2017 ++++++++++++++++++++++++++++++++++++++++++++++++ svm.h | 56 ++ utilities.cpp | 208 +++++ utilities.h | 92 +++ 9 files changed, 3984 insertions(+) create mode 100644 cp-offline.cpp create mode 100644 cp.cpp create mode 100644 cp.h create mode 100644 knn.cpp create mode 100644 knn.h create mode 100644 svm.cpp create mode 100644 svm.h create mode 100644 utilities.cpp create mode 100644 utilities.h diff --git a/cp-offline.cpp b/cp-offline.cpp new file mode 100644 index 0000000..604155e --- /dev/null +++ b/cp-offline.cpp @@ -0,0 +1,313 @@ +#include "cp.h" +#include +#include +#include +#include + +void ExitWithHelp(); +void ParseCommandLine(int argc, char *argv[], char *train_file_name, char *test_file_name, char *output_file_name, char *model_file_name); + +struct Parameter param; + +int main(int argc, char *argv[]) { + char train_file_name[256]; + char test_file_name[256]; + char output_file_name[256]; + char model_file_name[256]; + struct Problem *train, *test; + struct Model *model; + int num_correct = 0; + double avg_lower_bound = 0, avg_upper_bound = 0, avg_brier = 0, avg_logloss = 0; + const char *error_message; + + ParseCommandLine(argc, argv, train_file_name, test_file_name, output_file_name, model_file_name); + error_message = CheckParameter(¶m); + + if (error_message != NULL) { + std::cerr << error_message << std::endl; + exit(EXIT_FAILURE); + } + + train = ReadProblem(train_file_name); + test = ReadProblem(test_file_name); + + if ((param.measure_type == SVM_EL || + param.measure_type == SVM_ES || + param.measure_type == SVM_KM) && + param.svm_param->gamma == 0) { + param.svm_param->gamma = 1.0 / train->max_index; + } + + std::ofstream output_file(output_file_name); + if (!output_file.is_open()) { + std::cerr << "Unable to open output file: " << output_file_name << std::endl; + exit(EXIT_FAILURE); + } + + std::chrono::time_point start_time = std::chrono::high_resolution_clock::now(); + + if (param.load_model == 1) { + model = LoadModel(model_file_name); + if (model == NULL) { + exit(EXIT_FAILURE); + } + } else { + model = TrainCP(train, ¶m); + } + + if (param.save_model == 1) { + if (SaveModel(model_file_name, model) != 0) { + std::cerr << "Unable to save model file" << std::endl; + } + } + + if (param.probability == 1) { + output_file << " "; + for (int i = 0; i < model->num_classes; ++i) { + output_file << model->labels[i] << " "; + } + output_file << '\n'; + } + + for (int i = 0; i < test->num_ex; ++i) { + double predict_label, lower_bound, upper_bound, logloss, brier = 0, *avg_prob = NULL; + + predict_label = PredictCP(train, model, test->x[i], lower_bound, upper_bound, &avg_prob); + + for (int j = 0; j < model->num_classes; ++j) { + if (model->labels[j] == test->y[i]) { + brier += (1-avg_prob[j])*(1-avg_prob[j]); + double tmp = std::fmax(std::fmin(avg_prob[j], 1-kEpsilon), kEpsilon); + logloss = - std::log(tmp); + } else { + brier += avg_prob[j]*avg_prob[j]; + } + } + avg_lower_bound += lower_bound; + avg_upper_bound += upper_bound; + avg_brier += brier; + avg_logloss += logloss; + + output_file << std::resetiosflags(std::ios::fixed) << test->y[i] << ' ' << predict_label << ' ' + << std::setiosflags(std::ios::fixed) << lower_bound << ' ' << upper_bound; + if (param.probability == 1) { + for (int j = 0; j < model->num_classes; ++j) { + output_file << ' ' << avg_prob[j]; + } + } + output_file << '\n'; + if (predict_label == test->y[i]) { + ++num_correct; + } + delete[] avg_prob; + } + avg_lower_bound /= test->num_ex; + avg_upper_bound /= test->num_ex; + avg_brier /= test->num_ex; + avg_logloss /= test->num_ex; + + std::chrono::time_point end_time = std::chrono::high_resolution_clock::now(); + + std::cout << "Accuracy: " << 100.0*num_correct/test->num_ex << '%' + << " (" << num_correct << '/' << test->num_ex << ") " + << "Probabilities: [" << std::fixed << std::setprecision(4) << 100*avg_lower_bound << "%, " + << 100*avg_upper_bound << "%] " + << "Brier Score: " << avg_brier << ' ' + << "Logarithmic Loss: " << avg_logloss << '\n'; + output_file.close(); + + std::cout << "Time cost: " << std::chrono::duration_cast(end_time - start_time).count()/1000.0 << " s\n"; + + FreeProblem(train); + FreeProblem(test); + FreeModel(model); + FreeParam(¶m); + + return 0; +} + +void ExitWithHelp() { + std::cout << "Usage: cp-offline [options] train_file test_file [output_file]\n" + << "options:\n" + << " -t non-conformity measure : set type of NCM (default 0)\n" + << " 0 -- k-nearest neighbors (KNN)\n" + << " 1 -- support vector machine with equal length (SVM_EL)\n" + << " 2 -- support vector machine with equal size (SVM_ES)\n" + << " 3 -- support vector machine with k-means clustering (SVM_KM)\n" + << " -k num_neighbors : set number of neighbors in kNN (default 1)\n" + << " -s model_file_name : save model\n" + << " -l model_file_name : load model\n" + << " -b probability estimates : whether to output probability estimates for all labels, 0 or 1 (default 0)\n" + << " -p : prefix of options to set parameters for SVM\n" + << " -ps svm_type : set type of SVM (default 0)\n" + << " 0 -- C-SVC (multi-class classification)\n" + << " 1 -- nu-SVC (multi-class classification)\n" + << " -pt kernel_type : set type of kernel function (default 2)\n" + << " 0 -- linear: u'*v\n" + << " 1 -- polynomial: (gamma*u'*v + coef0)^degree\n" + << " 2 -- radial basis function: exp(-gamma*|u-v|^2)\n" + << " 3 -- sigmoid: tanh(gamma*u'*v + coef0)\n" + << " 4 -- precomputed kernel (kernel values in training_set_file)\n" + << " -pd degree : set degree in kernel function (default 3)\n" + << " -pg gamma : set gamma in kernel function (default 1/num_features)\n" + << " -pr coef0 : set coef0 in kernel function (default 0)\n" + << " -pc cost : set the parameter C of C-SVC (default 1)\n" + << " -pn nu : set the parameter nu of nu-SVC (default 0.5)\n" + << " -pm cachesize : set cache memory size in MB (default 100)\n" + << " -pe epsilon : set tolerance of termination criterion (default 0.001)\n" + << " -ph shrinking : whether to use the shrinking heuristics, 0 or 1 (default 1)\n" + << " -pwi weights : set the parameter C of class i to weight*C, for C-SVC (default 1)\n" + << " -pq : quiet mode (no outputs)\n"; + exit(EXIT_FAILURE); +} + +void ParseCommandLine(int argc, char **argv, char *train_file_name, char *test_file_name, char *output_file_name, char *model_file_name) { + int i; + param.measure_type = KNN; + param.save_model = 0; + param.load_model = 0; + param.probability = 0; + param.knn_param = new KNNParameter; + param.svm_param = NULL; + InitKNNParam(param.knn_param); + + for (i = 1; i < argc; ++i) { + if (argv[i][0] != '-') break; + if ((i+2) >= argc) + ExitWithHelp(); + switch (argv[i][1]) { + case 't': { + ++i; + param.measure_type = std::atoi(argv[i]); + if (param.measure_type == SVM_EL || + param.measure_type == SVM_ES || + param.measure_type == SVM_KM) { + FreeKNNParam(param.knn_param); + delete param.knn_param; + param.svm_param = new SVMParameter; + InitSVMParam(param.svm_param); + } + break; + } + case 'k': { + ++i; + param.knn_param->num_neighbors = std::atoi(argv[i]); + break; + } + case 's': { + ++i; + param.save_model = 1; + std::strcpy(model_file_name, argv[i]); + break; + } + case 'l': { + ++i; + param.load_model = 1; + std::strcpy(model_file_name, argv[i]); + break; + } + case 'b': { + ++i; + param.probability = std::atoi(argv[i]); + break; + } + case 'p': { + if (argv[i][2]) { + switch (argv[i][2]) { + case 's': { + ++i; + param.svm_param->svm_type = std::atoi(argv[i]); + break; + } + case 't': { + ++i; + param.svm_param->kernel_type = std::atoi(argv[i]); + break; + } + case 'd': { + ++i; + param.svm_param->degree = std::atoi(argv[i]); + break; + } + case 'g': { + ++i; + param.svm_param->gamma = std::atof(argv[i]); + break; + } + case 'r': { + ++i; + param.svm_param->coef0 = std::atof(argv[i]); + break; + } + case 'n': { + ++i; + param.svm_param->nu = std::atof(argv[i]); + break; + } + case 'm': { + ++i; + param.svm_param->cache_size = std::atof(argv[i]); + break; + } + case 'c': { + ++i; + param.svm_param->C = std::atof(argv[i]); + break; + } + case 'e': { + ++i; + param.svm_param->eps = std::atof(argv[i]); + break; + } + case 'h': { + ++i; + param.svm_param->shrinking = std::atoi(argv[i]); + break; + } + case 'q': { + SetPrintNull(); + break; + } + case 'w': { // weights [option]: '-w1' means weight of '1' + ++i; + ++param.svm_param->num_weights; + param.svm_param->weight_labels = (int *)realloc(param.svm_param->weight_labels, sizeof(int)*static_cast(param.svm_param->num_weights)); + param.svm_param->weights = (double *)realloc(param.svm_param->weights, sizeof(double)*static_cast(param.svm_param->num_weights)); + param.svm_param->weight_labels[param.svm_param->num_weights-1] = std::atoi(&argv[i-1][3]); // get and convert 'i' to int + param.svm_param->weights[param.svm_param->num_weights-1] = std::atof(argv[i]); + break; + // TODO: change realloc function + } + default: { + std::cerr << "Unknown SVM option: " << argv[i] << std::endl; + ExitWithHelp(); + } + } + } + break; + } + default: { + std::cerr << "Unknown option: -" << argv[i][1] << std::endl; + ExitWithHelp(); + } + } + } + + if ((i+1) >= argc) + ExitWithHelp(); + std::strcpy(train_file_name, argv[i]); + std::strcpy(test_file_name, argv[i+1]); + if ((i+2) < argc) { + std::strcpy(output_file_name, argv[i+2]); + } else { + char *p = std::strrchr(argv[i+1],'/'); + if (p == NULL) { + p = argv[i+1]; + } else { + ++p; + } + std::sprintf(output_file_name, "%s_output", p); + } + + return; +} \ No newline at end of file diff --git a/cp.cpp b/cp.cpp new file mode 100644 index 0000000..51c970f --- /dev/null +++ b/cp.cpp @@ -0,0 +1,932 @@ +#include "cp.h" +#include +#include +#include +#include + +Model *TrainCP(const struct Problem *train, const struct Parameter *param) { + Model *model = new Model; + model->param = *param; + int num_ex = train->num_ex; + + if (param->taxonomy_type == KNN) { + int num_neighbors = param->knn_param->num_neighbors; + + int *categories = new int[num_ex]; + for (int i = 0; i < num_ex; ++i) { + categories[i] = -1; + } + + model->knn_model = TrainKNN(train, param->knn_param); + + int num_classes = model->knn_model->num_classes; + int num_categories = param->num_categories; + if (num_categories != num_classes) { + std::cerr << "WARNING: number of categories should be the same as number of classes in KNN. See README for details." << std::endl; + num_categories = num_classes; + } + + for (int i = 0; i < num_ex; ++i) { + categories[i] = FindMostFrequent(model->knn_model->label_neighbors[i], num_neighbors); + } + + model->num_classes = num_classes; + model->num_ex = num_ex; + model->num_categories = num_categories; + model->categories = categories; + clone(model->labels, model->knn_model->labels, num_classes); + } + + if (param->taxonomy_type == SVM_EL || + param->taxonomy_type == SVM_ES || + param->taxonomy_type == SVM_KM) { + int num_categories = param->num_categories; + int *categories = new int[num_ex]; + double *combined_decision_values = new double[num_ex]; + + for (int i = 0; i < num_ex; ++i) { + categories[i] = -1; + combined_decision_values[i] = 0; + } + + model->svm_model = TrainSVM(train, param->svm_param); + + int num_classes = model->svm_model->num_classes; + if (num_classes == 1) { + std::cerr << "WARNING: training set only has one class. See README for details." << std::endl; + } + if (num_classes > 2 && num_categories < num_classes) { + std::cerr << "WARNING: number of categories should be the same as number of classes in Multi-Class case. See README for details." << std::endl; + num_categories = num_classes; + } + + for (int i = 0; i < num_ex; ++i) { + double *decision_values = new double[num_classes*(num_classes-1)/2]; + int label = 0; + double predict_label = PredictSVMValues(model->svm_model, train->x[i], decision_values); + + for (int j = 0; j < num_classes; ++j) { + if (predict_label == model->svm_model->labels[j]) { + label = j; + break; + } + } + combined_decision_values[i] = CalcCombinedDecisionValues(decision_values, num_classes, label); + delete[] decision_values; + } + + if (param->taxonomy_type == SVM_EL) { + for (int i = 0; i < num_ex; ++i) { + categories[i] = GetEqualLengthCategory(combined_decision_values[i], num_categories, num_classes); + } + } + if (param->taxonomy_type == SVM_ES) { + if (num_classes == 1) { + for (int i = 0; i < num_ex; ++i) { + categories[i] = 0; + } + model->points = new double[num_categories]; + for (int i = 0; i < num_categories; ++i) { + model->points[i] = 0; + } + } else { + double *points; + points = GetEqualSizeCategory(combined_decision_values, categories, num_categories, num_ex); + clone(model->points, points, num_categories); + delete[] points; + } + } + if (param->taxonomy_type == SVM_KM) { + double *points; + points = GetKMeansCategory(combined_decision_values, categories, num_categories, num_ex, kEpsilon); + clone(model->points, points, num_categories); + delete[] points; + } + delete[] combined_decision_values; + model->num_classes = num_classes; + model->num_ex = num_ex; + model->categories = categories; + model->num_categories = num_categories; + clone(model->labels, model->svm_model->labels, num_classes); + } + + return model; +} + +double PredictCP(const struct Problem *train, const struct Model *model, const struct Node *x, double &lower, double &upper, double **avg_prob) { + const Parameter& param = model->param; + int num_ex = model->num_ex; + int num_classes = model->num_classes; + int num_categories = model->num_categories; + int *labels = model->labels; + double predict_label; + int **f_matrix = new int*[num_classes]; + int *alter_labels = new int[num_ex]; + + for (int i = 0; i < num_classes; ++i) { + for (int j = 0; j < num_ex; ++j) { + if (labels[i] == train->y[j]) { + alter_labels[j] = i; + } + } + } + + if (param.taxonomy_type == KNN) { + int num_neighbors = param.knn_param->num_neighbors; + for (int i = 0; i < num_classes; ++i) { + int *categories = new int[num_ex+1]; + double **dist_neighbors = new double*[num_ex+1]; + int **label_neighbors = new int*[num_ex+1]; + f_matrix[i] = new int[num_classes]; + for (int j = 0; j < num_classes; ++j) { + f_matrix[i][j] = 0; + } + + for (int j = 0; j < num_ex; ++j) { + clone(dist_neighbors[j], model->knn_model->dist_neighbors[j], num_neighbors); + clone(label_neighbors[j], model->knn_model->label_neighbors[j], num_neighbors); + categories[j] = model->categories[j]; + } + dist_neighbors[num_ex] = new double[num_neighbors]; + label_neighbors[num_ex] = new int[num_neighbors]; + for (int j = 0; j < num_neighbors; ++j) { + dist_neighbors[num_ex][j] = kInf; + label_neighbors[num_ex][j] = -1; + } + categories[num_ex] = -1; + + for (int j = 0; j < num_ex; ++j) { + double dist = CalcDist(train->x[j], x); + int index; + index = CompareDist(dist_neighbors[j], dist, num_neighbors); + if (index < num_neighbors) { + InsertLabel(label_neighbors[j], i, num_neighbors, index); + } + index = CompareDist(dist_neighbors[num_ex], dist, num_neighbors); + if (index < num_neighbors) { + InsertLabel(label_neighbors[num_ex], alter_labels[j], num_neighbors, index); + } + } + + for (int j = 0; j < num_ex+1; ++j) { + categories[j] = FindMostFrequent(label_neighbors[j], num_neighbors); + } + + for (int j = 0; j < num_ex; ++j) { + if (categories[j] == categories[num_ex]) { + ++f_matrix[i][alter_labels[j]]; + } + } + f_matrix[i][i]++; + + for (int j = 0; j < num_ex+1; ++j) { + delete[] dist_neighbors[j]; + delete[] label_neighbors[j]; + } + + delete[] dist_neighbors; + delete[] label_neighbors; + delete[] categories; + } + } + + if (param.taxonomy_type == SVM_EL || + param.taxonomy_type == SVM_ES || + param.taxonomy_type == SVM_KM) { + for (int i = 0; i < num_classes; ++i) { + int *categories = new int[num_ex+1]; + f_matrix[i] = new int[num_classes]; + for (int j = 0; j < num_classes; ++j) { + f_matrix[i][j] = 0; + } + + for (int j = 0; j < num_ex; ++j) { + categories[j] = model->categories[j]; + } + categories[num_ex] = -1; + + double *decision_values = new double[num_classes*(num_classes-1)/2]; + int label = 0; + double predict_label = PredictSVMValues(model->svm_model, x, decision_values); + for (int j = 0; j < num_classes; ++j) { + if (predict_label == labels[j]) { + label = j; + break; + } + } + double combined_decision_values = CalcCombinedDecisionValues(decision_values, num_classes, label); + if (param.taxonomy_type == SVM_EL) { + categories[num_ex] = GetEqualLengthCategory(combined_decision_values, num_categories, num_classes); + } + if (param.taxonomy_type == SVM_ES) { + if (num_classes == 1) { + categories[num_ex] = 0; + } else { + int j; + for (j = 0; j < num_categories; ++j) { + if (combined_decision_values <= model->points[j]) { + categories[num_ex] = j; + break; + } + } + if (j == num_categories) { + categories[num_ex] = num_categories - 1; + } + } + } + if (param.taxonomy_type == SVM_KM) { + categories[num_ex] = AssignCluster(num_categories, combined_decision_values, model->points); + } + delete[] decision_values; + for (int j = 0; j < num_ex; ++j) { + if (categories[j] == categories[num_ex]) { + ++f_matrix[i][alter_labels[j]]; + } + } + f_matrix[i][i]++; + + delete[] categories; + } + } + + double **matrix = new double*[num_classes]; + for (int i = 0; i < num_classes; ++i) { + matrix[i] = new double[num_classes]; + int sum = 0; + for (int j = 0; j < num_classes; ++j) { + sum += f_matrix[i][j]; + } + for (int j = 0; j < num_classes; ++j) { + matrix[i][j] = static_cast(f_matrix[i][j]) / sum; + } + } + + double *quality = new double[num_classes]; + *avg_prob = new double[num_classes]; + for (int j = 0; j < num_classes; ++j) { + quality[j] = matrix[0][j]; + (*avg_prob)[j] = matrix[0][j]; + for (int i = 1; i < num_classes; ++i) { + if (matrix[i][j] < quality[j]) { + quality[j] = matrix[i][j]; + } + (*avg_prob)[j] += matrix[i][j]; + } + (*avg_prob)[j] /= num_classes; + } + + int best = 0; + for (int i = 1; i < num_classes; ++i) { + if (quality[i] > quality[best]) { + best = i; + } + } + + lower = quality[best]; + upper = matrix[0][best]; + for (int i = 1; i < num_classes; ++i) { + if (matrix[i][best] > upper) { + upper = matrix[i][best]; + } + } + + predict_label = labels[best]; + + delete[] alter_labels; + delete[] quality; + for (int i = 0; i < num_classes; ++i) { + delete[] f_matrix[i]; + delete[] matrix[i]; + } + delete[] f_matrix; + delete[] matrix; + + return predict_label; +} + +void CrossValidation(const struct Problem *prob, const struct Parameter *param, + double *predict_labels, double *lower_bounds, double *upper_bounds, + double *brier, double *logloss) { + int num_folds = param->num_folds; + int num_ex = prob->num_ex; + int num_classes; + + int *fold_start; + int *perm = new int[num_ex]; + + if (num_folds > num_ex) { + num_folds = num_ex; + std::cerr << "WARNING: number of folds > number of data. Will use number of folds = number of data instead (i.e., leave-one-out cross validation)" << std::endl; + } + fold_start = new int[num_folds+1]; + + if (num_folds < num_ex) { + int *start = NULL; + int *label = NULL; + int *count = NULL; + GroupClasses(prob, &num_classes, &label, &start, &count, perm); + + int *fold_count = new int[num_folds]; + int *index = new int[num_ex]; + + for (int i = 0; i < num_ex; ++i) { + index[i] = perm[i]; + } + std::random_device rd; + std::mt19937 g(rd()); + for (int i = 0; i < num_classes; ++i) { + std::shuffle(index+start[i], index+start[i]+count[i], g); + } + + for (int i = 0; i < num_folds; ++i) { + fold_count[i] = 0; + for (int c = 0; c < num_classes; ++c) { + fold_count[i] += (i+1)*count[c]/num_folds - i*count[c]/num_folds; + } + } + + fold_start[0] = 0; + for (int i = 1; i <= num_folds; ++i) { + fold_start[i] = fold_start[i-1] + fold_count[i-1]; + } + for (int c = 0; c < num_classes; ++c) { + for (int i = 0; i < num_folds; ++i) { + int begin = start[c] + i*count[c]/num_folds; + int end = start[c] + (i+1)*count[c]/num_folds; + for (int j = begin; j < end; ++j) { + perm[fold_start[i]] = index[j]; + fold_start[i]++; + } + } + } + fold_start[0] = 0; + for (int i = 1; i <= num_folds; ++i) { + fold_start[i] = fold_start[i-1] + fold_count[i-1]; + } + delete[] start; + delete[] label; + delete[] count; + delete[] index; + delete[] fold_count; + + } else { + + for (int i = 0; i < num_ex; ++i) { + perm[i] = i; + } + std::random_device rd; + std::mt19937 g(rd()); + std::shuffle(perm, perm+num_ex, g); + fold_start[0] = 0; + for (int i = 1; i <= num_folds; ++i) { + fold_start[i] = fold_start[i-1] + (i+1)*num_ex/num_folds - i*num_ex/num_folds; + } + } + + for (int i = 0; i < num_folds; ++i) { + int begin = fold_start[i]; + int end = fold_start[i+1]; + int k = 0; + struct Problem subprob; + + subprob.num_ex = num_ex - (end-begin); + subprob.x = new Node*[subprob.num_ex]; + subprob.y = new double[subprob.num_ex]; + + for (int j = 0; j < begin; ++j) { + subprob.x[k] = prob->x[perm[j]]; + subprob.y[k] = prob->y[perm[j]]; + ++k; + } + for (int j = end; j < num_ex; ++j) { + subprob.x[k] = prob->x[perm[j]]; + subprob.y[k] = prob->y[perm[j]]; + ++k; + } + + struct Model *submodel = TrainCP(&subprob, param); + + if (param->probability == 1) { + for (int j = 0; j < submodel->num_classes; ++j) { + std::cout << submodel->labels[j] << " "; + } + std::cout << '\n'; + } + + for (int j = begin; j < end; ++j) { + double *avg_prob = NULL; + brier[perm[j]] = 0; + + predict_labels[perm[j]] = PredictCP(&subprob, submodel, prob->x[perm[j]], lower_bounds[perm[j]], upper_bounds[perm[j]], &avg_prob); + + for (k = 0; k < submodel->num_classes; ++k) { + if (submodel->labels[k] == prob->y[perm[j]]) { + brier[perm[j]] += (1-avg_prob[k]) * (1-avg_prob[k]); + double tmp = std::fmax(std::fmin(avg_prob[k], 1-kEpsilon), kEpsilon); + logloss[perm[j]] = - std::log(tmp); + } else { + brier[perm[j]] += avg_prob[k] * avg_prob[k]; + } + } + if (param->probability == 1) { + for (k = 0; k < submodel->num_classes; ++k) { + std::cout << avg_prob[k] << ' '; + } + std::cout << '\n'; + } + delete[] avg_prob; + } + FreeModel(submodel); + delete[] subprob.x; + delete[] subprob.y; + } + delete[] fold_start; + delete[] perm; + + return; +} + +void OnlinePredict(const struct Problem *prob, const struct Parameter *param, + double *predict_labels, int *indices, + double *lower_bounds, double *upper_bounds, + double *brier, double *logloss) { + int num_ex = prob->num_ex; + int num_classes = 0; + + for (int i = 0; i < num_ex; ++i) { + indices[i] = i; + } + std::random_device rd; + std::mt19937 g(rd()); + std::shuffle(indices, indices+num_ex, g); + + if (param->taxonomy_type == KNN) { + int num_neighbors = param->knn_param->num_neighbors; + int *alter_labels = new int[num_ex]; + std::vector labels; + + int *categories = new int[num_ex]; + double **dist_neighbors = new double*[num_ex]; + int **label_neighbors = new int*[num_ex]; + + for (int i = 0; i < num_ex; ++i) { + dist_neighbors[i] = new double[num_neighbors]; + label_neighbors[i] = new int[num_neighbors]; + for (int j = 0; j < num_neighbors; ++j) { + dist_neighbors[i][j] = kInf; + label_neighbors[i][j] = -1; + } + categories[i] = -1; + } + + int this_label = static_cast(prob->y[indices[0]]); + labels.push_back(this_label); + alter_labels[0] = 0; + num_classes = 1; + + for (int i = 1; i < num_ex; ++i) { + if (num_classes == 1) + std::cerr << + "WARNING: training set only has one class. See README for details." + << std::endl; + + int **f_matrix = new int*[num_classes]; + + for (int j = 0; j < num_classes; ++j) { + f_matrix[j] = new int[num_classes]; + for (int k = 0; k < num_classes; ++k) { + f_matrix[j][k] = 0; + } + + double **dist_neighbors_ = new double*[i+1]; + int **label_neighbors_ = new int*[i+1]; + + for (int j = 0; j < i; ++j) { + clone(dist_neighbors_[j], dist_neighbors[j], num_neighbors); + clone(label_neighbors_[j], label_neighbors[j], num_neighbors); + } + dist_neighbors_[i] = new double[num_neighbors]; + label_neighbors_[i] = new int[num_neighbors]; + for (int j = 0; j < num_neighbors; ++j) { + dist_neighbors_[i][j] = kInf; + label_neighbors_[i][j] = -1; + } + + for (int k = 0; k < i; ++k) { + double dist = CalcDist(prob->x[indices[k]], prob->x[indices[i]]); + int index; + + index = CompareDist(dist_neighbors_[i], dist, num_neighbors); + if (index < num_neighbors) { + InsertLabel(label_neighbors_[i], alter_labels[k], num_neighbors, index); + } + index = CompareDist(dist_neighbors_[k], dist, num_neighbors); + if (index < num_neighbors) { + InsertLabel(label_neighbors_[k], j, num_neighbors, index); + } + } + for (int k = 0; k <= i; ++k) { + categories[k] = FindMostFrequent(label_neighbors_[k], num_neighbors); + } + + for (int k = 0; k < i; ++k) { + if (categories[k] == categories[i]) { + ++f_matrix[j][alter_labels[k]]; + } + } + f_matrix[j][j]++; + + for (int j = 0; j < num_neighbors; ++j) { + dist_neighbors[i][j] = dist_neighbors_[i][j]; + label_neighbors[i][j] = label_neighbors_[i][j]; + } + for (int j = 0; j < i+1; ++j) { + delete[] dist_neighbors_[j]; + delete[] label_neighbors_[j]; + } + delete[] dist_neighbors_; + delete[] label_neighbors_; + } + + double **matrix = new double*[num_classes]; + for (int j = 0; j < num_classes; ++j) { + matrix[j] = new double[num_classes]; + int sum = 0; + for (int k = 0; k < num_classes; ++k) + sum += f_matrix[j][k]; + for (int k = 0; k < num_classes; ++k) + matrix[j][k] = static_cast(f_matrix[j][k]) / sum; + } + + double *quality = new double[num_classes]; + double *avg_prob = new double[num_classes]; + for (int j = 0; j < num_classes; ++j) { + quality[j] = matrix[0][j]; + avg_prob[j] = matrix[0][j]; + for (int k = 1; k < num_classes; ++k) { + if (matrix[k][j] < quality[j]) { + quality[j] = matrix[k][j]; + } + avg_prob[j] += matrix[k][j]; + } + avg_prob[j] /= num_classes; + } + + int best = 0; + for (int j = 1; j < num_classes; ++j) { + if (quality[j] > quality[best]) { + best = j; + } + } + + lower_bounds[i] = quality[best]; + upper_bounds[i] = matrix[0][best]; + for (int j = 1; j < num_classes; ++j) { + if (matrix[j][best] > upper_bounds[i]) { + upper_bounds[i] = matrix[j][best]; + } + } + + brier[i] = 0; + for (int j = 0; j < num_classes; ++j) { + if (labels[static_cast(j)] == prob->y[indices[i]]) { + brier[i] += (1-avg_prob[j])*(1-avg_prob[j]); + double tmp = std::fmax(std::fmin(avg_prob[j], 1-kEpsilon), kEpsilon); + logloss[i] = - std::log(tmp); + } else { + brier[i] += avg_prob[j]*avg_prob[j]; + } + } + + if (param->probability == 1) { + for (int j = 0; j < num_classes; ++j) { + std::cout << avg_prob[j] << ' '; + } + std::cout << '\n'; + } + + delete[] avg_prob; + + predict_labels[i] = labels[static_cast(best)]; + + delete[] quality; + for (int j = 0; j < num_classes; ++j) { + delete[] f_matrix[j]; + delete[] matrix[j]; + } + delete[] f_matrix; + delete[] matrix; + + this_label = static_cast(prob->y[indices[i]]); + std::size_t j; + for (j = 0; j < num_classes; ++j) { + if (this_label == labels[j]) break; + } + alter_labels[i] = static_cast(j); + if (j == num_classes) { + labels.push_back(this_label); + ++num_classes; + } + + for (int j = 0; j < i; ++j) { + double dist = CalcDist(prob->x[indices[j]], prob->x[indices[i]]); + int index = CompareDist(dist_neighbors[j], dist, num_neighbors); + if (index < num_neighbors) { + InsertLabel(label_neighbors[j], alter_labels[i], num_neighbors, index); + } + } + + } + if (param->probability == 1) { + for (std::size_t j = 0; j < num_classes; ++j) { + std::cout << labels[j] << " "; + } + std::cout << '\n'; + } + + for (int i = 0; i < num_ex; ++i) { + delete[] dist_neighbors[i]; + delete[] label_neighbors[i]; + } + + delete[] dist_neighbors; + delete[] label_neighbors; + delete[] categories; + delete[] alter_labels; + std::vector(labels).swap(labels); + } + + if (param->taxonomy_type == SVM_EL || + param->taxonomy_type == SVM_ES || + param->taxonomy_type == SVM_KM) { + Problem subprob; + subprob.x = new Node*[num_ex]; + subprob.y = new double[num_ex]; + + for (int i = 0; i < num_ex; ++i) { + subprob.x[i] = prob->x[indices[i]]; + subprob.y[i] = prob->y[indices[i]]; + } + + for (int i = 1; i < num_ex; ++i) { + double *avg_prob = NULL; + brier[i] = 0; + subprob.num_ex = i; + Model *submodel = TrainCP(&subprob, param); + predict_labels[i] = PredictCP(&subprob, submodel, subprob.x[i], + lower_bounds[i], upper_bounds[i], &avg_prob); + for (int j = 0; j < submodel->num_classes; ++j) { + if (submodel->labels[j] == subprob.y[i]) { + brier[i] += (1-avg_prob[j]) * (1-avg_prob[j]); + double tmp = std::fmax(std::fmin(avg_prob[j], 1-kEpsilon), kEpsilon); + logloss[i] = - std::log(tmp); + } else { + brier[i] += avg_prob[j] * avg_prob[j]; + } + } + if (param->probability == 1) { + for (int j = 0; j < submodel->num_classes; ++j) { + std::cout << avg_prob[j] << ' '; + } + std::cout << '\n'; + } + FreeModel(submodel); + delete[] avg_prob; + } + delete[] subprob.x; + delete[] subprob.y; + } + + return; +} + +static const char *kTaxonomyTypeTable[] = { "knn", "svm_el", "svm_es", "svm_km", NULL }; + +int SaveModel(const char *model_file_name, const struct Model *model) { + std::ofstream model_file(model_file_name); + if (!model_file.is_open()) { + std::cerr << "Unable to open model file: " << model_file_name << std::endl; + return -1; + } + + const Parameter ¶m = model->param; + + model_file << "taxonomy_type " << kTaxonomyTypeTable[param.taxonomy_type] << '\n'; + model_file << "num_categories " << model->num_categories << '\n'; + + if (param.taxonomy_type == KNN) { + SaveKNNModel(model_file, model->knn_model); + } + if (param.taxonomy_type == SVM_EL || + param.taxonomy_type == SVM_ES || + param.taxonomy_type == SVM_KM) { + SaveSVMModel(model_file, model->svm_model); + } + + if ((param.taxonomy_type == SVM_ES || + param.taxonomy_type == SVM_KM) && + model->points) { + model_file << "points\n"; + for (int i = 0; i < model->num_categories; ++i) { + model_file << model->points[i] << ' '; + } + model_file << '\n'; + } + + if (model->categories) { + model_file << "categories\n"; + for (int i = 0; i < model->num_ex; ++i) { + model_file << model->categories[i] << ' '; + } + model_file << '\n'; + } + + if (model_file.bad() || model_file.fail()) { + model_file.close(); + return -1; + } + + model_file.close(); + + return 0; +} + +Model *LoadModel(const char *model_file_name) { + std::ifstream model_file(model_file_name); + if (!model_file.is_open()) { + std::cerr << "Unable to open model file: " << model_file_name << std::endl; + return NULL; + } + + Model *model = new Model; + + Parameter ¶m = model->param; + param.load_model = 1; + model->labels = NULL; + model->categories = NULL; + + char cmd[80]; + while (1) { + model_file >> cmd; + + if (std::strcmp(cmd, "taxonomy_type") == 0) { + model_file >> cmd; + int i; + for (i = 0; kTaxonomyTypeTable[i]; ++i) { + if (std::strcmp(kTaxonomyTypeTable[i], cmd) == 0) { + param.taxonomy_type = i; + break; + } + } + if (kTaxonomyTypeTable[i] == NULL) { + std::cerr << "Unknown taxonomy type.\n" << std::endl; + FreeModel(model); + delete model; + model_file.close(); + return NULL; + } + } else + if (std::strcmp(cmd, "num_categories") == 0) { + model_file >> param.num_categories; + model->num_categories = param.num_categories; + } else + if (std::strcmp(cmd, "knn_model") == 0) { + model->knn_model = LoadKNNModel(model_file); + if (model->knn_model == NULL) { + FreeModel(model); + delete model; + model_file.close(); + return NULL; + } + model->num_ex = model->knn_model->num_ex; + model->num_classes = model->knn_model->num_classes; + clone(model->labels, model->knn_model->labels, model->num_classes); + model->param.knn_param = &model->knn_model->param; + } else + if (std::strcmp(cmd, "svm_model") == 0) { + model->svm_model = LoadSVMModel(model_file); + if (model->svm_model == NULL) { + FreeModel(model); + delete model; + model_file.close(); + return NULL; + } + model->num_ex = model->svm_model->num_ex; + model->num_classes = model->svm_model->num_classes; + clone(model->labels, model->svm_model->labels, model->num_classes); + model->param.svm_param = &model->svm_model->param; + } else + if (std::strcmp(cmd, "points") == 0) { + int num_categories = model->num_categories; + model->points = new double[num_categories]; + for (int i = 0; i < num_categories; ++i) { + model_file >> model->points[i]; + } + } else + if (std::strcmp(cmd, "categories") == 0) { + int num_ex = model->num_ex; + model->categories = new int[num_ex]; + for (int i = 0; i < num_ex; ++i) { + model_file >> model->categories[i]; + } + break; + } else { + std::cerr << "Unknown text in model file: " << cmd << std::endl; + FreeModel(model); + delete model; + model_file.close(); + return NULL; + } + } + model_file.close(); + + return model; +} + +void FreeModel(struct Model *model) { + if (model->param.taxonomy_type == KNN && + model->knn_model != NULL) { + FreeKNNModel(model->knn_model); + delete model->knn_model; + model->knn_model = NULL; + } + + if ((model->param.taxonomy_type == SVM_EL || + model->param.taxonomy_type == SVM_ES || + model->param.taxonomy_type == SVM_KM) && + model->svm_model != NULL) { + FreeSVMModel(&(model->svm_model)); + delete model->svm_model; + model->svm_model = NULL; + } + + if (model->labels != NULL) { + delete[] model->labels; + model->labels = NULL; + } + + if (model->param.taxonomy_type == SVM_ES && + model->points != NULL) { + delete[] model->points; + model->points = NULL; + } + if (model->categories != NULL) { + delete[] model->categories; + model->categories = NULL; + } + + delete model; + model = NULL; + + return; +} + +void FreeParam(struct Parameter *param) { + if (param->taxonomy_type == KNN && + param->knn_param != NULL) { + FreeKNNParam(param->knn_param); + param->knn_param = NULL; + } + + if ((param->taxonomy_type == SVM_EL || + param->taxonomy_type == SVM_ES || + param->taxonomy_type == SVM_KM) && + param->svm_param != NULL) { + FreeSVMParam(param->svm_param); + param->svm_param = NULL; + } + + return; +} + +const char *CheckParameter(const struct Parameter *param) { + if (param->save_model == 1 && param->load_model == 1) { + return "cannot save and load model at the same time"; + } + + if (param->num_categories == 0) { + return "no. of categories cannot be less than 1"; + } + + if (param->taxonomy_type == KNN) { + if (param->knn_param == NULL) { + return "no knn parameter"; + } + return CheckKNNParameter(param->knn_param); + } + + if (param->taxonomy_type == SVM_EL || + param->taxonomy_type == SVM_ES || + param->taxonomy_type == SVM_KM) { + if (param->svm_param == NULL) { + return "no svm parameter"; + } + return CheckSVMParameter(param->svm_param); + } + + if (param->taxonomy_type > 3) { + return "no such taxonomy type"; + } + + return NULL; +} \ No newline at end of file diff --git a/cp.h b/cp.h new file mode 100644 index 0000000..df33947 --- /dev/null +++ b/cp.h @@ -0,0 +1,41 @@ +#ifndef LIBCP_CP_H_ +#define LIBCP_CP_H_ + +#include "utilities.h" +#include "knn.h" +#include "svm.h" + +enum { KNN, SVM_EL, SVM_ES, SVM_KM }; + +struct Parameter { + struct KNNParameter *knn_param; + struct SVMParameter *svm_param; + int save_model; + int load_model; + int measure_type; + int num_folds; + int probability; +}; + +struct Model { + struct Parameter param; + struct SVMModel *svm_model; + struct KNNModel *knn_model; + int num_ex; + int num_classes; + int *labels; +}; + +Model *TrainCP(const struct Problem *train, const struct Parameter *param); +double PredictCP(const struct Problem *train, const struct Model *model, const struct Node *x, double &lower, double &upper, double **avg_prob); +void CrossValidation(const struct Problem *prob, const struct Parameter *param, double *predict_labels, double *lower_bounds, double *upper_bounds, double *brier, double *logloss); +void OnlinePredict(const struct Problem *prob, const struct Parameter *param, double *predict_labels, int *indices, double *lower_bounds, double *upper_bounds, double *brier, double *logloss); + +int SaveModel(const char *model_file_name, const struct Model *model); +Model *LoadModel(const char *model_file_name); +void FreeModel(struct Model *model); + +void FreeParam(struct Parameter *param); +const char *CheckParameter(const struct Parameter *param); + +#endif // LIBCP_CP_H_ \ No newline at end of file diff --git a/knn.cpp b/knn.cpp new file mode 100644 index 0000000..bf1eb06 --- /dev/null +++ b/knn.cpp @@ -0,0 +1,284 @@ +#include "knn.h" +#include +#include + +double CalcDist(const struct Node *x1, const struct Node *x2) { + double sum = 0; + + while (x1->index != -1 && x2->index != -1) { + if (x1->index == x2->index) { + sum += (x1->value - x2->value) * (x1->value - x2->value); + ++x1; + ++x2; + } else { + if(x1->index > x2->index) { + sum += x2->value * x2->value; + ++x2; + } else { + sum += x1->value * x1->value; + ++x1; + } + } + } + if (x1->index == -1) { + while (x2->index != -1) { + sum += x2->value * x2->value; + ++x2; + } + } + if (x2->index == -1) { + while (x1->index != -1) { + sum += x1->value * x1->value; + ++x1; + } + } + + return sqrt(sum); +} + +int CompareDist(double *neighbors, double dist, int num_neighbors) { + int i = 0; + + while (i < num_neighbors) { + if (dist < neighbors[i]) + break; + ++i; + } + if (i == num_neighbors) + return i; + for (int j = num_neighbors-1; j > i; --j) + neighbors[j] = neighbors[j-1]; + neighbors[i] = dist; + + return i; +} + +KNNModel *TrainKNN(const struct Problem *prob, const struct KNNParameter *param) { + KNNModel *model = new KNNModel; + model->param = *param; + model->labels = NULL; + model->dist_neighbors = NULL; + model->label_neighbors = NULL; + + int num_classes = 0; + int num_ex = prob->num_ex; + int num_neighbors = param->num_neighbors; + int *labels = NULL; + int *alter_labels = new int[num_ex]; + + std::vector unique_labels; + for (int i = 0; i < num_ex; ++i) { + int this_label = static_cast(prob->y[i]); + std::size_t j; + for (j = 0; j < num_classes; ++j) { + if (this_label == unique_labels[j]) break; + } + alter_labels[i] = static_cast(j); + if (j == num_classes) { + unique_labels.push_back(this_label); + ++num_classes; + } + } + labels = new int[num_classes]; + for (std::size_t i = 0; i < unique_labels.size(); ++i) { + labels[i] = unique_labels[i]; + } + std::vector(unique_labels).swap(unique_labels); + + if (num_classes == 1) { + std::cerr << "WARNING: training set only has one class. See README for details." << std::endl; + } + + double **dist_neighbors = new double*[num_ex]; + int **label_neighbors = new int*[num_ex]; + + for (int i = 0; i < num_ex; ++i) { + dist_neighbors[i] = new double[num_neighbors]; + label_neighbors[i] = new int[num_neighbors]; + for (int j = 0; j < num_neighbors; ++j) { + dist_neighbors[i][j] = INF; + label_neighbors[i][j] = -1; + } + } + + for (int i = 0; i < num_ex-1; ++i) { + for (int j = i+1; j < num_ex; ++j) { + double dist = CalcDist(prob->x[i], prob->x[j]); + int index; + index = CompareDist(dist_neighbors[i], dist, num_neighbors); + if (index < num_neighbors) { + InsertLabel(label_neighbors[i], alter_labels[j], num_neighbors, index); + } + index = CompareDist(dist_neighbors[j], dist, num_neighbors); + if (index < num_neighbors) { + InsertLabel(label_neighbors[j], alter_labels[i], num_neighbors, index); + } + } + } + delete[] alter_labels; + + model->num_ex = num_ex; + model->num_classes = num_classes; + model->labels = labels; + model->dist_neighbors = dist_neighbors; + model->label_neighbors = label_neighbors; + model->param = *param; + + return model; +} + +double PredictKNN(struct Problem *train, struct Node *x, const int num_neighbors) { + double neighbors[num_neighbors]; + double labels[num_neighbors]; + + for (int i = 0; i < num_neighbors; ++i) { + neighbors[i] = INF; + labels[i] = -1; + } + for (int i = 0; i < train->num_ex; ++i) { + double dist = CalcDist(train->x[i], x); + int index = CompareDist(neighbors, dist, num_neighbors); + if (index < num_neighbors) { + InsertLabel(labels, train->y[i], num_neighbors, index); + } + } + double predict_label = FindMostFrequent(labels, num_neighbors); + + return predict_label; +} + +int SaveKNNModel(std::ofstream &model_file, const struct KNNModel *model) { + model_file << "knn_model\n"; + model_file << "num_examples " << model->num_ex << '\n'; + model_file << "num_classes " << model->num_classes << '\n'; + model_file << "num_neighbors " << model->param.num_neighbors << '\n'; + + if (model->labels) { + model_file << "labels"; + for (int i = 0; i < model->num_classes; ++i) { + model_file << ' ' << model->labels[i]; + } + model_file << '\n'; + } + + if (model->dist_neighbors) { + model_file << "dist_neighbors\n"; + for (int i = 0; i < model->num_ex; ++i) { + for (int j = 0; j < model->param.num_neighbors; ++j) { + model_file << model->dist_neighbors[i][j] << ' '; + } + } + model_file << '\n'; + } + + if (model->label_neighbors) { + model_file << "label_neighbors\n"; + for (int i = 0; i < model->num_ex; ++i) { + for (int j = 0; j < model->param.num_neighbors; ++j) { + model_file << model->label_neighbors[i][j] << ' '; + } + } + model_file << '\n'; + } + + return 0; +} + +KNNModel *LoadKNNModel(std::ifstream &model_file) { + KNNModel *model = new KNNModel; + KNNParameter ¶m = model->param; + + char cmd[80]; + while (1) { + model_file >> cmd; + + if (std::strcmp(cmd, "num_examples") == 0) { + model_file >> model->num_ex; + } else + if (std::strcmp(cmd, "num_classes") == 0) { + model_file >> model->num_classes; + } else + if (std::strcmp(cmd, "num_neighbors") == 0) { + model_file >> param.num_neighbors; + } else + if (std::strcmp(cmd, "labels") == 0) { + int n = model->num_classes; + model->labels = new int[n]; + for (int i = 0; i < n; ++i) { + model_file >> model->labels[i]; + } + } else + if (std::strcmp(cmd, "dist_neighbors") == 0) { + int n = param.num_neighbors; + int num_ex = model->num_ex; + model->dist_neighbors = new double*[num_ex]; + for (int i = 0; i < num_ex; ++i) { + model->dist_neighbors[i] = new double[n]; + for (int j = 0; j < n; ++j) { + model_file >> model->dist_neighbors[i][j]; + } + } + } else + if (std::strcmp(cmd, "label_neighbors") == 0) { + int n = param.num_neighbors; + int num_ex = model->num_ex; + model->label_neighbors = new int*[num_ex]; + for (int i = 0; i < num_ex; ++i) { + model->label_neighbors[i] = new int[n]; + for (int j = 0; j < n; ++j) { + model_file >> model->label_neighbors[i][j]; + } + } + break; + } else { + std::cerr << "Unknown text in knn_model file: " << cmd << std::endl; + FreeKNNModel(model); + return NULL; + } + } + + return model; +} + +void FreeKNNModel(struct KNNModel *model) { + if (model->labels != NULL) { + delete[] model->labels; + model->labels = NULL; + } + + if (model->dist_neighbors != NULL) { + for (int i = 0; i < model->num_ex; ++i) { + delete[] model->dist_neighbors[i]; + } + delete[] model->dist_neighbors; + model->dist_neighbors = NULL; + } + + if (model->label_neighbors != NULL) { + for (int i = 0; i < model->num_ex; ++i) { + delete[] model->label_neighbors[i]; + } + delete[] model->label_neighbors; + model->label_neighbors = NULL; + } + + return; +} + +void FreeKNNParam(struct KNNParameter *param) { + return; +} + +void InitKNNParam(struct KNNParameter *param) { + param->num_neighbors = 1; + + return; +} + +const char *CheckKNNParameter(const struct KNNParameter *param) { + if (param->num_neighbors < 1) { + return "num_neighbors should be greater than 0"; + } + + return NULL; +} \ No newline at end of file diff --git a/knn.h b/knn.h new file mode 100644 index 0000000..c39f6d5 --- /dev/null +++ b/knn.h @@ -0,0 +1,41 @@ +#ifndef LIBCP_KNN_H_ +#define LIBCP_KNN_H_ + +#include "utilities.h" + +struct KNNParameter { + int num_neighbors; +}; + +struct KNNModel { + struct KNNParameter param; + int num_ex; + int num_classes; + int *labels; // label of each class (label[k]) + double **dist_neighbors; + int **label_neighbors; +}; + +template +static inline void InsertLabel(T *labels, T label, int num_neighbors, int index) { + for (int i = num_neighbors-1; i > index; --i) + labels[i] = labels[i-1]; + labels[index] = label; + + return; +} + +KNNModel *TrainKNN(const struct Problem *prob, const struct KNNParameter *param); +double PredictKNN(struct Problem *train, struct Node *x, const int num_neighbors); +double CalcDist(const struct Node *x1, const struct Node *x2); +int CompareDist(double *neighbors, double dist, int num_neighbors); + +int SaveKNNModel(std::ofstream &model_file, const struct KNNModel *model); +KNNModel *LoadKNNModel(std::ifstream &model_file); +void FreeKNNModel(struct KNNModel *model); + +void FreeKNNParam(struct KNNParameter *param); +void InitKNNParam(struct KNNParameter *param); +const char *CheckKNNParameter(const struct KNNParameter *param); + +#endif // LIBCP_KNN_H_ diff --git a/svm.cpp b/svm.cpp new file mode 100644 index 0000000..2128c1e --- /dev/null +++ b/svm.cpp @@ -0,0 +1,2017 @@ +#include "svm.h" +#include +#include +#include +#include +#include +#include +#include +#include + +typedef float Qfloat; +typedef signed char schar; + +static void PrintCout(const char *s) { + std::cout << s; + std::cout.flush(); +} + +static void PrintNull(const char *s) {} + +static void (*PrintString) (const char *) = &PrintNull; + +static void Info(const char *format, ...) { + char buffer[BUFSIZ]; + va_list ap; + va_start(ap, format); + vsprintf(buffer, format, ap); + va_end(ap); + (*PrintString)(buffer); +} + +// +// Kernel Cache +// +// l is the number of total data items +// size is the cache size limit in bytes +// +class Cache { + public: + Cache(int l, long int size); + ~Cache(); + + // request data [0,len) + // return some position p where [p,len) need to be filled + // (p >= len if nothing needs to be filled) + int get_data(const int index, Qfloat **data, int len); + void SwapIndex(int i, int j); + + private: + int l_; + long int size_; + struct Head { + Head *prev, *next; // a circular list + Qfloat *data; + int len; // data[0,len) is cached in this entry + }; + + Head *head_; + Head lru_head_; + void DeleteLRU(Head *h); + void InsertLRU(Head *h); +}; + +Cache::Cache(int l, long int size) : l_(l), size_(size) { + head_ = (Head *)calloc(static_cast(l_), sizeof(Head)); // initialized to 0 + size_ /= sizeof(Qfloat); + size_ -= static_cast(l_) * sizeof(Head) / sizeof(Qfloat); + size_ = std::max(size_, 2 * static_cast(l_)); // cache must be large enough for two columns + lru_head_.next = lru_head_.prev = &lru_head_; +} + +Cache::~Cache() { + for (Head *h = lru_head_.next; h != &lru_head_; h=h->next) { + delete[] h->data; + } + delete[] head_; +} + +void Cache::DeleteLRU(Head *h) { + // delete from current location + h->prev->next = h->next; + h->next->prev = h->prev; +} + +void Cache::InsertLRU(Head *h) { + // insert to last position + h->next = &lru_head_; + h->prev = lru_head_.prev; + h->prev->next = h; + h->next->prev = h; +} + +int Cache::get_data(const int index, Qfloat **data, int len) { + Head *h = &head_[index]; + if (h->len) { + DeleteLRU(h); + } + int more = len - h->len; + + if (more > 0) { + // free old space + while (size_ < more) { + Head *old = lru_head_.next; + DeleteLRU(old); + delete[] old->data; + size_ += old->len; + old->data = 0; + old->len = 0; + } + + // allocate new space + h->data = (Qfloat *)realloc(h->data, sizeof(Qfloat)*static_cast(len)); + size_ -= more; + std::swap(h->len, len); + } + + InsertLRU(h); + *data = h->data; + + return len; +} + +void Cache::SwapIndex(int i, int j) { + if (i == j) { + return; + } + + if (head_[i].len) { + DeleteLRU(&head_[i]); + } + if (head_[j].len) { + DeleteLRU(&head_[j]); + } + std::swap(head_[i].data, head_[j].data); + std::swap(head_[i].len, head_[j].len); + if (head_[i].len) { + InsertLRU(&head_[i]); + } + if (head_[j].len) { + InsertLRU(&head_[j]); + } + + if (i > j) { + std::swap(i, j); + } + for (Head *h = lru_head_.next; h != &lru_head_; h = h->next) { + if (h->len > i) { + if (h->len > j) { + std::swap(h->data[i], h->data[j]); + } else { + // give up + DeleteLRU(h); + delete[] h->data; + size_ += h->len; + h->data = 0; + h->len = 0; + } + } + } +} + +// +// Kernel evaluation +// +// the static method KernelFunction is for doing single kernel evaluation +// the constructor of Kernel prepares to calculate the l*l kernel matrix +// the member function get_Q is for getting one column from the Q Matrix +// +class QMatrix { + public: + virtual Qfloat *get_Q(int column, int len) const = 0; + virtual double *get_QD() const = 0; + virtual void SwapIndex(int i, int j) const = 0; + virtual ~QMatrix() {} +}; + +class Kernel : public QMatrix { + public: + Kernel(int l, Node *const *x, const SVMParameter& param); + virtual ~Kernel(); + static double KernelFunction(const Node *x, const Node *y, const SVMParameter& param); + virtual Qfloat *get_Q(int column, int len) const = 0; + virtual double *get_QD() const = 0; + virtual void SwapIndex(int i, int j) const { + std::swap(x_[i], x_[j]); + if (x_square_) { + std::swap(x_square_[i], x_square_[j]); + } + } + + protected: + double (Kernel::*kernel_function)(int i, int j) const; + + private: + const Node **x_; + double *x_square_; + + // SVMParameter + const int kernel_type; + const int degree; + const double gamma; + const double coef0; + + static double Dot(const Node *px, const Node *py); + double KernelLinear(int i, int j) const { + return Dot(x_[i], x_[j]); + } + double KernelPoly(int i, int j) const { + return std::pow(gamma*Dot(x_[i], x_[j])+coef0, degree); + } + double KernelRBF(int i, int j) const { + return exp(-gamma*(x_square_[i]+x_square_[j]-2*Dot(x_[i], x_[j]))); + } + double KernelSigmoid(int i, int j) const { + return tanh(gamma*Dot(x_[i], x_[j])+coef0); + } + double KernelPrecomputed(int i, int j) const { + return x_[i][static_cast(x_[j][0].value)].value; + } +}; + +Kernel::Kernel(int l, Node *const *x, const SVMParameter ¶m) + :kernel_type(param.kernel_type), + degree(param.degree), + gamma(param.gamma), + coef0(param.coef0) { + switch (kernel_type) { + case LINEAR: { + kernel_function = &Kernel::KernelLinear; + break; + } + case POLY: { + kernel_function = &Kernel::KernelPoly; + break; + } + case RBF: { + kernel_function = &Kernel::KernelRBF; + break; + } + case SIGMOID: { + kernel_function = &Kernel::KernelSigmoid; + break; + } + case PRECOMPUTED: { + kernel_function = &Kernel::KernelPrecomputed; + break; + } + default: { + // assert(false); + break; + } + } + + clone(x_, x, l); + + if (kernel_type == RBF) { + x_square_ = new double[l]; + for (int i = 0; i < l; ++i) { + x_square_[i] = Dot(x_[i], x_[i]); + } + } else { + x_square_ = 0; + } +} + +Kernel::~Kernel() { + delete[] x_; + delete[] x_square_; +} + +double Kernel::Dot(const Node *px, const Node *py) { + double sum = 0; + while (px->index != -1 && py->index != -1) { + if (px->index == py->index) { + sum += px->value * py->value; + ++px; + ++py; + } else { + if (px->index > py->index) { + ++py; + } else { + ++px; + } + } + } + + return sum; +} + +double Kernel::KernelFunction(const Node *x, const Node *y, const SVMParameter ¶m) { + switch (param.kernel_type) { + case LINEAR: { + return Dot(x, y); + } + case POLY: { + return std::pow(param.gamma*Dot(x, y)+param.coef0, param.degree); + } + case RBF: { + double sum = 0; + while (x->index != -1 && y->index != -1) { + if (x->index == y->index) { + double d = x->value - y->value; + sum += d*d; + ++x; + ++y; + } else { + if (x->index > y->index) { + sum += y->value * y->value; + ++y; + } else { + sum += x->value * x->value; + ++x; + } + } + } + + while (x->index != -1) { + sum += x->value * x->value; + ++x; + } + + while (y->index != -1) { + sum += y->value * y->value; + ++y; + } + + return exp(-param.gamma*sum); + } + case SIGMOID: { + return tanh(param.gamma*Dot(x, y)+param.coef0); + } + case PRECOMPUTED: { //x: test (validation), y: SV + return x[static_cast(y->value)].value; + } + default: { + // assert(false); + return 0; // Unreachable + } + } +} + +// An SMO algorithm in Fan et al., JMLR 6(2005), p. 1889--1918 +// Solves: +// +// min 0.5(\alpha^T Q \alpha) + p^T \alpha +// +// y^T \alpha = \delta +// y_i = +1 or -1 +// 0 <= alpha_i <= Cp for y_i = 1 +// 0 <= alpha_i <= Cn for y_i = -1 +// +// Given: +// +// Q, p, y, Cp, Cn, and an initial feasible point \alpha +// l is the size of vectors and matrices +// eps is the stopping tolerance +// +// solution will be put in \alpha, objective value will be put in obj +// +class Solver { + public: + Solver() {}; + virtual ~Solver() {}; + + struct SolutionInfo { + double obj; + double rho; + double upper_bound_p; + double upper_bound_n; + double r; // for Solver_NU + }; + + void Solve(int l, const QMatrix &Q, const double *p, const schar *y, + double *alpha, double Cp, double Cn, double eps, + SolutionInfo *si, int shrinking); + + protected: + int active_size_; + schar *y_; + double *G_; // gradient of objective function + enum { LOWER_BOUND, UPPER_BOUND, FREE }; + char *alpha_status_; // LOWER_BOUND, UPPER_BOUND, FREE + double *alpha_; + const QMatrix *Q_; + const double *QD_; + double eps_; + double Cp_; + double Cn_; + double *p_; + int *active_set_; + double *G_bar_; // gradient, if we treat free variables as 0 + int l_; + bool unshrink_; // XXX + + double get_C(int i) { + return (y_[i] > 0) ? Cp_ : Cn_; + } + void UpdateAlphaStatus(int i) { + if (alpha_[i] >= get_C(i)) { + alpha_status_[i] = UPPER_BOUND; + } else { + if (alpha_[i] <= 0) { + alpha_status_[i] = LOWER_BOUND; + } else { + alpha_status_[i] = FREE; + } + } + } + bool IsUpperBound(int i) { + return alpha_status_[i] == UPPER_BOUND; + } + bool IsLowerBound(int i) { + return alpha_status_[i] == LOWER_BOUND; + } + bool IsFree(int i) { + return alpha_status_[i] == FREE; + } + void SwapIndex(int i, int j); + void ReconstructGradient(); + virtual int SelectWorkingSet(int &i, int &j); + virtual double CalculateRho(); + virtual void DoShrinking(); + + private: + bool IsShrunk(int i, double Gmax1, double Gmax2); +}; + +void Solver::SwapIndex(int i, int j) { + Q_->SwapIndex(i, j); + std::swap(y_[i], y_[j]); + std::swap(G_[i], G_[j]); + std::swap(alpha_status_[i], alpha_status_[j]); + std::swap(alpha_[i], alpha_[j]); + std::swap(p_[i], p_[j]); + std::swap(active_set_[i], active_set_[j]); + std::swap(G_bar_[i], G_bar_[j]); +} + +void Solver::ReconstructGradient() { + // reconstruct inactive elements of G from G_bar_ and free variables + if (active_size_ == l_) { + return; + } + + int num_free = 0; + + for (int i = active_size_; i < l_; ++i) { + G_[i] = G_bar_[i] + p_[i]; + } + + for (int i = 0; i < active_size_; ++i) { + if (IsFree(i)) { + num_free++; + } + } + + if (2*num_free < active_size_) { + Info("\nWARNING: using -h 0 may be faster\n"); + } + + if (num_free*l_ > 2*active_size_*(l_-active_size_)) { + for (int i = active_size_; i < l_; ++i) { + const Qfloat *Q_i = Q_->get_Q(i, active_size_); + for (int j = 0; j < active_size_; ++j) { + if (IsFree(j)) { + G_[i] += alpha_[j] * Q_i[j]; + } + } + } + } else { + for (int i = 0; i < active_size_; ++i) { + if (IsFree(i)) { + const Qfloat *Q_i = Q_->get_Q(i, l_); + double alpha_i = alpha_[i]; + for (int j = active_size_; j < l_; ++j) { + G_[j] += alpha_i * Q_i[j]; + } + } + } + } +} + +void Solver::Solve(int l, const QMatrix &Q, const double *p, const schar *y, + double *alpha, double Cp, double Cn, double eps, + SolutionInfo *si, int shrinking) { + l_ = l; + Q_ = &Q; + QD_=Q.get_QD(); + clone(p_, p, l); + clone(y_, y, l); + clone(alpha_, alpha, l); + Cp_ = Cp; + Cn_ = Cn; + eps_ = eps; + unshrink_ = false; + + // initialize alpha_status_ + alpha_status_ = new char[l]; + for (int i = 0; i < l; ++i) { + UpdateAlphaStatus(i); + } + + // initialize active set (for shrinking) + active_set_ = new int[l]; + for (int i = 0; i < l; ++i) { + active_set_[i] = i; + } + active_size_ = l; + + // initialize gradient + G_ = new double[l]; + G_bar_ = new double[l]; + for (int i = 0; i < l; ++i) { + G_[i] = p_[i]; + G_bar_[i] = 0; + } + for (int i = 0; i < l; ++i) + if (!IsLowerBound(i)) { + const Qfloat *Q_i = Q.get_Q(i,l); + double alpha_i = alpha_[i]; + for (int j = 0; j < l; ++j) { + G_[j] += alpha_i*Q_i[j]; + } + if (IsUpperBound(i)) { + for (int j = 0; j < l; ++j) { + G_bar_[j] += get_C(i) * Q_i[j]; + } + } + } + + // optimization step + int iter = 0; + int max_iter = std::max(10000000, (l>INT_MAX/100) ? (INT_MAX) : (100*l)); + int counter = std::min(l, 1000) + 1; + + while (iter < max_iter) { + // show progress and do shrinking + if (--counter == 0) { + counter = std::min(l, 1000); + if (shrinking) { + DoShrinking(); + } + Info("."); + } + + int i, j; + if (SelectWorkingSet(i, j) != 0) { + // reconstruct the whole gradient + ReconstructGradient(); + // reset active set size and check + active_size_ = l; + Info("*"); + if (SelectWorkingSet(i, j) != 0) { + break; + } else { + counter = 1; // do shrinking next iteration + } + } + + ++iter; + + // update alpha[i] and alpha[j], handle bounds carefully + const Qfloat *Q_i = Q.get_Q(i, active_size_); + const Qfloat *Q_j = Q.get_Q(j, active_size_); + + double C_i = get_C(i); + double C_j = get_C(j); + + double old_alpha_i = alpha_[i]; + double old_alpha_j = alpha_[j]; + + if (y_[i] != y_[j]) { + double quad_coef = QD_[i] + QD_[j] + 2*Q_i[j]; + if (quad_coef <= 0) { + quad_coef = kTau; + } + double delta = (-G_[i]-G_[j]) / quad_coef; + double diff = alpha_[i] - alpha_[j]; + alpha_[i] += delta; + alpha_[j] += delta; + + if (diff > 0) { + if (alpha_[j] < 0) { + alpha_[j] = 0; + alpha_[i] = diff; + } + } else { + if (alpha_[i] < 0) { + alpha_[i] = 0; + alpha_[j] = -diff; + } + } + if (diff > C_i - C_j) { + if (alpha_[i] > C_i) { + alpha_[i] = C_i; + alpha_[j] = C_i - diff; + } + } else { + if (alpha_[j] > C_j) { + alpha_[j] = C_j; + alpha_[i] = C_j + diff; + } + } + } else { + double quad_coef = QD_[i] + QD_[j] - 2*Q_i[j]; + if (quad_coef <= 0) { + quad_coef = kTau; + } + double delta = (G_[i]-G_[j]) / quad_coef; + double sum = alpha_[i] + alpha_[j]; + alpha_[i] -= delta; + alpha_[j] += delta; + + if (sum > C_i) { + if (alpha_[i] > C_i) { + alpha_[i] = C_i; + alpha_[j] = sum - C_i; + } + } else { + if (alpha_[j] < 0) { + alpha_[j] = 0; + alpha_[i] = sum; + } + } + if (sum > C_j) { + if (alpha_[j] > C_j) { + alpha_[j] = C_j; + alpha_[i] = sum - C_j; + } + } else { + if (alpha_[i] < 0) { + alpha_[i] = 0; + alpha_[j] = sum; + } + } + } + + // update G + double delta_alpha_i = alpha_[i] - old_alpha_i; + double delta_alpha_j = alpha_[j] - old_alpha_j; + + for (int k = 0; k < active_size_; ++k) { + G_[k] += Q_i[k]*delta_alpha_i + Q_j[k]*delta_alpha_j; + } + + // update alpha_status_ and G_bar_ + bool ui = IsUpperBound(i); + bool uj = IsUpperBound(j); + UpdateAlphaStatus(i); + UpdateAlphaStatus(j); + if (ui != IsUpperBound(i)) { + Q_i = Q.get_Q(i, l); + if (ui) { + for (int k = 0; k < l; ++k) { + G_bar_[k] -= C_i * Q_i[k]; + } + } else { + for (int k = 0; k < l; ++k) { + G_bar_[k] += C_i * Q_i[k]; + } + } + } + + if (uj != IsUpperBound(j)) { + Q_j = Q.get_Q(j, l); + if (uj) { + for (int k = 0; k < l; ++k) { + G_bar_[k] -= C_j * Q_j[k]; + } + } else { + for (int k = 0; k < l; ++k) { + G_bar_[k] += C_j * Q_j[k]; + } + } + } + } + + if (iter >= max_iter) { + if (active_size_ < l) { + // reconstruct the whole gradient to calculate objective value + ReconstructGradient(); + active_size_ = l; + Info("*"); + } + std::cerr << "\nWARNING: reaching max number of iterations" << std::endl; + } + + // calculate rho + si->rho = CalculateRho(); + + // calculate objective value + double v = 0; + for (int i = 0; i < l; ++i) { + v += alpha_[i] * (G_[i] + p_[i]); + } + si->obj = v / 2; + + // put back the solution + for (int i = 0; i < l; ++i) { + alpha[active_set_[i]] = alpha_[i]; + } + + // juggle everything back + /*{ + for(int i=0;iupper_bound_p = Cp; + si->upper_bound_n = Cn; + + Info("\noptimization finished, #iter = %d\n", iter); + + delete[] p_; + delete[] y_; + delete[] alpha_; + delete[] alpha_status_; + delete[] active_set_; + delete[] G_; + delete[] G_bar_; +} + +// return 1 if already optimal, return 0 otherwise +int Solver::SelectWorkingSet(int &out_i, int &out_j) { + // return i,j such that + // i: maximizes -y_i * grad(f)_i, i in I_up(\alpha) + // j: minimizes the decrease of obj value + // (if quadratic coefficeint <= 0, replace it with tau) + // -y_j*grad(f)_j < -y_i*grad(f)_i, j in I_low(\alpha) + + double Gmax = -kInf; + double Gmax2 = -kInf; + int Gmax_idx = -1; + int Gmin_idx = -1; + double obj_diff_min = kInf; + + for (int t = 0; t < active_size_; ++t) + if (y_[t] == +1) { + if (!IsUpperBound(t)) { + if (-G_[t] >= Gmax) { + Gmax = -G_[t]; + Gmax_idx = t; + } + } + } else { + if (!IsLowerBound(t)) { + if (G_[t] >= Gmax) { + Gmax = G_[t]; + Gmax_idx = t; + } + } + } + + int i = Gmax_idx; + const Qfloat *Q_i = NULL; + if (i != -1) {// NULL Q_i not accessed: Gmax=-kInf if i=-1 + Q_i = Q_->get_Q(i, active_size_); + } + + for (int j = 0; j < active_size_; ++j) { + if (y_[j] == +1) { + if (!IsLowerBound(j)) { + double grad_diff = Gmax + G_[j]; + if (G_[j] >= Gmax2) { + Gmax2 = G_[j]; + } + if (grad_diff > 0) { + double obj_diff; + double quad_coef = QD_[i] + QD_[j] - 2.0*y_[i]*Q_i[j]; + if (quad_coef > 0) { + obj_diff = -(grad_diff*grad_diff) / quad_coef; + } else { + obj_diff = -(grad_diff*grad_diff) / kTau; + } + + if (obj_diff <= obj_diff_min) { + Gmin_idx = j; + obj_diff_min = obj_diff; + } + } + } + } else { + if (!IsUpperBound(j)) { + double grad_diff = Gmax - G_[j]; + if (-G_[j] >= Gmax2) { + Gmax2 = -G_[j]; + } + if (grad_diff > 0) { + double obj_diff; + double quad_coef = QD_[i] + QD_[j] + 2.0*y_[i]*Q_i[j]; + if (quad_coef > 0) { + obj_diff = -(grad_diff*grad_diff) / quad_coef; + } else { + obj_diff = -(grad_diff*grad_diff) / kTau; + } + if (obj_diff <= obj_diff_min) { + Gmin_idx = j; + obj_diff_min = obj_diff; + } + } + } + } + } + + if (Gmax+Gmax2 < eps_) { + return 1; + } + + out_i = Gmax_idx; + out_j = Gmin_idx; + + return 0; +} + +bool Solver::IsShrunk(int i, double Gmax1, double Gmax2) { + if (IsUpperBound(i)) { + if (y_[i] == +1) { + return(-G_[i] > Gmax1); + } else { + return(-G_[i] > Gmax2); + } + } else { + if (IsLowerBound(i)) { + if (y_[i] == +1) { + return(G_[i] > Gmax2); + } else { + return(G_[i] > Gmax1); + } + } else { + return (false); + } + } +} + +void Solver::DoShrinking() { + double Gmax1 = -kInf; // max { -y_i * grad(f)_i | i in I_up(\alpha) } + double Gmax2 = -kInf; // max { y_i * grad(f)_i | i in I_low(\alpha) } + + // find maximal violating pair first + for (int i = 0; i < active_size_; ++i) { + if (y_[i] == +1) { + if (!IsUpperBound(i)) { + if (-G_[i] >= Gmax1) { + Gmax1 = -G_[i]; + } + } + if (!IsLowerBound(i)) { + if (G_[i] >= Gmax2) { + Gmax2 = G_[i]; + } + } + } else { + if (!IsUpperBound(i)) { + if (-G_[i] >= Gmax2) { + Gmax2 = -G_[i]; + } + } + if (!IsLowerBound(i)) { + if (G_[i] >= Gmax1) { + Gmax1 = G_[i]; + } + } + } + } + + if ((unshrink_ == false) && (Gmax1 + Gmax2 <= eps_*10)) { + unshrink_ = true; + ReconstructGradient(); + active_size_ = l_; + Info("*"); + } + + for (int i = 0; i < active_size_; ++i) { + if (IsShrunk(i, Gmax1, Gmax2)) { + active_size_--; + while (active_size_ > i) { + if (!IsShrunk(active_size_, Gmax1, Gmax2)) { + SwapIndex(i, active_size_); + break; + } + active_size_--; + } + } + } +} + +double Solver::CalculateRho() { + double r; + int num_free = 0; + double ub = kInf, lb = -kInf, sum_free = 0; + for (int i = 0; i < active_size_; ++i) { + double yG = y_[i] * G_[i]; + + if (IsUpperBound(i)) { + if (y_[i] == -1) { + ub = std::min(ub, yG); + } else { + lb = std::max(lb, yG); + } + } else { + if (IsLowerBound(i)) { + if (y_[i] == +1) { + ub = std::min(ub, yG); + } else { + lb = std::max(lb, yG); + } + } else { + ++num_free; + sum_free += yG; + } + } + } + + if (num_free > 0) { + r = sum_free / num_free; + } else { + r = (ub+lb) / 2; + } + + return r; +} + +// +// Solver for nu-svm classification and regression +// +// additional constraint: e^T \alpha = constant +// +class Solver_NU : public Solver { + public: + Solver_NU() {} + void Solve(int l, const QMatrix& Q, const double *p, const schar *y, + double *alpha, double Cp, double Cn, double eps, + SolutionInfo* si, int shrinking) { + si_ = si; + Solver::Solve(l, Q, p, y, alpha, Cp, Cn, eps, si, shrinking); + } + + private: + SolutionInfo *si_; + int SelectWorkingSet(int &i, int &j); + double CalculateRho(); + bool IsShrunk(int i, double Gmax1, double Gmax2, double Gmax3, double Gmax4); + void DoShrinking(); +}; + +// return 1 if already optimal, return 0 otherwise +int Solver_NU::SelectWorkingSet(int &out_i, int &out_j) { + // return i,j such that y_i = y_j and + // i: maximizes -y_i * grad(f)_i, i in I_up(\alpha) + // j: minimizes the decrease of obj value + // (if quadratic coefficeint <= 0, replace it with tau) + // -y_j*grad(f)_j < -y_i*grad(f)_i, j in I_low(\alpha) + + double Gmaxp = -kInf; + double Gmaxp2 = -kInf; + int Gmaxp_idx = -1; + + double Gmaxn = -kInf; + double Gmaxn2 = -kInf; + int Gmaxn_idx = -1; + + int Gmin_idx = -1; + double obj_diff_min = kInf; + + for (int t = 0; t < active_size_; ++t) + if (y_[t] == +1) { + if (!IsUpperBound(t)) { + if (-G_[t] >= Gmaxp) { + Gmaxp = -G_[t]; + Gmaxp_idx = t; + } + } + } else { + if (!IsLowerBound(t)) { + if (G_[t] >= Gmaxn) { + Gmaxn = G_[t]; + Gmaxn_idx = t; + } + } + } + + int ip = Gmaxp_idx; + int in = Gmaxn_idx; + const Qfloat *Q_ip = NULL; + const Qfloat *Q_in = NULL; + if (ip != -1) {// NULL Q_ip not accessed: Gmaxp=-kInf if ip=-1 + Q_ip = Q_->get_Q(ip, active_size_); + } + if (in != -1) { + Q_in = Q_->get_Q(in, active_size_); + } + + for (int j = 0; j < active_size_; ++j) { + if (y_[j] == +1) { + if (!IsLowerBound(j)) { + double grad_diff = Gmaxp + G_[j]; + if (G_[j] >= Gmaxp2) { + Gmaxp2 = G_[j]; + } + if (grad_diff > 0) { + double obj_diff; + double quad_coef = QD_[ip] + QD_[j] - 2*Q_ip[j]; + if (quad_coef > 0) { + obj_diff = -(grad_diff*grad_diff) / quad_coef; + } else { + obj_diff = -(grad_diff*grad_diff) / kTau; + } + if (obj_diff <= obj_diff_min) { + Gmin_idx = j; + obj_diff_min = obj_diff; + } + } + } + } else { + if (!IsUpperBound(j)) { + double grad_diff = Gmaxn - G_[j]; + if (-G_[j] >= Gmaxn2) { + Gmaxn2 = -G_[j]; + } + if (grad_diff > 0) { + double obj_diff; + double quad_coef = QD_[in] + QD_[j] - 2*Q_in[j]; + if (quad_coef > 0) { + obj_diff = -(grad_diff*grad_diff) / quad_coef; + } else { + obj_diff = -(grad_diff*grad_diff) / kTau; + } + if (obj_diff <= obj_diff_min) { + Gmin_idx = j; + obj_diff_min = obj_diff; + } + } + } + } + } + + if (std::max(Gmaxp+Gmaxp2, Gmaxn+Gmaxn2) < eps_) { + return 1; + } + + if (y_[Gmin_idx] == +1) { + out_i = Gmaxp_idx; + } else { + out_i = Gmaxn_idx; + } + out_j = Gmin_idx; + + return 0; +} + +bool Solver_NU::IsShrunk(int i, double Gmax1, double Gmax2, double Gmax3, double Gmax4) { + if (IsUpperBound(i)) { + if (y_[i] == +1) { + return(-G_[i] > Gmax1); + } else { + return(-G_[i] > Gmax4); + } + } else { + if (IsLowerBound(i)) { + if (y_[i] == +1) { + return(G_[i] > Gmax2); + } else { + return(G_[i] > Gmax3); + } + } else { + return (false); + } + } +} + +void Solver_NU::DoShrinking() { + double Gmax1 = -kInf; // max { -y_i * grad(f)_i | y_i = +1, i in I_up(\alpha) } + double Gmax2 = -kInf; // max { y_i * grad(f)_i | y_i = +1, i in I_low(\alpha) } + double Gmax3 = -kInf; // max { -y_i * grad(f)_i | y_i = -1, i in I_up(\alpha) } + double Gmax4 = -kInf; // max { y_i * grad(f)_i | y_i = -1, i in I_low(\alpha) } + + // find maximal violating pair first + for (int i = 0; i < active_size_; ++i) { + if (!IsUpperBound(i)) { + if (y_[i] == +1) { + if (-G_[i] > Gmax1) { + Gmax1 = -G_[i]; + } + } else { + if (-G_[i] > Gmax4) { + Gmax4 = -G_[i]; + } + } + } + if (!IsLowerBound(i)) { + if (y_[i] == +1) { + if(G_[i] > Gmax2) { + Gmax2 = G_[i]; + } + } else { + if (G_[i] > Gmax3) { + Gmax3 = G_[i]; + } + } + } + } + + if ((unshrink_ == false) && (std::max(Gmax1+Gmax2, Gmax3+Gmax4) <= eps_*10)) { + unshrink_ = true; + ReconstructGradient(); + active_size_ = l_; + } + + for (int i = 0; i < active_size_; ++i) + if (IsShrunk(i, Gmax1, Gmax2, Gmax3, Gmax4)) { + active_size_--; + while (active_size_ > i) { + if (!IsShrunk(active_size_, Gmax1, Gmax2, Gmax3, Gmax4)) { + SwapIndex(i, active_size_); + break; + } + active_size_--; + } + } +} + +double Solver_NU::CalculateRho() { + int num_free1 = 0, num_free2 = 0; + double ub1 = kInf, ub2 = kInf; + double lb1 = -kInf, lb2 = -kInf; + double sum_free1 = 0, sum_free2 = 0; + + for (int i = 0; i < active_size_; ++i) { + if (y_[i] == +1) { + if (IsUpperBound(i)) { + lb1 = std::max(lb1, G_[i]); + } else { + if (IsLowerBound(i)) { + ub1 = std::min(ub1, G_[i]); + } else { + ++num_free1; + sum_free1 += G_[i]; + } + } + } else { + if (IsUpperBound(i)) { + lb2 = std::max(lb2, G_[i]); + } else { + if (IsLowerBound(i)) { + ub2 = std::min(ub2, G_[i]); + } else { + ++num_free2; + sum_free2 += G_[i]; + } + } + } + } + + double r1, r2; + if (num_free1 > 0) { + r1 = sum_free1 / num_free1; + } else { + r1 = (ub1+lb1) / 2; + } + + if (num_free2 > 0) { + r2 = sum_free2 / num_free2; + } else { + r2 = (ub2+lb2) / 2; + } + + si_->r = (r1+r2) / 2; + return ((r1-r2)/2); +} + +// +// Q matrices for various formulations +// +class SVC_Q : public Kernel { + public: + SVC_Q(const Problem &prob, const SVMParameter ¶m, const schar *y) : Kernel(prob.num_ex, prob.x, param) { + clone(y_, y, prob.num_ex); + cache_ = new Cache(prob.num_ex, static_cast(param.cache_size*(1<<20))); + QD_ = new double[prob.num_ex]; + for (int i = 0; i < prob.num_ex; ++i) + QD_[i] = (this->*kernel_function)(i, i); + } + + Qfloat *get_Q(int i, int len) const { + Qfloat *data; + int start = cache_->get_data(i, &data, len); + if (start < len) { + for (int j = start; j < len; ++j) + data[j] = static_cast(y_[i]*y_[j]*(this->*kernel_function)(i, j)); + } + return data; + } + + double *get_QD() const { + return QD_; + } + + void SwapIndex(int i, int j) const { + cache_->SwapIndex(i, j); + Kernel::SwapIndex(i, j); + std::swap(y_[i], y_[j]); + std::swap(QD_[i], QD_[j]); + } + + ~SVC_Q() { + delete[] y_; + delete cache_; + delete[] QD_; + } + + private: + schar *y_; + Cache *cache_; + double *QD_; +}; + +// +// construct and solve various formulations +// +static void SolveCSVC(const Problem *prob, const SVMParameter *param, double *alpha, Solver::SolutionInfo *si, double Cp, double Cn) { + int num_ex = prob->num_ex; + double *minus_ones = new double[num_ex]; + schar *y = new schar[num_ex]; + + for (int i = 0; i < num_ex; ++i) { + alpha[i] = 0; + minus_ones[i] = -1; + if (prob->y[i] > 0) { + y[i] = +1; + } else { + y[i] = -1; + } + } + + Solver s; + s.Solve(num_ex, SVC_Q(*prob, *param, y), minus_ones, y, alpha, Cp, Cn, param->eps, si, param->shrinking); + + double sum_alpha=0; + for (int i = 0; i < num_ex; ++i) { + sum_alpha += alpha[i]; + } + + if (Cp == Cn) { + Info("nu = %f\n", sum_alpha/(Cp*prob->num_ex)); + } + + for (int i = 0; i < num_ex; ++i) { + alpha[i] *= y[i]; + } + + delete[] minus_ones; + delete[] y; +} + +static void SolveNuSVC(const Problem *prob, const SVMParameter *param, double *alpha, Solver::SolutionInfo *si) { + int num_ex = prob->num_ex; + double nu = param->nu; + + schar *y = new schar[num_ex]; + + for (int i = 0; i < num_ex; ++i) { + if (prob->y[i] > 0) { + y[i] = +1; + } else { + y[i] = -1; + } + } + + double sum_pos = nu*num_ex/2; + double sum_neg = nu*num_ex/2; + + for (int i = 0; i < num_ex; ++i) { + if (y[i] == +1) { + alpha[i] = std::min(1.0, sum_pos); + sum_pos -= alpha[i]; + } else { + alpha[i] = std::min(1.0, sum_neg); + sum_neg -= alpha[i]; + } + } + + double *zeros = new double[num_ex]; + + for (int i = 0; i < num_ex; ++i) { + zeros[i] = 0; + } + + Solver_NU s; + s.Solve(num_ex, SVC_Q(*prob, *param, y), zeros, y, alpha, 1.0, 1.0, param->eps, si, param->shrinking); + double r = si->r; + + Info("C = %f\n", 1/r); + + for (int i = 0; i < num_ex; ++i) { + alpha[i] *= y[i]/r; + } + + si->rho /= r; + si->obj /= (r*r); + si->upper_bound_p = 1/r; + si->upper_bound_n = 1/r; + + delete[] y; + delete[] zeros; +} + +// +// DecisionFunction +// +struct DecisionFunction { + double *alpha; + double rho; +}; + +static DecisionFunction TrainSingleSVM(const Problem *prob, const SVMParameter *param, double Cp, double Cn) { + double *alpha = new double[prob->num_ex]; + Solver::SolutionInfo si; + switch (param->svm_type) { + case C_SVC: { + SolveCSVC(prob, param, alpha, &si, Cp, Cn); + break; + } + case NU_SVC: { + SolveNuSVC(prob, param, alpha, &si); + break; + } + default: { + // assert{false}; + break; + } + } + + Info("obj = %f, rho = %f\n", si.obj, si.rho); + + // output SVs + int nSV = 0; + int nBSV = 0; + for (int i = 0; i < prob->num_ex; ++i) { + if (fabs(alpha[i]) > 0) { + ++nSV; + if (prob->y[i] > 0) { + if (fabs(alpha[i]) >= si.upper_bound_p) { + ++nBSV; + } + } else { + if (fabs(alpha[i]) >= si.upper_bound_n) { + ++nBSV; + } + } + } + } + + Info("nSV = %d, nBSV = %d\n", nSV, nBSV); + + DecisionFunction f; + f.alpha = alpha; + f.rho = si.rho; + return f; +} + +// +// Interface functions +// +SVMModel *TrainSVM(const Problem *prob, const SVMParameter *param) { + SVMModel *model = new SVMModel; + model->param = *param; + model->free_sv = 0; // XXX + + // classification + int num_ex = prob->num_ex; + int num_classes; + int *labels = NULL; + int *start = NULL; + int *count = NULL; + int *perm = new int[num_ex]; + + // group training data of the same class + GroupClasses(prob, &num_classes, &labels, &start, &count, perm); + if (num_classes == 1) { + Info("WARNING: training data in only one class. See README for details.\n"); + } + + Node **x = new Node*[num_ex]; + for (int i = 0; i < num_ex; ++i) { + x[i] = prob->x[perm[i]]; + } + + // calculate weighted C + double *weighted_C = new double[num_classes]; + for (int i = 0; i < num_classes; ++i) { + weighted_C[i] = param->C; + } + for (int i = 0; i < param->num_weights; ++i) { + int j; + for (j = 0; j < num_classes; ++j) { + if (param->weight_labels[i] == labels[j]) { + break; + } + } + if (j == num_classes) { + std::cerr << "WARNING: class label " << param->weight_labels[i] << " specified in weight is not found" << std::endl; + } else { + weighted_C[j] *= param->weights[i]; + } + } + + // train k*(k-1)/2 models + bool *non_zero = new bool[num_ex]; + for (int i = 0; i < num_ex; ++i) { + non_zero[i] = false; + } + DecisionFunction *f = new DecisionFunction[num_classes*(num_classes-1)/2]; + + int p = 0; + for (int i = 0; i < num_classes; ++i) { + for (int j = i+1; j < num_classes; ++j) { + Problem sub_prob; + int si = start[i], sj = start[j]; + int ci = count[i], cj = count[j]; + sub_prob.num_ex = ci+cj; + sub_prob.x = new Node*[sub_prob.num_ex]; + sub_prob.y = new double[sub_prob.num_ex]; + for (int k = 0; k < ci; ++k) { + sub_prob.x[k] = x[si+k]; + sub_prob.y[k] = +1; + } + for (int k = 0; k < cj; ++k) { + sub_prob.x[ci+k] = x[sj+k]; + sub_prob.y[ci+k] = -1; + } + + f[p] = TrainSingleSVM(&sub_prob, param,weighted_C[i], weighted_C[j]); + for (int k = 0; k < ci; ++k) { + if (!non_zero[si+k] && fabs(f[p].alpha[k]) > 0) { + non_zero[si+k] = true; + } + } + for (int k = 0; k < cj; ++k) { + if (!non_zero[sj+k] && fabs(f[p].alpha[ci+k]) > 0) { + non_zero[sj+k] = true; + } + } + delete[] sub_prob.x; + delete[] sub_prob.y; + ++p; + } + } + + // build output + model->num_classes = num_classes; + model->num_ex = num_ex; + + model->labels = new int[num_classes]; + for (int i = 0; i < num_classes; ++i) { + model->labels[i] = labels[i]; + } + + model->rho = new double[num_classes*(num_classes-1)/2]; + for (int i = 0; i < num_classes*(num_classes-1)/2; ++i) { + model->rho[i] = f[i].rho; + } + + int total_sv = 0; + int *nz_count = new int[num_classes]; + model->num_svs = new int[num_classes]; + for (int i = 0; i < num_classes; ++i) { + int num_svs = 0; + for (int j = 0; j < count[i]; ++j) { + if (non_zero[start[i]+j]) { + ++num_svs; + ++total_sv; + } + } + model->num_svs[i] = num_svs; + nz_count[i] = num_svs; + } + + Info("Total nSV = %d\n", total_sv); + + model->total_sv = total_sv; + model->svs = new Node*[total_sv]; + model->sv_indices = new int[total_sv]; + p = 0; + for (int i = 0; i < num_ex; ++i) { + if (non_zero[i]) { + model->svs[p] = x[i]; + model->sv_indices[p] = perm[i] + 1; + ++p; + } + } + + int *nz_start = new int[num_classes]; + nz_start[0] = 0; + for (int i = 1; i < num_classes; ++i) { + nz_start[i] = nz_start[i-1]+nz_count[i-1]; + } + + model->sv_coef = new double*[num_classes-1]; + for (int i = 0; i < num_classes-1; ++i) { + model->sv_coef[i] = new double[total_sv]; + } + + p = 0; + for (int i = 0; i < num_classes; ++i) { + for (int j = i+1; j < num_classes; ++j) { + // classifier (i,j): coefficients with + // i are in sv_coef[j-1][nz_start[i]...], + // j are in sv_coef[i][nz_start[j]...] + int si = start[i]; + int sj = start[j]; + int ci = count[i]; + int cj = count[j]; + + int q = nz_start[i]; + for (int k = 0; k < ci; ++k) { + if (non_zero[si+k]) { + model->sv_coef[j-1][q++] = f[p].alpha[k]; + } + } + q = nz_start[j]; + for (int k = 0; k < cj; ++k) { + if (non_zero[sj+k]) { + model->sv_coef[i][q++] = f[p].alpha[ci+k]; + } + } + ++p; + } + } + + delete[] labels; + delete[] count; + delete[] perm; + delete[] start; + delete[] x; + delete[] weighted_C; + delete[] non_zero; + for (int i = 0; i < num_classes*(num_classes-1)/2; ++i) { + delete[] f[i].alpha; + } + delete[] f; + delete[] nz_count; + delete[] nz_start; + + return model; +} + +double PredictSVMValues(const SVMModel *model, const Node *x, double *decision_values) { + int num_classes = model->num_classes; + int total_sv = model->total_sv; + + double *kvalue = new double[total_sv]; + for (int i = 0; i < total_sv; ++i) { + kvalue[i] = Kernel::KernelFunction(x, model->svs[i], model->param); + } + + int *start = new int[num_classes]; + start[0] = 0; + for (int i = 1; i < num_classes; ++i) { + start[i] = start[i-1] + model->num_svs[i-1]; + } + + int *vote = new int[num_classes]; + for (int i = 0; i < num_classes; ++i) { + vote[i] = 0; + } + + int p = 0; + for (int i = 0; i < num_classes; ++i) { + for (int j = i+1; j < num_classes; ++j) { + double sum = 0; + int si = start[i]; + int sj = start[j]; + int ci = model->num_svs[i]; + int cj = model->num_svs[j]; + + double *coef1 = model->sv_coef[j-1]; + double *coef2 = model->sv_coef[i]; + for (int k = 0; k < ci; ++k) { + sum += coef1[si+k] * kvalue[si+k]; + } + for (int k = 0; k < cj; ++k) { + sum += coef2[sj+k] * kvalue[sj+k]; + } + sum -= model->rho[p]; + decision_values[p] = sum; + + if (decision_values[p] > 0) { + ++vote[i]; + } else { + ++vote[j]; + } + ++p; + } + } + + int vote_max_idx = 0; + for (int i = 1; i < num_classes; ++i) { + if (vote[i] > vote[vote_max_idx]) { + vote_max_idx = i; + } + } + + delete[] kvalue; + delete[] start; + delete[] vote; + + return model->labels[vote_max_idx]; +} + +double PredictSVM(const SVMModel *model, const Node *x) { + int num_classes = model->num_classes; + double *decision_values = new double[num_classes*(num_classes-1)/2]; + double pred_result = PredictSVMValues(model, x, decision_values); + delete[] decision_values; + return pred_result; +} + +static const char *kSVMTypeTable[] = { "c_svc", "nu_svc", NULL }; + +static const char *kKernelTypeTable[] = { "linear", "polynomial", "rbf", "sigmoid", "precomputed", NULL }; + +int SaveSVMModel(std::ofstream &model_file, const struct SVMModel *model) { + const SVMParameter ¶m = model->param; + + model_file << "svm_model\n"; + model_file << "svm_type " << kSVMTypeTable[param.svm_type] << '\n'; + model_file << "kernel_type " << kKernelTypeTable[param.kernel_type] << '\n'; + + if (param.kernel_type == POLY) { + model_file << "degree " << param.degree << '\n'; + } + if (param.kernel_type == POLY || + param.kernel_type == RBF || + param.kernel_type == SIGMOID) { + model_file << "gamma " << param.gamma << '\n'; + } + if (param.kernel_type == POLY || + param.kernel_type == SIGMOID) { + model_file << "coef0 " << param.coef0 << '\n'; + } + + int num_classes = model->num_classes; + int total_sv = model->total_sv; + model_file << "num_examples " << model->num_ex << '\n'; + model_file << "num_classes " << num_classes << '\n'; + model_file << "total_SV " << total_sv << '\n'; + + if (model->labels) { + model_file << "labels"; + for (int i = 0; i < num_classes; ++i) + model_file << ' ' << model->labels[i]; + model_file << '\n'; + } + + if (model->rho) { + model_file << "rho"; + for (int i = 0; i < num_classes*(num_classes-1)/2; ++i) + model_file << ' ' << model->rho[i]; + model_file << '\n'; + } + + if (model->num_svs) { + model_file << "num_SVs"; + for (int i = 0; i < num_classes; ++i) + model_file << ' ' << model->num_svs[i]; + model_file << '\n'; + } + + if (model->sv_indices) { + model_file << "SV_indices\n"; + for (int i = 0; i < total_sv; ++i) + model_file << model->sv_indices[i] << ' '; + model_file << '\n'; + } + + model_file << "SVs\n"; + const double *const *sv_coef = model->sv_coef; + const Node *const *svs = model->svs; + + for (int i = 0; i < total_sv; ++i) { + for (int j = 0; j < num_classes-1; ++j) + model_file << std::setprecision(16) << (sv_coef[j][i]+0.0) << ' '; // add "+0.0" to avoid negative zero in output + + const Node *p = svs[i]; + + if (param.kernel_type == PRECOMPUTED) { + model_file << "0:" << static_cast(p->value) << ' '; + } else { + while (p->index != -1) { + model_file << p->index << ':' << std::setprecision(8) << p->value << ' '; + ++p; + } + } + model_file << '\n'; + } + + return 0; +} + +SVMModel *LoadSVMModel(std::ifstream &model_file) { + SVMModel *model = new SVMModel; + SVMParameter ¶m = model->param; + model->rho = NULL; + model->sv_indices = NULL; + model->labels = NULL; + model->num_svs = NULL; + + char cmd[80]; + while (1) { + model_file >> cmd; + + if (std::strcmp(cmd, "svm_type") == 0) { + model_file >> cmd; + int i; + for (i = 0; kSVMTypeTable[i]; ++i) { + if (std::strcmp(kSVMTypeTable[i], cmd) == 0) { + param.svm_type = i; + break; + } + } + if (kSVMTypeTable[i] == NULL) { + std::cerr << "Unknown SVM type.\n" << std::endl; + return NULL; + } + } else + if (std::strcmp(cmd, "kernel_type") == 0) { + model_file >> cmd; + int i; + for (i = 0; kKernelTypeTable[i]; ++i) { + if (std::strcmp(kKernelTypeTable[i], cmd) == 0) { + param.kernel_type = i; + break; + } + } + if (kKernelTypeTable[i] == NULL) { + std::cerr << "Unknown kernel function.\n" << std::endl; + return NULL; + } + } else + if (std::strcmp(cmd, "degree") == 0) { + model_file >> param.degree; + } else + if (std::strcmp(cmd, "gamma") == 0) { + model_file >> param.gamma; + } else + if (std::strcmp(cmd, "coef0") == 0) { + model_file >> param.coef0; + } else + if (std::strcmp(cmd, "num_examples") == 0) { + model_file >> model->num_ex; + } else + if (std::strcmp(cmd, "num_classes") == 0) { + model_file >> model->num_classes; + } else + if (std::strcmp(cmd, "total_SV") == 0) { + model_file >> model->total_sv; + } else + if (std::strcmp(cmd, "labels") == 0) { + int n = model->num_classes; + model->labels = new int[n]; + for (int i = 0; i < n; ++i) { + model_file >> model->labels[i]; + } + } else + if (std::strcmp(cmd, "rho") == 0) { + int n = model->num_classes*(model->num_classes-1)/2; + model->rho = new double[n]; + for (int i = 0; i < n; ++i) { + model_file >> model->rho[i]; + } + } else + if (std::strcmp(cmd, "num_SVs") == 0) { + int n = model->num_classes; + model->num_svs = new int[n]; + for (int i = 0; i < n; ++i) { + model_file >> model->num_svs[i]; + } + } else + if (std::strcmp(cmd, "SV_indices") == 0) { + int n = model->total_sv; + model->sv_indices = new int[n]; + for (int i = 0; i < n; ++i) { + model_file >> model->sv_indices[i]; + } + } else + if (std::strcmp(cmd, "SVs") == 0) { + std::size_t m = static_cast(model->num_classes)-1; + int total_sv = model->total_sv; + std::string line; + + if (model_file.peek() == '\n') + model_file.get(); + + model->sv_coef = new double*[m]; + for (int i = 0; i < m; ++i) { + model->sv_coef[i] = new double[total_sv]; + } + model->svs = new Node*[total_sv]; + for (int i = 0; i < total_sv; ++i) { + std::vector tokens; + std::size_t prev = 0, pos; + + std::getline(model_file, line); + while ((pos = line.find_first_of(" \t\n", prev)) != std::string::npos) { + if (pos > prev) + tokens.push_back(line.substr(prev, pos-prev)); + prev = pos + 1; + } + if (prev < line.length()) + tokens.push_back(line.substr(prev, std::string::npos)); + + for (std::size_t j = 0; j < m; ++j) { + try + { + std::size_t end; + model->sv_coef[j][i] = std::stod(tokens[j], &end); + if (end != tokens[j].length()) { + throw std::invalid_argument("incomplete convention"); + } + } + catch(std::exception& e) + { + std::cerr << "Error: " << e.what() << " in SV " << (i+1) << std::endl; + delete[] model->svs; + for (int j = 0; j < m; ++j) { + delete[] model->sv_coef[j]; + } + delete[] model->sv_coef; + std::vector(tokens).swap(tokens); + exit(EXIT_FAILURE); + } // TODO try not to use exception + } + + std::size_t elements = tokens.size() - m + 1; + model->svs[i] = new Node[elements]; + prev = 0; + for (std::size_t j = 0; j < elements-1; ++j) { + pos = tokens[j+m].find_first_of(':'); + try + { + std::size_t end; + + model->svs[i][j].index = std::stoi(tokens[j+m].substr(prev, pos-prev), &end); + if (end != (tokens[j+m].substr(prev, pos-prev)).length()) { + throw std::invalid_argument("incomplete convention"); + } + model->svs[i][j].value = std::stod(tokens[j+m].substr(pos+1), &end); + if (end != (tokens[j+m].substr(pos+1)).length()) { + throw std::invalid_argument("incomplete convention"); + } + } + catch(std::exception& e) + { + std::cerr << "Error: " << e.what() << " in line " << (i+1) << std::endl; + for (int k = 0; k < m; ++k) { + delete[] model->sv_coef[k]; + } + delete[] model->sv_coef; + for (int k = 0; k < i+1; ++k) { + delete[] model->svs[k]; + } + delete[] model->svs; + std::vector(tokens).swap(tokens); + exit(EXIT_FAILURE); + } + } + model->svs[i][elements-1].index = -1; + model->svs[i][elements-1].value = 0; + } + break; + } else { + std::cerr << "Unknown text in knn_model file: " << cmd << std::endl; + FreeSVMModel(&model); + return NULL; + } + } + model->free_sv = 1; + return model; +} + +void FreeSVMModelContent(SVMModel *model) { + if (model->free_sv && model->total_sv > 0 && model->svs != NULL) { + delete[] model->svs; + model->svs = NULL; + } + + if (model->sv_coef) { + for (int i = 0; i < model->num_classes-1; ++i) + delete[] model->sv_coef[i]; + } + + if (model->svs) { + delete[] model->svs; + model->svs = NULL; + } + + if (model->sv_coef) { + delete[] model->sv_coef; + model->sv_coef = NULL; + } + + if (model->rho) { + delete[] model->rho; + model->rho = NULL; + } + + if (model->labels) { + delete[] model->labels; + model->labels= NULL; + } + + if (model->sv_indices) { + delete[] model->sv_indices; + model->sv_indices = NULL; + } + + if (model->num_svs) { + delete[] model->num_svs; + model->num_svs = NULL; + } +} + +void FreeSVMModel(SVMModel** model) +{ + if (model != NULL && *model != NULL) { + FreeSVMModelContent(*model); + delete *model; + *model = NULL; + } + + return; +} + +void FreeSVMParam(SVMParameter* param) { + if (param->weight_labels) { + delete[] param->weight_labels; + param->weight_labels = NULL; + } + if (param->weights) { + delete[] param->weights; + param->weights = NULL; + } + delete param; + param = NULL; + + return; +} + +const char *CheckSVMParameter(const SVMParameter *param) { + int svm_type = param->svm_type; + if (svm_type != C_SVC && + svm_type != NU_SVC) + return "unknown svm type"; + + int kernel_type = param->kernel_type; + if (kernel_type != LINEAR && + kernel_type != POLY && + kernel_type != RBF && + kernel_type != SIGMOID && + kernel_type != PRECOMPUTED) + return "unknown kernel type"; + + if (param->gamma < 0) + return "gamma < 0"; + + if (param->degree < 0) + return "degree of polynomial kernel < 0"; + + if (param->cache_size <= 0) + return "cache_size <= 0"; + + if (param->eps <= 0) + return "eps <= 0"; + + if (svm_type == C_SVC) + if (param->C <= 0) + return "C <= 0"; + + if (svm_type == NU_SVC) + if (param->nu <= 0 || param->nu > 1) + return "nu <= 0 or nu > 1"; + + if (param->shrinking != 0 && + param->shrinking != 1) + return "shrinking != 0 and shrinking != 1"; + + return NULL; +} + +void InitSVMParam(struct SVMParameter *param) { + param->svm_type = C_SVC; + param->kernel_type = RBF; + param->degree = 3; + param->gamma = 0; // default 1/num_features + param->coef0 = 0; + param->nu = 0.5; + param->cache_size = 100; + param->C = 1; + param->eps = 1e-3; + param->shrinking = 1; + param->num_weights = 0; + param->weight_labels = NULL; + param->weights = NULL; + SetPrintCout(); + + return; +} + +void SetPrintNull() { + PrintString = &PrintNull; +} + +void SetPrintCout() { + PrintString = &PrintCout; +} \ No newline at end of file diff --git a/svm.h b/svm.h new file mode 100644 index 0000000..be0a4ca --- /dev/null +++ b/svm.h @@ -0,0 +1,56 @@ +#ifndef LIBCP_SVM_H_ +#define LIBCP_SVM_H_ + +#include "utilities.h" + +enum { C_SVC, NU_SVC }; // svm_type +enum { LINEAR, POLY, RBF, SIGMOID, PRECOMPUTED }; // kernel_type + +struct SVMParameter { + int svm_type; + int kernel_type; + int degree; // for poly + double gamma; // for poly/rbf/sigmoid + double coef0; // for poly/sigmoid + double cache_size; // in MB + double eps; // stopping criteria + double C; // for C_SVC + int num_weights; // for C_SVC + int *weight_labels; // for C_SVC + double *weights; // for C_SVC + double nu; // for NU_SVC + int shrinking; // use the shrinking heuristics +}; + +struct SVMModel { + struct SVMParameter param; + int num_ex; + int num_classes; // number of classes (k) + int total_sv; // total #SV + struct Node **svs; // SVs (SV[total_sv]) + double **sv_coef; // coefficients for SVs in decision functions (sv_coef[k-1][total_sv]) + double *rho; // constants in decision functions (rho[k*(k-1)/2]) + int *sv_indices; // sv_indices[0,...,nSV-1] are values in [1,...,num_traning_data] to indicate SVs in the training set + int *labels; // label of each class (label[k]) + int *num_svs; // number of SVs for each class (nSV[k]) + // nSV[0] + nSV[1] + ... + nSV[k-1] = total_sv + int free_sv; // 1 if SVMModel is created by LoadSVMModel + // 0 if SVMModel is created by TrainSVM +}; + +SVMModel *TrainSVM(const struct Problem *prob, const struct SVMParameter *param); +double PredictSVMValues(const struct SVMModel *model, const struct Node *x, double *decision_values); +double PredictSVM(const struct SVMModel *model, const struct Node *x); + +int SaveSVMModel(std::ofstream &model_file, const struct SVMModel *model); +SVMModel *LoadSVMModel(std::ifstream &model_file); +void FreeSVMModel(struct SVMModel **model); + +void FreeSVMParam(struct SVMParameter *param); +void InitSVMParam(struct SVMParameter *param); +const char *CheckSVMParameter(const struct SVMParameter *param); + +void SetPrintNull(); +void SetPrintCout(); + +#endif // LIBCP_SVM_H_ \ No newline at end of file diff --git a/utilities.cpp b/utilities.cpp new file mode 100644 index 0000000..1ddfee2 --- /dev/null +++ b/utilities.cpp @@ -0,0 +1,208 @@ +#include "utilities.h" +#include +#include +#include +#include +#include +#include + +Problem *ReadProblem(const char *file_name) { + std::ifstream input_file(file_name); + if (!input_file.is_open()) { + std::cerr << "Unable to open input file: " << file_name << std::endl; + exit(EXIT_FAILURE); + } + + int max_index, current_max_index; + std::string line; + Problem *problem = new Problem; + problem->num_ex = 0; + + while (std::getline(input_file, line)) { + ++problem->num_ex; + } + input_file.clear(); + input_file.seekg(0); + + problem->y = new double[problem->num_ex]; + problem->x = new Node*[problem->num_ex]; + + max_index = 0; + for (int i = 0; i < problem->num_ex; ++i) { + std::vector tokens; + std::size_t prev = 0, pos; + + current_max_index = -1; + std::getline(input_file, line); + while ((pos = line.find_first_of(" \t\n", prev)) != std::string::npos) { + if (pos > prev) { + tokens.push_back(line.substr(prev, pos-prev)); + } + prev = pos + 1; + } + if (prev < line.length()) { + tokens.push_back(line.substr(prev, std::string::npos)); + } + + try + { + std::size_t end; + + problem->y[i] = std::stod(tokens[0], &end); + if (end != tokens[0].length()) { + throw std::invalid_argument("incomplete convention"); + } + } + catch(std::exception& e) + { + std::cerr << "Error: " << e.what() << " in line " << (i+1) << std::endl; + delete[] problem->y; + for (int j = 0; j < i; ++j) { + delete[] problem->x[j]; + } + delete[] problem->x; + delete problem; + std::vector(tokens).swap(tokens); + input_file.close(); + exit(EXIT_FAILURE); + } // TODO try not to use exception + + std::size_t elements = tokens.size(); + problem->x[i] = new Node[elements]; + prev = 0; + for (std::size_t j = 0; j < elements-1; ++j) { + pos = tokens[j+1].find_first_of(':'); + try + { + std::size_t end; + problem->x[i][j].index = std::stoi(tokens[j+1].substr(prev, pos-prev), &end); + if (end != (tokens[j+1].substr(prev, pos-prev)).length()) { + throw std::invalid_argument("incomplete convention"); + } + problem->x[i][j].value = std::stod(tokens[j+1].substr(pos+1), &end); + if (end != (tokens[j+1].substr(pos+1)).length()) { + throw std::invalid_argument("incomplete convention"); + } + } + catch(std::exception& e) + { + std::cerr << "Error: " << e.what() << " in line " << (i+1) << std::endl; + delete[] problem->y; + for (int j = 0; j < i+1; ++j) { + delete[] problem->x[j]; + } + delete[] problem->x; + delete problem; + std::vector(tokens).swap(tokens); + input_file.close(); + exit(EXIT_FAILURE); + } + current_max_index = problem->x[i][j].index; + } + + if (current_max_index > max_index) { + max_index = current_max_index; + } + problem->x[i][elements-1].index = -1; + problem->x[i][elements-1].value = 0; + } + problem->max_index = max_index; + + // TODO add precomputed kernel check + + input_file.close(); + + return problem; +} + +void FreeProblem(struct Problem *problem) { + if (problem->y != NULL) { + delete[] problem->y; + } + + for (int i = 0; i < problem->num_ex; ++i) { + if (problem->x[i] != NULL) { + delete[] problem->x[i]; + } + } + if (problem->x != NULL) { + delete[] problem->x; + } + if (problem != NULL) { + delete problem; + } + + return; +} + +// label: label name, start: begin of each class, count: #data of classes, perm: indices to the original data +// perm, length l, must be allocated before calling this subroutine +void GroupClasses(const Problem *prob, int *num_classes_ret, int **labels_ret, int **start_ret, int **count_ret, int *perm) { + int num_ex = prob->num_ex; + int max_num_classes = 16; + int num_classes = 0; + int *labels = new int[max_num_classes]; + int *count = new int[max_num_classes]; + int *data_labels = new int[num_ex]; + + for (int i = 0; i < num_ex; ++i) { + int this_label = static_cast(prob->y[i]); + int j; + for (j = 0; j < num_classes; ++j) { + if (this_label == labels[j]) { + ++count[j]; + break; + } + } + data_labels[i] = j; + if (j == num_classes) { + if (num_classes == max_num_classes) { + max_num_classes *= 2; + labels = (int *)realloc(labels, (unsigned long)max_num_classes*sizeof(int)); + count = (int *)realloc(count, (unsigned long)max_num_classes*sizeof(int)); + } + labels[num_classes] = this_label; + count[num_classes] = 1; + ++num_classes; + } + } + + // + // Labels are ordered by their first occurrence in the training set. + // However, for two-class sets with -1/+1 labels and -1 appears first, + // we swap labels to ensure that internally the binary SVM has positive data corresponding to the +1 instances. + // + if (num_classes == 2 && labels[0] == -1 && labels[1] == 1) { + std::swap(labels[0], labels[1]); + std::swap(count[0], count[1]); + for (int i = 0; i < num_ex; ++i) { + if (data_labels[i] == 0) { + data_labels[i] = 1; + } else { + data_labels[i] = 0; + } + } + } + + int *start = new int[num_classes]; + start[0] = 0; + for (int i = 1; i < num_classes; ++i) { + start[i] = start[i-1] + count[i-1]; + } + for (int i = 0; i < num_ex; ++i) { + perm[start[data_labels[i]]] = i; + ++start[data_labels[i]]; + } + start[0] = 0; + for (int i = 1; i < num_classes; ++i) { + start[i] = start[i-1] + count[i-1]; + } + + *num_classes_ret = num_classes; + *labels_ret = labels; + *start_ret = start; + *count_ret = count; + delete[] data_labels; + + return; +} \ No newline at end of file diff --git a/utilities.h b/utilities.h new file mode 100644 index 0000000..94fa1c1 --- /dev/null +++ b/utilities.h @@ -0,0 +1,92 @@ +#ifndef LIBCP_UTILITIES_H_ +#define LIBCP_UTILITIES_H_ + +#include +#include +#include +#include + +const double kInf = HUGE_VAL; +const double kTau = 1e-12; +const double kEpsilon = 1e-15; + +struct Node { + int index; + double value; +}; + +struct Problem { + int num_ex; // number of examples + int max_index; + double *y; + struct Node **x; +}; + +template +T FindMostFrequent(T *array, int size) { + std::vector v(array, array+size); + std::map frequency_map; + int max_frequency = 0; + T most_frequent_element; + + for (std::size_t i = 0; i < v.size(); ++i) { + if (v[i] != -1) { + ++frequency_map[v[i]]; + } + int cur_frequency = frequency_map[v[i]]; + if (cur_frequency > max_frequency) { + max_frequency = cur_frequency; + most_frequent_element = v[i]; + } + } + + return most_frequent_element; +} + +template +static inline void clone(T *&dest, S *src, int size) { + dest = new T[size]; + if (sizeof(T) < sizeof(S)) + std::cerr << "WARNING: destination type is smaller than source type, data will be truncated." << std::endl; + std::copy(src, src+size, dest); + + return; +} + +template +void QuickSortIndex(T array[], size_t index[], size_t left, size_t right) { + size_t i = left, j = right; + size_t p = left + (right-left)/2; + size_t ind = index[p]; + T pivot = array[p]; + for ( ; i < j; ) { + while ((i < p) && (pivot >= array[i])) + ++i; + if (i < p) { + array[p] = array[i]; + index[p] = index[i]; + p = i; + } + while ((j > p) && (array[j] >= pivot)) + --j; + if (j > p) { + array[p] = array[j]; + index[p] = index[j]; + p = j; + } + } + array[p] = pivot; + index[p] = ind; + if (p - left > 1) + QuickSortIndex(array, index, left, p - 1); + if (right - p > 1) + QuickSortIndex(array, index, p + 1, right); + + return; +} + +Problem *ReadProblem(const char *file_name); +void FreeProblem(struct Problem *problem); +void GroupClasses(const Problem *prob, int *num_classes_ret, int **labels_ret, int **start_ret, int **count_ret, int *perm); + +#endif // LIBCP_UTILITIES_H_ \ No newline at end of file From 4265c66c05619020f738064e8f97cc0d9f5e3d07 Mon Sep 17 00:00:00 2001 From: fated Date: Wed, 29 Oct 2014 00:18:25 +0000 Subject: [PATCH 2/8] add makefile --- Makefile | 30 ++++++++++++++++++++++++++++++ 1 file changed, 30 insertions(+) create mode 100644 Makefile diff --git a/Makefile b/Makefile new file mode 100644 index 0000000..d78d1cc --- /dev/null +++ b/Makefile @@ -0,0 +1,30 @@ +CXX ?= g++ +CFLAGS = -Wall -Wconversion -O3 -fPIC +SHVER = 2 +OS = $(shell uname) + +all: cp-offline cp-online cp-cv + +cp-offline: cp-offline.cpp utilities.o knn.o svm.o cp.o + $(CXX) $(CFLAGS) cp-offline.cpp utilities.o knn.o svm.o cp.o -o cp-offline -lm + +cp-online: cp-online.cpp utilities.o knn.o svm.o cp.o + $(CXX) $(CFLAGS) cp-online.cpp utilities.o knn.o svm.o cp.o -o cp-online -lm + +cp-cv: cp-cv.cpp utilities.o knn.o svm.o cp.o + $(CXX) $(CFLAGS) cp-cv.cpp utilities.o knn.o svm.o cp.o -o cp-cv -lm + +utilities.o: utilities.cpp utilities.h + $(CXX) $(CFLAGS) -c utilities.cpp + +svm.o: knn.cpp knn.h + $(CXX) $(CFLAGS) -c knn.cpp + +svm.o: svm.cpp svm.h + $(CXX) $(CFLAGS) -c svm.cpp + +cp.o: cp.cpp cp.h + $(CXX) $(CFLAGS) -c cp.cpp + +clean: + rm -f utilities.o knn.o svm.o cp.o cp-offline cp-online cp-cv From 615648b98f969b12569b0ce51eda478ac0dcc5ca Mon Sep 17 00:00:00 2001 From: fated Date: Thu, 26 Feb 2015 13:25:52 +0000 Subject: [PATCH 3/8] simple offline cp for knn added --- Makefile | 17 +- cp-offline.cpp | 204 ++--- cp.cpp | 839 +++----------------- cp.h | 16 +- knn.cpp | 145 +++- knn.h | 3 + svm.cpp | 2017 ------------------------------------------------ svm.h | 56 -- utilities.cpp | 93 ++- utilities.h | 8 + 10 files changed, 372 insertions(+), 3026 deletions(-) delete mode 100644 svm.cpp delete mode 100644 svm.h diff --git a/Makefile b/Makefile index d78d1cc..2689554 100644 --- a/Makefile +++ b/Makefile @@ -3,16 +3,10 @@ CFLAGS = -Wall -Wconversion -O3 -fPIC SHVER = 2 OS = $(shell uname) -all: cp-offline cp-online cp-cv +all: cp-offline -cp-offline: cp-offline.cpp utilities.o knn.o svm.o cp.o - $(CXX) $(CFLAGS) cp-offline.cpp utilities.o knn.o svm.o cp.o -o cp-offline -lm - -cp-online: cp-online.cpp utilities.o knn.o svm.o cp.o - $(CXX) $(CFLAGS) cp-online.cpp utilities.o knn.o svm.o cp.o -o cp-online -lm - -cp-cv: cp-cv.cpp utilities.o knn.o svm.o cp.o - $(CXX) $(CFLAGS) cp-cv.cpp utilities.o knn.o svm.o cp.o -o cp-cv -lm +cp-offline: cp-offline.cpp utilities.o knn.o cp.o + $(CXX) $(CFLAGS) cp-offline.cpp utilities.o knn.o cp.o -o cp-offline -lm utilities.o: utilities.cpp utilities.h $(CXX) $(CFLAGS) -c utilities.cpp @@ -20,11 +14,8 @@ utilities.o: utilities.cpp utilities.h svm.o: knn.cpp knn.h $(CXX) $(CFLAGS) -c knn.cpp -svm.o: svm.cpp svm.h - $(CXX) $(CFLAGS) -c svm.cpp - cp.o: cp.cpp cp.h $(CXX) $(CFLAGS) -c cp.cpp clean: - rm -f utilities.o knn.o svm.o cp.o cp-offline cp-online cp-cv + rm -f utilities.o knn.o cp.o cp-offline diff --git a/cp-offline.cpp b/cp-offline.cpp index 604155e..f412895 100644 --- a/cp-offline.cpp +++ b/cp-offline.cpp @@ -3,6 +3,7 @@ #include #include #include +#include void ExitWithHelp(); void ParseCommandLine(int argc, char *argv[], char *train_file_name, char *test_file_name, char *output_file_name, char *model_file_name); @@ -16,8 +17,8 @@ int main(int argc, char *argv[]) { char model_file_name[256]; struct Problem *train, *test; struct Model *model; - int num_correct = 0; - double avg_lower_bound = 0, avg_upper_bound = 0, avg_brier = 0, avg_logloss = 0; + int num_correct = 0, num_empty = 0, num_multi = 0, num_incl = 0; + double avg_conf = 0, avg_cred = 0; const char *error_message; ParseCommandLine(argc, argv, train_file_name, test_file_name, output_file_name, model_file_name); @@ -31,13 +32,6 @@ int main(int argc, char *argv[]) { train = ReadProblem(train_file_name); test = ReadProblem(test_file_name); - if ((param.measure_type == SVM_EL || - param.measure_type == SVM_ES || - param.measure_type == SVM_KM) && - param.svm_param->gamma == 0) { - param.svm_param->gamma = 1.0 / train->max_index; - } - std::ofstream output_file(output_file_name); if (!output_file.is_open()) { std::cerr << "Unable to open output file: " << output_file_name << std::endl; @@ -61,59 +55,54 @@ int main(int argc, char *argv[]) { } } - if (param.probability == 1) { - output_file << " "; - for (int i = 0; i < model->num_classes; ++i) { - output_file << model->labels[i] << " "; - } - output_file << '\n'; - } - for (int i = 0; i < test->num_ex; ++i) { - double predict_label, lower_bound, upper_bound, logloss, brier = 0, *avg_prob = NULL; + double conf, cred; + std::vector predict_label; - predict_label = PredictCP(train, model, test->x[i], lower_bound, upper_bound, &avg_prob); + predict_label = PredictCP(train, model, test->x[i], conf, cred); - for (int j = 0; j < model->num_classes; ++j) { - if (model->labels[j] == test->y[i]) { - brier += (1-avg_prob[j])*(1-avg_prob[j]); - double tmp = std::fmax(std::fmin(avg_prob[j], 1-kEpsilon), kEpsilon); - logloss = - std::log(tmp); - } else { - brier += avg_prob[j]*avg_prob[j]; - } + avg_conf += conf; + avg_cred += cred; + + output_file << std::resetiosflags(std::ios::fixed) << test->y[i] << ' ' << predict_label[0] << ' ' + << std::setiosflags(std::ios::fixed) << conf << ' ' << cred; + if (predict_label[0] == test->y[i]) { + ++num_correct; } - avg_lower_bound += lower_bound; - avg_upper_bound += upper_bound; - avg_brier += brier; - avg_logloss += logloss; - output_file << std::resetiosflags(std::ios::fixed) << test->y[i] << ' ' << predict_label << ' ' - << std::setiosflags(std::ios::fixed) << lower_bound << ' ' << upper_bound; - if (param.probability == 1) { - for (int j = 0; j < model->num_classes; ++j) { - output_file << ' ' << avg_prob[j]; + if (predict_label.size() == 1) { + ++num_empty; + output_file << " Empty\n"; + } else { + output_file << " set:"; + for (size_t j = 1; j < predict_label.size(); ++j) { + output_file << ' ' << predict_label[j]; + if (predict_label[j] == test->y[i]) { + ++num_incl; + } + } + if (predict_label.size() > 2) { + ++num_multi; + output_file << " Multi\n"; + } else { + output_file << " Single\n"; } } - output_file << '\n'; - if (predict_label == test->y[i]) { - ++num_correct; - } - delete[] avg_prob; + std::vector().swap(predict_label); } - avg_lower_bound /= test->num_ex; - avg_upper_bound /= test->num_ex; - avg_brier /= test->num_ex; - avg_logloss /= test->num_ex; + avg_conf /= test->num_ex; + avg_cred /= test->num_ex; std::chrono::time_point end_time = std::chrono::high_resolution_clock::now(); - std::cout << "Accuracy: " << 100.0*num_correct/test->num_ex << '%' + std::cout << "Simple Accuracy: " << 100.0*num_correct/test->num_ex << '%' << " (" << num_correct << '/' << test->num_ex << ") " - << "Probabilities: [" << std::fixed << std::setprecision(4) << 100*avg_lower_bound << "%, " - << 100*avg_upper_bound << "%] " - << "Brier Score: " << avg_brier << ' ' - << "Logarithmic Loss: " << avg_logloss << '\n'; + << "Mean Confidence: " << std::fixed << std::setprecision(4) << 100*avg_conf << "%, " + << "Mean Credibility: " << 100*avg_cred << "% " << '\n'; + std::cout << "Accuracy: " << 100.0*num_incl/test->num_ex << '%' + << " (" << num_incl << '/' << test->num_ex << ") " + << "Multi Prediction: " << std::fixed << std::setprecision(4) << 100.0*num_multi/test->num_ex << "%, " + << "Empty Prediction: " << 100.0*num_empty/test->num_ex << "% " << '\n'; output_file.close(); std::cout << "Time cost: " << std::chrono::duration_cast(end_time - start_time).count()/1000.0 << " s\n"; @@ -131,33 +120,10 @@ void ExitWithHelp() { << "options:\n" << " -t non-conformity measure : set type of NCM (default 0)\n" << " 0 -- k-nearest neighbors (KNN)\n" - << " 1 -- support vector machine with equal length (SVM_EL)\n" - << " 2 -- support vector machine with equal size (SVM_ES)\n" - << " 3 -- support vector machine with k-means clustering (SVM_KM)\n" << " -k num_neighbors : set number of neighbors in kNN (default 1)\n" << " -s model_file_name : save model\n" << " -l model_file_name : load model\n" - << " -b probability estimates : whether to output probability estimates for all labels, 0 or 1 (default 0)\n" - << " -p : prefix of options to set parameters for SVM\n" - << " -ps svm_type : set type of SVM (default 0)\n" - << " 0 -- C-SVC (multi-class classification)\n" - << " 1 -- nu-SVC (multi-class classification)\n" - << " -pt kernel_type : set type of kernel function (default 2)\n" - << " 0 -- linear: u'*v\n" - << " 1 -- polynomial: (gamma*u'*v + coef0)^degree\n" - << " 2 -- radial basis function: exp(-gamma*|u-v|^2)\n" - << " 3 -- sigmoid: tanh(gamma*u'*v + coef0)\n" - << " 4 -- precomputed kernel (kernel values in training_set_file)\n" - << " -pd degree : set degree in kernel function (default 3)\n" - << " -pg gamma : set gamma in kernel function (default 1/num_features)\n" - << " -pr coef0 : set coef0 in kernel function (default 0)\n" - << " -pc cost : set the parameter C of C-SVC (default 1)\n" - << " -pn nu : set the parameter nu of nu-SVC (default 0.5)\n" - << " -pm cachesize : set cache memory size in MB (default 100)\n" - << " -pe epsilon : set tolerance of termination criterion (default 0.001)\n" - << " -ph shrinking : whether to use the shrinking heuristics, 0 or 1 (default 1)\n" - << " -pwi weights : set the parameter C of class i to weight*C, for C-SVC (default 1)\n" - << " -pq : quiet mode (no outputs)\n"; + << " -e epsilon : set significance level (default 0.05)\n"; exit(EXIT_FAILURE); } @@ -166,9 +132,8 @@ void ParseCommandLine(int argc, char **argv, char *train_file_name, char *test_f param.measure_type = KNN; param.save_model = 0; param.load_model = 0; - param.probability = 0; + param.epsilon = 0.05; param.knn_param = new KNNParameter; - param.svm_param = NULL; InitKNNParam(param.knn_param); for (i = 1; i < argc; ++i) { @@ -179,14 +144,6 @@ void ParseCommandLine(int argc, char **argv, char *train_file_name, char *test_f case 't': { ++i; param.measure_type = std::atoi(argv[i]); - if (param.measure_type == SVM_EL || - param.measure_type == SVM_ES || - param.measure_type == SVM_KM) { - FreeKNNParam(param.knn_param); - delete param.knn_param; - param.svm_param = new SVMParameter; - InitSVMParam(param.svm_param); - } break; } case 'k': { @@ -206,84 +163,9 @@ void ParseCommandLine(int argc, char **argv, char *train_file_name, char *test_f std::strcpy(model_file_name, argv[i]); break; } - case 'b': { + case 'e': { ++i; - param.probability = std::atoi(argv[i]); - break; - } - case 'p': { - if (argv[i][2]) { - switch (argv[i][2]) { - case 's': { - ++i; - param.svm_param->svm_type = std::atoi(argv[i]); - break; - } - case 't': { - ++i; - param.svm_param->kernel_type = std::atoi(argv[i]); - break; - } - case 'd': { - ++i; - param.svm_param->degree = std::atoi(argv[i]); - break; - } - case 'g': { - ++i; - param.svm_param->gamma = std::atof(argv[i]); - break; - } - case 'r': { - ++i; - param.svm_param->coef0 = std::atof(argv[i]); - break; - } - case 'n': { - ++i; - param.svm_param->nu = std::atof(argv[i]); - break; - } - case 'm': { - ++i; - param.svm_param->cache_size = std::atof(argv[i]); - break; - } - case 'c': { - ++i; - param.svm_param->C = std::atof(argv[i]); - break; - } - case 'e': { - ++i; - param.svm_param->eps = std::atof(argv[i]); - break; - } - case 'h': { - ++i; - param.svm_param->shrinking = std::atoi(argv[i]); - break; - } - case 'q': { - SetPrintNull(); - break; - } - case 'w': { // weights [option]: '-w1' means weight of '1' - ++i; - ++param.svm_param->num_weights; - param.svm_param->weight_labels = (int *)realloc(param.svm_param->weight_labels, sizeof(int)*static_cast(param.svm_param->num_weights)); - param.svm_param->weights = (double *)realloc(param.svm_param->weights, sizeof(double)*static_cast(param.svm_param->num_weights)); - param.svm_param->weight_labels[param.svm_param->num_weights-1] = std::atoi(&argv[i-1][3]); // get and convert 'i' to int - param.svm_param->weights[param.svm_param->num_weights-1] = std::atof(argv[i]); - break; - // TODO: change realloc function - } - default: { - std::cerr << "Unknown SVM option: " << argv[i] << std::endl; - ExitWithHelp(); - } - } - } + param.epsilon = std::atof(argv[i]); break; } default: { diff --git a/cp.cpp b/cp.cpp index 51c970f..7c14077 100644 --- a/cp.cpp +++ b/cp.cpp @@ -4,703 +4,148 @@ #include #include +static double CalcAlpha(double *min_same, double *min_diff, int num_neighbors) { + double alpha; + double sum_same = 0, sum_diff = 0; + + for (int i = 0; i < num_neighbors; ++i) { + sum_same += min_same[i]; + sum_diff += min_diff[i]; + } + if (sum_diff == 0.0) { + if (sum_same == 0.0) { + alpha = 0.0; + } else { + alpha = kInf; + } + } else { + alpha = sum_same / sum_diff; + } + + return alpha; +} + Model *TrainCP(const struct Problem *train, const struct Parameter *param) { Model *model = new Model; model->param = *param; int num_ex = train->num_ex; - if (param->taxonomy_type == KNN) { - int num_neighbors = param->knn_param->num_neighbors; - - int *categories = new int[num_ex]; - for (int i = 0; i < num_ex; ++i) { - categories[i] = -1; - } - + if (param->measure_type == KNN) { model->knn_model = TrainKNN(train, param->knn_param); + int num_neighbors = param->knn_param->num_neighbors; int num_classes = model->knn_model->num_classes; - int num_categories = param->num_categories; - if (num_categories != num_classes) { - std::cerr << "WARNING: number of categories should be the same as number of classes in KNN. See README for details." << std::endl; - num_categories = num_classes; - } + double *alpha = new double[num_ex]; for (int i = 0; i < num_ex; ++i) { - categories[i] = FindMostFrequent(model->knn_model->label_neighbors[i], num_neighbors); + alpha[i] = CalcAlpha(model->knn_model->min_same[i], model->knn_model->min_diff[i], num_neighbors); } model->num_classes = num_classes; model->num_ex = num_ex; - model->num_categories = num_categories; - model->categories = categories; + model->alpha = alpha; clone(model->labels, model->knn_model->labels, num_classes); } - if (param->taxonomy_type == SVM_EL || - param->taxonomy_type == SVM_ES || - param->taxonomy_type == SVM_KM) { - int num_categories = param->num_categories; - int *categories = new int[num_ex]; - double *combined_decision_values = new double[num_ex]; - - for (int i = 0; i < num_ex; ++i) { - categories[i] = -1; - combined_decision_values[i] = 0; - } - - model->svm_model = TrainSVM(train, param->svm_param); - - int num_classes = model->svm_model->num_classes; - if (num_classes == 1) { - std::cerr << "WARNING: training set only has one class. See README for details." << std::endl; - } - if (num_classes > 2 && num_categories < num_classes) { - std::cerr << "WARNING: number of categories should be the same as number of classes in Multi-Class case. See README for details." << std::endl; - num_categories = num_classes; - } - - for (int i = 0; i < num_ex; ++i) { - double *decision_values = new double[num_classes*(num_classes-1)/2]; - int label = 0; - double predict_label = PredictSVMValues(model->svm_model, train->x[i], decision_values); - - for (int j = 0; j < num_classes; ++j) { - if (predict_label == model->svm_model->labels[j]) { - label = j; - break; - } - } - combined_decision_values[i] = CalcCombinedDecisionValues(decision_values, num_classes, label); - delete[] decision_values; - } - - if (param->taxonomy_type == SVM_EL) { - for (int i = 0; i < num_ex; ++i) { - categories[i] = GetEqualLengthCategory(combined_decision_values[i], num_categories, num_classes); - } - } - if (param->taxonomy_type == SVM_ES) { - if (num_classes == 1) { - for (int i = 0; i < num_ex; ++i) { - categories[i] = 0; - } - model->points = new double[num_categories]; - for (int i = 0; i < num_categories; ++i) { - model->points[i] = 0; - } - } else { - double *points; - points = GetEqualSizeCategory(combined_decision_values, categories, num_categories, num_ex); - clone(model->points, points, num_categories); - delete[] points; - } - } - if (param->taxonomy_type == SVM_KM) { - double *points; - points = GetKMeansCategory(combined_decision_values, categories, num_categories, num_ex, kEpsilon); - clone(model->points, points, num_categories); - delete[] points; - } - delete[] combined_decision_values; - model->num_classes = num_classes; - model->num_ex = num_ex; - model->categories = categories; - model->num_categories = num_categories; - clone(model->labels, model->svm_model->labels, num_classes); - } - return model; } -double PredictCP(const struct Problem *train, const struct Model *model, const struct Node *x, double &lower, double &upper, double **avg_prob) { +std::vector PredictCP(const struct Problem *train, const struct Model *model, const struct Node *x, double &conf, double &cred) { const Parameter& param = model->param; int num_ex = model->num_ex; int num_classes = model->num_classes; - int num_categories = model->num_categories; int *labels = model->labels; - double predict_label; - int **f_matrix = new int*[num_classes]; - int *alter_labels = new int[num_ex]; - - for (int i = 0; i < num_classes; ++i) { - for (int j = 0; j < num_ex; ++j) { - if (labels[i] == train->y[j]) { - alter_labels[j] = i; - } - } - } + std::vector predict_label; + int *p_values = new int[num_classes]; - if (param.taxonomy_type == KNN) { + if (param.measure_type == KNN) { int num_neighbors = param.knn_param->num_neighbors; + for (int i = 0; i < num_classes; ++i) { - int *categories = new int[num_ex+1]; - double **dist_neighbors = new double*[num_ex+1]; - int **label_neighbors = new int*[num_ex+1]; - f_matrix[i] = new int[num_classes]; - for (int j = 0; j < num_classes; ++j) { - f_matrix[i][j] = 0; - } + int y = labels[i]; + p_values[i] = 0; + + double **min_same = new double*[num_ex+1]; + double **min_diff = new double*[num_ex+1]; + double *alpha = new double[num_ex+1]; for (int j = 0; j < num_ex; ++j) { - clone(dist_neighbors[j], model->knn_model->dist_neighbors[j], num_neighbors); - clone(label_neighbors[j], model->knn_model->label_neighbors[j], num_neighbors); - categories[j] = model->categories[j]; + clone(min_same[j], model->knn_model->min_same[j], num_neighbors); + clone(min_diff[j], model->knn_model->min_diff[j], num_neighbors); + alpha[j] = model->alpha[j]; } - dist_neighbors[num_ex] = new double[num_neighbors]; - label_neighbors[num_ex] = new int[num_neighbors]; + min_same[num_ex] = new double[num_neighbors]; + min_diff[num_ex] = new double[num_neighbors]; for (int j = 0; j < num_neighbors; ++j) { - dist_neighbors[num_ex][j] = kInf; - label_neighbors[num_ex][j] = -1; + min_same[num_ex][j] = kInf; + min_diff[num_ex][j] = kInf; } - categories[num_ex] = -1; + alpha[num_ex] = 0; for (int j = 0; j < num_ex; ++j) { double dist = CalcDist(train->x[j], x); - int index; - index = CompareDist(dist_neighbors[j], dist, num_neighbors); - if (index < num_neighbors) { - InsertLabel(label_neighbors[j], i, num_neighbors, index); - } - index = CompareDist(dist_neighbors[num_ex], dist, num_neighbors); - if (index < num_neighbors) { - InsertLabel(label_neighbors[num_ex], alter_labels[j], num_neighbors, index); + + if (train->y[j] == y) { + int index = CompareDist(min_same[j], dist, num_neighbors); + if (index < num_neighbors) { + alpha[j] = CalcAlpha(min_same[j], min_diff[j], num_neighbors); + } + CompareDist(min_same[num_ex], dist, num_neighbors); + } else { + int index = CompareDist(min_diff[j], dist, num_neighbors); + if (index < num_neighbors) { + alpha[j] = CalcAlpha(min_same[j], min_diff[j], num_neighbors); + } + CompareDist(min_diff[num_ex], dist, num_neighbors); } } + alpha[num_ex] = CalcAlpha(min_same[num_ex], min_diff[num_ex], num_neighbors); for (int j = 0; j < num_ex+1; ++j) { - categories[j] = FindMostFrequent(label_neighbors[j], num_neighbors); - } - - for (int j = 0; j < num_ex; ++j) { - if (categories[j] == categories[num_ex]) { - ++f_matrix[i][alter_labels[j]]; + if (alpha[j] >= alpha[num_ex]) { + ++p_values[i]; } } - f_matrix[i][i]++; for (int j = 0; j < num_ex+1; ++j) { - delete[] dist_neighbors[j]; - delete[] label_neighbors[j]; - } - - delete[] dist_neighbors; - delete[] label_neighbors; - delete[] categories; - } - } - - if (param.taxonomy_type == SVM_EL || - param.taxonomy_type == SVM_ES || - param.taxonomy_type == SVM_KM) { - for (int i = 0; i < num_classes; ++i) { - int *categories = new int[num_ex+1]; - f_matrix[i] = new int[num_classes]; - for (int j = 0; j < num_classes; ++j) { - f_matrix[i][j] = 0; - } - - for (int j = 0; j < num_ex; ++j) { - categories[j] = model->categories[j]; - } - categories[num_ex] = -1; - - double *decision_values = new double[num_classes*(num_classes-1)/2]; - int label = 0; - double predict_label = PredictSVMValues(model->svm_model, x, decision_values); - for (int j = 0; j < num_classes; ++j) { - if (predict_label == labels[j]) { - label = j; - break; - } - } - double combined_decision_values = CalcCombinedDecisionValues(decision_values, num_classes, label); - if (param.taxonomy_type == SVM_EL) { - categories[num_ex] = GetEqualLengthCategory(combined_decision_values, num_categories, num_classes); - } - if (param.taxonomy_type == SVM_ES) { - if (num_classes == 1) { - categories[num_ex] = 0; - } else { - int j; - for (j = 0; j < num_categories; ++j) { - if (combined_decision_values <= model->points[j]) { - categories[num_ex] = j; - break; - } - } - if (j == num_categories) { - categories[num_ex] = num_categories - 1; - } - } - } - if (param.taxonomy_type == SVM_KM) { - categories[num_ex] = AssignCluster(num_categories, combined_decision_values, model->points); + delete[] min_same[j]; + delete[] min_diff[j]; } - delete[] decision_values; - for (int j = 0; j < num_ex; ++j) { - if (categories[j] == categories[num_ex]) { - ++f_matrix[i][alter_labels[j]]; - } - } - f_matrix[i][i]++; - - delete[] categories; - } - } - - double **matrix = new double*[num_classes]; - for (int i = 0; i < num_classes; ++i) { - matrix[i] = new double[num_classes]; - int sum = 0; - for (int j = 0; j < num_classes; ++j) { - sum += f_matrix[i][j]; + delete[] min_same; + delete[] min_diff; + delete[] alpha; } - for (int j = 0; j < num_classes; ++j) { - matrix[i][j] = static_cast(f_matrix[i][j]) / sum; - } - } - double *quality = new double[num_classes]; - *avg_prob = new double[num_classes]; - for (int j = 0; j < num_classes; ++j) { - quality[j] = matrix[0][j]; - (*avg_prob)[j] = matrix[0][j]; - for (int i = 1; i < num_classes; ++i) { - if (matrix[i][j] < quality[j]) { - quality[j] = matrix[i][j]; - } - (*avg_prob)[j] += matrix[i][j]; - } - (*avg_prob)[j] /= num_classes; } int best = 0; + cred = p_values[0]; + conf = p_values[1]; for (int i = 1; i < num_classes; ++i) { - if (quality[i] > quality[best]) { + if (p_values[i] > cred) { + conf = cred; + cred = p_values[i]; best = i; + } else if (p_values[i] < cred && p_values[i] > conf) { + conf = p_values[i]; } } + cred = cred / (num_ex+1); + conf = 1 - conf / (num_ex+1); - lower = quality[best]; - upper = matrix[0][best]; - for (int i = 1; i < num_classes; ++i) { - if (matrix[i][best] > upper) { - upper = matrix[i][best]; - } - } - - predict_label = labels[best]; - - delete[] alter_labels; - delete[] quality; + predict_label.push_back(labels[best]); for (int i = 0; i < num_classes; ++i) { - delete[] f_matrix[i]; - delete[] matrix[i]; - } - delete[] f_matrix; - delete[] matrix; - - return predict_label; -} - -void CrossValidation(const struct Problem *prob, const struct Parameter *param, - double *predict_labels, double *lower_bounds, double *upper_bounds, - double *brier, double *logloss) { - int num_folds = param->num_folds; - int num_ex = prob->num_ex; - int num_classes; - - int *fold_start; - int *perm = new int[num_ex]; - - if (num_folds > num_ex) { - num_folds = num_ex; - std::cerr << "WARNING: number of folds > number of data. Will use number of folds = number of data instead (i.e., leave-one-out cross validation)" << std::endl; - } - fold_start = new int[num_folds+1]; - - if (num_folds < num_ex) { - int *start = NULL; - int *label = NULL; - int *count = NULL; - GroupClasses(prob, &num_classes, &label, &start, &count, perm); - - int *fold_count = new int[num_folds]; - int *index = new int[num_ex]; - - for (int i = 0; i < num_ex; ++i) { - index[i] = perm[i]; - } - std::random_device rd; - std::mt19937 g(rd()); - for (int i = 0; i < num_classes; ++i) { - std::shuffle(index+start[i], index+start[i]+count[i], g); - } - - for (int i = 0; i < num_folds; ++i) { - fold_count[i] = 0; - for (int c = 0; c < num_classes; ++c) { - fold_count[i] += (i+1)*count[c]/num_folds - i*count[c]/num_folds; - } - } - - fold_start[0] = 0; - for (int i = 1; i <= num_folds; ++i) { - fold_start[i] = fold_start[i-1] + fold_count[i-1]; - } - for (int c = 0; c < num_classes; ++c) { - for (int i = 0; i < num_folds; ++i) { - int begin = start[c] + i*count[c]/num_folds; - int end = start[c] + (i+1)*count[c]/num_folds; - for (int j = begin; j < end; ++j) { - perm[fold_start[i]] = index[j]; - fold_start[i]++; - } - } - } - fold_start[0] = 0; - for (int i = 1; i <= num_folds; ++i) { - fold_start[i] = fold_start[i-1] + fold_count[i-1]; - } - delete[] start; - delete[] label; - delete[] count; - delete[] index; - delete[] fold_count; - - } else { - - for (int i = 0; i < num_ex; ++i) { - perm[i] = i; - } - std::random_device rd; - std::mt19937 g(rd()); - std::shuffle(perm, perm+num_ex, g); - fold_start[0] = 0; - for (int i = 1; i <= num_folds; ++i) { - fold_start[i] = fold_start[i-1] + (i+1)*num_ex/num_folds - i*num_ex/num_folds; + if (static_cast(p_values[i])/(num_ex+1) > param.epsilon) { + predict_label.push_back(labels[i]); } } - for (int i = 0; i < num_folds; ++i) { - int begin = fold_start[i]; - int end = fold_start[i+1]; - int k = 0; - struct Problem subprob; + delete[] p_values; - subprob.num_ex = num_ex - (end-begin); - subprob.x = new Node*[subprob.num_ex]; - subprob.y = new double[subprob.num_ex]; - - for (int j = 0; j < begin; ++j) { - subprob.x[k] = prob->x[perm[j]]; - subprob.y[k] = prob->y[perm[j]]; - ++k; - } - for (int j = end; j < num_ex; ++j) { - subprob.x[k] = prob->x[perm[j]]; - subprob.y[k] = prob->y[perm[j]]; - ++k; - } - - struct Model *submodel = TrainCP(&subprob, param); - - if (param->probability == 1) { - for (int j = 0; j < submodel->num_classes; ++j) { - std::cout << submodel->labels[j] << " "; - } - std::cout << '\n'; - } - - for (int j = begin; j < end; ++j) { - double *avg_prob = NULL; - brier[perm[j]] = 0; - - predict_labels[perm[j]] = PredictCP(&subprob, submodel, prob->x[perm[j]], lower_bounds[perm[j]], upper_bounds[perm[j]], &avg_prob); - - for (k = 0; k < submodel->num_classes; ++k) { - if (submodel->labels[k] == prob->y[perm[j]]) { - brier[perm[j]] += (1-avg_prob[k]) * (1-avg_prob[k]); - double tmp = std::fmax(std::fmin(avg_prob[k], 1-kEpsilon), kEpsilon); - logloss[perm[j]] = - std::log(tmp); - } else { - brier[perm[j]] += avg_prob[k] * avg_prob[k]; - } - } - if (param->probability == 1) { - for (k = 0; k < submodel->num_classes; ++k) { - std::cout << avg_prob[k] << ' '; - } - std::cout << '\n'; - } - delete[] avg_prob; - } - FreeModel(submodel); - delete[] subprob.x; - delete[] subprob.y; - } - delete[] fold_start; - delete[] perm; - - return; -} - -void OnlinePredict(const struct Problem *prob, const struct Parameter *param, - double *predict_labels, int *indices, - double *lower_bounds, double *upper_bounds, - double *brier, double *logloss) { - int num_ex = prob->num_ex; - int num_classes = 0; - - for (int i = 0; i < num_ex; ++i) { - indices[i] = i; - } - std::random_device rd; - std::mt19937 g(rd()); - std::shuffle(indices, indices+num_ex, g); - - if (param->taxonomy_type == KNN) { - int num_neighbors = param->knn_param->num_neighbors; - int *alter_labels = new int[num_ex]; - std::vector labels; - - int *categories = new int[num_ex]; - double **dist_neighbors = new double*[num_ex]; - int **label_neighbors = new int*[num_ex]; - - for (int i = 0; i < num_ex; ++i) { - dist_neighbors[i] = new double[num_neighbors]; - label_neighbors[i] = new int[num_neighbors]; - for (int j = 0; j < num_neighbors; ++j) { - dist_neighbors[i][j] = kInf; - label_neighbors[i][j] = -1; - } - categories[i] = -1; - } - - int this_label = static_cast(prob->y[indices[0]]); - labels.push_back(this_label); - alter_labels[0] = 0; - num_classes = 1; - - for (int i = 1; i < num_ex; ++i) { - if (num_classes == 1) - std::cerr << - "WARNING: training set only has one class. See README for details." - << std::endl; - - int **f_matrix = new int*[num_classes]; - - for (int j = 0; j < num_classes; ++j) { - f_matrix[j] = new int[num_classes]; - for (int k = 0; k < num_classes; ++k) { - f_matrix[j][k] = 0; - } - - double **dist_neighbors_ = new double*[i+1]; - int **label_neighbors_ = new int*[i+1]; - - for (int j = 0; j < i; ++j) { - clone(dist_neighbors_[j], dist_neighbors[j], num_neighbors); - clone(label_neighbors_[j], label_neighbors[j], num_neighbors); - } - dist_neighbors_[i] = new double[num_neighbors]; - label_neighbors_[i] = new int[num_neighbors]; - for (int j = 0; j < num_neighbors; ++j) { - dist_neighbors_[i][j] = kInf; - label_neighbors_[i][j] = -1; - } - - for (int k = 0; k < i; ++k) { - double dist = CalcDist(prob->x[indices[k]], prob->x[indices[i]]); - int index; - - index = CompareDist(dist_neighbors_[i], dist, num_neighbors); - if (index < num_neighbors) { - InsertLabel(label_neighbors_[i], alter_labels[k], num_neighbors, index); - } - index = CompareDist(dist_neighbors_[k], dist, num_neighbors); - if (index < num_neighbors) { - InsertLabel(label_neighbors_[k], j, num_neighbors, index); - } - } - for (int k = 0; k <= i; ++k) { - categories[k] = FindMostFrequent(label_neighbors_[k], num_neighbors); - } - - for (int k = 0; k < i; ++k) { - if (categories[k] == categories[i]) { - ++f_matrix[j][alter_labels[k]]; - } - } - f_matrix[j][j]++; - - for (int j = 0; j < num_neighbors; ++j) { - dist_neighbors[i][j] = dist_neighbors_[i][j]; - label_neighbors[i][j] = label_neighbors_[i][j]; - } - for (int j = 0; j < i+1; ++j) { - delete[] dist_neighbors_[j]; - delete[] label_neighbors_[j]; - } - delete[] dist_neighbors_; - delete[] label_neighbors_; - } - - double **matrix = new double*[num_classes]; - for (int j = 0; j < num_classes; ++j) { - matrix[j] = new double[num_classes]; - int sum = 0; - for (int k = 0; k < num_classes; ++k) - sum += f_matrix[j][k]; - for (int k = 0; k < num_classes; ++k) - matrix[j][k] = static_cast(f_matrix[j][k]) / sum; - } - - double *quality = new double[num_classes]; - double *avg_prob = new double[num_classes]; - for (int j = 0; j < num_classes; ++j) { - quality[j] = matrix[0][j]; - avg_prob[j] = matrix[0][j]; - for (int k = 1; k < num_classes; ++k) { - if (matrix[k][j] < quality[j]) { - quality[j] = matrix[k][j]; - } - avg_prob[j] += matrix[k][j]; - } - avg_prob[j] /= num_classes; - } - - int best = 0; - for (int j = 1; j < num_classes; ++j) { - if (quality[j] > quality[best]) { - best = j; - } - } - - lower_bounds[i] = quality[best]; - upper_bounds[i] = matrix[0][best]; - for (int j = 1; j < num_classes; ++j) { - if (matrix[j][best] > upper_bounds[i]) { - upper_bounds[i] = matrix[j][best]; - } - } - - brier[i] = 0; - for (int j = 0; j < num_classes; ++j) { - if (labels[static_cast(j)] == prob->y[indices[i]]) { - brier[i] += (1-avg_prob[j])*(1-avg_prob[j]); - double tmp = std::fmax(std::fmin(avg_prob[j], 1-kEpsilon), kEpsilon); - logloss[i] = - std::log(tmp); - } else { - brier[i] += avg_prob[j]*avg_prob[j]; - } - } - - if (param->probability == 1) { - for (int j = 0; j < num_classes; ++j) { - std::cout << avg_prob[j] << ' '; - } - std::cout << '\n'; - } - - delete[] avg_prob; - - predict_labels[i] = labels[static_cast(best)]; - - delete[] quality; - for (int j = 0; j < num_classes; ++j) { - delete[] f_matrix[j]; - delete[] matrix[j]; - } - delete[] f_matrix; - delete[] matrix; - - this_label = static_cast(prob->y[indices[i]]); - std::size_t j; - for (j = 0; j < num_classes; ++j) { - if (this_label == labels[j]) break; - } - alter_labels[i] = static_cast(j); - if (j == num_classes) { - labels.push_back(this_label); - ++num_classes; - } - - for (int j = 0; j < i; ++j) { - double dist = CalcDist(prob->x[indices[j]], prob->x[indices[i]]); - int index = CompareDist(dist_neighbors[j], dist, num_neighbors); - if (index < num_neighbors) { - InsertLabel(label_neighbors[j], alter_labels[i], num_neighbors, index); - } - } - - } - if (param->probability == 1) { - for (std::size_t j = 0; j < num_classes; ++j) { - std::cout << labels[j] << " "; - } - std::cout << '\n'; - } - - for (int i = 0; i < num_ex; ++i) { - delete[] dist_neighbors[i]; - delete[] label_neighbors[i]; - } - - delete[] dist_neighbors; - delete[] label_neighbors; - delete[] categories; - delete[] alter_labels; - std::vector(labels).swap(labels); - } - - if (param->taxonomy_type == SVM_EL || - param->taxonomy_type == SVM_ES || - param->taxonomy_type == SVM_KM) { - Problem subprob; - subprob.x = new Node*[num_ex]; - subprob.y = new double[num_ex]; - - for (int i = 0; i < num_ex; ++i) { - subprob.x[i] = prob->x[indices[i]]; - subprob.y[i] = prob->y[indices[i]]; - } - - for (int i = 1; i < num_ex; ++i) { - double *avg_prob = NULL; - brier[i] = 0; - subprob.num_ex = i; - Model *submodel = TrainCP(&subprob, param); - predict_labels[i] = PredictCP(&subprob, submodel, subprob.x[i], - lower_bounds[i], upper_bounds[i], &avg_prob); - for (int j = 0; j < submodel->num_classes; ++j) { - if (submodel->labels[j] == subprob.y[i]) { - brier[i] += (1-avg_prob[j]) * (1-avg_prob[j]); - double tmp = std::fmax(std::fmin(avg_prob[j], 1-kEpsilon), kEpsilon); - logloss[i] = - std::log(tmp); - } else { - brier[i] += avg_prob[j] * avg_prob[j]; - } - } - if (param->probability == 1) { - for (int j = 0; j < submodel->num_classes; ++j) { - std::cout << avg_prob[j] << ' '; - } - std::cout << '\n'; - } - FreeModel(submodel); - delete[] avg_prob; - } - delete[] subprob.x; - delete[] subprob.y; - } - - return; + return predict_label; } -static const char *kTaxonomyTypeTable[] = { "knn", "svm_el", "svm_es", "svm_km", NULL }; +static const char *kMeasureTypeTable[] = { "knn", NULL }; int SaveModel(const char *model_file_name, const struct Model *model) { std::ofstream model_file(model_file_name); @@ -711,32 +156,17 @@ int SaveModel(const char *model_file_name, const struct Model *model) { const Parameter ¶m = model->param; - model_file << "taxonomy_type " << kTaxonomyTypeTable[param.taxonomy_type] << '\n'; - model_file << "num_categories " << model->num_categories << '\n'; + model_file << "measure_type " << kMeasureTypeTable[param.measure_type] << '\n'; + model_file << "epsilon " << param.epsilon << '\n'; - if (param.taxonomy_type == KNN) { + if (param.measure_type == KNN) { SaveKNNModel(model_file, model->knn_model); } - if (param.taxonomy_type == SVM_EL || - param.taxonomy_type == SVM_ES || - param.taxonomy_type == SVM_KM) { - SaveSVMModel(model_file, model->svm_model); - } - - if ((param.taxonomy_type == SVM_ES || - param.taxonomy_type == SVM_KM) && - model->points) { - model_file << "points\n"; - for (int i = 0; i < model->num_categories; ++i) { - model_file << model->points[i] << ' '; - } - model_file << '\n'; - } - if (model->categories) { - model_file << "categories\n"; + if (model->alpha) { + model_file << "alpha\n"; for (int i = 0; i < model->num_ex; ++i) { - model_file << model->categories[i] << ' '; + model_file << model->alpha[i] << ' '; } model_file << '\n'; } @@ -763,32 +193,31 @@ Model *LoadModel(const char *model_file_name) { Parameter ¶m = model->param; param.load_model = 1; model->labels = NULL; - model->categories = NULL; + model->alpha = NULL; char cmd[80]; while (1) { model_file >> cmd; - if (std::strcmp(cmd, "taxonomy_type") == 0) { + if (std::strcmp(cmd, "measure_type") == 0) { model_file >> cmd; int i; - for (i = 0; kTaxonomyTypeTable[i]; ++i) { - if (std::strcmp(kTaxonomyTypeTable[i], cmd) == 0) { - param.taxonomy_type = i; + for (i = 0; kMeasureTypeTable[i]; ++i) { + if (std::strcmp(kMeasureTypeTable[i], cmd) == 0) { + param.measure_type = i; break; } } - if (kTaxonomyTypeTable[i] == NULL) { - std::cerr << "Unknown taxonomy type.\n" << std::endl; + if (kMeasureTypeTable[i] == NULL) { + std::cerr << "Unknown measure type.\n" << std::endl; FreeModel(model); delete model; model_file.close(); return NULL; } } else - if (std::strcmp(cmd, "num_categories") == 0) { - model_file >> param.num_categories; - model->num_categories = param.num_categories; + if (std::strcmp(cmd, "epsilon") == 0) { + model_file >> param.epsilon; } else if (std::strcmp(cmd, "knn_model") == 0) { model->knn_model = LoadKNNModel(model_file); @@ -803,31 +232,11 @@ Model *LoadModel(const char *model_file_name) { clone(model->labels, model->knn_model->labels, model->num_classes); model->param.knn_param = &model->knn_model->param; } else - if (std::strcmp(cmd, "svm_model") == 0) { - model->svm_model = LoadSVMModel(model_file); - if (model->svm_model == NULL) { - FreeModel(model); - delete model; - model_file.close(); - return NULL; - } - model->num_ex = model->svm_model->num_ex; - model->num_classes = model->svm_model->num_classes; - clone(model->labels, model->svm_model->labels, model->num_classes); - model->param.svm_param = &model->svm_model->param; - } else - if (std::strcmp(cmd, "points") == 0) { - int num_categories = model->num_categories; - model->points = new double[num_categories]; - for (int i = 0; i < num_categories; ++i) { - model_file >> model->points[i]; - } - } else - if (std::strcmp(cmd, "categories") == 0) { + if (std::strcmp(cmd, "alpha") == 0) { int num_ex = model->num_ex; - model->categories = new int[num_ex]; + model->alpha = new double[num_ex]; for (int i = 0; i < num_ex; ++i) { - model_file >> model->categories[i]; + model_file >> model->alpha[i]; } break; } else { @@ -844,35 +253,20 @@ Model *LoadModel(const char *model_file_name) { } void FreeModel(struct Model *model) { - if (model->param.taxonomy_type == KNN && + if (model->param.measure_type == KNN && model->knn_model != NULL) { FreeKNNModel(model->knn_model); - delete model->knn_model; model->knn_model = NULL; } - if ((model->param.taxonomy_type == SVM_EL || - model->param.taxonomy_type == SVM_ES || - model->param.taxonomy_type == SVM_KM) && - model->svm_model != NULL) { - FreeSVMModel(&(model->svm_model)); - delete model->svm_model; - model->svm_model = NULL; - } - if (model->labels != NULL) { delete[] model->labels; model->labels = NULL; } - if (model->param.taxonomy_type == SVM_ES && - model->points != NULL) { - delete[] model->points; - model->points = NULL; - } - if (model->categories != NULL) { - delete[] model->categories; - model->categories = NULL; + if (model->alpha != NULL) { + delete[] model->alpha; + model->alpha = NULL; } delete model; @@ -882,20 +276,12 @@ void FreeModel(struct Model *model) { } void FreeParam(struct Parameter *param) { - if (param->taxonomy_type == KNN && + if (param->measure_type == KNN && param->knn_param != NULL) { FreeKNNParam(param->knn_param); param->knn_param = NULL; } - if ((param->taxonomy_type == SVM_EL || - param->taxonomy_type == SVM_ES || - param->taxonomy_type == SVM_KM) && - param->svm_param != NULL) { - FreeSVMParam(param->svm_param); - param->svm_param = NULL; - } - return; } @@ -904,27 +290,18 @@ const char *CheckParameter(const struct Parameter *param) { return "cannot save and load model at the same time"; } - if (param->num_categories == 0) { - return "no. of categories cannot be less than 1"; + if (param->epsilon < 0 || param->epsilon > 1) { + return "epsilon should be between 0 and 1"; } - if (param->taxonomy_type == KNN) { + if (param->measure_type == KNN) { if (param->knn_param == NULL) { return "no knn parameter"; } return CheckKNNParameter(param->knn_param); } - if (param->taxonomy_type == SVM_EL || - param->taxonomy_type == SVM_ES || - param->taxonomy_type == SVM_KM) { - if (param->svm_param == NULL) { - return "no svm parameter"; - } - return CheckSVMParameter(param->svm_param); - } - - if (param->taxonomy_type > 3) { + if (param->measure_type > 0) { return "no such taxonomy type"; } diff --git a/cp.h b/cp.h index df33947..b3d6801 100644 --- a/cp.h +++ b/cp.h @@ -3,33 +3,31 @@ #include "utilities.h" #include "knn.h" -#include "svm.h" +#include -enum { KNN, SVM_EL, SVM_ES, SVM_KM }; +enum { KNN }; struct Parameter { struct KNNParameter *knn_param; - struct SVMParameter *svm_param; + int cp_type; + int measure_type; int save_model; int load_model; - int measure_type; int num_folds; - int probability; + double epsilon; }; struct Model { struct Parameter param; - struct SVMModel *svm_model; struct KNNModel *knn_model; int num_ex; int num_classes; int *labels; + double *alpha; }; Model *TrainCP(const struct Problem *train, const struct Parameter *param); -double PredictCP(const struct Problem *train, const struct Model *model, const struct Node *x, double &lower, double &upper, double **avg_prob); -void CrossValidation(const struct Problem *prob, const struct Parameter *param, double *predict_labels, double *lower_bounds, double *upper_bounds, double *brier, double *logloss); -void OnlinePredict(const struct Problem *prob, const struct Parameter *param, double *predict_labels, int *indices, double *lower_bounds, double *upper_bounds, double *brier, double *logloss); +std::vector PredictCP(const struct Problem *train, const struct Model *model, const struct Node *x, double &conf, double &cred); int SaveModel(const char *model_file_name, const struct Model *model); Model *LoadModel(const char *model_file_name); diff --git a/knn.cpp b/knn.cpp index bf1eb06..9678a2b 100644 --- a/knn.cpp +++ b/knn.cpp @@ -59,6 +59,8 @@ KNNModel *TrainKNN(const struct Problem *prob, const struct KNNParameter *param) model->labels = NULL; model->dist_neighbors = NULL; model->label_neighbors = NULL; + model->min_same = NULL; + model->min_diff = NULL; int num_classes = 0; int num_ex = prob->num_ex; @@ -89,39 +91,74 @@ KNNModel *TrainKNN(const struct Problem *prob, const struct KNNParameter *param) std::cerr << "WARNING: training set only has one class. See README for details." << std::endl; } - double **dist_neighbors = new double*[num_ex]; - int **label_neighbors = new int*[num_ex]; + if (param->is_cp == 0) { + double **dist_neighbors = new double*[num_ex]; + int **label_neighbors = new int*[num_ex]; - for (int i = 0; i < num_ex; ++i) { - dist_neighbors[i] = new double[num_neighbors]; - label_neighbors[i] = new int[num_neighbors]; - for (int j = 0; j < num_neighbors; ++j) { - dist_neighbors[i][j] = INF; - label_neighbors[i][j] = -1; + for (int i = 0; i < num_ex; ++i) { + dist_neighbors[i] = new double[num_neighbors]; + label_neighbors[i] = new int[num_neighbors]; + for (int j = 0; j < num_neighbors; ++j) { + dist_neighbors[i][j] = kInf; + label_neighbors[i][j] = -1; + } } - } - for (int i = 0; i < num_ex-1; ++i) { - for (int j = i+1; j < num_ex; ++j) { - double dist = CalcDist(prob->x[i], prob->x[j]); - int index; - index = CompareDist(dist_neighbors[i], dist, num_neighbors); - if (index < num_neighbors) { - InsertLabel(label_neighbors[i], alter_labels[j], num_neighbors, index); + for (int i = 0; i < num_ex-1; ++i) { + for (int j = i+1; j < num_ex; ++j) { + double dist = CalcDist(prob->x[i], prob->x[j]); + int index; + index = CompareDist(dist_neighbors[i], dist, num_neighbors); + if (index < num_neighbors) { + InsertLabel(label_neighbors[i], alter_labels[j], num_neighbors, index); + } + index = CompareDist(dist_neighbors[j], dist, num_neighbors); + if (index < num_neighbors) { + InsertLabel(label_neighbors[j], alter_labels[i], num_neighbors, index); + } + } + } + + model->dist_neighbors = dist_neighbors; + model->label_neighbors = label_neighbors; + + } else { + + double **min_same = new double*[num_ex]; + double **min_diff = new double*[num_ex]; + + for (int i = 0; i < num_ex; ++i) { + min_same[i] = new double[num_neighbors]; + min_diff[i] = new double[num_neighbors]; + for (int j = 0; j < num_neighbors; ++j) { + min_same[i][j] = kInf; + min_diff[i][j] = kInf; } - index = CompareDist(dist_neighbors[j], dist, num_neighbors); - if (index < num_neighbors) { - InsertLabel(label_neighbors[j], alter_labels[i], num_neighbors, index); + } + + for (int i = 0; i < num_ex-1; ++i) { + for (int j = i+1; j < num_ex; ++j) { + double dist = CalcDist(prob->x[i], prob->x[j]); + + if (prob->y[i] == prob->y[j]) { + CompareDist(min_same[i], dist, num_neighbors); + CompareDist(min_same[j], dist, num_neighbors); + } else { + CompareDist(min_diff[i], dist, num_neighbors); + CompareDist(min_diff[j], dist, num_neighbors); + } } } + + model->min_same = min_same; + model->min_diff = min_diff; } + delete[] alter_labels; model->num_ex = num_ex; model->num_classes = num_classes; model->labels = labels; - model->dist_neighbors = dist_neighbors; - model->label_neighbors = label_neighbors; model->param = *param; return model; @@ -132,7 +169,7 @@ double PredictKNN(struct Problem *train, struct Node *x, const int num_neighbors double labels[num_neighbors]; for (int i = 0; i < num_neighbors; ++i) { - neighbors[i] = INF; + neighbors[i] = kInf; labels[i] = -1; } for (int i = 0; i < train->num_ex; ++i) { @@ -181,6 +218,26 @@ int SaveKNNModel(std::ofstream &model_file, const struct KNNModel *model) { model_file << '\n'; } + if (model->min_same) { + model_file << "min_same\n"; + for (int i = 0; i < model->num_ex; ++i) { + for (int j = 0; j < model->param.num_neighbors; ++j) { + model_file << model->min_same[i][j] << ' '; + } + } + model_file << '\n'; + } + + if (model->min_diff) { + model_file << "min_diff\n"; + for (int i = 0; i < model->num_ex; ++i) { + for (int j = 0; j < model->param.num_neighbors; ++j) { + model_file << model->min_diff[i][j] << ' '; + } + } + model_file << '\n'; + } + return 0; } @@ -229,6 +286,28 @@ KNNModel *LoadKNNModel(std::ifstream &model_file) { model_file >> model->label_neighbors[i][j]; } } + } else + if (std::strcmp(cmd, "min_same") == 0) { + int n = param.num_neighbors; + int num_ex = model->num_ex; + model->min_same = new double*[num_ex]; + for (int i = 0; i < num_ex; ++i) { + model->min_same[i] = new double[n]; + for (int j = 0; j < n; ++j) { + model_file >> model->min_same[i][j]; + } + } + } else + if (std::strcmp(cmd, "min_diff") == 0) { + int n = param.num_neighbors; + int num_ex = model->num_ex; + model->min_diff = new double*[num_ex]; + for (int i = 0; i < num_ex; ++i) { + model->min_diff[i] = new double[n]; + for (int j = 0; j < n; ++j) { + model_file >> model->min_diff[i][j]; + } + } break; } else { std::cerr << "Unknown text in knn_model file: " << cmd << std::endl; @@ -262,6 +341,27 @@ void FreeKNNModel(struct KNNModel *model) { model->label_neighbors = NULL; } + if (model->min_same != NULL) { + for (int i = 0; i < model->num_ex; ++i) { + delete[] model->min_same[i]; + } + delete[] model->min_same; + model->min_same = NULL; + } + + if (model->min_diff != NULL) { + for (int i = 0; i < model->num_ex; ++i) { + delete[] model->min_diff[i]; + } + delete[] model->min_diff; + model->min_diff = NULL; + } + + if (model != NULL) { + delete model; + model = NULL; + } + return; } @@ -271,6 +371,7 @@ void FreeKNNParam(struct KNNParameter *param) { void InitKNNParam(struct KNNParameter *param) { param->num_neighbors = 1; + param->is_cp = 1; return; } diff --git a/knn.h b/knn.h index c39f6d5..0b1e917 100644 --- a/knn.h +++ b/knn.h @@ -5,6 +5,7 @@ struct KNNParameter { int num_neighbors; + int is_cp; }; struct KNNModel { @@ -14,6 +15,8 @@ struct KNNModel { int *labels; // label of each class (label[k]) double **dist_neighbors; int **label_neighbors; + double **min_same; + double **min_diff; }; template diff --git a/svm.cpp b/svm.cpp deleted file mode 100644 index 2128c1e..0000000 --- a/svm.cpp +++ /dev/null @@ -1,2017 +0,0 @@ -#include "svm.h" -#include -#include -#include -#include -#include -#include -#include -#include - -typedef float Qfloat; -typedef signed char schar; - -static void PrintCout(const char *s) { - std::cout << s; - std::cout.flush(); -} - -static void PrintNull(const char *s) {} - -static void (*PrintString) (const char *) = &PrintNull; - -static void Info(const char *format, ...) { - char buffer[BUFSIZ]; - va_list ap; - va_start(ap, format); - vsprintf(buffer, format, ap); - va_end(ap); - (*PrintString)(buffer); -} - -// -// Kernel Cache -// -// l is the number of total data items -// size is the cache size limit in bytes -// -class Cache { - public: - Cache(int l, long int size); - ~Cache(); - - // request data [0,len) - // return some position p where [p,len) need to be filled - // (p >= len if nothing needs to be filled) - int get_data(const int index, Qfloat **data, int len); - void SwapIndex(int i, int j); - - private: - int l_; - long int size_; - struct Head { - Head *prev, *next; // a circular list - Qfloat *data; - int len; // data[0,len) is cached in this entry - }; - - Head *head_; - Head lru_head_; - void DeleteLRU(Head *h); - void InsertLRU(Head *h); -}; - -Cache::Cache(int l, long int size) : l_(l), size_(size) { - head_ = (Head *)calloc(static_cast(l_), sizeof(Head)); // initialized to 0 - size_ /= sizeof(Qfloat); - size_ -= static_cast(l_) * sizeof(Head) / sizeof(Qfloat); - size_ = std::max(size_, 2 * static_cast(l_)); // cache must be large enough for two columns - lru_head_.next = lru_head_.prev = &lru_head_; -} - -Cache::~Cache() { - for (Head *h = lru_head_.next; h != &lru_head_; h=h->next) { - delete[] h->data; - } - delete[] head_; -} - -void Cache::DeleteLRU(Head *h) { - // delete from current location - h->prev->next = h->next; - h->next->prev = h->prev; -} - -void Cache::InsertLRU(Head *h) { - // insert to last position - h->next = &lru_head_; - h->prev = lru_head_.prev; - h->prev->next = h; - h->next->prev = h; -} - -int Cache::get_data(const int index, Qfloat **data, int len) { - Head *h = &head_[index]; - if (h->len) { - DeleteLRU(h); - } - int more = len - h->len; - - if (more > 0) { - // free old space - while (size_ < more) { - Head *old = lru_head_.next; - DeleteLRU(old); - delete[] old->data; - size_ += old->len; - old->data = 0; - old->len = 0; - } - - // allocate new space - h->data = (Qfloat *)realloc(h->data, sizeof(Qfloat)*static_cast(len)); - size_ -= more; - std::swap(h->len, len); - } - - InsertLRU(h); - *data = h->data; - - return len; -} - -void Cache::SwapIndex(int i, int j) { - if (i == j) { - return; - } - - if (head_[i].len) { - DeleteLRU(&head_[i]); - } - if (head_[j].len) { - DeleteLRU(&head_[j]); - } - std::swap(head_[i].data, head_[j].data); - std::swap(head_[i].len, head_[j].len); - if (head_[i].len) { - InsertLRU(&head_[i]); - } - if (head_[j].len) { - InsertLRU(&head_[j]); - } - - if (i > j) { - std::swap(i, j); - } - for (Head *h = lru_head_.next; h != &lru_head_; h = h->next) { - if (h->len > i) { - if (h->len > j) { - std::swap(h->data[i], h->data[j]); - } else { - // give up - DeleteLRU(h); - delete[] h->data; - size_ += h->len; - h->data = 0; - h->len = 0; - } - } - } -} - -// -// Kernel evaluation -// -// the static method KernelFunction is for doing single kernel evaluation -// the constructor of Kernel prepares to calculate the l*l kernel matrix -// the member function get_Q is for getting one column from the Q Matrix -// -class QMatrix { - public: - virtual Qfloat *get_Q(int column, int len) const = 0; - virtual double *get_QD() const = 0; - virtual void SwapIndex(int i, int j) const = 0; - virtual ~QMatrix() {} -}; - -class Kernel : public QMatrix { - public: - Kernel(int l, Node *const *x, const SVMParameter& param); - virtual ~Kernel(); - static double KernelFunction(const Node *x, const Node *y, const SVMParameter& param); - virtual Qfloat *get_Q(int column, int len) const = 0; - virtual double *get_QD() const = 0; - virtual void SwapIndex(int i, int j) const { - std::swap(x_[i], x_[j]); - if (x_square_) { - std::swap(x_square_[i], x_square_[j]); - } - } - - protected: - double (Kernel::*kernel_function)(int i, int j) const; - - private: - const Node **x_; - double *x_square_; - - // SVMParameter - const int kernel_type; - const int degree; - const double gamma; - const double coef0; - - static double Dot(const Node *px, const Node *py); - double KernelLinear(int i, int j) const { - return Dot(x_[i], x_[j]); - } - double KernelPoly(int i, int j) const { - return std::pow(gamma*Dot(x_[i], x_[j])+coef0, degree); - } - double KernelRBF(int i, int j) const { - return exp(-gamma*(x_square_[i]+x_square_[j]-2*Dot(x_[i], x_[j]))); - } - double KernelSigmoid(int i, int j) const { - return tanh(gamma*Dot(x_[i], x_[j])+coef0); - } - double KernelPrecomputed(int i, int j) const { - return x_[i][static_cast(x_[j][0].value)].value; - } -}; - -Kernel::Kernel(int l, Node *const *x, const SVMParameter ¶m) - :kernel_type(param.kernel_type), - degree(param.degree), - gamma(param.gamma), - coef0(param.coef0) { - switch (kernel_type) { - case LINEAR: { - kernel_function = &Kernel::KernelLinear; - break; - } - case POLY: { - kernel_function = &Kernel::KernelPoly; - break; - } - case RBF: { - kernel_function = &Kernel::KernelRBF; - break; - } - case SIGMOID: { - kernel_function = &Kernel::KernelSigmoid; - break; - } - case PRECOMPUTED: { - kernel_function = &Kernel::KernelPrecomputed; - break; - } - default: { - // assert(false); - break; - } - } - - clone(x_, x, l); - - if (kernel_type == RBF) { - x_square_ = new double[l]; - for (int i = 0; i < l; ++i) { - x_square_[i] = Dot(x_[i], x_[i]); - } - } else { - x_square_ = 0; - } -} - -Kernel::~Kernel() { - delete[] x_; - delete[] x_square_; -} - -double Kernel::Dot(const Node *px, const Node *py) { - double sum = 0; - while (px->index != -1 && py->index != -1) { - if (px->index == py->index) { - sum += px->value * py->value; - ++px; - ++py; - } else { - if (px->index > py->index) { - ++py; - } else { - ++px; - } - } - } - - return sum; -} - -double Kernel::KernelFunction(const Node *x, const Node *y, const SVMParameter ¶m) { - switch (param.kernel_type) { - case LINEAR: { - return Dot(x, y); - } - case POLY: { - return std::pow(param.gamma*Dot(x, y)+param.coef0, param.degree); - } - case RBF: { - double sum = 0; - while (x->index != -1 && y->index != -1) { - if (x->index == y->index) { - double d = x->value - y->value; - sum += d*d; - ++x; - ++y; - } else { - if (x->index > y->index) { - sum += y->value * y->value; - ++y; - } else { - sum += x->value * x->value; - ++x; - } - } - } - - while (x->index != -1) { - sum += x->value * x->value; - ++x; - } - - while (y->index != -1) { - sum += y->value * y->value; - ++y; - } - - return exp(-param.gamma*sum); - } - case SIGMOID: { - return tanh(param.gamma*Dot(x, y)+param.coef0); - } - case PRECOMPUTED: { //x: test (validation), y: SV - return x[static_cast(y->value)].value; - } - default: { - // assert(false); - return 0; // Unreachable - } - } -} - -// An SMO algorithm in Fan et al., JMLR 6(2005), p. 1889--1918 -// Solves: -// -// min 0.5(\alpha^T Q \alpha) + p^T \alpha -// -// y^T \alpha = \delta -// y_i = +1 or -1 -// 0 <= alpha_i <= Cp for y_i = 1 -// 0 <= alpha_i <= Cn for y_i = -1 -// -// Given: -// -// Q, p, y, Cp, Cn, and an initial feasible point \alpha -// l is the size of vectors and matrices -// eps is the stopping tolerance -// -// solution will be put in \alpha, objective value will be put in obj -// -class Solver { - public: - Solver() {}; - virtual ~Solver() {}; - - struct SolutionInfo { - double obj; - double rho; - double upper_bound_p; - double upper_bound_n; - double r; // for Solver_NU - }; - - void Solve(int l, const QMatrix &Q, const double *p, const schar *y, - double *alpha, double Cp, double Cn, double eps, - SolutionInfo *si, int shrinking); - - protected: - int active_size_; - schar *y_; - double *G_; // gradient of objective function - enum { LOWER_BOUND, UPPER_BOUND, FREE }; - char *alpha_status_; // LOWER_BOUND, UPPER_BOUND, FREE - double *alpha_; - const QMatrix *Q_; - const double *QD_; - double eps_; - double Cp_; - double Cn_; - double *p_; - int *active_set_; - double *G_bar_; // gradient, if we treat free variables as 0 - int l_; - bool unshrink_; // XXX - - double get_C(int i) { - return (y_[i] > 0) ? Cp_ : Cn_; - } - void UpdateAlphaStatus(int i) { - if (alpha_[i] >= get_C(i)) { - alpha_status_[i] = UPPER_BOUND; - } else { - if (alpha_[i] <= 0) { - alpha_status_[i] = LOWER_BOUND; - } else { - alpha_status_[i] = FREE; - } - } - } - bool IsUpperBound(int i) { - return alpha_status_[i] == UPPER_BOUND; - } - bool IsLowerBound(int i) { - return alpha_status_[i] == LOWER_BOUND; - } - bool IsFree(int i) { - return alpha_status_[i] == FREE; - } - void SwapIndex(int i, int j); - void ReconstructGradient(); - virtual int SelectWorkingSet(int &i, int &j); - virtual double CalculateRho(); - virtual void DoShrinking(); - - private: - bool IsShrunk(int i, double Gmax1, double Gmax2); -}; - -void Solver::SwapIndex(int i, int j) { - Q_->SwapIndex(i, j); - std::swap(y_[i], y_[j]); - std::swap(G_[i], G_[j]); - std::swap(alpha_status_[i], alpha_status_[j]); - std::swap(alpha_[i], alpha_[j]); - std::swap(p_[i], p_[j]); - std::swap(active_set_[i], active_set_[j]); - std::swap(G_bar_[i], G_bar_[j]); -} - -void Solver::ReconstructGradient() { - // reconstruct inactive elements of G from G_bar_ and free variables - if (active_size_ == l_) { - return; - } - - int num_free = 0; - - for (int i = active_size_; i < l_; ++i) { - G_[i] = G_bar_[i] + p_[i]; - } - - for (int i = 0; i < active_size_; ++i) { - if (IsFree(i)) { - num_free++; - } - } - - if (2*num_free < active_size_) { - Info("\nWARNING: using -h 0 may be faster\n"); - } - - if (num_free*l_ > 2*active_size_*(l_-active_size_)) { - for (int i = active_size_; i < l_; ++i) { - const Qfloat *Q_i = Q_->get_Q(i, active_size_); - for (int j = 0; j < active_size_; ++j) { - if (IsFree(j)) { - G_[i] += alpha_[j] * Q_i[j]; - } - } - } - } else { - for (int i = 0; i < active_size_; ++i) { - if (IsFree(i)) { - const Qfloat *Q_i = Q_->get_Q(i, l_); - double alpha_i = alpha_[i]; - for (int j = active_size_; j < l_; ++j) { - G_[j] += alpha_i * Q_i[j]; - } - } - } - } -} - -void Solver::Solve(int l, const QMatrix &Q, const double *p, const schar *y, - double *alpha, double Cp, double Cn, double eps, - SolutionInfo *si, int shrinking) { - l_ = l; - Q_ = &Q; - QD_=Q.get_QD(); - clone(p_, p, l); - clone(y_, y, l); - clone(alpha_, alpha, l); - Cp_ = Cp; - Cn_ = Cn; - eps_ = eps; - unshrink_ = false; - - // initialize alpha_status_ - alpha_status_ = new char[l]; - for (int i = 0; i < l; ++i) { - UpdateAlphaStatus(i); - } - - // initialize active set (for shrinking) - active_set_ = new int[l]; - for (int i = 0; i < l; ++i) { - active_set_[i] = i; - } - active_size_ = l; - - // initialize gradient - G_ = new double[l]; - G_bar_ = new double[l]; - for (int i = 0; i < l; ++i) { - G_[i] = p_[i]; - G_bar_[i] = 0; - } - for (int i = 0; i < l; ++i) - if (!IsLowerBound(i)) { - const Qfloat *Q_i = Q.get_Q(i,l); - double alpha_i = alpha_[i]; - for (int j = 0; j < l; ++j) { - G_[j] += alpha_i*Q_i[j]; - } - if (IsUpperBound(i)) { - for (int j = 0; j < l; ++j) { - G_bar_[j] += get_C(i) * Q_i[j]; - } - } - } - - // optimization step - int iter = 0; - int max_iter = std::max(10000000, (l>INT_MAX/100) ? (INT_MAX) : (100*l)); - int counter = std::min(l, 1000) + 1; - - while (iter < max_iter) { - // show progress and do shrinking - if (--counter == 0) { - counter = std::min(l, 1000); - if (shrinking) { - DoShrinking(); - } - Info("."); - } - - int i, j; - if (SelectWorkingSet(i, j) != 0) { - // reconstruct the whole gradient - ReconstructGradient(); - // reset active set size and check - active_size_ = l; - Info("*"); - if (SelectWorkingSet(i, j) != 0) { - break; - } else { - counter = 1; // do shrinking next iteration - } - } - - ++iter; - - // update alpha[i] and alpha[j], handle bounds carefully - const Qfloat *Q_i = Q.get_Q(i, active_size_); - const Qfloat *Q_j = Q.get_Q(j, active_size_); - - double C_i = get_C(i); - double C_j = get_C(j); - - double old_alpha_i = alpha_[i]; - double old_alpha_j = alpha_[j]; - - if (y_[i] != y_[j]) { - double quad_coef = QD_[i] + QD_[j] + 2*Q_i[j]; - if (quad_coef <= 0) { - quad_coef = kTau; - } - double delta = (-G_[i]-G_[j]) / quad_coef; - double diff = alpha_[i] - alpha_[j]; - alpha_[i] += delta; - alpha_[j] += delta; - - if (diff > 0) { - if (alpha_[j] < 0) { - alpha_[j] = 0; - alpha_[i] = diff; - } - } else { - if (alpha_[i] < 0) { - alpha_[i] = 0; - alpha_[j] = -diff; - } - } - if (diff > C_i - C_j) { - if (alpha_[i] > C_i) { - alpha_[i] = C_i; - alpha_[j] = C_i - diff; - } - } else { - if (alpha_[j] > C_j) { - alpha_[j] = C_j; - alpha_[i] = C_j + diff; - } - } - } else { - double quad_coef = QD_[i] + QD_[j] - 2*Q_i[j]; - if (quad_coef <= 0) { - quad_coef = kTau; - } - double delta = (G_[i]-G_[j]) / quad_coef; - double sum = alpha_[i] + alpha_[j]; - alpha_[i] -= delta; - alpha_[j] += delta; - - if (sum > C_i) { - if (alpha_[i] > C_i) { - alpha_[i] = C_i; - alpha_[j] = sum - C_i; - } - } else { - if (alpha_[j] < 0) { - alpha_[j] = 0; - alpha_[i] = sum; - } - } - if (sum > C_j) { - if (alpha_[j] > C_j) { - alpha_[j] = C_j; - alpha_[i] = sum - C_j; - } - } else { - if (alpha_[i] < 0) { - alpha_[i] = 0; - alpha_[j] = sum; - } - } - } - - // update G - double delta_alpha_i = alpha_[i] - old_alpha_i; - double delta_alpha_j = alpha_[j] - old_alpha_j; - - for (int k = 0; k < active_size_; ++k) { - G_[k] += Q_i[k]*delta_alpha_i + Q_j[k]*delta_alpha_j; - } - - // update alpha_status_ and G_bar_ - bool ui = IsUpperBound(i); - bool uj = IsUpperBound(j); - UpdateAlphaStatus(i); - UpdateAlphaStatus(j); - if (ui != IsUpperBound(i)) { - Q_i = Q.get_Q(i, l); - if (ui) { - for (int k = 0; k < l; ++k) { - G_bar_[k] -= C_i * Q_i[k]; - } - } else { - for (int k = 0; k < l; ++k) { - G_bar_[k] += C_i * Q_i[k]; - } - } - } - - if (uj != IsUpperBound(j)) { - Q_j = Q.get_Q(j, l); - if (uj) { - for (int k = 0; k < l; ++k) { - G_bar_[k] -= C_j * Q_j[k]; - } - } else { - for (int k = 0; k < l; ++k) { - G_bar_[k] += C_j * Q_j[k]; - } - } - } - } - - if (iter >= max_iter) { - if (active_size_ < l) { - // reconstruct the whole gradient to calculate objective value - ReconstructGradient(); - active_size_ = l; - Info("*"); - } - std::cerr << "\nWARNING: reaching max number of iterations" << std::endl; - } - - // calculate rho - si->rho = CalculateRho(); - - // calculate objective value - double v = 0; - for (int i = 0; i < l; ++i) { - v += alpha_[i] * (G_[i] + p_[i]); - } - si->obj = v / 2; - - // put back the solution - for (int i = 0; i < l; ++i) { - alpha[active_set_[i]] = alpha_[i]; - } - - // juggle everything back - /*{ - for(int i=0;iupper_bound_p = Cp; - si->upper_bound_n = Cn; - - Info("\noptimization finished, #iter = %d\n", iter); - - delete[] p_; - delete[] y_; - delete[] alpha_; - delete[] alpha_status_; - delete[] active_set_; - delete[] G_; - delete[] G_bar_; -} - -// return 1 if already optimal, return 0 otherwise -int Solver::SelectWorkingSet(int &out_i, int &out_j) { - // return i,j such that - // i: maximizes -y_i * grad(f)_i, i in I_up(\alpha) - // j: minimizes the decrease of obj value - // (if quadratic coefficeint <= 0, replace it with tau) - // -y_j*grad(f)_j < -y_i*grad(f)_i, j in I_low(\alpha) - - double Gmax = -kInf; - double Gmax2 = -kInf; - int Gmax_idx = -1; - int Gmin_idx = -1; - double obj_diff_min = kInf; - - for (int t = 0; t < active_size_; ++t) - if (y_[t] == +1) { - if (!IsUpperBound(t)) { - if (-G_[t] >= Gmax) { - Gmax = -G_[t]; - Gmax_idx = t; - } - } - } else { - if (!IsLowerBound(t)) { - if (G_[t] >= Gmax) { - Gmax = G_[t]; - Gmax_idx = t; - } - } - } - - int i = Gmax_idx; - const Qfloat *Q_i = NULL; - if (i != -1) {// NULL Q_i not accessed: Gmax=-kInf if i=-1 - Q_i = Q_->get_Q(i, active_size_); - } - - for (int j = 0; j < active_size_; ++j) { - if (y_[j] == +1) { - if (!IsLowerBound(j)) { - double grad_diff = Gmax + G_[j]; - if (G_[j] >= Gmax2) { - Gmax2 = G_[j]; - } - if (grad_diff > 0) { - double obj_diff; - double quad_coef = QD_[i] + QD_[j] - 2.0*y_[i]*Q_i[j]; - if (quad_coef > 0) { - obj_diff = -(grad_diff*grad_diff) / quad_coef; - } else { - obj_diff = -(grad_diff*grad_diff) / kTau; - } - - if (obj_diff <= obj_diff_min) { - Gmin_idx = j; - obj_diff_min = obj_diff; - } - } - } - } else { - if (!IsUpperBound(j)) { - double grad_diff = Gmax - G_[j]; - if (-G_[j] >= Gmax2) { - Gmax2 = -G_[j]; - } - if (grad_diff > 0) { - double obj_diff; - double quad_coef = QD_[i] + QD_[j] + 2.0*y_[i]*Q_i[j]; - if (quad_coef > 0) { - obj_diff = -(grad_diff*grad_diff) / quad_coef; - } else { - obj_diff = -(grad_diff*grad_diff) / kTau; - } - if (obj_diff <= obj_diff_min) { - Gmin_idx = j; - obj_diff_min = obj_diff; - } - } - } - } - } - - if (Gmax+Gmax2 < eps_) { - return 1; - } - - out_i = Gmax_idx; - out_j = Gmin_idx; - - return 0; -} - -bool Solver::IsShrunk(int i, double Gmax1, double Gmax2) { - if (IsUpperBound(i)) { - if (y_[i] == +1) { - return(-G_[i] > Gmax1); - } else { - return(-G_[i] > Gmax2); - } - } else { - if (IsLowerBound(i)) { - if (y_[i] == +1) { - return(G_[i] > Gmax2); - } else { - return(G_[i] > Gmax1); - } - } else { - return (false); - } - } -} - -void Solver::DoShrinking() { - double Gmax1 = -kInf; // max { -y_i * grad(f)_i | i in I_up(\alpha) } - double Gmax2 = -kInf; // max { y_i * grad(f)_i | i in I_low(\alpha) } - - // find maximal violating pair first - for (int i = 0; i < active_size_; ++i) { - if (y_[i] == +1) { - if (!IsUpperBound(i)) { - if (-G_[i] >= Gmax1) { - Gmax1 = -G_[i]; - } - } - if (!IsLowerBound(i)) { - if (G_[i] >= Gmax2) { - Gmax2 = G_[i]; - } - } - } else { - if (!IsUpperBound(i)) { - if (-G_[i] >= Gmax2) { - Gmax2 = -G_[i]; - } - } - if (!IsLowerBound(i)) { - if (G_[i] >= Gmax1) { - Gmax1 = G_[i]; - } - } - } - } - - if ((unshrink_ == false) && (Gmax1 + Gmax2 <= eps_*10)) { - unshrink_ = true; - ReconstructGradient(); - active_size_ = l_; - Info("*"); - } - - for (int i = 0; i < active_size_; ++i) { - if (IsShrunk(i, Gmax1, Gmax2)) { - active_size_--; - while (active_size_ > i) { - if (!IsShrunk(active_size_, Gmax1, Gmax2)) { - SwapIndex(i, active_size_); - break; - } - active_size_--; - } - } - } -} - -double Solver::CalculateRho() { - double r; - int num_free = 0; - double ub = kInf, lb = -kInf, sum_free = 0; - for (int i = 0; i < active_size_; ++i) { - double yG = y_[i] * G_[i]; - - if (IsUpperBound(i)) { - if (y_[i] == -1) { - ub = std::min(ub, yG); - } else { - lb = std::max(lb, yG); - } - } else { - if (IsLowerBound(i)) { - if (y_[i] == +1) { - ub = std::min(ub, yG); - } else { - lb = std::max(lb, yG); - } - } else { - ++num_free; - sum_free += yG; - } - } - } - - if (num_free > 0) { - r = sum_free / num_free; - } else { - r = (ub+lb) / 2; - } - - return r; -} - -// -// Solver for nu-svm classification and regression -// -// additional constraint: e^T \alpha = constant -// -class Solver_NU : public Solver { - public: - Solver_NU() {} - void Solve(int l, const QMatrix& Q, const double *p, const schar *y, - double *alpha, double Cp, double Cn, double eps, - SolutionInfo* si, int shrinking) { - si_ = si; - Solver::Solve(l, Q, p, y, alpha, Cp, Cn, eps, si, shrinking); - } - - private: - SolutionInfo *si_; - int SelectWorkingSet(int &i, int &j); - double CalculateRho(); - bool IsShrunk(int i, double Gmax1, double Gmax2, double Gmax3, double Gmax4); - void DoShrinking(); -}; - -// return 1 if already optimal, return 0 otherwise -int Solver_NU::SelectWorkingSet(int &out_i, int &out_j) { - // return i,j such that y_i = y_j and - // i: maximizes -y_i * grad(f)_i, i in I_up(\alpha) - // j: minimizes the decrease of obj value - // (if quadratic coefficeint <= 0, replace it with tau) - // -y_j*grad(f)_j < -y_i*grad(f)_i, j in I_low(\alpha) - - double Gmaxp = -kInf; - double Gmaxp2 = -kInf; - int Gmaxp_idx = -1; - - double Gmaxn = -kInf; - double Gmaxn2 = -kInf; - int Gmaxn_idx = -1; - - int Gmin_idx = -1; - double obj_diff_min = kInf; - - for (int t = 0; t < active_size_; ++t) - if (y_[t] == +1) { - if (!IsUpperBound(t)) { - if (-G_[t] >= Gmaxp) { - Gmaxp = -G_[t]; - Gmaxp_idx = t; - } - } - } else { - if (!IsLowerBound(t)) { - if (G_[t] >= Gmaxn) { - Gmaxn = G_[t]; - Gmaxn_idx = t; - } - } - } - - int ip = Gmaxp_idx; - int in = Gmaxn_idx; - const Qfloat *Q_ip = NULL; - const Qfloat *Q_in = NULL; - if (ip != -1) {// NULL Q_ip not accessed: Gmaxp=-kInf if ip=-1 - Q_ip = Q_->get_Q(ip, active_size_); - } - if (in != -1) { - Q_in = Q_->get_Q(in, active_size_); - } - - for (int j = 0; j < active_size_; ++j) { - if (y_[j] == +1) { - if (!IsLowerBound(j)) { - double grad_diff = Gmaxp + G_[j]; - if (G_[j] >= Gmaxp2) { - Gmaxp2 = G_[j]; - } - if (grad_diff > 0) { - double obj_diff; - double quad_coef = QD_[ip] + QD_[j] - 2*Q_ip[j]; - if (quad_coef > 0) { - obj_diff = -(grad_diff*grad_diff) / quad_coef; - } else { - obj_diff = -(grad_diff*grad_diff) / kTau; - } - if (obj_diff <= obj_diff_min) { - Gmin_idx = j; - obj_diff_min = obj_diff; - } - } - } - } else { - if (!IsUpperBound(j)) { - double grad_diff = Gmaxn - G_[j]; - if (-G_[j] >= Gmaxn2) { - Gmaxn2 = -G_[j]; - } - if (grad_diff > 0) { - double obj_diff; - double quad_coef = QD_[in] + QD_[j] - 2*Q_in[j]; - if (quad_coef > 0) { - obj_diff = -(grad_diff*grad_diff) / quad_coef; - } else { - obj_diff = -(grad_diff*grad_diff) / kTau; - } - if (obj_diff <= obj_diff_min) { - Gmin_idx = j; - obj_diff_min = obj_diff; - } - } - } - } - } - - if (std::max(Gmaxp+Gmaxp2, Gmaxn+Gmaxn2) < eps_) { - return 1; - } - - if (y_[Gmin_idx] == +1) { - out_i = Gmaxp_idx; - } else { - out_i = Gmaxn_idx; - } - out_j = Gmin_idx; - - return 0; -} - -bool Solver_NU::IsShrunk(int i, double Gmax1, double Gmax2, double Gmax3, double Gmax4) { - if (IsUpperBound(i)) { - if (y_[i] == +1) { - return(-G_[i] > Gmax1); - } else { - return(-G_[i] > Gmax4); - } - } else { - if (IsLowerBound(i)) { - if (y_[i] == +1) { - return(G_[i] > Gmax2); - } else { - return(G_[i] > Gmax3); - } - } else { - return (false); - } - } -} - -void Solver_NU::DoShrinking() { - double Gmax1 = -kInf; // max { -y_i * grad(f)_i | y_i = +1, i in I_up(\alpha) } - double Gmax2 = -kInf; // max { y_i * grad(f)_i | y_i = +1, i in I_low(\alpha) } - double Gmax3 = -kInf; // max { -y_i * grad(f)_i | y_i = -1, i in I_up(\alpha) } - double Gmax4 = -kInf; // max { y_i * grad(f)_i | y_i = -1, i in I_low(\alpha) } - - // find maximal violating pair first - for (int i = 0; i < active_size_; ++i) { - if (!IsUpperBound(i)) { - if (y_[i] == +1) { - if (-G_[i] > Gmax1) { - Gmax1 = -G_[i]; - } - } else { - if (-G_[i] > Gmax4) { - Gmax4 = -G_[i]; - } - } - } - if (!IsLowerBound(i)) { - if (y_[i] == +1) { - if(G_[i] > Gmax2) { - Gmax2 = G_[i]; - } - } else { - if (G_[i] > Gmax3) { - Gmax3 = G_[i]; - } - } - } - } - - if ((unshrink_ == false) && (std::max(Gmax1+Gmax2, Gmax3+Gmax4) <= eps_*10)) { - unshrink_ = true; - ReconstructGradient(); - active_size_ = l_; - } - - for (int i = 0; i < active_size_; ++i) - if (IsShrunk(i, Gmax1, Gmax2, Gmax3, Gmax4)) { - active_size_--; - while (active_size_ > i) { - if (!IsShrunk(active_size_, Gmax1, Gmax2, Gmax3, Gmax4)) { - SwapIndex(i, active_size_); - break; - } - active_size_--; - } - } -} - -double Solver_NU::CalculateRho() { - int num_free1 = 0, num_free2 = 0; - double ub1 = kInf, ub2 = kInf; - double lb1 = -kInf, lb2 = -kInf; - double sum_free1 = 0, sum_free2 = 0; - - for (int i = 0; i < active_size_; ++i) { - if (y_[i] == +1) { - if (IsUpperBound(i)) { - lb1 = std::max(lb1, G_[i]); - } else { - if (IsLowerBound(i)) { - ub1 = std::min(ub1, G_[i]); - } else { - ++num_free1; - sum_free1 += G_[i]; - } - } - } else { - if (IsUpperBound(i)) { - lb2 = std::max(lb2, G_[i]); - } else { - if (IsLowerBound(i)) { - ub2 = std::min(ub2, G_[i]); - } else { - ++num_free2; - sum_free2 += G_[i]; - } - } - } - } - - double r1, r2; - if (num_free1 > 0) { - r1 = sum_free1 / num_free1; - } else { - r1 = (ub1+lb1) / 2; - } - - if (num_free2 > 0) { - r2 = sum_free2 / num_free2; - } else { - r2 = (ub2+lb2) / 2; - } - - si_->r = (r1+r2) / 2; - return ((r1-r2)/2); -} - -// -// Q matrices for various formulations -// -class SVC_Q : public Kernel { - public: - SVC_Q(const Problem &prob, const SVMParameter ¶m, const schar *y) : Kernel(prob.num_ex, prob.x, param) { - clone(y_, y, prob.num_ex); - cache_ = new Cache(prob.num_ex, static_cast(param.cache_size*(1<<20))); - QD_ = new double[prob.num_ex]; - for (int i = 0; i < prob.num_ex; ++i) - QD_[i] = (this->*kernel_function)(i, i); - } - - Qfloat *get_Q(int i, int len) const { - Qfloat *data; - int start = cache_->get_data(i, &data, len); - if (start < len) { - for (int j = start; j < len; ++j) - data[j] = static_cast(y_[i]*y_[j]*(this->*kernel_function)(i, j)); - } - return data; - } - - double *get_QD() const { - return QD_; - } - - void SwapIndex(int i, int j) const { - cache_->SwapIndex(i, j); - Kernel::SwapIndex(i, j); - std::swap(y_[i], y_[j]); - std::swap(QD_[i], QD_[j]); - } - - ~SVC_Q() { - delete[] y_; - delete cache_; - delete[] QD_; - } - - private: - schar *y_; - Cache *cache_; - double *QD_; -}; - -// -// construct and solve various formulations -// -static void SolveCSVC(const Problem *prob, const SVMParameter *param, double *alpha, Solver::SolutionInfo *si, double Cp, double Cn) { - int num_ex = prob->num_ex; - double *minus_ones = new double[num_ex]; - schar *y = new schar[num_ex]; - - for (int i = 0; i < num_ex; ++i) { - alpha[i] = 0; - minus_ones[i] = -1; - if (prob->y[i] > 0) { - y[i] = +1; - } else { - y[i] = -1; - } - } - - Solver s; - s.Solve(num_ex, SVC_Q(*prob, *param, y), minus_ones, y, alpha, Cp, Cn, param->eps, si, param->shrinking); - - double sum_alpha=0; - for (int i = 0; i < num_ex; ++i) { - sum_alpha += alpha[i]; - } - - if (Cp == Cn) { - Info("nu = %f\n", sum_alpha/(Cp*prob->num_ex)); - } - - for (int i = 0; i < num_ex; ++i) { - alpha[i] *= y[i]; - } - - delete[] minus_ones; - delete[] y; -} - -static void SolveNuSVC(const Problem *prob, const SVMParameter *param, double *alpha, Solver::SolutionInfo *si) { - int num_ex = prob->num_ex; - double nu = param->nu; - - schar *y = new schar[num_ex]; - - for (int i = 0; i < num_ex; ++i) { - if (prob->y[i] > 0) { - y[i] = +1; - } else { - y[i] = -1; - } - } - - double sum_pos = nu*num_ex/2; - double sum_neg = nu*num_ex/2; - - for (int i = 0; i < num_ex; ++i) { - if (y[i] == +1) { - alpha[i] = std::min(1.0, sum_pos); - sum_pos -= alpha[i]; - } else { - alpha[i] = std::min(1.0, sum_neg); - sum_neg -= alpha[i]; - } - } - - double *zeros = new double[num_ex]; - - for (int i = 0; i < num_ex; ++i) { - zeros[i] = 0; - } - - Solver_NU s; - s.Solve(num_ex, SVC_Q(*prob, *param, y), zeros, y, alpha, 1.0, 1.0, param->eps, si, param->shrinking); - double r = si->r; - - Info("C = %f\n", 1/r); - - for (int i = 0; i < num_ex; ++i) { - alpha[i] *= y[i]/r; - } - - si->rho /= r; - si->obj /= (r*r); - si->upper_bound_p = 1/r; - si->upper_bound_n = 1/r; - - delete[] y; - delete[] zeros; -} - -// -// DecisionFunction -// -struct DecisionFunction { - double *alpha; - double rho; -}; - -static DecisionFunction TrainSingleSVM(const Problem *prob, const SVMParameter *param, double Cp, double Cn) { - double *alpha = new double[prob->num_ex]; - Solver::SolutionInfo si; - switch (param->svm_type) { - case C_SVC: { - SolveCSVC(prob, param, alpha, &si, Cp, Cn); - break; - } - case NU_SVC: { - SolveNuSVC(prob, param, alpha, &si); - break; - } - default: { - // assert{false}; - break; - } - } - - Info("obj = %f, rho = %f\n", si.obj, si.rho); - - // output SVs - int nSV = 0; - int nBSV = 0; - for (int i = 0; i < prob->num_ex; ++i) { - if (fabs(alpha[i]) > 0) { - ++nSV; - if (prob->y[i] > 0) { - if (fabs(alpha[i]) >= si.upper_bound_p) { - ++nBSV; - } - } else { - if (fabs(alpha[i]) >= si.upper_bound_n) { - ++nBSV; - } - } - } - } - - Info("nSV = %d, nBSV = %d\n", nSV, nBSV); - - DecisionFunction f; - f.alpha = alpha; - f.rho = si.rho; - return f; -} - -// -// Interface functions -// -SVMModel *TrainSVM(const Problem *prob, const SVMParameter *param) { - SVMModel *model = new SVMModel; - model->param = *param; - model->free_sv = 0; // XXX - - // classification - int num_ex = prob->num_ex; - int num_classes; - int *labels = NULL; - int *start = NULL; - int *count = NULL; - int *perm = new int[num_ex]; - - // group training data of the same class - GroupClasses(prob, &num_classes, &labels, &start, &count, perm); - if (num_classes == 1) { - Info("WARNING: training data in only one class. See README for details.\n"); - } - - Node **x = new Node*[num_ex]; - for (int i = 0; i < num_ex; ++i) { - x[i] = prob->x[perm[i]]; - } - - // calculate weighted C - double *weighted_C = new double[num_classes]; - for (int i = 0; i < num_classes; ++i) { - weighted_C[i] = param->C; - } - for (int i = 0; i < param->num_weights; ++i) { - int j; - for (j = 0; j < num_classes; ++j) { - if (param->weight_labels[i] == labels[j]) { - break; - } - } - if (j == num_classes) { - std::cerr << "WARNING: class label " << param->weight_labels[i] << " specified in weight is not found" << std::endl; - } else { - weighted_C[j] *= param->weights[i]; - } - } - - // train k*(k-1)/2 models - bool *non_zero = new bool[num_ex]; - for (int i = 0; i < num_ex; ++i) { - non_zero[i] = false; - } - DecisionFunction *f = new DecisionFunction[num_classes*(num_classes-1)/2]; - - int p = 0; - for (int i = 0; i < num_classes; ++i) { - for (int j = i+1; j < num_classes; ++j) { - Problem sub_prob; - int si = start[i], sj = start[j]; - int ci = count[i], cj = count[j]; - sub_prob.num_ex = ci+cj; - sub_prob.x = new Node*[sub_prob.num_ex]; - sub_prob.y = new double[sub_prob.num_ex]; - for (int k = 0; k < ci; ++k) { - sub_prob.x[k] = x[si+k]; - sub_prob.y[k] = +1; - } - for (int k = 0; k < cj; ++k) { - sub_prob.x[ci+k] = x[sj+k]; - sub_prob.y[ci+k] = -1; - } - - f[p] = TrainSingleSVM(&sub_prob, param,weighted_C[i], weighted_C[j]); - for (int k = 0; k < ci; ++k) { - if (!non_zero[si+k] && fabs(f[p].alpha[k]) > 0) { - non_zero[si+k] = true; - } - } - for (int k = 0; k < cj; ++k) { - if (!non_zero[sj+k] && fabs(f[p].alpha[ci+k]) > 0) { - non_zero[sj+k] = true; - } - } - delete[] sub_prob.x; - delete[] sub_prob.y; - ++p; - } - } - - // build output - model->num_classes = num_classes; - model->num_ex = num_ex; - - model->labels = new int[num_classes]; - for (int i = 0; i < num_classes; ++i) { - model->labels[i] = labels[i]; - } - - model->rho = new double[num_classes*(num_classes-1)/2]; - for (int i = 0; i < num_classes*(num_classes-1)/2; ++i) { - model->rho[i] = f[i].rho; - } - - int total_sv = 0; - int *nz_count = new int[num_classes]; - model->num_svs = new int[num_classes]; - for (int i = 0; i < num_classes; ++i) { - int num_svs = 0; - for (int j = 0; j < count[i]; ++j) { - if (non_zero[start[i]+j]) { - ++num_svs; - ++total_sv; - } - } - model->num_svs[i] = num_svs; - nz_count[i] = num_svs; - } - - Info("Total nSV = %d\n", total_sv); - - model->total_sv = total_sv; - model->svs = new Node*[total_sv]; - model->sv_indices = new int[total_sv]; - p = 0; - for (int i = 0; i < num_ex; ++i) { - if (non_zero[i]) { - model->svs[p] = x[i]; - model->sv_indices[p] = perm[i] + 1; - ++p; - } - } - - int *nz_start = new int[num_classes]; - nz_start[0] = 0; - for (int i = 1; i < num_classes; ++i) { - nz_start[i] = nz_start[i-1]+nz_count[i-1]; - } - - model->sv_coef = new double*[num_classes-1]; - for (int i = 0; i < num_classes-1; ++i) { - model->sv_coef[i] = new double[total_sv]; - } - - p = 0; - for (int i = 0; i < num_classes; ++i) { - for (int j = i+1; j < num_classes; ++j) { - // classifier (i,j): coefficients with - // i are in sv_coef[j-1][nz_start[i]...], - // j are in sv_coef[i][nz_start[j]...] - int si = start[i]; - int sj = start[j]; - int ci = count[i]; - int cj = count[j]; - - int q = nz_start[i]; - for (int k = 0; k < ci; ++k) { - if (non_zero[si+k]) { - model->sv_coef[j-1][q++] = f[p].alpha[k]; - } - } - q = nz_start[j]; - for (int k = 0; k < cj; ++k) { - if (non_zero[sj+k]) { - model->sv_coef[i][q++] = f[p].alpha[ci+k]; - } - } - ++p; - } - } - - delete[] labels; - delete[] count; - delete[] perm; - delete[] start; - delete[] x; - delete[] weighted_C; - delete[] non_zero; - for (int i = 0; i < num_classes*(num_classes-1)/2; ++i) { - delete[] f[i].alpha; - } - delete[] f; - delete[] nz_count; - delete[] nz_start; - - return model; -} - -double PredictSVMValues(const SVMModel *model, const Node *x, double *decision_values) { - int num_classes = model->num_classes; - int total_sv = model->total_sv; - - double *kvalue = new double[total_sv]; - for (int i = 0; i < total_sv; ++i) { - kvalue[i] = Kernel::KernelFunction(x, model->svs[i], model->param); - } - - int *start = new int[num_classes]; - start[0] = 0; - for (int i = 1; i < num_classes; ++i) { - start[i] = start[i-1] + model->num_svs[i-1]; - } - - int *vote = new int[num_classes]; - for (int i = 0; i < num_classes; ++i) { - vote[i] = 0; - } - - int p = 0; - for (int i = 0; i < num_classes; ++i) { - for (int j = i+1; j < num_classes; ++j) { - double sum = 0; - int si = start[i]; - int sj = start[j]; - int ci = model->num_svs[i]; - int cj = model->num_svs[j]; - - double *coef1 = model->sv_coef[j-1]; - double *coef2 = model->sv_coef[i]; - for (int k = 0; k < ci; ++k) { - sum += coef1[si+k] * kvalue[si+k]; - } - for (int k = 0; k < cj; ++k) { - sum += coef2[sj+k] * kvalue[sj+k]; - } - sum -= model->rho[p]; - decision_values[p] = sum; - - if (decision_values[p] > 0) { - ++vote[i]; - } else { - ++vote[j]; - } - ++p; - } - } - - int vote_max_idx = 0; - for (int i = 1; i < num_classes; ++i) { - if (vote[i] > vote[vote_max_idx]) { - vote_max_idx = i; - } - } - - delete[] kvalue; - delete[] start; - delete[] vote; - - return model->labels[vote_max_idx]; -} - -double PredictSVM(const SVMModel *model, const Node *x) { - int num_classes = model->num_classes; - double *decision_values = new double[num_classes*(num_classes-1)/2]; - double pred_result = PredictSVMValues(model, x, decision_values); - delete[] decision_values; - return pred_result; -} - -static const char *kSVMTypeTable[] = { "c_svc", "nu_svc", NULL }; - -static const char *kKernelTypeTable[] = { "linear", "polynomial", "rbf", "sigmoid", "precomputed", NULL }; - -int SaveSVMModel(std::ofstream &model_file, const struct SVMModel *model) { - const SVMParameter ¶m = model->param; - - model_file << "svm_model\n"; - model_file << "svm_type " << kSVMTypeTable[param.svm_type] << '\n'; - model_file << "kernel_type " << kKernelTypeTable[param.kernel_type] << '\n'; - - if (param.kernel_type == POLY) { - model_file << "degree " << param.degree << '\n'; - } - if (param.kernel_type == POLY || - param.kernel_type == RBF || - param.kernel_type == SIGMOID) { - model_file << "gamma " << param.gamma << '\n'; - } - if (param.kernel_type == POLY || - param.kernel_type == SIGMOID) { - model_file << "coef0 " << param.coef0 << '\n'; - } - - int num_classes = model->num_classes; - int total_sv = model->total_sv; - model_file << "num_examples " << model->num_ex << '\n'; - model_file << "num_classes " << num_classes << '\n'; - model_file << "total_SV " << total_sv << '\n'; - - if (model->labels) { - model_file << "labels"; - for (int i = 0; i < num_classes; ++i) - model_file << ' ' << model->labels[i]; - model_file << '\n'; - } - - if (model->rho) { - model_file << "rho"; - for (int i = 0; i < num_classes*(num_classes-1)/2; ++i) - model_file << ' ' << model->rho[i]; - model_file << '\n'; - } - - if (model->num_svs) { - model_file << "num_SVs"; - for (int i = 0; i < num_classes; ++i) - model_file << ' ' << model->num_svs[i]; - model_file << '\n'; - } - - if (model->sv_indices) { - model_file << "SV_indices\n"; - for (int i = 0; i < total_sv; ++i) - model_file << model->sv_indices[i] << ' '; - model_file << '\n'; - } - - model_file << "SVs\n"; - const double *const *sv_coef = model->sv_coef; - const Node *const *svs = model->svs; - - for (int i = 0; i < total_sv; ++i) { - for (int j = 0; j < num_classes-1; ++j) - model_file << std::setprecision(16) << (sv_coef[j][i]+0.0) << ' '; // add "+0.0" to avoid negative zero in output - - const Node *p = svs[i]; - - if (param.kernel_type == PRECOMPUTED) { - model_file << "0:" << static_cast(p->value) << ' '; - } else { - while (p->index != -1) { - model_file << p->index << ':' << std::setprecision(8) << p->value << ' '; - ++p; - } - } - model_file << '\n'; - } - - return 0; -} - -SVMModel *LoadSVMModel(std::ifstream &model_file) { - SVMModel *model = new SVMModel; - SVMParameter ¶m = model->param; - model->rho = NULL; - model->sv_indices = NULL; - model->labels = NULL; - model->num_svs = NULL; - - char cmd[80]; - while (1) { - model_file >> cmd; - - if (std::strcmp(cmd, "svm_type") == 0) { - model_file >> cmd; - int i; - for (i = 0; kSVMTypeTable[i]; ++i) { - if (std::strcmp(kSVMTypeTable[i], cmd) == 0) { - param.svm_type = i; - break; - } - } - if (kSVMTypeTable[i] == NULL) { - std::cerr << "Unknown SVM type.\n" << std::endl; - return NULL; - } - } else - if (std::strcmp(cmd, "kernel_type") == 0) { - model_file >> cmd; - int i; - for (i = 0; kKernelTypeTable[i]; ++i) { - if (std::strcmp(kKernelTypeTable[i], cmd) == 0) { - param.kernel_type = i; - break; - } - } - if (kKernelTypeTable[i] == NULL) { - std::cerr << "Unknown kernel function.\n" << std::endl; - return NULL; - } - } else - if (std::strcmp(cmd, "degree") == 0) { - model_file >> param.degree; - } else - if (std::strcmp(cmd, "gamma") == 0) { - model_file >> param.gamma; - } else - if (std::strcmp(cmd, "coef0") == 0) { - model_file >> param.coef0; - } else - if (std::strcmp(cmd, "num_examples") == 0) { - model_file >> model->num_ex; - } else - if (std::strcmp(cmd, "num_classes") == 0) { - model_file >> model->num_classes; - } else - if (std::strcmp(cmd, "total_SV") == 0) { - model_file >> model->total_sv; - } else - if (std::strcmp(cmd, "labels") == 0) { - int n = model->num_classes; - model->labels = new int[n]; - for (int i = 0; i < n; ++i) { - model_file >> model->labels[i]; - } - } else - if (std::strcmp(cmd, "rho") == 0) { - int n = model->num_classes*(model->num_classes-1)/2; - model->rho = new double[n]; - for (int i = 0; i < n; ++i) { - model_file >> model->rho[i]; - } - } else - if (std::strcmp(cmd, "num_SVs") == 0) { - int n = model->num_classes; - model->num_svs = new int[n]; - for (int i = 0; i < n; ++i) { - model_file >> model->num_svs[i]; - } - } else - if (std::strcmp(cmd, "SV_indices") == 0) { - int n = model->total_sv; - model->sv_indices = new int[n]; - for (int i = 0; i < n; ++i) { - model_file >> model->sv_indices[i]; - } - } else - if (std::strcmp(cmd, "SVs") == 0) { - std::size_t m = static_cast(model->num_classes)-1; - int total_sv = model->total_sv; - std::string line; - - if (model_file.peek() == '\n') - model_file.get(); - - model->sv_coef = new double*[m]; - for (int i = 0; i < m; ++i) { - model->sv_coef[i] = new double[total_sv]; - } - model->svs = new Node*[total_sv]; - for (int i = 0; i < total_sv; ++i) { - std::vector tokens; - std::size_t prev = 0, pos; - - std::getline(model_file, line); - while ((pos = line.find_first_of(" \t\n", prev)) != std::string::npos) { - if (pos > prev) - tokens.push_back(line.substr(prev, pos-prev)); - prev = pos + 1; - } - if (prev < line.length()) - tokens.push_back(line.substr(prev, std::string::npos)); - - for (std::size_t j = 0; j < m; ++j) { - try - { - std::size_t end; - model->sv_coef[j][i] = std::stod(tokens[j], &end); - if (end != tokens[j].length()) { - throw std::invalid_argument("incomplete convention"); - } - } - catch(std::exception& e) - { - std::cerr << "Error: " << e.what() << " in SV " << (i+1) << std::endl; - delete[] model->svs; - for (int j = 0; j < m; ++j) { - delete[] model->sv_coef[j]; - } - delete[] model->sv_coef; - std::vector(tokens).swap(tokens); - exit(EXIT_FAILURE); - } // TODO try not to use exception - } - - std::size_t elements = tokens.size() - m + 1; - model->svs[i] = new Node[elements]; - prev = 0; - for (std::size_t j = 0; j < elements-1; ++j) { - pos = tokens[j+m].find_first_of(':'); - try - { - std::size_t end; - - model->svs[i][j].index = std::stoi(tokens[j+m].substr(prev, pos-prev), &end); - if (end != (tokens[j+m].substr(prev, pos-prev)).length()) { - throw std::invalid_argument("incomplete convention"); - } - model->svs[i][j].value = std::stod(tokens[j+m].substr(pos+1), &end); - if (end != (tokens[j+m].substr(pos+1)).length()) { - throw std::invalid_argument("incomplete convention"); - } - } - catch(std::exception& e) - { - std::cerr << "Error: " << e.what() << " in line " << (i+1) << std::endl; - for (int k = 0; k < m; ++k) { - delete[] model->sv_coef[k]; - } - delete[] model->sv_coef; - for (int k = 0; k < i+1; ++k) { - delete[] model->svs[k]; - } - delete[] model->svs; - std::vector(tokens).swap(tokens); - exit(EXIT_FAILURE); - } - } - model->svs[i][elements-1].index = -1; - model->svs[i][elements-1].value = 0; - } - break; - } else { - std::cerr << "Unknown text in knn_model file: " << cmd << std::endl; - FreeSVMModel(&model); - return NULL; - } - } - model->free_sv = 1; - return model; -} - -void FreeSVMModelContent(SVMModel *model) { - if (model->free_sv && model->total_sv > 0 && model->svs != NULL) { - delete[] model->svs; - model->svs = NULL; - } - - if (model->sv_coef) { - for (int i = 0; i < model->num_classes-1; ++i) - delete[] model->sv_coef[i]; - } - - if (model->svs) { - delete[] model->svs; - model->svs = NULL; - } - - if (model->sv_coef) { - delete[] model->sv_coef; - model->sv_coef = NULL; - } - - if (model->rho) { - delete[] model->rho; - model->rho = NULL; - } - - if (model->labels) { - delete[] model->labels; - model->labels= NULL; - } - - if (model->sv_indices) { - delete[] model->sv_indices; - model->sv_indices = NULL; - } - - if (model->num_svs) { - delete[] model->num_svs; - model->num_svs = NULL; - } -} - -void FreeSVMModel(SVMModel** model) -{ - if (model != NULL && *model != NULL) { - FreeSVMModelContent(*model); - delete *model; - *model = NULL; - } - - return; -} - -void FreeSVMParam(SVMParameter* param) { - if (param->weight_labels) { - delete[] param->weight_labels; - param->weight_labels = NULL; - } - if (param->weights) { - delete[] param->weights; - param->weights = NULL; - } - delete param; - param = NULL; - - return; -} - -const char *CheckSVMParameter(const SVMParameter *param) { - int svm_type = param->svm_type; - if (svm_type != C_SVC && - svm_type != NU_SVC) - return "unknown svm type"; - - int kernel_type = param->kernel_type; - if (kernel_type != LINEAR && - kernel_type != POLY && - kernel_type != RBF && - kernel_type != SIGMOID && - kernel_type != PRECOMPUTED) - return "unknown kernel type"; - - if (param->gamma < 0) - return "gamma < 0"; - - if (param->degree < 0) - return "degree of polynomial kernel < 0"; - - if (param->cache_size <= 0) - return "cache_size <= 0"; - - if (param->eps <= 0) - return "eps <= 0"; - - if (svm_type == C_SVC) - if (param->C <= 0) - return "C <= 0"; - - if (svm_type == NU_SVC) - if (param->nu <= 0 || param->nu > 1) - return "nu <= 0 or nu > 1"; - - if (param->shrinking != 0 && - param->shrinking != 1) - return "shrinking != 0 and shrinking != 1"; - - return NULL; -} - -void InitSVMParam(struct SVMParameter *param) { - param->svm_type = C_SVC; - param->kernel_type = RBF; - param->degree = 3; - param->gamma = 0; // default 1/num_features - param->coef0 = 0; - param->nu = 0.5; - param->cache_size = 100; - param->C = 1; - param->eps = 1e-3; - param->shrinking = 1; - param->num_weights = 0; - param->weight_labels = NULL; - param->weights = NULL; - SetPrintCout(); - - return; -} - -void SetPrintNull() { - PrintString = &PrintNull; -} - -void SetPrintCout() { - PrintString = &PrintCout; -} \ No newline at end of file diff --git a/svm.h b/svm.h deleted file mode 100644 index be0a4ca..0000000 --- a/svm.h +++ /dev/null @@ -1,56 +0,0 @@ -#ifndef LIBCP_SVM_H_ -#define LIBCP_SVM_H_ - -#include "utilities.h" - -enum { C_SVC, NU_SVC }; // svm_type -enum { LINEAR, POLY, RBF, SIGMOID, PRECOMPUTED }; // kernel_type - -struct SVMParameter { - int svm_type; - int kernel_type; - int degree; // for poly - double gamma; // for poly/rbf/sigmoid - double coef0; // for poly/sigmoid - double cache_size; // in MB - double eps; // stopping criteria - double C; // for C_SVC - int num_weights; // for C_SVC - int *weight_labels; // for C_SVC - double *weights; // for C_SVC - double nu; // for NU_SVC - int shrinking; // use the shrinking heuristics -}; - -struct SVMModel { - struct SVMParameter param; - int num_ex; - int num_classes; // number of classes (k) - int total_sv; // total #SV - struct Node **svs; // SVs (SV[total_sv]) - double **sv_coef; // coefficients for SVs in decision functions (sv_coef[k-1][total_sv]) - double *rho; // constants in decision functions (rho[k*(k-1)/2]) - int *sv_indices; // sv_indices[0,...,nSV-1] are values in [1,...,num_traning_data] to indicate SVs in the training set - int *labels; // label of each class (label[k]) - int *num_svs; // number of SVs for each class (nSV[k]) - // nSV[0] + nSV[1] + ... + nSV[k-1] = total_sv - int free_sv; // 1 if SVMModel is created by LoadSVMModel - // 0 if SVMModel is created by TrainSVM -}; - -SVMModel *TrainSVM(const struct Problem *prob, const struct SVMParameter *param); -double PredictSVMValues(const struct SVMModel *model, const struct Node *x, double *decision_values); -double PredictSVM(const struct SVMModel *model, const struct Node *x); - -int SaveSVMModel(std::ofstream &model_file, const struct SVMModel *model); -SVMModel *LoadSVMModel(std::ifstream &model_file); -void FreeSVMModel(struct SVMModel **model); - -void FreeSVMParam(struct SVMParameter *param); -void InitSVMParam(struct SVMParameter *param); -const char *CheckSVMParameter(const struct SVMParameter *param); - -void SetPrintNull(); -void SetPrintCout(); - -#endif // LIBCP_SVM_H_ \ No newline at end of file diff --git a/utilities.cpp b/utilities.cpp index 1ddfee2..31ab32a 100644 --- a/utilities.cpp +++ b/utilities.cpp @@ -6,6 +6,32 @@ #include #include +void (*PrintString) (const char *) = &PrintNull; + +void PrintCout(const char *s) { + std::cout << s; + std::cout.flush(); +} + +void PrintNull(const char *s) {} + +void Info(const char *format, ...) { + char buffer[BUFSIZ]; + va_list ap; + va_start(ap, format); + vsprintf(buffer, format, ap); + va_end(ap); + (*PrintString)(buffer); +} + +void SetPrintNull() { + PrintString = &PrintNull; +} + +void SetPrintCout() { + PrintString = &PrintCout; +} + Problem *ReadProblem(const char *file_name) { std::ifstream input_file(file_name); if (!input_file.is_open()) { @@ -34,7 +60,7 @@ Problem *ReadProblem(const char *file_name) { current_max_index = -1; std::getline(input_file, line); - while ((pos = line.find_first_of(" \t\n", prev)) != std::string::npos) { + while ((pos = line.find_first_of(" \t\n\r", prev)) != std::string::npos) { if (pos > prev) { tokens.push_back(line.substr(prev, pos-prev)); } @@ -55,7 +81,7 @@ Problem *ReadProblem(const char *file_name) { } catch(std::exception& e) { - std::cerr << "Error: " << e.what() << " in line " << (i+1) << std::endl; + std::cerr << "Error: " << e.what() << " in line " << (i+1) << " tokens " << tokens[0] << std::endl; delete[] problem->y; for (int j = 0; j < i; ++j) { delete[] problem->x[j]; @@ -86,7 +112,7 @@ Problem *ReadProblem(const char *file_name) { } catch(std::exception& e) { - std::cerr << "Error: " << e.what() << " in line " << (i+1) << std::endl; + std::cerr << "Error: " << e.what() << " in line " << (i+1) << " tokens " << tokens[j+1] << std::endl; delete[] problem->y; for (int j = 0; j < i+1; ++j) { delete[] problem->x[j]; @@ -139,33 +165,37 @@ void FreeProblem(struct Problem *problem) { // perm, length l, must be allocated before calling this subroutine void GroupClasses(const Problem *prob, int *num_classes_ret, int **labels_ret, int **start_ret, int **count_ret, int *perm) { int num_ex = prob->num_ex; - int max_num_classes = 16; int num_classes = 0; - int *labels = new int[max_num_classes]; - int *count = new int[max_num_classes]; + int *labels = NULL; + int *count = NULL; int *data_labels = new int[num_ex]; + std::vector unique_labels; + std::vector counts; for (int i = 0; i < num_ex; ++i) { int this_label = static_cast(prob->y[i]); - int j; + std::size_t j; for (j = 0; j < num_classes; ++j) { - if (this_label == labels[j]) { - ++count[j]; + if (this_label == unique_labels[j]) { + ++counts[j]; break; } } - data_labels[i] = j; + data_labels[i] = static_cast(j); if (j == num_classes) { - if (num_classes == max_num_classes) { - max_num_classes *= 2; - labels = (int *)realloc(labels, (unsigned long)max_num_classes*sizeof(int)); - count = (int *)realloc(count, (unsigned long)max_num_classes*sizeof(int)); - } - labels[num_classes] = this_label; - count[num_classes] = 1; + unique_labels.push_back(this_label); + counts.push_back(1); ++num_classes; } } + labels = new int[num_classes]; + count = new int[num_classes]; + for (std::size_t i = 0; i < unique_labels.size(); ++i) { + labels[i] = unique_labels[i]; + count[i] = counts[i]; + } + std::vector(unique_labels).swap(unique_labels); + std::vector(counts).swap(counts); // // Labels are ordered by their first occurrence in the training set. @@ -205,4 +235,33 @@ void GroupClasses(const Problem *prob, int *num_classes_ret, int **labels_ret, i delete[] data_labels; return; +} + +int *GetLabels(const Problem *prob, int *num_classes_ret) { + int num_ex = prob->num_ex; + int num_classes = 0; + int *labels = NULL; + std::vector unique_labels; + + for (int i = 0; i < num_ex; ++i) { + int this_label = static_cast(prob->y[i]); + std::size_t j; + for (j = 0; j < num_classes; ++j) { + if (this_label == unique_labels[j]) { + break; + } + } + if (j == num_classes) { + unique_labels.push_back(this_label); + ++num_classes; + } + } + labels = new int[num_classes]; + for (std::size_t i = 0; i < unique_labels.size(); ++i) { + labels[i] = unique_labels[i]; + } + std::vector(unique_labels).swap(unique_labels); + *num_classes_ret = num_classes; + + return labels; } \ No newline at end of file diff --git a/utilities.h b/utilities.h index 94fa1c1..0620fae 100644 --- a/utilities.h +++ b/utilities.h @@ -5,6 +5,7 @@ #include #include #include +#include const double kInf = HUGE_VAL; const double kTau = 1e-12; @@ -22,6 +23,12 @@ struct Problem { struct Node **x; }; +void PrintCout(const char *s); +void PrintNull(const char *s); +void Info(const char *format, ...); +void SetPrintNull(); +void SetPrintCout(); + template T FindMostFrequent(T *array, int size) { std::vector v(array, array+size); @@ -88,5 +95,6 @@ void QuickSortIndex(T array[], size_t index[], size_t left, size_t right) { Problem *ReadProblem(const char *file_name); void FreeProblem(struct Problem *problem); void GroupClasses(const Problem *prob, int *num_classes_ret, int **labels_ret, int **start_ret, int **count_ret, int *perm); +int *GetLabels(const Problem *prob, int *num_classes_ret); #endif // LIBCP_UTILITIES_H_ \ No newline at end of file From a96027ada6cbef6178a39658c46839bb92676bf9 Mon Sep 17 00:00:00 2001 From: fated Date: Thu, 26 Feb 2015 17:41:57 +0000 Subject: [PATCH 4/8] cp-cv.cpp file added --- Makefile | 7 +++++-- cp-offline.cpp | 4 ++-- cp.cpp | 4 ++++ cp.h | 1 + 4 files changed, 12 insertions(+), 4 deletions(-) diff --git a/Makefile b/Makefile index 2689554..f9b0793 100644 --- a/Makefile +++ b/Makefile @@ -3,11 +3,14 @@ CFLAGS = -Wall -Wconversion -O3 -fPIC SHVER = 2 OS = $(shell uname) -all: cp-offline +all: cp-offline cp-cv cp-offline: cp-offline.cpp utilities.o knn.o cp.o $(CXX) $(CFLAGS) cp-offline.cpp utilities.o knn.o cp.o -o cp-offline -lm +cp-cv: cp-cv.cpp utilities.o knn.o cp.o + $(CXX) $(CFLAGS) cp-cv.cpp utilities.o knn.o cp.o -o cp-cv -lm + utilities.o: utilities.cpp utilities.h $(CXX) $(CFLAGS) -c utilities.cpp @@ -18,4 +21,4 @@ cp.o: cp.cpp cp.h $(CXX) $(CFLAGS) -c cp.cpp clean: - rm -f utilities.o knn.o cp.o cp-offline + rm -f utilities.o knn.o cp.o cp-offline cp-cv diff --git a/cp-offline.cpp b/cp-offline.cpp index f412895..4b80f37 100644 --- a/cp-offline.cpp +++ b/cp-offline.cpp @@ -98,11 +98,11 @@ int main(int argc, char *argv[]) { std::cout << "Simple Accuracy: " << 100.0*num_correct/test->num_ex << '%' << " (" << num_correct << '/' << test->num_ex << ") " << "Mean Confidence: " << std::fixed << std::setprecision(4) << 100*avg_conf << "%, " - << "Mean Credibility: " << 100*avg_cred << "% " << '\n'; + << "Mean Credibility: " << 100*avg_cred << "%\n"; std::cout << "Accuracy: " << 100.0*num_incl/test->num_ex << '%' << " (" << num_incl << '/' << test->num_ex << ") " << "Multi Prediction: " << std::fixed << std::setprecision(4) << 100.0*num_multi/test->num_ex << "%, " - << "Empty Prediction: " << 100.0*num_empty/test->num_ex << "% " << '\n'; + << "Empty Prediction: " << 100.0*num_empty/test->num_ex << "%\n"; output_file.close(); std::cout << "Time cost: " << std::chrono::duration_cast(end_time - start_time).count()/1000.0 << " s\n"; diff --git a/cp.cpp b/cp.cpp index 7c14077..e5a42ad 100644 --- a/cp.cpp +++ b/cp.cpp @@ -145,6 +145,10 @@ std::vector PredictCP(const struct Problem *train, const struct Model *mode return predict_label; } +void CrossValidation(const struct Problem *prob, const struct Parameter *param, std::vector *predict_labels, double *conf, double *cred) { + return; +} + static const char *kMeasureTypeTable[] = { "knn", NULL }; int SaveModel(const char *model_file_name, const struct Model *model) { diff --git a/cp.h b/cp.h index b3d6801..4c4bae0 100644 --- a/cp.h +++ b/cp.h @@ -28,6 +28,7 @@ struct Model { Model *TrainCP(const struct Problem *train, const struct Parameter *param); std::vector PredictCP(const struct Problem *train, const struct Model *model, const struct Node *x, double &conf, double &cred); +void CrossValidation(const struct Problem *prob, const struct Parameter *param, std::vector *predict_labels, double *conf, double *cred); int SaveModel(const char *model_file_name, const struct Model *model); Model *LoadModel(const char *model_file_name); From d914e6f6b00c419ff499b823504b7aae33b5e1cf Mon Sep 17 00:00:00 2001 From: fated Date: Thu, 26 Feb 2015 18:12:01 +0000 Subject: [PATCH 5/8] cv mode for cp added --- cp-cv.cpp | 174 ++++++++++++++++++++++++++++++++++++++++++++++++++++++ cp.cpp | 112 ++++++++++++++++++++++++++++++++++- 2 files changed, 285 insertions(+), 1 deletion(-) create mode 100644 cp-cv.cpp diff --git a/cp-cv.cpp b/cp-cv.cpp new file mode 100644 index 0000000..af96688 --- /dev/null +++ b/cp-cv.cpp @@ -0,0 +1,174 @@ +#include "cp.h" +#include +#include +#include + +void ExitWithHelp(); +void ParseCommandLine(int argc, char *argv[], char *data_file_name, char *output_file_name); + +struct Parameter param; + +int main(int argc, char *argv[]) { + char data_file_name[256]; + char output_file_name[256]; + struct Problem *prob; + int num_correct = 0, num_empty = 0, num_multi = 0, num_incl = 0; + double avg_conf = 0, avg_cred = 0; + double *conf = NULL, *cred = NULL; + std::vector *predict_labels = NULL; + const char *error_message; + + ParseCommandLine(argc, argv, data_file_name, output_file_name); + error_message = CheckParameter(¶m); + + if (error_message != NULL) { + std::cerr << error_message << std::endl; + exit(EXIT_FAILURE); + } + + prob = ReadProblem(data_file_name); + + std::ofstream output_file(output_file_name); + if (!output_file.is_open()) { + std::cerr << "Unable to open output file: " << output_file_name << std::endl; + exit(EXIT_FAILURE); + } + + predict_labels = new std::vector[prob->num_ex]; + conf = new double[prob->num_ex]; + cred = new double[prob->num_ex]; + + std::chrono::time_point start_time = std::chrono::high_resolution_clock::now(); + + CrossValidation(prob, ¶m, predict_labels, conf, cred); + + std::chrono::time_point end_time = std::chrono::high_resolution_clock::now(); + + for (int i = 0; i < prob->num_ex; ++i) { + avg_conf += conf[i]; + avg_cred += cred[i]; + + output_file << std::resetiosflags(std::ios::fixed) << prob->y[i] << ' ' << predict_labels[i][0] << ' ' + << std::setiosflags(std::ios::fixed) << conf[i] << ' ' << cred[i]; + if (predict_labels[i][0] == prob->y[i]) { + ++num_correct; + } + + if (predict_labels[i].size() == 1) { + ++num_empty; + output_file << " Empty\n"; + } else { + output_file << " set:"; + for (size_t j = 1; j < predict_labels[i].size(); ++j) { + output_file << ' ' << predict_labels[i][j]; + if (predict_labels[i][j] == prob->y[i]) { + ++num_incl; + } + } + if (predict_labels[i].size() > 2) { + ++num_multi; + output_file << " Multi\n"; + } else { + output_file << " Single\n"; + } + } + std::vector().swap(predict_labels[i]); + } + avg_conf /= prob->num_ex; + avg_cred /= prob->num_ex; + + std::cout << "CV Accuracy: " << 100.0*num_correct/(prob->num_ex) << '%' + << " (" << num_correct << '/' << prob->num_ex << ") " + << "Mean Confidence: " << std::fixed << std::setprecision(4) << 100*avg_conf << "%, " + << "Mean Credibility: " << 100*avg_cred << "%\n"; + std::cout << "Accuracy: " << 100.0*num_incl/prob->num_ex << '%' + << " (" << num_incl << '/' << prob->num_ex << ") " + << "Multi Prediction: " << std::fixed << std::setprecision(4) << 100.0*num_multi/prob->num_ex << "%, " + << "Empty Prediction: " << 100.0*num_empty/prob->num_ex << "%\n"; + output_file.close(); + + std::cout << "Time cost: " << std::chrono::duration_cast(end_time - start_time).count()/1000.0 << " s\n"; + + FreeProblem(prob); + FreeParam(¶m); + delete[] predict_labels; + delete[] conf; + delete[] cred; + + return 0; +} + +void ExitWithHelp() { + std::cout << "Usage: cp-cv [options] data_file [output_file]\n" + << "options:\n" + << " -t non-conformity measure : set type of NCM (default 0)\n" + << " 0 -- k-nearest neighbors (KNN)\n" + << " -k num_neighbors : set number of neighbors in kNN (default 1)\n" + << " -v num_folds : set number of folders in cross validation (default 5)\n" + << " -e epsilon : set significance level (default 0.05)\n"; + exit(EXIT_FAILURE); +} + +void ParseCommandLine(int argc, char **argv, char *data_file_name, char *output_file_name) { + int i; + param.measure_type = KNN; + param.save_model = 0; + param.load_model = 0; + param.num_folds = 5; + param.epsilon = 0.05; + param.knn_param = new KNNParameter; + InitKNNParam(param.knn_param); + + for (i = 1; i < argc; ++i) { + if (argv[i][0] != '-') break; + if ((i+1) >= argc) + ExitWithHelp(); + switch (argv[i][1]) { + case 't': { + ++i; + param.measure_type = std::atoi(argv[i]); + break; + } + case 'k': { + ++i; + param.knn_param->num_neighbors = std::atoi(argv[i]); + break; + } + case 'v': { + ++i; + param.num_folds = std::atoi(argv[i]); + if (param.num_folds < 2) { + std::cerr << "n-fold cross validation: n must >= 2" << std::endl; + exit(EXIT_FAILURE); + } + break; + } + case 'e': { + ++i; + param.epsilon = std::atof(argv[i]); + break; + } + default: { + std::cerr << "Unknown option: -" << argv[i][1] << std::endl; + ExitWithHelp(); + } + } + } + + if (i >= argc) + ExitWithHelp(); + strcpy(data_file_name, argv[i]); + if ((i+1) < argc) { + std::strcpy(output_file_name, argv[i+1]); + } else { + char *p = std::strrchr(argv[i],'/'); + if (p == NULL) { + p = argv[i]; + } else { + ++p; + } + std::sprintf(output_file_name, "%s_output", p); + } + + return; +} \ No newline at end of file diff --git a/cp.cpp b/cp.cpp index e5a42ad..5dbcc6d 100644 --- a/cp.cpp +++ b/cp.cpp @@ -145,7 +145,117 @@ std::vector PredictCP(const struct Problem *train, const struct Model *mode return predict_label; } -void CrossValidation(const struct Problem *prob, const struct Parameter *param, std::vector *predict_labels, double *conf, double *cred) { +void CrossValidation(const struct Problem *prob, const struct Parameter *param, + std::vector *predict_labels, double *conf, double *cred) { + int num_folds = param->num_folds; + int num_ex = prob->num_ex; + int num_classes; + + int *fold_start; + int *perm = new int[num_ex]; + + if (num_folds > num_ex) { + num_folds = num_ex; + std::cerr << "WARNING: number of folds > number of data. Will use number of folds = number of data instead (i.e., leave-one-out cross validation)" << std::endl; + } + fold_start = new int[num_folds+1]; + + if (num_folds < num_ex) { + int *start = NULL; + int *label = NULL; + int *count = NULL; + GroupClasses(prob, &num_classes, &label, &start, &count, perm); + + int *fold_count = new int[num_folds]; + int *index = new int[num_ex]; + + for (int i = 0; i < num_ex; ++i) { + index[i] = perm[i]; + } + std::random_device rd; + std::mt19937 g(rd()); + for (int i = 0; i < num_classes; ++i) { + std::shuffle(index+start[i], index+start[i]+count[i], g); + } + + for (int i = 0; i < num_folds; ++i) { + fold_count[i] = 0; + for (int c = 0; c < num_classes; ++c) { + fold_count[i] += (i+1)*count[c]/num_folds - i*count[c]/num_folds; + } + } + + fold_start[0] = 0; + for (int i = 1; i <= num_folds; ++i) { + fold_start[i] = fold_start[i-1] + fold_count[i-1]; + } + for (int c = 0; c < num_classes; ++c) { + for (int i = 0; i < num_folds; ++i) { + int begin = start[c] + i*count[c]/num_folds; + int end = start[c] + (i+1)*count[c]/num_folds; + for (int j = begin; j < end; ++j) { + perm[fold_start[i]] = index[j]; + fold_start[i]++; + } + } + } + fold_start[0] = 0; + for (int i = 1; i <= num_folds; ++i) { + fold_start[i] = fold_start[i-1] + fold_count[i-1]; + } + delete[] start; + delete[] label; + delete[] count; + delete[] index; + delete[] fold_count; + + } else { + + for (int i = 0; i < num_ex; ++i) { + perm[i] = i; + } + std::random_device rd; + std::mt19937 g(rd()); + std::shuffle(perm, perm+num_ex, g); + fold_start[0] = 0; + for (int i = 1; i <= num_folds; ++i) { + fold_start[i] = fold_start[i-1] + (i+1)*num_ex/num_folds - i*num_ex/num_folds; + } + } + + for (int i = 0; i < num_folds; ++i) { + int begin = fold_start[i]; + int end = fold_start[i+1]; + int k = 0; + struct Problem subprob; + + subprob.num_ex = num_ex - (end-begin); + subprob.x = new Node*[subprob.num_ex]; + subprob.y = new double[subprob.num_ex]; + + for (int j = 0; j < begin; ++j) { + subprob.x[k] = prob->x[perm[j]]; + subprob.y[k] = prob->y[perm[j]]; + ++k; + } + for (int j = end; j < num_ex; ++j) { + subprob.x[k] = prob->x[perm[j]]; + subprob.y[k] = prob->y[perm[j]]; + ++k; + } + + Model *submodel = TrainCP(&subprob, param); + + for (int j = begin; j < end; ++j) { + predict_labels[perm[j]] = PredictCP(&subprob, submodel, prob->x[perm[j]], conf[perm[j]], cred[perm[j]]); + } + FreeModel(submodel); + delete[] subprob.x; + delete[] subprob.y; + } + delete[] fold_start; + delete[] perm; + return; } From ae93f297d316670acb7cc18c9b57824ec9356149 Mon Sep 17 00:00:00 2001 From: fated Date: Thu, 26 Feb 2015 18:32:49 +0000 Subject: [PATCH 6/8] cp-online.cpp file added --- Makefile | 7 ++- cp-online.cpp | 168 ++++++++++++++++++++++++++++++++++++++++++++++++++ cp.cpp | 4 ++ cp.h | 1 + 4 files changed, 178 insertions(+), 2 deletions(-) create mode 100644 cp-online.cpp diff --git a/Makefile b/Makefile index f9b0793..e146829 100644 --- a/Makefile +++ b/Makefile @@ -3,7 +3,7 @@ CFLAGS = -Wall -Wconversion -O3 -fPIC SHVER = 2 OS = $(shell uname) -all: cp-offline cp-cv +all: cp-offline cp-cv cp-online cp-offline: cp-offline.cpp utilities.o knn.o cp.o $(CXX) $(CFLAGS) cp-offline.cpp utilities.o knn.o cp.o -o cp-offline -lm @@ -11,6 +11,9 @@ cp-offline: cp-offline.cpp utilities.o knn.o cp.o cp-cv: cp-cv.cpp utilities.o knn.o cp.o $(CXX) $(CFLAGS) cp-cv.cpp utilities.o knn.o cp.o -o cp-cv -lm +cp-online: cp-online.cpp utilities.o knn.o cp.o + $(CXX) $(CFLAGS) cp-online.cpp utilities.o knn.o cp.o -o cp-online -lm + utilities.o: utilities.cpp utilities.h $(CXX) $(CFLAGS) -c utilities.cpp @@ -21,4 +24,4 @@ cp.o: cp.cpp cp.h $(CXX) $(CFLAGS) -c cp.cpp clean: - rm -f utilities.o knn.o cp.o cp-offline cp-cv + rm -f utilities.o knn.o cp.o cp-offline cp-cv cp-online diff --git a/cp-online.cpp b/cp-online.cpp new file mode 100644 index 0000000..55f98bd --- /dev/null +++ b/cp-online.cpp @@ -0,0 +1,168 @@ +#include "cp.h" +#include +#include +#include + +void ExitWithHelp(); +void ParseCommandLine(int argc, char *argv[], char *data_file_name, char *output_file_name); + +struct Parameter param; + +int main(int argc, char *argv[]) { + char data_file_name[256]; + char output_file_name[256]; + struct Problem *prob; + int num_correct = 0, num_empty = 0, num_multi = 0, num_incl = 0; + int *indices = NULL; + double avg_conf = 0, avg_cred = 0; + double *conf = NULL, *cred = NULL; + std::vector *predict_labels = NULL; + const char *error_message; + + ParseCommandLine(argc, argv, data_file_name, output_file_name); + error_message = CheckParameter(¶m); + + if (error_message != NULL) { + std::cerr << error_message << std::endl; + exit(EXIT_FAILURE); + } + + prob = ReadProblem(data_file_name); + + std::ofstream output_file(output_file_name); + if (!output_file.is_open()) { + std::cerr << "Unable to open output file: " << output_file_name << std::endl; + exit(EXIT_FAILURE); + } + + predict_labels = new std::vector[prob->num_ex]; + conf = new double[prob->num_ex]; + cred = new double[prob->num_ex]; + indices = new int[prob->num_ex]; + + std::chrono::time_point start_time = std::chrono::high_resolution_clock::now(); + + OnlinePredict(prob, ¶m, predict_labels, indices, conf, cred); + + std::chrono::time_point end_time = std::chrono::high_resolution_clock::now(); + + output_file << prob->y[indices[0]] << '\n'; + + for (int i = 1; i < prob->num_ex; ++i) { + avg_conf += conf[i]; + avg_cred += cred[i]; + + output_file << std::resetiosflags(std::ios::fixed) << prob->y[indices[i]] << ' ' << predict_labels[i][0] << ' ' + << std::setiosflags(std::ios::fixed) << conf[i] << ' ' << cred[i]; + if (predict_labels[i][0] == prob->y[indices[i]]) { + ++num_correct; + } + + if (predict_labels[i].size() == 1) { + ++num_empty; + output_file << " Empty\n"; + } else { + output_file << " set:"; + for (size_t j = 1; j < predict_labels[i].size(); ++j) { + output_file << ' ' << predict_labels[i][j]; + if (predict_labels[i][j] == prob->y[indices[i]]) { + ++num_incl; + } + } + if (predict_labels[i].size() > 2) { + ++num_multi; + output_file << " Multi\n"; + } else { + output_file << " Single\n"; + } + } + std::vector().swap(predict_labels[i]); + } + avg_conf /= prob->num_ex - 1; + avg_cred /= prob->num_ex - 1; + + std::cout << "Online Accuracy: " << 100.0*num_correct/(prob->num_ex-1) << '%' + << " (" << num_correct << '/' << prob->num_ex-1 << ") " + << "Mean Confidence: " << std::fixed << std::setprecision(4) << 100*avg_conf << "%, " + << "Mean Credibility: " << 100*avg_cred << "%\n"; + std::cout << "Accuracy: " << 100.0*num_incl/prob->num_ex-1 << '%' + << " (" << num_incl << '/' << prob->num_ex-1 << ") " + << "Multi Prediction: " << std::fixed << std::setprecision(4) << 100.0*num_multi/prob->num_ex-1 << "%, " + << "Empty Prediction: " << 100.0*num_empty/prob->num_ex-1 << "%\n"; + output_file.close(); + + std::cout << "Time cost: " << std::chrono::duration_cast(end_time - start_time).count()/1000.0 << " s\n"; + + FreeProblem(prob); + FreeParam(¶m); + delete[] predict_labels; + delete[] conf; + delete[] cred; + delete[] indices; + + return 0; +} + +void ExitWithHelp() { + std::cout << "Usage: vm-online [options] data_file [output_file]\n" + << "options:\n" + << " -t non-conformity measure : set type of NCM (default 0)\n" + << " 0 -- k-nearest neighbors (KNN)\n" + << " -k num_neighbors : set number of neighbors in kNN (default 1)\n" + << " -e epsilon : set significance level (default 0.05)\n"; + exit(EXIT_FAILURE); +} + +void ParseCommandLine(int argc, char **argv, char *data_file_name, char *output_file_name) { + int i; + param.measure_type = KNN; + param.save_model = 0; + param.load_model = 0; + param.epsilon = 0.05; + param.knn_param = new KNNParameter; + InitKNNParam(param.knn_param); + + for (i = 1; i < argc; ++i) { + if (argv[i][0] != '-') break; + if ((i+1) >= argc) + ExitWithHelp(); + switch (argv[i][1]) { + case 't': { + ++i; + param.measure_type = std::atoi(argv[i]); + break; + } + case 'k': { + ++i; + param.knn_param->num_neighbors = std::atoi(argv[i]); + break; + } + case 'e': { + ++i; + param.epsilon = std::atof(argv[i]); + break; + } + default: { + std::cerr << "Unknown option: -" << argv[i][1] << std::endl; + ExitWithHelp(); + } + } + } + + if (i >= argc) + ExitWithHelp(); + strcpy(data_file_name, argv[i]); + if ((i+1) < argc) { + std::strcpy(output_file_name, argv[i+1]); + } else { + char *p = std::strrchr(argv[i],'/'); + if (p == NULL) { + p = argv[i]; + } else { + ++p; + } + std::sprintf(output_file_name, "%s_output", p); + } + + return; +} \ No newline at end of file diff --git a/cp.cpp b/cp.cpp index 5dbcc6d..c77ed4a 100644 --- a/cp.cpp +++ b/cp.cpp @@ -259,6 +259,10 @@ void CrossValidation(const struct Problem *prob, const struct Parameter *param, return; } +void OnlinePredict(const struct Problem *prob, const struct Parameter *param, std::vector *predict_labels, int *indices, double *conf, double *cred) { + return; +} + static const char *kMeasureTypeTable[] = { "knn", NULL }; int SaveModel(const char *model_file_name, const struct Model *model) { diff --git a/cp.h b/cp.h index 4c4bae0..8754c76 100644 --- a/cp.h +++ b/cp.h @@ -29,6 +29,7 @@ struct Model { Model *TrainCP(const struct Problem *train, const struct Parameter *param); std::vector PredictCP(const struct Problem *train, const struct Model *model, const struct Node *x, double &conf, double &cred); void CrossValidation(const struct Problem *prob, const struct Parameter *param, std::vector *predict_labels, double *conf, double *cred); +void OnlinePredict(const struct Problem *prob, const struct Parameter *param, std::vector *predict_labels, int *indices, double *conf, double *cred); int SaveModel(const char *model_file_name, const struct Model *model); Model *LoadModel(const char *model_file_name); From edc89187833e31ae4d7f4406835e9aa3e51849cd Mon Sep 17 00:00:00 2001 From: fated Date: Thu, 26 Feb 2015 19:52:42 +0000 Subject: [PATCH 7/8] online mode for cp added --- cp-online.cpp | 6 +- cp.cpp | 163 +++++++++++++++++++++++++++++++++++++++++++++++++- 2 files changed, 165 insertions(+), 4 deletions(-) diff --git a/cp-online.cpp b/cp-online.cpp index 55f98bd..2d42beb 100644 --- a/cp-online.cpp +++ b/cp-online.cpp @@ -85,10 +85,10 @@ int main(int argc, char *argv[]) { << " (" << num_correct << '/' << prob->num_ex-1 << ") " << "Mean Confidence: " << std::fixed << std::setprecision(4) << 100*avg_conf << "%, " << "Mean Credibility: " << 100*avg_cred << "%\n"; - std::cout << "Accuracy: " << 100.0*num_incl/prob->num_ex-1 << '%' + std::cout << "Accuracy: " << 100.0*num_incl/(prob->num_ex-1) << '%' << " (" << num_incl << '/' << prob->num_ex-1 << ") " - << "Multi Prediction: " << std::fixed << std::setprecision(4) << 100.0*num_multi/prob->num_ex-1 << "%, " - << "Empty Prediction: " << 100.0*num_empty/prob->num_ex-1 << "%\n"; + << "Multi Prediction: " << std::fixed << std::setprecision(4) << 100.0*num_multi/(prob->num_ex-1) << "%, " + << "Empty Prediction: " << 100.0*num_empty/(prob->num_ex-1) << "%\n"; output_file.close(); std::cout << "Time cost: " << std::chrono::duration_cast(end_time - start_time).count()/1000.0 << " s\n"; diff --git a/cp.cpp b/cp.cpp index c77ed4a..b057288 100644 --- a/cp.cpp +++ b/cp.cpp @@ -259,7 +259,168 @@ void CrossValidation(const struct Problem *prob, const struct Parameter *param, return; } -void OnlinePredict(const struct Problem *prob, const struct Parameter *param, std::vector *predict_labels, int *indices, double *conf, double *cred) { +void OnlinePredict(const struct Problem *prob, const struct Parameter *param, + std::vector *predict_labels, int *indices, double *conf, double *cred) { + int num_ex = prob->num_ex; + int num_classes = 0; + + for (int i = 0; i < num_ex; ++i) { + indices[i] = i; + } + std::random_device rd; + std::mt19937 g(rd()); + std::shuffle(indices, indices+num_ex, g); + + if (param->measure_type == KNN) { + int num_neighbors = param->knn_param->num_neighbors; + std::vector labels; + + double **min_same = new double*[num_ex]; + double **min_diff = new double*[num_ex]; + double *alpha = new double[num_ex]; + + for (int i = 0; i < num_ex; ++i) { + min_same[i] = new double[num_neighbors]; + min_diff[i] = new double[num_neighbors]; + for (int j = 0; j < num_neighbors; ++j) { + min_same[i][j] = kInf; + min_diff[i][j] = kInf; + } + alpha[i] = 0; + } + + int this_label = static_cast(prob->y[indices[0]]); + labels.push_back(this_label); + num_classes = 1; + + for (int i = 1; i < num_ex; ++i) { + if (num_classes == 1) + std::cerr << + "WARNING: training set only has one class. See README for details." + << std::endl; + + int *p_values = new int[num_classes]; + for (int j = 0; j < num_classes; ++j) { + p_values[j] = 0; + double **min_same_ = new double*[i+1]; + double **min_diff_ = new double*[i+1]; + double *alpha_ = new double[i+1]; + + for (int j = 0; j < i; ++j) { + clone(min_same_[j], min_same[j], num_neighbors); + clone(min_diff_[j], min_diff[j], num_neighbors); + alpha_[j] = alpha[j]; + } + min_same_[i] = new double[num_neighbors]; + min_diff_[i] = new double[num_neighbors]; + alpha_[i] = 0; + for (int j = 0; j < num_neighbors; ++j) { + min_same_[i][j] = kInf; + min_diff_[i][j] = kInf; + } + for (int k = 0; k < i; ++k) { + double dist = CalcDist(prob->x[indices[k]], prob->x[indices[i]]); + + if (prob->y[indices[k]] == labels[static_cast(j)]) { + int index = CompareDist(min_same_[k], dist, num_neighbors); + if (index < num_neighbors) { + alpha_[k] = CalcAlpha(min_same_[k], min_diff_[k], num_neighbors); + } + CompareDist(min_same_[i], dist, num_neighbors); + } else { + int index = CompareDist(min_diff_[k], dist, num_neighbors); + if (index < num_neighbors) { + alpha_[k] = CalcAlpha(min_same_[k], min_diff_[k], num_neighbors); + } + CompareDist(min_diff_[i], dist, num_neighbors); + } + } + alpha_[i] = CalcAlpha(min_same_[i], min_diff_[i], num_neighbors); + + for (int k = 0; k < i+1; ++k) { + if (alpha_[k] >= alpha_[i]) { + ++p_values[j]; + } + } + + for (int j = 0; j < i+1; ++j) { + delete[] min_same_[j]; + delete[] min_diff_[j]; + } + delete[] min_same_; + delete[] min_diff_; + delete[] alpha_; + } + + if (num_classes == 1) { + conf[i] = 1; + cred[i] = p_values[0] / (i+1); + predict_labels[i].push_back(labels[0]); + } else { + int best = 0; + cred[i] = p_values[0]; + conf[i] = p_values[1]; + for (int j = 1; j < num_classes; ++j) { + if (p_values[j] > cred[i]) { + conf[i] = cred[i]; + cred[i] = p_values[j]; + best = j; + } else if (p_values[j] < cred[i] && p_values[j] > conf[i]) { + conf[i] = p_values[j]; + } + } + cred[i] = cred[i] / (i+1); + conf[i] = 1 - conf[i] / (i+1); + predict_labels[i].push_back(labels[static_cast(best)]); + } + + for (int j = 0; j < num_classes; ++j) { + if (static_cast(p_values[j])/(i+1) > param->epsilon) { + predict_labels[i].push_back(labels[static_cast(j)]); + } + } + + delete[] p_values; + + this_label = static_cast(prob->y[indices[i]]); + std::size_t j; + for (j = 0; j < num_classes; ++j) { + if (this_label == labels[static_cast(j)]) break; + } + if (j == num_classes) { + labels.push_back(this_label); + ++num_classes; + } + + for (int j = 0; j < i; ++j) { + double dist = CalcDist(prob->x[indices[j]], prob->x[indices[i]]); + if (prob->y[indices[j]] == this_label) { + int index = CompareDist(min_same[j], dist, num_neighbors); + if (index < num_neighbors) { + alpha[j] = CalcAlpha(min_same[j], min_diff[j], num_neighbors); + } + CompareDist(min_same[i], dist, num_neighbors); + } else { + int index = CompareDist(min_diff[j], dist, num_neighbors); + if (index < num_neighbors) { + alpha[j] = CalcAlpha(min_same[j], min_diff[j], num_neighbors); + } + CompareDist(min_diff[i], dist, num_neighbors); + } + } + alpha[i] = CalcAlpha(min_same[i], min_diff[i], num_neighbors); + } + + for (int i = 0; i < num_ex; ++i) { + delete[] min_same[i]; + delete[] min_diff[i]; + } + delete[] min_same; + delete[] min_diff; + delete[] alpha; + std::vector(labels).swap(labels); + } + return; } From 03053f560776d326692307da7b22d39df2eb4640 Mon Sep 17 00:00:00 2001 From: fated Date: Thu, 26 Feb 2015 20:37:58 +0000 Subject: [PATCH 8/8] add readme file, fix an typo error in cp-cv --- README.md | 185 +++++++++++++++++++++++++++++++++++++++++++++++++- cp-online.cpp | 2 +- 2 files changed, 183 insertions(+), 4 deletions(-) diff --git a/README.md b/README.md index 37d55ce..5d5d3be 100644 --- a/README.md +++ b/README.md @@ -1,4 +1,183 @@ -libcp-revamped -============== +# LIBCP -- A Library for Conformal Prediction -Revamped LibVM +LibCP is a simple, easy-to-use, and efficient software for Conformal Prediction on classification, which gives prediction together with confidence and credibility. It solves conformal prediction in both online and batch mode with *k*-nearest neighbors as the underlying algorithm. This document explains the use of LibCP. + +## Table of Contents + +* [Installation and Data Format](#installation-and-data-format) +* ["cp-offline" Usage](#cp-offline-usage) +* ["cp-online" Usage](#cp-online-usage) +* ["cp-cv" Usage](#cp-cv-usage) +* [Tips on Practical Use](#tips-on-practical-use) +* [Examples](#examples) +* [Library Usage](#library-usage) +* [Additional Information](#additional-information) +* [Acknowledgments](#acknowledgments) + +## Installation and Data Format[↩](#table-of-contents) + +On Unix systems, type `make` to build the `cp-offline`, `cp-online` and `cp-cv` programs. Run them without arguments to show the usage of them. + +The format of training and testing data file is: +``` +