# Pastebin odLBYcdO /* * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 3 of the License, or * (at your option) any later version. * * Written (W) 2013 Evangelos Anagnostopoulos */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include using namespace shogun; const char* filepath = 0; const char* testpath = 0; int32_t D = 300; float64_t C = 0.1; float64_t epsilon = 0.01; float64_t width = 8; int32_t correct_dimension = -1; SGSparseMatrix load_data(const char* filepath, float64_t*& label_vec) { FILE* data_file = fopen(filepath, "r"); SGSparseMatrix sparse_data; CLibSVMFile* file_reader = new CLibSVMFile(data_file); file_reader->get_sparse_matrix(sparse_data.sparse_matrix, sparse_data.num_features, sparse_data.num_vectors, label_vec); if (correct_dimension!=-1) sparse_data.num_features = correct_dimension; SG_UNREF(file_reader); return sparse_data; } void print_help_message() { SG_SPRINT("Usage : ./rf_classify --dataset path_to_data [--testset path_to_test_data] [-D number_of_samples]\n"); SG_SPRINT(" [-C C_for_SVM] [--epsilon SVM_epsilon] [--width gaussian_kernel_width] [--dimension feature_dimension]\n"); SG_SPRINT("\nPerforms binary classification on provided data using Random Fourier features with a linear SVM solver,\n"); SG_SPRINT("namely SVMOcas.\nParameter explanation :\n"); SG_SPRINT("\ndataset : Path to data in LibSVM format. Required."); SG_SPRINT("\ntestset : Path to test data in LibSVM format. Optional."); SG_SPRINT("\nD : Number of samples for the Random Fourier features. Default value = 300"); SG_SPRINT("\nC : SVM parameter C. Default value = 0.1"); SG_SPRINT("\nepsilon : SVM epsilon. Default value = 0.01"); SG_SPRINT("\nwidth : Gaussian Kernel width parameter. Default value = 8"); SG_SPRINT("\ndimension : Correct feature dimension. Optional\n"); } void parse_arguments(int argv, char** argc) { if (argv%2!=1) { print_help_message(); exit_shogun(); exit(0); } for (index_t i=1; i sparse_data = load_data(filepath, label_vec); SGVector label(label_vec, sparse_data.num_vectors); /** Creating features */ CBinaryLabels* labels = new CBinaryLabels(label); SG_REF(labels); CSparseFeatures* s_feats = new CSparseFeatures(sparse_data); SGVector params(1); params[0] = width; //CRandomFourierDotFeatures* r_feats = new CRandomFourierDotFeatures( //s_feats, D, KernelName::GAUSSIAN, params); /** Training */ float64_t svm_C1=1.0; float64_t svm_C2=1.0; float64_t svm_epsilon = 1e-5; float64_t max_train_time = 0; bool svm_use_bias = true; CLPBoost* svm = new CLPBoost(); svm->set_features(s_feats); svm->set_labels(labels); svm->set_C(svm_C1, svm_C2); svm->set_epsilon(svm_epsilon); svm->set_bias_enabled(svm_use_bias); svm->set_max_train_time(max_train_time); //CLPBoost* svm = new CLPBoost(C, s_feats, labels); //CLibLinear* svm = new CLibLinear(C, s_feats, labels); //CSVMOcas* svm = new CSVMOcas(C, r_feats, labels); svm->set_epsilon(epsilon); SG_SPRINT("Starting training\n"); CTime* timer = new CTime(); svm->train(); //svm->train_machine(); float64_t secs = timer->cur_runtime_diff_sec(); timer->stop(); SG_UNREF(timer); SG_SPRINT("Training completed, took %fs\n", secs); /** Training completed */ CBinaryLabels* predicted = CLabelsFactory::to_binary(svm->apply()); CPRCEvaluation* prc_evaluator = new CPRCEvaluation(); CROCEvaluation* roc_evaluator = new CROCEvaluation(); CAccuracyMeasure* accuracy_evaluator = new CAccuracyMeasure(); float64_t auROC = roc_evaluator->evaluate(predicted, labels); float64_t auPRC = prc_evaluator->evaluate(predicted, labels); float32_t accuracy = accuracy_evaluator->evaluate(predicted, labels); SG_SPRINT("Training auPRC=%f, auROC=%f, accuracy=%f ( Incorrectly predicted=%f% )\n", auPRC, auROC, accuracy, (1-accuracy) * 100); SG_UNREF(predicted); //SGMatrix w = r_feats->get_random_coefficients(); svm->set_features(NULL); if (testpath!=0) { sparse_data = load_data(testpath, label_vec); label = SGVector(label_vec, sparse_data.num_vectors); s_feats = new CSparseFeatures(sparse_data); CBinaryLabels* test_labels = new CBinaryLabels(label); predicted = CLabelsFactory::to_binary(svm->apply(s_feats)); auROC = roc_evaluator->evaluate(predicted, test_labels); auPRC = prc_evaluator->evaluate(predicted, test_labels); accuracy = accuracy_evaluator->evaluate(predicted, test_labels); SG_SPRINT("Test auPRC=%f, auROC=%f, accuracy=%f ( Incorrectly predicted=%f% )\n", auPRC, auROC, accuracy, (1-accuracy) * 100); SG_UNREF(predicted); SG_UNREF(test_labels); } SG_UNREF(prc_evaluator); SG_UNREF(roc_evaluator); SG_UNREF(accuracy_evaluator); SG_UNREF(svm); SG_UNREF(labels); exit_shogun(); }