use of Classifier.supervised.SVM in project IR_Base by Linda-sunshine.
the class MovieReviewMain method main.
/**
***************************Main function******************************
*/
public static void main(String[] args) throws IOException {
_Corpus corpus = new _Corpus();
/**
***Set these parameters before run the classifiers.****
*/
// Initialize the fetureSize to be zero at first.
int featureSize = 0;
// Define the number of classes in this Naive Bayes.
int classNumber = 2;
// The default value is unigram.
int Ngram = 1;
// Document length threshold
int lengthThreshold = 5;
// The way of calculating the feature value, which can also be "TFIDF", "BM25"
String featureValue = "TF";
int norm = 1;
// Which classifier to use.
String classifier = "SVM";
System.out.println("--------------------------------------------------------------------------------------");
System.out.println("Parameters of this run:" + "\nClassNumber: " + classNumber + "\tNgram: " + Ngram + "\tFeatureValue: " + featureValue + "\tClassifier: " + classifier);
/**
***The parameters used in loading files.****
*/
String folder = "data/txt_sentoken";
String suffix = ".txt";
// Token model.
String tokenModel = "./data/Model/en-token.bin";
// String finalLocation = "/Users/lingong/Documents/Lin'sWorkSpace/IR_Base/data/movie/FinalFeatureStat.txt"; //The destination of storing the final features with stats.
// String featureLocation = "/Users/lingong/Documents/Lin'sWorkSpace/IR_Base/data/movie/SelectedFeatures.txt";
String finalLocation = "/home/lin/Lin'sWorkSpace/IR_Base/FinalFeatureStat.txt";
String featureLocation = "/home/lin/Lin'sWorkSpace/IR_Base/SelectedFeatures.txt";
/**
***Paramters in feature selection.****
*/
// String providedCV = "";
String featureSelection = "";
// Provided CV.
String providedCV = "Features.txt";
// String featureSelection = "MI"; //Feature selection method.
// Used in feature selection, the starting point of the features.
double startProb = 0.5;
// Used in feature selection, the ending point of the features.
double endProb = 1;
// Filter the features with DFs smaller than this threshold.
int maxDF = -1, minDF = 5;
System.out.println("Feature Seleciton: " + featureSelection + "\tStarting probability: " + startProb + "\tEnding probability:" + endProb);
System.out.println("--------------------------------------------------------------------------------------");
if (providedCV.isEmpty() && featureSelection.isEmpty()) {
// Case 1: no provided CV, no feature selection.
System.out.println("Case 1: no provided CV, no feature selection. Start loading files, wait...");
DocAnalyzer analyzer = new DocAnalyzer(tokenModel, classNumber, null, Ngram, lengthThreshold);
// Load all the documents as the data set.
analyzer.LoadDirectory(folder, suffix);
analyzer.setFeatureValues(featureValue, norm);
corpus = analyzer.returnCorpus(finalLocation);
} else if (!providedCV.isEmpty() && featureSelection.isEmpty()) {
// Case 2: provided CV, no feature selection.
System.out.println("Case 2: provided CV, no feature selection. Start loading files, wait...");
DocAnalyzer analyzer = new DocAnalyzer(tokenModel, classNumber, providedCV, Ngram, lengthThreshold);
// Load all the documents as the data set.
analyzer.LoadDirectory(folder, suffix);
analyzer.setFeatureValues(featureValue, norm);
corpus = analyzer.returnCorpus(finalLocation);
} else if (providedCV.isEmpty() && !featureSelection.isEmpty()) {
// Case 3: no provided CV, feature selection.
System.out.println("Case 3: no provided CV, feature selection. Start loading files to do feature selection, wait...");
DocAnalyzer analyzer = new DocAnalyzer(tokenModel, classNumber, null, Ngram, lengthThreshold);
// Load all the documents as the data set.
analyzer.LoadDirectory(folder, suffix);
// Select the features.
analyzer.featureSelection(featureLocation, featureSelection, startProb, endProb, maxDF, minDF);
System.out.println("Start loading files, wait...");
analyzer = new DocAnalyzer(tokenModel, classNumber, featureLocation, Ngram, lengthThreshold);
analyzer.LoadDirectory(folder, suffix);
analyzer.setFeatureValues(featureValue, norm);
corpus = analyzer.returnCorpus(finalLocation);
} else if (!providedCV.isEmpty() && !featureSelection.isEmpty()) {
// Case 4: provided CV, feature selection.
DocAnalyzer analyzer = new DocAnalyzer(tokenModel, classNumber, providedCV, Ngram, lengthThreshold);
System.out.println("Case 4: provided CV, feature selection. Start loading files to do feature selection, wait...");
// Load all the documents as the data set.
analyzer.LoadDirectory(folder, suffix);
// Select the features.
analyzer.featureSelection(featureLocation, featureSelection, startProb, endProb, maxDF, minDF);
System.out.println("Start loading files, wait...");
analyzer = new DocAnalyzer(tokenModel, classNumber, featureLocation, Ngram, lengthThreshold);
analyzer.LoadDirectory(folder, suffix);
analyzer.setFeatureValues(featureValue, norm);
corpus = analyzer.returnCorpus(finalLocation);
} else
System.out.println("The setting fails, please check the parameters!!");
// Execute different classifiers.
if (classifier.equals("NB")) {
// Define a new naive bayes with the parameters.
System.out.println("Start naive bayes, wait...");
NaiveBayes myNB = new NaiveBayes(corpus);
// Use the movie reviews for testing the codes.
myNB.crossValidation(10, corpus);
} else if (classifier.equals("LR")) {
// Define a new lambda.
double lambda = 0;
// Define a new logistics regression with the parameters.
System.out.println("Start logistic regression, wait...");
LogisticRegression myLR = new LogisticRegression(corpus, lambda);
// Use the movie reviews for testing the codes.
myLR.crossValidation(10, corpus);
} else if (classifier.equals("SVM")) {
// corpus.save2File("data/FVs/fvector.dat");
// The default value is 1.
double C = 3;
// default value from Lin's implementation
double eps = 0.01;
System.out.println("Start SVM, wait...");
SVM mySVM = new SVM(corpus, C);
mySVM.crossValidation(10, corpus);
} else
System.out.println("Have not developed yet!:(");
}
use of Classifier.supervised.SVM in project IR_Base by Linda-sunshine.
the class VectorReviewMain method main.
public static void main(String[] args) throws IOException, ParseException {
/**
***Set these parameters before run the classifiers.****
*/
// Define the number of classes in this Naive Bayes.
int classNumber = 5;
// Document length threshold
int lengthThreshold = 5;
// k fold-cross validation
int CVFold = 10;
// Supervised classification models: "NB", "LR", "PR-LR", "SVM"
// Semi-supervised classification models: "GF", "GF-RW", "GF-RW-ML"
// Which classifier to use.
String classifier = "GF-RW-ML";
// String modelPath = "./data/Model/";
double C = 1.0;
// "SUP", "SEMI"
String style = "SEMI";
String multipleLearner = "SVM";
/**
***The parameters used in loading files.****
*/
String featureLocation = "data/Features/fv_2gram_BM25_CHI_small.txt";
String vctfile = "data/FVs/vct_2gram_BM25_CHI_tablet_small.dat";
// String featureLocation = "data/Features/fv_fake.txt";
// String vctfile = "data/Fvs/LinearRegression.dat";
/**
***Parameters in time series analysis.****
*/
// String debugOutput = String.format("data/debug/%s.sim.pair", classifier);
String debugOutput = null;
/**
**Pre-process the data.****
*/
// Feture selection.
System.out.println("Loading vectors from file, wait...");
VctAnalyzer analyzer = new VctAnalyzer(classNumber, lengthThreshold, featureLocation);
// Load all the documents as the data set.
analyzer.LoadDoc(vctfile);
_Corpus corpus = analyzer.getCorpus();
// make it binary
corpus.mapLabels(4);
/**
******Choose different classification methods.********
*/
if (style.equals("SUP")) {
if (classifier.equals("NB")) {
// Define a new naive bayes with the parameters.
System.out.println("Start naive bayes, wait...");
NaiveBayes myNB = new NaiveBayes(corpus);
// Use the movie reviews for testing the codes.
myNB.crossValidation(CVFold, corpus);
} else if (classifier.equals("KNN")) {
// Define a new naive bayes with the parameters.
System.out.println("Start kNN, wait...");
KNN myKNN = new KNN(corpus, 10, 1);
// Use the movie reviews for testing the codes.
myKNN.crossValidation(CVFold, corpus);
} else if (classifier.equals("LR")) {
// Define a new logistics regression with the parameters.
System.out.println("Start logistic regression, wait...");
LogisticRegression myLR = new LogisticRegression(corpus, C);
myLR.setDebugOutput(debugOutput);
// Use the movie reviews for testing the codes.
myLR.crossValidation(CVFold, corpus);
// myLR.saveModel(modelPath + "LR.model");
} else if (classifier.equals("PRLR")) {
// Define a new logistics regression with the parameters.
System.out.println("Start posterior regularized logistic regression, wait...");
PRLogisticRegression myLR = new PRLogisticRegression(corpus, C);
myLR.setDebugOutput(debugOutput);
// Use the movie reviews for testing the codes.
myLR.crossValidation(CVFold, corpus);
// myLR.saveModel(modelPath + "LR.model");
} else if (classifier.equals("SVM")) {
System.out.println("Start SVM, wait...");
SVM mySVM = new SVM(corpus, C);
mySVM.crossValidation(CVFold, corpus);
} else if (classifier.equals("PR")) {
System.out.println("Start PageRank, wait...");
PageRank myPR = new PageRank(corpus, C, 100, 50, 1e-6);
myPR.train(corpus.getCollection());
} else
System.out.println("Classifier has not been developed yet!");
} else if (style.equals("SEMI")) {
double learningRatio = 1.0;
// k nearest labeled, k' nearest unlabeled
int k = 20, kPrime = 20;
// labeled data weight, unlabeled data weight
double tAlpha = 1.0, tBeta = 0.1;
// convergence of random walk, weight of random walk
double tDelta = 1e-4, tEta = 0.5;
boolean simFlag = false;
double threshold = 0.5;
// bound for generating rating constraints (must be zero in binary case)
int bound = 0;
boolean metricLearning = true;
if (classifier.equals("GF")) {
GaussianFields mySemi = new GaussianFields(corpus, multipleLearner, C);
mySemi.crossValidation(CVFold, corpus);
} else if (classifier.equals("GF-RW")) {
GaussianFields mySemi = new GaussianFieldsByRandomWalk(corpus, multipleLearner, C, learningRatio, k, kPrime, tAlpha, tBeta, tDelta, tEta, false);
mySemi.setDebugOutput(debugOutput);
mySemi.crossValidation(CVFold, corpus);
} else if (classifier.equals("GF-RW-ML")) {
LinearSVMMetricLearning lMetricLearner = new LinearSVMMetricLearning(corpus, multipleLearner, C, learningRatio, k, kPrime, tAlpha, tBeta, tDelta, tEta, false, bound);
lMetricLearner.setMetricLearningMethod(metricLearning);
lMetricLearner.setDebugOutput(debugOutput);
lMetricLearner.crossValidation(CVFold, corpus);
} else
System.out.println("Classifier has not been developed yet!");
} else
System.out.println("Learning paradigm has not been developed yet!");
}
Aggregations