Search in sources :

Example 6 with SVM

use of Classifier.supervised.SVM in project IR_Base by Linda-sunshine.

the class MovieReviewMain method main.

/**
 ***************************Main function******************************
 */
public static void main(String[] args) throws IOException {
    _Corpus corpus = new _Corpus();
    /**
     ***Set these parameters before run the classifiers.****
     */
    // Initialize the fetureSize to be zero at first.
    int featureSize = 0;
    // Define the number of classes in this Naive Bayes.
    int classNumber = 2;
    // The default value is unigram.
    int Ngram = 1;
    // Document length threshold
    int lengthThreshold = 5;
    // The way of calculating the feature value, which can also be "TFIDF", "BM25"
    String featureValue = "TF";
    int norm = 1;
    // Which classifier to use.
    String classifier = "SVM";
    System.out.println("--------------------------------------------------------------------------------------");
    System.out.println("Parameters of this run:" + "\nClassNumber: " + classNumber + "\tNgram: " + Ngram + "\tFeatureValue: " + featureValue + "\tClassifier: " + classifier);
    /**
     ***The parameters used in loading files.****
     */
    String folder = "data/txt_sentoken";
    String suffix = ".txt";
    // Token model.
    String tokenModel = "./data/Model/en-token.bin";
    // String finalLocation = "/Users/lingong/Documents/Lin'sWorkSpace/IR_Base/data/movie/FinalFeatureStat.txt"; //The destination of storing the final features with stats.
    // String featureLocation = "/Users/lingong/Documents/Lin'sWorkSpace/IR_Base/data/movie/SelectedFeatures.txt";
    String finalLocation = "/home/lin/Lin'sWorkSpace/IR_Base/FinalFeatureStat.txt";
    String featureLocation = "/home/lin/Lin'sWorkSpace/IR_Base/SelectedFeatures.txt";
    /**
     ***Paramters in feature selection.****
     */
    // String providedCV = "";
    String featureSelection = "";
    // Provided CV.
    String providedCV = "Features.txt";
    // String featureSelection = "MI"; //Feature selection method.
    // Used in feature selection, the starting point of the features.
    double startProb = 0.5;
    // Used in feature selection, the ending point of the features.
    double endProb = 1;
    // Filter the features with DFs smaller than this threshold.
    int maxDF = -1, minDF = 5;
    System.out.println("Feature Seleciton: " + featureSelection + "\tStarting probability: " + startProb + "\tEnding probability:" + endProb);
    System.out.println("--------------------------------------------------------------------------------------");
    if (providedCV.isEmpty() && featureSelection.isEmpty()) {
        // Case 1: no provided CV, no feature selection.
        System.out.println("Case 1: no provided CV, no feature selection.  Start loading files, wait...");
        DocAnalyzer analyzer = new DocAnalyzer(tokenModel, classNumber, null, Ngram, lengthThreshold);
        // Load all the documents as the data set.
        analyzer.LoadDirectory(folder, suffix);
        analyzer.setFeatureValues(featureValue, norm);
        corpus = analyzer.returnCorpus(finalLocation);
    } else if (!providedCV.isEmpty() && featureSelection.isEmpty()) {
        // Case 2: provided CV, no feature selection.
        System.out.println("Case 2: provided CV, no feature selection. Start loading files, wait...");
        DocAnalyzer analyzer = new DocAnalyzer(tokenModel, classNumber, providedCV, Ngram, lengthThreshold);
        // Load all the documents as the data set.
        analyzer.LoadDirectory(folder, suffix);
        analyzer.setFeatureValues(featureValue, norm);
        corpus = analyzer.returnCorpus(finalLocation);
    } else if (providedCV.isEmpty() && !featureSelection.isEmpty()) {
        // Case 3: no provided CV, feature selection.
        System.out.println("Case 3: no provided CV, feature selection. Start loading files to do feature selection, wait...");
        DocAnalyzer analyzer = new DocAnalyzer(tokenModel, classNumber, null, Ngram, lengthThreshold);
        // Load all the documents as the data set.
        analyzer.LoadDirectory(folder, suffix);
        // Select the features.
        analyzer.featureSelection(featureLocation, featureSelection, startProb, endProb, maxDF, minDF);
        System.out.println("Start loading files, wait...");
        analyzer = new DocAnalyzer(tokenModel, classNumber, featureLocation, Ngram, lengthThreshold);
        analyzer.LoadDirectory(folder, suffix);
        analyzer.setFeatureValues(featureValue, norm);
        corpus = analyzer.returnCorpus(finalLocation);
    } else if (!providedCV.isEmpty() && !featureSelection.isEmpty()) {
        // Case 4: provided CV, feature selection.
        DocAnalyzer analyzer = new DocAnalyzer(tokenModel, classNumber, providedCV, Ngram, lengthThreshold);
        System.out.println("Case 4: provided CV, feature selection. Start loading files to do feature selection, wait...");
        // Load all the documents as the data set.
        analyzer.LoadDirectory(folder, suffix);
        // Select the features.
        analyzer.featureSelection(featureLocation, featureSelection, startProb, endProb, maxDF, minDF);
        System.out.println("Start loading files, wait...");
        analyzer = new DocAnalyzer(tokenModel, classNumber, featureLocation, Ngram, lengthThreshold);
        analyzer.LoadDirectory(folder, suffix);
        analyzer.setFeatureValues(featureValue, norm);
        corpus = analyzer.returnCorpus(finalLocation);
    } else
        System.out.println("The setting fails, please check the parameters!!");
    // Execute different classifiers.
    if (classifier.equals("NB")) {
        // Define a new naive bayes with the parameters.
        System.out.println("Start naive bayes, wait...");
        NaiveBayes myNB = new NaiveBayes(corpus);
        // Use the movie reviews for testing the codes.
        myNB.crossValidation(10, corpus);
    } else if (classifier.equals("LR")) {
        // Define a new lambda.
        double lambda = 0;
        // Define a new logistics regression with the parameters.
        System.out.println("Start logistic regression, wait...");
        LogisticRegression myLR = new LogisticRegression(corpus, lambda);
        // Use the movie reviews for testing the codes.
        myLR.crossValidation(10, corpus);
    } else if (classifier.equals("SVM")) {
        // corpus.save2File("data/FVs/fvector.dat");
        // The default value is 1.
        double C = 3;
        // default value from Lin's implementation
        double eps = 0.01;
        System.out.println("Start SVM, wait...");
        SVM mySVM = new SVM(corpus, C);
        mySVM.crossValidation(10, corpus);
    } else
        System.out.println("Have not developed yet!:(");
}
Also used : structures._Corpus(structures._Corpus) NaiveBayes(Classifier.supervised.NaiveBayes) DocAnalyzer(Analyzer.DocAnalyzer) SVM(Classifier.supervised.SVM) LogisticRegression(Classifier.supervised.LogisticRegression)

Example 7 with SVM

use of Classifier.supervised.SVM in project IR_Base by Linda-sunshine.

the class VectorReviewMain method main.

public static void main(String[] args) throws IOException, ParseException {
    /**
     ***Set these parameters before run the classifiers.****
     */
    // Define the number of classes in this Naive Bayes.
    int classNumber = 5;
    // Document length threshold
    int lengthThreshold = 5;
    // k fold-cross validation
    int CVFold = 10;
    // Supervised classification models: "NB", "LR", "PR-LR", "SVM"
    // Semi-supervised classification models: "GF", "GF-RW", "GF-RW-ML"
    // Which classifier to use.
    String classifier = "GF-RW-ML";
    // String modelPath = "./data/Model/";
    double C = 1.0;
    // "SUP", "SEMI"
    String style = "SEMI";
    String multipleLearner = "SVM";
    /**
     ***The parameters used in loading files.****
     */
    String featureLocation = "data/Features/fv_2gram_BM25_CHI_small.txt";
    String vctfile = "data/FVs/vct_2gram_BM25_CHI_tablet_small.dat";
    // String featureLocation = "data/Features/fv_fake.txt";
    // String vctfile = "data/Fvs/LinearRegression.dat";
    /**
     ***Parameters in time series analysis.****
     */
    // String debugOutput = String.format("data/debug/%s.sim.pair", classifier);
    String debugOutput = null;
    /**
     **Pre-process the data.****
     */
    // Feture selection.
    System.out.println("Loading vectors from file, wait...");
    VctAnalyzer analyzer = new VctAnalyzer(classNumber, lengthThreshold, featureLocation);
    // Load all the documents as the data set.
    analyzer.LoadDoc(vctfile);
    _Corpus corpus = analyzer.getCorpus();
    // make it binary
    corpus.mapLabels(4);
    /**
     ******Choose different classification methods.********
     */
    if (style.equals("SUP")) {
        if (classifier.equals("NB")) {
            // Define a new naive bayes with the parameters.
            System.out.println("Start naive bayes, wait...");
            NaiveBayes myNB = new NaiveBayes(corpus);
            // Use the movie reviews for testing the codes.
            myNB.crossValidation(CVFold, corpus);
        } else if (classifier.equals("KNN")) {
            // Define a new naive bayes with the parameters.
            System.out.println("Start kNN, wait...");
            KNN myKNN = new KNN(corpus, 10, 1);
            // Use the movie reviews for testing the codes.
            myKNN.crossValidation(CVFold, corpus);
        } else if (classifier.equals("LR")) {
            // Define a new logistics regression with the parameters.
            System.out.println("Start logistic regression, wait...");
            LogisticRegression myLR = new LogisticRegression(corpus, C);
            myLR.setDebugOutput(debugOutput);
            // Use the movie reviews for testing the codes.
            myLR.crossValidation(CVFold, corpus);
        // myLR.saveModel(modelPath + "LR.model");
        } else if (classifier.equals("PRLR")) {
            // Define a new logistics regression with the parameters.
            System.out.println("Start posterior regularized logistic regression, wait...");
            PRLogisticRegression myLR = new PRLogisticRegression(corpus, C);
            myLR.setDebugOutput(debugOutput);
            // Use the movie reviews for testing the codes.
            myLR.crossValidation(CVFold, corpus);
        // myLR.saveModel(modelPath + "LR.model");
        } else if (classifier.equals("SVM")) {
            System.out.println("Start SVM, wait...");
            SVM mySVM = new SVM(corpus, C);
            mySVM.crossValidation(CVFold, corpus);
        } else if (classifier.equals("PR")) {
            System.out.println("Start PageRank, wait...");
            PageRank myPR = new PageRank(corpus, C, 100, 50, 1e-6);
            myPR.train(corpus.getCollection());
        } else
            System.out.println("Classifier has not been developed yet!");
    } else if (style.equals("SEMI")) {
        double learningRatio = 1.0;
        // k nearest labeled, k' nearest unlabeled
        int k = 20, kPrime = 20;
        // labeled data weight, unlabeled data weight
        double tAlpha = 1.0, tBeta = 0.1;
        // convergence of random walk, weight of random walk
        double tDelta = 1e-4, tEta = 0.5;
        boolean simFlag = false;
        double threshold = 0.5;
        // bound for generating rating constraints (must be zero in binary case)
        int bound = 0;
        boolean metricLearning = true;
        if (classifier.equals("GF")) {
            GaussianFields mySemi = new GaussianFields(corpus, multipleLearner, C);
            mySemi.crossValidation(CVFold, corpus);
        } else if (classifier.equals("GF-RW")) {
            GaussianFields mySemi = new GaussianFieldsByRandomWalk(corpus, multipleLearner, C, learningRatio, k, kPrime, tAlpha, tBeta, tDelta, tEta, false);
            mySemi.setDebugOutput(debugOutput);
            mySemi.crossValidation(CVFold, corpus);
        } else if (classifier.equals("GF-RW-ML")) {
            LinearSVMMetricLearning lMetricLearner = new LinearSVMMetricLearning(corpus, multipleLearner, C, learningRatio, k, kPrime, tAlpha, tBeta, tDelta, tEta, false, bound);
            lMetricLearner.setMetricLearningMethod(metricLearning);
            lMetricLearner.setDebugOutput(debugOutput);
            lMetricLearner.crossValidation(CVFold, corpus);
        } else
            System.out.println("Classifier has not been developed yet!");
    } else
        System.out.println("Learning paradigm has not been developed yet!");
}
Also used : PRLogisticRegression(Classifier.supervised.PRLogisticRegression) GaussianFieldsByRandomWalk(Classifier.semisupervised.GaussianFieldsByRandomWalk) PageRank(influence.PageRank) VctAnalyzer(Analyzer.VctAnalyzer) KNN(Classifier.supervised.KNN) SVM(Classifier.supervised.SVM) structures._Corpus(structures._Corpus) NaiveBayes(Classifier.supervised.NaiveBayes) LinearSVMMetricLearning(Classifier.metricLearning.LinearSVMMetricLearning) GaussianFields(Classifier.semisupervised.GaussianFields) PRLogisticRegression(Classifier.supervised.PRLogisticRegression) LogisticRegression(Classifier.supervised.LogisticRegression)

Aggregations

SVM (Classifier.supervised.SVM)7 LogisticRegression (Classifier.supervised.LogisticRegression)5 NaiveBayes (Classifier.supervised.NaiveBayes)5 structures._Corpus (structures._Corpus)5 DocAnalyzer (Analyzer.DocAnalyzer)3 LinearSVMMetricLearning (Classifier.metricLearning.LinearSVMMetricLearning)3 GaussianFields (Classifier.semisupervised.GaussianFields)3 GaussianFieldsByRandomWalk (Classifier.semisupervised.GaussianFieldsByRandomWalk)3 PageRank (influence.PageRank)3 VctAnalyzer (Analyzer.VctAnalyzer)2 PRLogisticRegression (Classifier.supervised.PRLogisticRegression)2 LDA_Gibbs (topicmodels.LDA.LDA_Gibbs)2 LDA_Variational_multithread (topicmodels.multithreads.LDA.LDA_Variational_multithread)2 topicmodels.multithreads.pLSA.pLSA_multithread (topicmodels.multithreads.pLSA.pLSA_multithread)2 topicmodels.pLSA.pLSA (topicmodels.pLSA.pLSA)2 Analyzer (Analyzer.Analyzer)1 AspectAnalyzer (Analyzer.AspectAnalyzer)1 BaseClassifier (Classifier.BaseClassifier)1 L2RMetricLearning (Classifier.metricLearning.L2RMetricLearning)1 NaiveBayesEM (Classifier.semisupervised.NaiveBayesEM)1