Search in sources :

Example 1 with AspectAnalyzer

use of Analyzer.AspectAnalyzer in project IR_Base by Linda-sunshine.

the class TransductiveMain method main.

public static void main(String[] args) throws IOException, ParseException {
    // Define the number of classes in this Naive Bayes.
    int classNumber = 5;
    // The default value is unigram.
    int Ngram = 2;
    // Document length threshold
    int lengthThreshold = 5;
    // each sentence should have at least 2 sentences for HTSM, LRSHTM
    int minimunNumberofSentence = 2;
    /**
     ***parameters for the two-topic topic model****
     */
    // pLSA, LDA_Gibbs, LDA_Variational
    String topicmodel = "pLSA";
    int number_of_topics = 30;
    // these two parameters must be larger than 1!!!
    double alpha = 1.0 + 1e-2, beta = 1.0 + 1e-3, eta = 5.0;
    // negative converge means do need to check likelihood convergency
    double converge = -1, lambda = 0.7;
    int number_of_iteration = 100;
    boolean aspectSentiPrior = true;
    /**
     ***The parameters used in loading files.****
     */
    String folder = "./data/amazon/tablet/topicmodel";
    String suffix = ".json";
    String stopword = "./data/Model/stopwords.dat";
    // Token model.
    String tokenModel = "./data/Model/en-token.bin";
    // Sentence model. Need it for pos tagging.
    String stnModel = "./data/Model/en-sent.bin";
    String tagModel = "./data/Model/en-pos-maxent.bin";
    String sentiWordNet = "./data/Model/SentiWordNet_3.0.0_20130122.txt";
    // Added by Mustafizur----------------
    String pathToPosWords = "./data/Model/SentiWordsPos.txt";
    String pathToNegWords = "./data/Model/SentiWordsNeg.txt";
    String pathToNegationWords = "./data/Model/negation_words.txt";
    // String category = "tablets"; //"electronics"
    // String dataSize = "86jsons"; //"50K", "100K"
    // String fvFile = String.format("./data/Features/fv_%dgram_%s_%s.txt", Ngram, category, dataSize);
    // String fvStatFile = String.format("./data/Features/fv_%dgram_stat_%s_%s.txt", Ngram, category, dataSize);
    // String aspectlist = "./data/Model/aspect_output_simple.txt";
    String fvFile = String.format("./data/Features/fv_%dgram_topicmodel.txt", Ngram);
    String fvStatFile = String.format("./data/Features/fv_%dgram_stat_topicmodel.txt", Ngram);
    String aspectSentiList = "./data/Model/aspect_sentiment_tablet.txt";
    String aspectList = "./data/Model/aspect_tablet.txt";
    /**
     ***Parameters in learning style.****
     */
    // "SUP", "SEMI"
    String style = "SEMI";
    // "RW", "RW-ML", "RW-L2R"
    String method = "RW-L2R";
    /**
     ***Parameters in transductive learning.****
     */
    String debugOutput = "data/debug/topical.sim";
    // String debugOutput = null;
    boolean releaseContent = false;
    // k fold-cross validation
    int CVFold = 10;
    // choice of base learner
    String multipleLearner = "SVM";
    // trade-off parameter
    double C = 1.0;
    /**
     ***Parameters in feature selection.****
     */
    // String featureSelection = "DF"; //Feature selection method.
    // double startProb = 0.5; // Used in feature selection, the starting point of the features.
    // double endProb = 0.999; // Used in feature selection, the ending point of the features.
    // int DFthreshold = 30; // Filter the features with DFs smaller than this threshold.
    // 
    // System.out.println("Performing feature selection, wait...");
    // jsonAnalyzer analyzer = new jsonAnalyzer(tokenModel, classNumber, null, Ngram, lengthThreshold);
    // analyzer.LoadStopwords(stopwords);
    // analyzer.LoadDirectory(folder, suffix); //Load all the documents as the data set.
    // analyzer.featureSelection(fvFile, featureSelection, startProb, endProb, DFthreshold); //Select the features.
    System.out.println("Creating feature vectors, wait...");
    AspectAnalyzer analyzer = new AspectAnalyzer(tokenModel, stnModel, tagModel, classNumber, fvFile, Ngram, lengthThreshold, aspectList, true);
    // Added by Mustafizur----------------
    analyzer.setMinimumNumberOfSentences(minimunNumberofSentence);
    // Load the sentiwordnet file.
    analyzer.LoadStopwords(stopword);
    // analyzer.loadPriorPosNegWords(sentiWordNet, pathToPosWords, pathToNegWords, pathToNegationWords);
    analyzer.setReleaseContent(releaseContent);
    // Added by Mustafizur----------------
    // Load all the documents as the data set.
    analyzer.LoadDirectory(folder, suffix);
    analyzer.setFeatureValues("TF", 0);
    // Get the collection of all the documents.
    _Corpus c = analyzer.returnCorpus(fvStatFile);
    pLSA tModel = null;
    if (topicmodel.equals("pLSA")) {
        tModel = new pLSA_multithread(number_of_iteration, converge, beta, c, lambda, number_of_topics, alpha);
    } else if (topicmodel.equals("LDA_Gibbs")) {
        tModel = new LDA_Gibbs(number_of_iteration, converge, beta, c, lambda, number_of_topics, alpha, 0.4, 50);
    } else if (topicmodel.equals("LDA_Variational")) {
        tModel = new LDA_Variational_multithread(number_of_iteration, converge, beta, c, lambda, number_of_topics, alpha, 10, -1);
    } else {
        System.out.println("The selected topic model has not developed yet!");
        return;
    }
    tModel.setDisplayLap(0);
    tModel.setSentiAspectPrior(aspectSentiPrior);
    tModel.LoadPrior(aspectSentiPrior ? aspectSentiList : aspectList, eta);
    tModel.EMonCorpus();
    // construct effective feature values for supervised classifiers
    analyzer.setFeatureValues("BM25", 2);
    // how to set this reasonably
    c.mapLabels(3);
    if (style.equals("SEMI")) {
        // perform transductive learning
        System.out.println("Start Transductive Learning, wait...");
        double learningRatio = 1.0;
        // k nearest labeled, k' nearest unlabeled
        int k = 30, kPrime = 20;
        // labeled data weight, unlabeled data weight
        double tAlpha = 1.0, tBeta = 0.1;
        // convergence of random walk, weight of random walk
        double tDelta = 1e-5, tEta = 0.6;
        boolean simFlag = false, weightedAvg = true;
        // bound for generating rating constraints (must be zero in binary case)
        int bound = 0;
        // top K similar documents for constructing pairwise ranking targets
        int topK = 25;
        double noiseRatio = 1.0;
        boolean metricLearning = true;
        // training LambdaRank with multi-threads
        boolean multithread_LR = true;
        GaussianFieldsByRandomWalk mySemi = null;
        if (method.equals("RW")) {
            mySemi = new GaussianFieldsByRandomWalk(c, multipleLearner, C, learningRatio, k, kPrime, tAlpha, tBeta, tDelta, tEta, weightedAvg);
        } else if (method.equals("RW-ML")) {
            mySemi = new LinearSVMMetricLearning(c, multipleLearner, C, learningRatio, k, kPrime, tAlpha, tBeta, tDelta, tEta, weightedAvg, bound);
            ((LinearSVMMetricLearning) mySemi).setMetricLearningMethod(metricLearning);
        } else if (method.equals("RW-L2R")) {
            mySemi = new L2RMetricLearning(c, multipleLearner, C, learningRatio, k, kPrime, tAlpha, tBeta, tDelta, tEta, weightedAvg, topK, noiseRatio, multithread_LR);
        }
        mySemi.setSimilarity(simFlag);
        mySemi.setDebugOutput(debugOutput);
        mySemi.crossValidation(CVFold, c);
    } else if (style.equals("SUP")) {
        // perform supervised learning
        System.out.println("Start SVM, wait...");
        SVM mySVM = new SVM(c, C);
        mySVM.crossValidation(CVFold, c);
    }
}
Also used : GaussianFieldsByRandomWalk(Classifier.semisupervised.GaussianFieldsByRandomWalk) SVM(Classifier.supervised.SVM) AspectAnalyzer(Analyzer.AspectAnalyzer) L2RMetricLearning(Classifier.metricLearning.L2RMetricLearning) structures._Corpus(structures._Corpus) LinearSVMMetricLearning(Classifier.metricLearning.LinearSVMMetricLearning) LDA_Gibbs(topicmodels.LDA.LDA_Gibbs) LDA_Variational_multithread(topicmodels.multithreads.LDA.LDA_Variational_multithread) topicmodels.multithreads.pLSA.pLSA_multithread(topicmodels.multithreads.pLSA.pLSA_multithread) topicmodels.pLSA.pLSA(topicmodels.pLSA.pLSA)

Aggregations

AspectAnalyzer (Analyzer.AspectAnalyzer)1 L2RMetricLearning (Classifier.metricLearning.L2RMetricLearning)1 LinearSVMMetricLearning (Classifier.metricLearning.LinearSVMMetricLearning)1 GaussianFieldsByRandomWalk (Classifier.semisupervised.GaussianFieldsByRandomWalk)1 SVM (Classifier.supervised.SVM)1 structures._Corpus (structures._Corpus)1 LDA_Gibbs (topicmodels.LDA.LDA_Gibbs)1 LDA_Variational_multithread (topicmodels.multithreads.LDA.LDA_Variational_multithread)1 topicmodels.multithreads.pLSA.pLSA_multithread (topicmodels.multithreads.pLSA.pLSA_multithread)1 topicmodels.pLSA.pLSA (topicmodels.pLSA.pLSA)1