Search in sources :

Example 1 with PriorProbClassifier

use of edu.neu.ccs.pyramid.classification.PriorProbClassifier in project pyramid by cheng-li.

the class SplitterTest method test1.

private static void test1() throws Exception {
    ClfDataSet dataSet = TRECFormat.loadClfDataSet(new File(DATASETS, "/imdb/train.trec"), DataSetType.CLF_SPARSE, true);
    PriorProbClassifier priorProbClassifier = new PriorProbClassifier(dataSet.getNumClasses());
    priorProbClassifier.fit(dataSet);
    double[] gradient = priorProbClassifier.getGradient(dataSet, 1);
    RegTreeConfig regTreeConfig = new RegTreeConfig();
    int[] activeFeatures = IntStream.range(0, dataSet.getNumFeatures()).toArray();
    int[] activeDataPoints = IntStream.range(0, dataSet.getNumDataPoints()).toArray();
    Comparator<SplitResult> comparator = Comparator.comparing(SplitResult::getReduction);
    List<Integer> results = Splitter.getAllSplits(regTreeConfig, dataSet, gradient).stream().sorted(comparator.reversed()).map(result -> result.getFeatureIndex()).limit(100).collect(Collectors.toList());
    //        results.stream().forEach(i-> System.out.println(dataSet.getFeatureSetting(i).getFeatureName()));
    System.out.println(results);
}
Also used : ClfDataSet(edu.neu.ccs.pyramid.dataset.ClfDataSet) PriorProbClassifier(edu.neu.ccs.pyramid.classification.PriorProbClassifier) File(java.io.File)

Example 2 with PriorProbClassifier

use of edu.neu.ccs.pyramid.classification.PriorProbClassifier in project pyramid by cheng-li.

the class LRCBMOptimizer method updateBinaryClassifier.

@Override
protected void updateBinaryClassifier(int component, int label, MultiLabelClfDataSet activeDataset, double[] activeGammas) {
    StopWatch stopWatch = new StopWatch();
    stopWatch.start();
    if (cbm.binaryClassifiers[component][label] == null || cbm.binaryClassifiers[component][label] instanceof PriorProbClassifier) {
        cbm.binaryClassifiers[component][label] = new LogisticRegression(2, activeDataset.getNumFeatures());
    }
    RidgeLogisticOptimizer ridgeLogisticOptimizer;
    int[] binaryLabels = DataSetUtil.toBinaryLabels(activeDataset.getMultiLabels(), label);
    // no parallelism
    ridgeLogisticOptimizer = new RidgeLogisticOptimizer((LogisticRegression) cbm.binaryClassifiers[component][label], activeDataset, binaryLabels, activeGammas, priorVarianceBinary, false);
    ((LBFGS) ridgeLogisticOptimizer.getOptimizer()).getLineSearcher().setInitialStepLength(initialStepSize);
    ridgeLogisticOptimizer.getOptimizer().getTerminator().setMaxIteration(binaryUpdatesPerIter);
    ridgeLogisticOptimizer.optimize();
    if (logger.isDebugEnabled()) {
        logger.debug("time spent on updating component " + component + " label " + label + " = " + stopWatch);
    }
}
Also used : PriorProbClassifier(edu.neu.ccs.pyramid.classification.PriorProbClassifier) LogisticRegression(edu.neu.ccs.pyramid.classification.logistic_regression.LogisticRegression) RidgeLogisticOptimizer(edu.neu.ccs.pyramid.classification.logistic_regression.RidgeLogisticOptimizer) StopWatch(org.apache.commons.lang3.time.StopWatch)

Example 3 with PriorProbClassifier

use of edu.neu.ccs.pyramid.classification.PriorProbClassifier in project pyramid by cheng-li.

the class AbstractRobustCBMOptimizer method skipOrUpdateBinaryClassifier.

protected void skipOrUpdateBinaryClassifier(int component, int label, List<Integer> activeIndices, MultiLabelClfDataSet activeDataSet, double totalWeight) {
    StopWatch stopWatch = new StopWatch();
    stopWatch.start();
    double effectivePositives = effectivePositives(component, label);
    double nonSmoothedPositiveProb = effectivePositives / totalWeight;
    // smooth the component-wise label fraction with global label fraction
    double smoothedPositiveProb = (effectivePositives + smoothingStrength * positiveCounts[label]) / (totalWeight + smoothingStrength * dataSet.getNumDataPoints());
    StringBuilder sb = new StringBuilder();
    sb.append("for component ").append(component).append(", label ").append(label);
    sb.append(", weighted positives = ").append(effectivePositives);
    sb.append(", non-smoothed positive fraction = " + (effectivePositives / totalWeight));
    sb.append(", global positive fraction = " + ((double) positiveCounts[label] / dataSet.getNumDataPoints()));
    sb.append(", smoothed positive fraction = " + smoothedPositiveProb);
    // it be happen that p >1 for numerical reasons
    if (smoothedPositiveProb >= 1) {
        smoothedPositiveProb = 1;
    }
    if (nonSmoothedPositiveProb < skipLabelThreshold || nonSmoothedPositiveProb > 1 - skipLabelThreshold) {
        double[] probs = { 1 - smoothedPositiveProb, smoothedPositiveProb };
        cbm.binaryClassifiers[component][label] = new PriorProbClassifier(probs);
        sb.append(", skip, use prior = ").append(smoothedPositiveProb);
        sb.append(", time spent = ").append(stopWatch.toString());
        if (logger.isDebugEnabled()) {
            logger.debug(sb.toString());
        }
        return;
    }
    if (logger.isDebugEnabled()) {
        logger.debug(sb.toString());
    }
    double[] activeInstanceWeights = activeIndices.stream().mapToDouble(i -> gammas[i][component] * noiseLabelWeights[i][label]).toArray();
    updateBinaryClassifier(component, label, activeDataSet, activeInstanceWeights);
}
Also used : IntStream(java.util.stream.IntStream) Arrays(java.util.Arrays) List(java.util.List) Logger(org.apache.logging.log4j.Logger) ArgMax(edu.neu.ccs.pyramid.util.ArgMax) edu.neu.ccs.pyramid.dataset(edu.neu.ccs.pyramid.dataset) Vector(org.apache.mahout.math.Vector) PriorProbClassifier(edu.neu.ccs.pyramid.classification.PriorProbClassifier) StopWatch(org.apache.commons.lang3.time.StopWatch) LogManager(org.apache.logging.log4j.LogManager) BMSelector(edu.neu.ccs.pyramid.clustering.bm.BMSelector) ArrayList(java.util.ArrayList) PriorProbClassifier(edu.neu.ccs.pyramid.classification.PriorProbClassifier) StopWatch(org.apache.commons.lang3.time.StopWatch)

Example 4 with PriorProbClassifier

use of edu.neu.ccs.pyramid.classification.PriorProbClassifier in project pyramid by cheng-li.

the class ENCBMOptimizer method updateBinaryClassifier.

@Override
protected void updateBinaryClassifier(int component, int label, MultiLabelClfDataSet activeDataset, double[] activeGammas) {
    StopWatch stopWatch = new StopWatch();
    stopWatch.start();
    if (cbm.binaryClassifiers[component][label] == null || cbm.binaryClassifiers[component][label] instanceof PriorProbClassifier) {
        cbm.binaryClassifiers[component][label] = new LogisticRegression(2, activeDataset.getNumFeatures());
    }
    int[] binaryLabels = DataSetUtil.toBinaryLabels(activeDataset.getMultiLabels(), label);
    double[][] targetsDistribution = DataSetUtil.labelsToDistributions(binaryLabels, 2);
    double[] overallWeights = new double[activeGammas.length];
    for (int i = 0; i < overallWeights.length; i++) {
        overallWeights[i] = activeGammas[i] * instanceWeights[i];
    }
    ElasticNetLogisticTrainer elasticNetLogisticTrainer = new ElasticNetLogisticTrainer.Builder((LogisticRegression) cbm.binaryClassifiers[component][label], activeDataset, 2, targetsDistribution, overallWeights).setRegularization(regularizationBinary).setL1Ratio(l1RatioBinary).setLineSearch(lineSearch).setMaxNumLinearRegUpdates(maxNumLinearRegUpdates).build();
    elasticNetLogisticTrainer.setActiveSet(activeSet);
    elasticNetLogisticTrainer.getTerminator().setMaxIteration(this.binaryUpdatesPerIter);
    elasticNetLogisticTrainer.optimize();
    if (logger.isDebugEnabled()) {
        logger.debug("time spent on updating component " + component + " label " + label + " = " + stopWatch);
    }
}
Also used : PriorProbClassifier(edu.neu.ccs.pyramid.classification.PriorProbClassifier) ElasticNetLogisticTrainer(edu.neu.ccs.pyramid.classification.logistic_regression.ElasticNetLogisticTrainer) LogisticRegression(edu.neu.ccs.pyramid.classification.logistic_regression.LogisticRegression) StopWatch(org.apache.commons.lang3.time.StopWatch)

Example 5 with PriorProbClassifier

use of edu.neu.ccs.pyramid.classification.PriorProbClassifier in project pyramid by cheng-li.

the class LRRecoverCBMOptimizer method updateBinaryClassifier.

@Override
protected void updateBinaryClassifier(int component, int label, MultiLabelClfDataSet activeDataset, double[] activeGammas) {
    StopWatch stopWatch = new StopWatch();
    stopWatch.start();
    if (cbm.binaryClassifiers[component][label] == null || cbm.binaryClassifiers[component][label] instanceof PriorProbClassifier) {
        cbm.binaryClassifiers[component][label] = new LogisticRegression(2, activeDataset.getNumFeatures());
    }
    RidgeLogisticOptimizer ridgeLogisticOptimizer;
    int[] binaryLabels = DataSetUtil.toBinaryLabels(activeDataset.getMultiLabels(), label);
    // no parallelism
    ridgeLogisticOptimizer = new RidgeLogisticOptimizer((LogisticRegression) cbm.binaryClassifiers[component][label], activeDataset, binaryLabels, activeGammas, priorVarianceBinary, false);
    ridgeLogisticOptimizer.getOptimizer().getTerminator().setMaxIteration(binaryUpdatesPerIter);
    ridgeLogisticOptimizer.optimize();
    if (logger.isDebugEnabled()) {
        logger.debug("time spent on updating component " + component + " label " + label + " = " + stopWatch);
    }
}
Also used : PriorProbClassifier(edu.neu.ccs.pyramid.classification.PriorProbClassifier) LogisticRegression(edu.neu.ccs.pyramid.classification.logistic_regression.LogisticRegression) RidgeLogisticOptimizer(edu.neu.ccs.pyramid.classification.logistic_regression.RidgeLogisticOptimizer) StopWatch(org.apache.commons.lang3.time.StopWatch)

Aggregations

PriorProbClassifier (edu.neu.ccs.pyramid.classification.PriorProbClassifier)14 StopWatch (org.apache.commons.lang3.time.StopWatch)10 LogisticRegression (edu.neu.ccs.pyramid.classification.logistic_regression.LogisticRegression)8 RidgeLogisticOptimizer (edu.neu.ccs.pyramid.classification.logistic_regression.RidgeLogisticOptimizer)4 ArrayList (java.util.ArrayList)3 ElasticNetLogisticTrainer (edu.neu.ccs.pyramid.classification.logistic_regression.ElasticNetLogisticTrainer)2 Feature (edu.neu.ccs.pyramid.feature.Feature)2 LKBOutputCalculator (edu.neu.ccs.pyramid.classification.lkboost.LKBOutputCalculator)1 LKBoost (edu.neu.ccs.pyramid.classification.lkboost.LKBoost)1 LKBoostOptimizer (edu.neu.ccs.pyramid.classification.lkboost.LKBoostOptimizer)1 BMSelector (edu.neu.ccs.pyramid.clustering.bm.BMSelector)1 edu.neu.ccs.pyramid.dataset (edu.neu.ccs.pyramid.dataset)1 ClfDataSet (edu.neu.ccs.pyramid.dataset.ClfDataSet)1 ConstantRegressor (edu.neu.ccs.pyramid.regression.ConstantRegressor)1 Regressor (edu.neu.ccs.pyramid.regression.Regressor)1 RegTreeConfig (edu.neu.ccs.pyramid.regression.regression_tree.RegTreeConfig)1 RegTreeFactory (edu.neu.ccs.pyramid.regression.regression_tree.RegTreeFactory)1 TreeRule (edu.neu.ccs.pyramid.regression.regression_tree.TreeRule)1 ArgMax (edu.neu.ccs.pyramid.util.ArgMax)1 File (java.io.File)1