Search in sources :

Example 21 with MultiLabel

use of edu.neu.ccs.pyramid.dataset.MultiLabel in project pyramid by cheng-li.

the class MultiLabelSynthesizer method crfArgmaxDrop.

public static MultiLabelClfDataSet crfArgmaxDrop() {
    int numData = 1000;
    int numClass = 4;
    int numFeature = 10;
    MultiLabelClfDataSet dataSet = MLClfDataSetBuilder.getBuilder().numFeatures(numFeature).numClasses(numClass).numDataPoints(numData).build();
    List<MultiLabel> support = Enumerator.enumerate(numClass);
    CMLCRF cmlcrf = new CMLCRF(numClass, numFeature, support);
    cmlcrf.getWeights().getWeightsWithoutBiasForClass(0).set(0, 0);
    cmlcrf.getWeights().getWeightsWithoutBiasForClass(0).set(1, 10);
    cmlcrf.getWeights().getWeightsWithoutBiasForClass(1).set(0, 10);
    cmlcrf.getWeights().getWeightsWithoutBiasForClass(1).set(1, 10);
    cmlcrf.getWeights().getWeightsWithoutBiasForClass(2).set(0, 10);
    cmlcrf.getWeights().getWeightsWithoutBiasForClass(2).set(1, 0);
    cmlcrf.getWeights().getWeightsWithoutBiasForClass(3).set(0, -10);
    cmlcrf.getWeights().getWeightsWithoutBiasForClass(3).set(1, -10);
    // generate features
    for (int i = 0; i < numData; i++) {
        for (int j = 0; j < numFeature; j++) {
            dataSet.setFeatureValue(i, j, Sampling.doubleUniform(-1, 1));
        }
    }
    SubsetAccPredictor predictor = new SubsetAccPredictor(cmlcrf);
    // drop labels
    double[] alphas = { 1, 0.9, 0.8, 0.7 };
    // assign labels
    for (int i = 0; i < numData; i++) {
        //            System.out.println(dataSet.getRow(i));
        MultiLabel label = predictor.predict(dataSet.getRow(i)).copy();
        for (int l = 0; l < numClass; l++) {
            if (Math.random() > alphas[l] && label.matchClass(l)) {
                //                    System.out.println("drop");
                label.removeLabel(l);
            }
        }
        dataSet.setLabels(i, label);
    }
    return dataSet;
}
Also used : CMLCRF(edu.neu.ccs.pyramid.multilabel_classification.crf.CMLCRF) MultiLabel(edu.neu.ccs.pyramid.dataset.MultiLabel) SubsetAccPredictor(edu.neu.ccs.pyramid.multilabel_classification.crf.SubsetAccPredictor) MultiLabelClfDataSet(edu.neu.ccs.pyramid.dataset.MultiLabelClfDataSet)

Example 22 with MultiLabel

use of edu.neu.ccs.pyramid.dataset.MultiLabel in project pyramid by cheng-li.

the class MultiLabelSynthesizer method flipTwo.

public static MultiLabelClfDataSet flipTwo(int numData, int numFeature, int numClass) {
    MultiLabelClfDataSet dataSet = MLClfDataSetBuilder.getBuilder().numFeatures(numFeature).numClasses(numClass).numDataPoints(numData).build();
    // generate weights
    Vector[] weights = new Vector[numClass];
    for (int k = 0; k < numClass; k++) {
        Vector vector = new DenseVector(numFeature);
        for (int j = 0; j < numFeature; j++) {
            vector.set(j, Sampling.doubleUniform(-1, 1));
        }
        weights[k] = vector;
    }
    // generate features
    for (int i = 0; i < numData; i++) {
        for (int j = 0; j < numFeature; j++) {
            dataSet.setFeatureValue(i, j, Sampling.doubleUniform(-1, 1));
        }
    }
    // assign labels
    for (int i = 0; i < numData; i++) {
        for (int k = 0; k < numClass; k++) {
            double dot = weights[k].dot(dataSet.getRow(i));
            if (dot >= 0) {
                dataSet.addLabel(i, k);
            }
        }
    }
    // flip
    for (int i = 0; i < numData; i++) {
        int toChange = Sampling.intUniform(0, numClass - 1);
        MultiLabel label = dataSet.getMultiLabels()[i];
        if (label.matchClass(toChange)) {
            label.removeLabel(toChange);
        } else {
            label.addLabel(toChange);
        }
        if (toChange == 0) {
            int another = Sampling.intUniform(1, numClass - 1);
            if (label.matchClass(another)) {
                label.removeLabel(another);
            } else {
                label.addLabel(another);
            }
        }
    }
    return dataSet;
}
Also used : MultiLabel(edu.neu.ccs.pyramid.dataset.MultiLabel) DenseVector(org.apache.mahout.math.DenseVector) Vector(org.apache.mahout.math.Vector) MultiLabelClfDataSet(edu.neu.ccs.pyramid.dataset.MultiLabelClfDataSet) DenseVector(org.apache.mahout.math.DenseVector)

Example 23 with MultiLabel

use of edu.neu.ccs.pyramid.dataset.MultiLabel in project pyramid by cheng-li.

the class MultiLabelSynthesizer method crfSample.

public static MultiLabelClfDataSet crfSample() {
    int numData = 10000;
    int numClass = 4;
    int numFeature = 2;
    MultiLabelClfDataSet dataSet = MLClfDataSetBuilder.getBuilder().numFeatures(numFeature).numClasses(numClass).numDataPoints(numData).build();
    List<MultiLabel> support = Enumerator.enumerate(numClass);
    CMLCRF cmlcrf = new CMLCRF(numClass, numFeature, support);
    cmlcrf.getWeights().getWeightsWithoutBiasForClass(0).set(0, 0);
    cmlcrf.getWeights().getWeightsWithoutBiasForClass(0).set(1, 10);
    cmlcrf.getWeights().getWeightsWithoutBiasForClass(1).set(0, 10);
    cmlcrf.getWeights().getWeightsWithoutBiasForClass(1).set(1, 10);
    cmlcrf.getWeights().getWeightsWithoutBiasForClass(2).set(0, 10);
    cmlcrf.getWeights().getWeightsWithoutBiasForClass(2).set(1, 0);
    cmlcrf.getWeights().getWeightsWithoutBiasForClass(3).set(0, -10);
    cmlcrf.getWeights().getWeightsWithoutBiasForClass(3).set(1, -10);
    // generate features
    for (int i = 0; i < numData; i++) {
        for (int j = 0; j < numFeature; j++) {
            dataSet.setFeatureValue(i, j, Sampling.doubleUniform(-1, 1));
        }
    }
    SamplingPredictor samplingPredictor = new SamplingPredictor(cmlcrf);
    // assign labels
    for (int i = 0; i < numData; i++) {
        MultiLabel label = samplingPredictor.predict(dataSet.getRow(i));
        dataSet.setLabels(i, label);
    }
    return dataSet;
}
Also used : CMLCRF(edu.neu.ccs.pyramid.multilabel_classification.crf.CMLCRF) MultiLabel(edu.neu.ccs.pyramid.dataset.MultiLabel) MultiLabelClfDataSet(edu.neu.ccs.pyramid.dataset.MultiLabelClfDataSet) SamplingPredictor(edu.neu.ccs.pyramid.multilabel_classification.crf.SamplingPredictor)

Example 24 with MultiLabel

use of edu.neu.ccs.pyramid.dataset.MultiLabel in project pyramid by cheng-li.

the class MultiLabelSynthesizer method crfArgmaxHide.

public static MultiLabelClfDataSet crfArgmaxHide() {
    int numData = 10000;
    int numClass = 4;
    int numFeature = 2;
    MultiLabelClfDataSet dataSet = MLClfDataSetBuilder.getBuilder().numFeatures(numFeature).numClasses(numClass).numDataPoints(numData).build();
    List<MultiLabel> support = Enumerator.enumerate(numClass);
    CMLCRF cmlcrf = new CMLCRF(numClass, numFeature, support);
    cmlcrf.getWeights().getWeightsWithoutBiasForClass(0).set(0, 0);
    cmlcrf.getWeights().getWeightsWithoutBiasForClass(0).set(1, 10);
    cmlcrf.getWeights().getWeightsWithoutBiasForClass(1).set(0, 10);
    cmlcrf.getWeights().getWeightsWithoutBiasForClass(1).set(1, 10);
    cmlcrf.getWeights().getWeightsWithoutBiasForClass(2).set(0, 10);
    cmlcrf.getWeights().getWeightsWithoutBiasForClass(2).set(1, 0);
    cmlcrf.getWeights().getWeightsWithoutBiasForClass(3).set(0, -10);
    cmlcrf.getWeights().getWeightsWithoutBiasForClass(3).set(1, -10);
    // generate features
    for (int i = 0; i < numData; i++) {
        for (int j = 0; j < numFeature; j++) {
            dataSet.setFeatureValue(i, j, Sampling.doubleUniform(-1, 1));
        }
    }
    SubsetAccPredictor predictor = new SubsetAccPredictor(cmlcrf);
    // assign labels
    for (int i = 0; i < numData; i++) {
        MultiLabel label = predictor.predict(dataSet.getRow(i));
        dataSet.setLabels(i, label);
    }
    // hide one feature
    for (int i = 0; i < numData; i++) {
        dataSet.setFeatureValue(i, 0, 0);
    }
    return dataSet;
}
Also used : CMLCRF(edu.neu.ccs.pyramid.multilabel_classification.crf.CMLCRF) MultiLabel(edu.neu.ccs.pyramid.dataset.MultiLabel) SubsetAccPredictor(edu.neu.ccs.pyramid.multilabel_classification.crf.SubsetAccPredictor) MultiLabelClfDataSet(edu.neu.ccs.pyramid.dataset.MultiLabelClfDataSet)

Example 25 with MultiLabel

use of edu.neu.ccs.pyramid.dataset.MultiLabel in project pyramid by cheng-li.

the class CMLCRFTest method test6.

private static void test6() throws Exception {
    MultiLabelClfDataSet dataSet = TRECFormat.loadMultiLabelClfDataSet(new File(DATASETS, "medical/train"), DataSetType.ML_CLF_SPARSE, true);
    MultiLabelClfDataSet testSet = TRECFormat.loadMultiLabelClfDataSet(new File(DATASETS, "medical/test"), DataSetType.ML_CLF_SPARSE, true);
    CMLCRF cmlcrf = new CMLCRF(dataSet);
    CRFLoss crfLoss = new CRFLoss(cmlcrf, dataSet, 1);
    MultiLabel[] predTrain;
    MultiLabel[] predTest;
    LBFGS optimizer = new LBFGS(crfLoss);
    for (int i = 0; i < 50; i++) {
        //            System.out.print("Obj: " + optimizer.getTerminator().getLastValue());
        System.out.println("iter: " + i);
        optimizer.iterate();
        System.out.println(crfLoss.getValue());
        predTrain = cmlcrf.predict(dataSet);
        predTest = cmlcrf.predict(testSet);
        System.out.print("\tTrain acc: " + Accuracy.accuracy(dataSet.getMultiLabels(), predTrain));
        System.out.print("\tTrain overlap " + Overlap.overlap(dataSet.getMultiLabels(), predTrain));
        System.out.print("\tTest acc: " + Accuracy.accuracy(testSet.getMultiLabels(), predTest));
        System.out.println("\tTest overlap " + Overlap.overlap(testSet.getMultiLabels(), predTest));
    //            System.out.println("crf = "+cmlcrf.getWeights());
    //            System.out.println(Arrays.toString(predTrain));
    }
}
Also used : CMLCRF(edu.neu.ccs.pyramid.multilabel_classification.crf.CMLCRF) LBFGS(edu.neu.ccs.pyramid.optimization.LBFGS) MultiLabel(edu.neu.ccs.pyramid.dataset.MultiLabel) CRFLoss(edu.neu.ccs.pyramid.multilabel_classification.crf.CRFLoss) File(java.io.File) MultiLabelClfDataSet(edu.neu.ccs.pyramid.dataset.MultiLabelClfDataSet)

Aggregations

MultiLabel (edu.neu.ccs.pyramid.dataset.MultiLabel)101 Vector (org.apache.mahout.math.Vector)22 MultiLabelClfDataSet (edu.neu.ccs.pyramid.dataset.MultiLabelClfDataSet)21 File (java.io.File)14 DenseVector (org.apache.mahout.math.DenseVector)13 CMLCRF (edu.neu.ccs.pyramid.multilabel_classification.crf.CMLCRF)12 Pair (edu.neu.ccs.pyramid.util.Pair)8 LBFGS (edu.neu.ccs.pyramid.optimization.LBFGS)7 ArrayList (java.util.ArrayList)7 MLMeasures (edu.neu.ccs.pyramid.eval.MLMeasures)6 CRFLoss (edu.neu.ccs.pyramid.multilabel_classification.crf.CRFLoss)6 MultiLabelClassifier (edu.neu.ccs.pyramid.multilabel_classification.MultiLabelClassifier)5 GeneralF1Predictor (edu.neu.ccs.pyramid.multilabel_classification.plugin_rule.GeneralF1Predictor)5 Collectors (java.util.stream.Collectors)5 EarlyStopper (edu.neu.ccs.pyramid.optimization.EarlyStopper)4 java.util (java.util)4 StopWatch (org.apache.commons.lang3.time.StopWatch)4 Config (edu.neu.ccs.pyramid.configuration.Config)3 DataSetUtil (edu.neu.ccs.pyramid.dataset.DataSetUtil)3 TRECFormat (edu.neu.ccs.pyramid.dataset.TRECFormat)3