Search in sources :

Example 6 with MultiLabelClfDataSet

use of edu.neu.ccs.pyramid.dataset.MultiLabelClfDataSet in project pyramid by cheng-li.

the class MultiLabelSynthesizer method independentNoise.

/**
     * y0: w=(0,1)
     * y1: w=(1,1)
     * y2: w=(1,0)
     * y3: w=(1,-1)
     * @return
     */
public static MultiLabelClfDataSet independentNoise() {
    int numData = 10000;
    int numClass = 4;
    int numFeature = 2;
    MultiLabelClfDataSet dataSet = MLClfDataSetBuilder.getBuilder().numFeatures(numFeature).numClasses(numClass).numDataPoints(numData).build();
    // generate weights
    Vector[] weights = new Vector[numClass];
    for (int k = 0; k < numClass; k++) {
        Vector vector = new DenseVector(numFeature);
        weights[k] = vector;
    }
    weights[0].set(0, 0);
    weights[0].set(1, 1);
    weights[1].set(0, 1);
    weights[1].set(1, 1);
    weights[2].set(0, 1);
    weights[2].set(1, 0);
    weights[3].set(0, 1);
    weights[3].set(1, -1);
    // generate features
    for (int i = 0; i < numData; i++) {
        for (int j = 0; j < numFeature; j++) {
            dataSet.setFeatureValue(i, j, Sampling.doubleUniform(-1, 1));
        }
    }
    NormalDistribution[] noises = new NormalDistribution[4];
    noises[0] = new NormalDistribution(0, 0.1);
    noises[1] = new NormalDistribution(0, 0.1);
    noises[2] = new NormalDistribution(0, 0.1);
    noises[3] = new NormalDistribution(0, 0.1);
    // assign labels
    int numFlipped = 0;
    for (int i = 0; i < numData; i++) {
        for (int k = 0; k < numClass; k++) {
            double dot = weights[k].dot(dataSet.getRow(i));
            double score = dot + noises[k].sample();
            if (score >= 0) {
                dataSet.addLabel(i, k);
            }
            if (dot * score < 0) {
                numFlipped += 1;
            }
        }
    }
    System.out.println("number of flipped = " + numFlipped);
    return dataSet;
}
Also used : NormalDistribution(org.apache.commons.math3.distribution.NormalDistribution) MultivariateNormalDistribution(org.apache.commons.math3.distribution.MultivariateNormalDistribution) DenseVector(org.apache.mahout.math.DenseVector) Vector(org.apache.mahout.math.Vector) MultiLabelClfDataSet(edu.neu.ccs.pyramid.dataset.MultiLabelClfDataSet) DenseVector(org.apache.mahout.math.DenseVector)

Example 7 with MultiLabelClfDataSet

use of edu.neu.ccs.pyramid.dataset.MultiLabelClfDataSet in project pyramid by cheng-li.

the class MultiLabelSynthesizer method crfArgmaxDrop.

public static MultiLabelClfDataSet crfArgmaxDrop() {
    int numData = 1000;
    int numClass = 4;
    int numFeature = 10;
    MultiLabelClfDataSet dataSet = MLClfDataSetBuilder.getBuilder().numFeatures(numFeature).numClasses(numClass).numDataPoints(numData).build();
    List<MultiLabel> support = Enumerator.enumerate(numClass);
    CMLCRF cmlcrf = new CMLCRF(numClass, numFeature, support);
    cmlcrf.getWeights().getWeightsWithoutBiasForClass(0).set(0, 0);
    cmlcrf.getWeights().getWeightsWithoutBiasForClass(0).set(1, 10);
    cmlcrf.getWeights().getWeightsWithoutBiasForClass(1).set(0, 10);
    cmlcrf.getWeights().getWeightsWithoutBiasForClass(1).set(1, 10);
    cmlcrf.getWeights().getWeightsWithoutBiasForClass(2).set(0, 10);
    cmlcrf.getWeights().getWeightsWithoutBiasForClass(2).set(1, 0);
    cmlcrf.getWeights().getWeightsWithoutBiasForClass(3).set(0, -10);
    cmlcrf.getWeights().getWeightsWithoutBiasForClass(3).set(1, -10);
    // generate features
    for (int i = 0; i < numData; i++) {
        for (int j = 0; j < numFeature; j++) {
            dataSet.setFeatureValue(i, j, Sampling.doubleUniform(-1, 1));
        }
    }
    SubsetAccPredictor predictor = new SubsetAccPredictor(cmlcrf);
    // drop labels
    double[] alphas = { 1, 0.9, 0.8, 0.7 };
    // assign labels
    for (int i = 0; i < numData; i++) {
        //            System.out.println(dataSet.getRow(i));
        MultiLabel label = predictor.predict(dataSet.getRow(i)).copy();
        for (int l = 0; l < numClass; l++) {
            if (Math.random() > alphas[l] && label.matchClass(l)) {
                //                    System.out.println("drop");
                label.removeLabel(l);
            }
        }
        dataSet.setLabels(i, label);
    }
    return dataSet;
}
Also used : CMLCRF(edu.neu.ccs.pyramid.multilabel_classification.crf.CMLCRF) MultiLabel(edu.neu.ccs.pyramid.dataset.MultiLabel) SubsetAccPredictor(edu.neu.ccs.pyramid.multilabel_classification.crf.SubsetAccPredictor) MultiLabelClfDataSet(edu.neu.ccs.pyramid.dataset.MultiLabelClfDataSet)

Example 8 with MultiLabelClfDataSet

use of edu.neu.ccs.pyramid.dataset.MultiLabelClfDataSet in project pyramid by cheng-li.

the class MultiLabelSynthesizer method flipTwo.

public static MultiLabelClfDataSet flipTwo(int numData, int numFeature, int numClass) {
    MultiLabelClfDataSet dataSet = MLClfDataSetBuilder.getBuilder().numFeatures(numFeature).numClasses(numClass).numDataPoints(numData).build();
    // generate weights
    Vector[] weights = new Vector[numClass];
    for (int k = 0; k < numClass; k++) {
        Vector vector = new DenseVector(numFeature);
        for (int j = 0; j < numFeature; j++) {
            vector.set(j, Sampling.doubleUniform(-1, 1));
        }
        weights[k] = vector;
    }
    // generate features
    for (int i = 0; i < numData; i++) {
        for (int j = 0; j < numFeature; j++) {
            dataSet.setFeatureValue(i, j, Sampling.doubleUniform(-1, 1));
        }
    }
    // assign labels
    for (int i = 0; i < numData; i++) {
        for (int k = 0; k < numClass; k++) {
            double dot = weights[k].dot(dataSet.getRow(i));
            if (dot >= 0) {
                dataSet.addLabel(i, k);
            }
        }
    }
    // flip
    for (int i = 0; i < numData; i++) {
        int toChange = Sampling.intUniform(0, numClass - 1);
        MultiLabel label = dataSet.getMultiLabels()[i];
        if (label.matchClass(toChange)) {
            label.removeLabel(toChange);
        } else {
            label.addLabel(toChange);
        }
        if (toChange == 0) {
            int another = Sampling.intUniform(1, numClass - 1);
            if (label.matchClass(another)) {
                label.removeLabel(another);
            } else {
                label.addLabel(another);
            }
        }
    }
    return dataSet;
}
Also used : MultiLabel(edu.neu.ccs.pyramid.dataset.MultiLabel) DenseVector(org.apache.mahout.math.DenseVector) Vector(org.apache.mahout.math.Vector) MultiLabelClfDataSet(edu.neu.ccs.pyramid.dataset.MultiLabelClfDataSet) DenseVector(org.apache.mahout.math.DenseVector)

Example 9 with MultiLabelClfDataSet

use of edu.neu.ccs.pyramid.dataset.MultiLabelClfDataSet in project pyramid by cheng-li.

the class MultiLabelSynthesizer method crfSample.

public static MultiLabelClfDataSet crfSample() {
    int numData = 10000;
    int numClass = 4;
    int numFeature = 2;
    MultiLabelClfDataSet dataSet = MLClfDataSetBuilder.getBuilder().numFeatures(numFeature).numClasses(numClass).numDataPoints(numData).build();
    List<MultiLabel> support = Enumerator.enumerate(numClass);
    CMLCRF cmlcrf = new CMLCRF(numClass, numFeature, support);
    cmlcrf.getWeights().getWeightsWithoutBiasForClass(0).set(0, 0);
    cmlcrf.getWeights().getWeightsWithoutBiasForClass(0).set(1, 10);
    cmlcrf.getWeights().getWeightsWithoutBiasForClass(1).set(0, 10);
    cmlcrf.getWeights().getWeightsWithoutBiasForClass(1).set(1, 10);
    cmlcrf.getWeights().getWeightsWithoutBiasForClass(2).set(0, 10);
    cmlcrf.getWeights().getWeightsWithoutBiasForClass(2).set(1, 0);
    cmlcrf.getWeights().getWeightsWithoutBiasForClass(3).set(0, -10);
    cmlcrf.getWeights().getWeightsWithoutBiasForClass(3).set(1, -10);
    // generate features
    for (int i = 0; i < numData; i++) {
        for (int j = 0; j < numFeature; j++) {
            dataSet.setFeatureValue(i, j, Sampling.doubleUniform(-1, 1));
        }
    }
    SamplingPredictor samplingPredictor = new SamplingPredictor(cmlcrf);
    // assign labels
    for (int i = 0; i < numData; i++) {
        MultiLabel label = samplingPredictor.predict(dataSet.getRow(i));
        dataSet.setLabels(i, label);
    }
    return dataSet;
}
Also used : CMLCRF(edu.neu.ccs.pyramid.multilabel_classification.crf.CMLCRF) MultiLabel(edu.neu.ccs.pyramid.dataset.MultiLabel) MultiLabelClfDataSet(edu.neu.ccs.pyramid.dataset.MultiLabelClfDataSet) SamplingPredictor(edu.neu.ccs.pyramid.multilabel_classification.crf.SamplingPredictor)

Example 10 with MultiLabelClfDataSet

use of edu.neu.ccs.pyramid.dataset.MultiLabelClfDataSet in project pyramid by cheng-li.

the class MultiLabelSynthesizer method crfArgmaxHide.

public static MultiLabelClfDataSet crfArgmaxHide() {
    int numData = 10000;
    int numClass = 4;
    int numFeature = 2;
    MultiLabelClfDataSet dataSet = MLClfDataSetBuilder.getBuilder().numFeatures(numFeature).numClasses(numClass).numDataPoints(numData).build();
    List<MultiLabel> support = Enumerator.enumerate(numClass);
    CMLCRF cmlcrf = new CMLCRF(numClass, numFeature, support);
    cmlcrf.getWeights().getWeightsWithoutBiasForClass(0).set(0, 0);
    cmlcrf.getWeights().getWeightsWithoutBiasForClass(0).set(1, 10);
    cmlcrf.getWeights().getWeightsWithoutBiasForClass(1).set(0, 10);
    cmlcrf.getWeights().getWeightsWithoutBiasForClass(1).set(1, 10);
    cmlcrf.getWeights().getWeightsWithoutBiasForClass(2).set(0, 10);
    cmlcrf.getWeights().getWeightsWithoutBiasForClass(2).set(1, 0);
    cmlcrf.getWeights().getWeightsWithoutBiasForClass(3).set(0, -10);
    cmlcrf.getWeights().getWeightsWithoutBiasForClass(3).set(1, -10);
    // generate features
    for (int i = 0; i < numData; i++) {
        for (int j = 0; j < numFeature; j++) {
            dataSet.setFeatureValue(i, j, Sampling.doubleUniform(-1, 1));
        }
    }
    SubsetAccPredictor predictor = new SubsetAccPredictor(cmlcrf);
    // assign labels
    for (int i = 0; i < numData; i++) {
        MultiLabel label = predictor.predict(dataSet.getRow(i));
        dataSet.setLabels(i, label);
    }
    // hide one feature
    for (int i = 0; i < numData; i++) {
        dataSet.setFeatureValue(i, 0, 0);
    }
    return dataSet;
}
Also used : CMLCRF(edu.neu.ccs.pyramid.multilabel_classification.crf.CMLCRF) MultiLabel(edu.neu.ccs.pyramid.dataset.MultiLabel) SubsetAccPredictor(edu.neu.ccs.pyramid.multilabel_classification.crf.SubsetAccPredictor) MultiLabelClfDataSet(edu.neu.ccs.pyramid.dataset.MultiLabelClfDataSet)

Aggregations

MultiLabelClfDataSet (edu.neu.ccs.pyramid.dataset.MultiLabelClfDataSet)48 File (java.io.File)24 MultiLabel (edu.neu.ccs.pyramid.dataset.MultiLabel)23 CMLCRF (edu.neu.ccs.pyramid.multilabel_classification.crf.CMLCRF)13 MLMeasures (edu.neu.ccs.pyramid.eval.MLMeasures)12 LBFGS (edu.neu.ccs.pyramid.optimization.LBFGS)9 Vector (org.apache.mahout.math.Vector)9 Config (edu.neu.ccs.pyramid.configuration.Config)7 CRFLoss (edu.neu.ccs.pyramid.multilabel_classification.crf.CRFLoss)7 DenseVector (org.apache.mahout.math.DenseVector)7 MultiLabelClassifier (edu.neu.ccs.pyramid.multilabel_classification.MultiLabelClassifier)5 Pair (edu.neu.ccs.pyramid.util.Pair)5 java.util (java.util)5 Collectors (java.util.stream.Collectors)5 IntStream (java.util.stream.IntStream)5 DataSetUtil (edu.neu.ccs.pyramid.dataset.DataSetUtil)4 TRECFormat (edu.neu.ccs.pyramid.dataset.TRECFormat)4 MLScorer (edu.neu.ccs.pyramid.multilabel_classification.MLScorer)4 StopWatch (org.apache.commons.lang3.time.StopWatch)4 AccScorer (edu.neu.ccs.pyramid.multilabel_classification.AccScorer)3