Search in sources :

Example 1 with IntegerDistribution

use of org.apache.commons.math3.distribution.IntegerDistribution in project RoaringBitmap by RoaringBitmap.

the class BenchmarkDataGenerator method generate.

static BenchmarkData generate(int param, int howMany, int smallType, int bigType) {
    IntegerDistribution ud = new UniformIntegerDistribution(new Well19937c(param + 17), Short.MIN_VALUE, Short.MAX_VALUE);
    ClusteredDataGenerator cd = new ClusteredDataGenerator();
    IntegerDistribution p = new UniformIntegerDistribution(new Well19937c(param + 123), SMALLEST_ARRAY, BIGGEST_ARRAY / param);
    BenchmarkContainer[] smalls = new BenchmarkContainer[howMany];
    BenchmarkContainer[] bigs = new BenchmarkContainer[howMany];
    for (int i = 0; i < howMany; i++) {
        int smallSize = p.sample();
        int bigSize = smallSize * param;
        short[] small = smallType == 0 ? generateUniform(ud, smallSize) : generateClustered(cd, smallSize);
        short[] big = bigType == 0 ? generateUniform(ud, bigSize) : generateClustered(cd, bigSize);
        smalls[i] = new BenchmarkContainer(small);
        bigs[i] = new BenchmarkContainer(big);
    }
    return new BenchmarkData(smalls, bigs);
}
Also used : IntegerDistribution(org.apache.commons.math3.distribution.IntegerDistribution) UniformIntegerDistribution(org.apache.commons.math3.distribution.UniformIntegerDistribution) Well19937c(org.apache.commons.math3.random.Well19937c) UniformIntegerDistribution(org.apache.commons.math3.distribution.UniformIntegerDistribution) ClusteredDataGenerator(me.lemire.integercompression.synth.ClusteredDataGenerator)

Example 2 with IntegerDistribution

use of org.apache.commons.math3.distribution.IntegerDistribution in project RoaringBitmap by RoaringBitmap.

the class BenchmarkDataGenerator method generate.

static BenchmarkData generate(int param, int howMany, int smallType, int bigType) {
    IntegerDistribution ud = new UniformIntegerDistribution(new Well19937c(param + 17), Short.MIN_VALUE, Short.MAX_VALUE);
    ClusteredDataGenerator cd = new ClusteredDataGenerator();
    IntegerDistribution p = new UniformIntegerDistribution(new Well19937c(param + 123), SMALLEST_ARRAY, BIGGEST_ARRAY / param);
    BenchmarkContainer[] smalls = new BenchmarkContainer[howMany];
    BenchmarkContainer[] bigs = new BenchmarkContainer[howMany];
    for (int i = 0; i < howMany; i++) {
        int smallSize = p.sample();
        int bigSize = smallSize * param;
        short[] small = smallType == 0 ? generateUniform(ud, smallSize) : generateClustered(cd, smallSize);
        short[] big = bigType == 0 ? generateUniform(ud, bigSize) : generateClustered(cd, bigSize);
        smalls[i] = new BenchmarkContainer(small);
        bigs[i] = new BenchmarkContainer(big);
    }
    return new BenchmarkData(smalls, bigs);
}
Also used : IntegerDistribution(org.apache.commons.math3.distribution.IntegerDistribution) UniformIntegerDistribution(org.apache.commons.math3.distribution.UniformIntegerDistribution) Well19937c(org.apache.commons.math3.random.Well19937c) UniformIntegerDistribution(org.apache.commons.math3.distribution.UniformIntegerDistribution) ClusteredDataGenerator(me.lemire.integercompression.synth.ClusteredDataGenerator)

Example 3 with IntegerDistribution

use of org.apache.commons.math3.distribution.IntegerDistribution in project pyramid by cheng-li.

the class MultiLabelSynthesizer method flipOneNonUniform.

/**
 * y0: w=(0,1)
 * y1: w=(1,1)
 * y2: w=(1,0)
 * y3: w=(1,-1)
 * @param numData
 * @return
 */
public static MultiLabelClfDataSet flipOneNonUniform(int numData) {
    int numClass = 4;
    int numFeature = 2;
    MultiLabelClfDataSet dataSet = MLClfDataSetBuilder.getBuilder().numFeatures(numFeature).numClasses(numClass).numDataPoints(numData).build();
    // generate weights
    Vector[] weights = new Vector[numClass];
    for (int k = 0; k < numClass; k++) {
        Vector vector = new DenseVector(numFeature);
        weights[k] = vector;
    }
    weights[0].set(0, 0);
    weights[0].set(1, 1);
    weights[1].set(0, 1);
    weights[1].set(1, 1);
    weights[2].set(0, 1);
    weights[2].set(1, 0);
    weights[3].set(0, 1);
    weights[3].set(1, -1);
    // generate features
    for (int i = 0; i < numData; i++) {
        for (int j = 0; j < numFeature; j++) {
            dataSet.setFeatureValue(i, j, Sampling.doubleUniform(-1, 1));
        }
    }
    // assign labels
    for (int i = 0; i < numData; i++) {
        for (int k = 0; k < numClass; k++) {
            double dot = weights[k].dot(dataSet.getRow(i));
            if (dot >= 0) {
                dataSet.addLabel(i, k);
            }
        }
    }
    int[] indices = { 0, 1, 2, 3 };
    double[] probs = { 0.4, 0.2, 0.2, 0.2 };
    IntegerDistribution distribution = new EnumeratedIntegerDistribution(indices, probs);
    // flip
    for (int i = 0; i < numData; i++) {
        int toChange = distribution.sample();
        MultiLabel label = dataSet.getMultiLabels()[i];
        if (label.matchClass(toChange)) {
            label.removeLabel(toChange);
        } else {
            label.addLabel(toChange);
        }
    }
    return dataSet;
}
Also used : EnumeratedIntegerDistribution(org.apache.commons.math3.distribution.EnumeratedIntegerDistribution) MultiLabel(edu.neu.ccs.pyramid.dataset.MultiLabel) IntegerDistribution(org.apache.commons.math3.distribution.IntegerDistribution) EnumeratedIntegerDistribution(org.apache.commons.math3.distribution.EnumeratedIntegerDistribution) DenseVector(org.apache.mahout.math.DenseVector) Vector(org.apache.mahout.math.Vector) MultiLabelClfDataSet(edu.neu.ccs.pyramid.dataset.MultiLabelClfDataSet) DenseVector(org.apache.mahout.math.DenseVector)

Example 4 with IntegerDistribution

use of org.apache.commons.math3.distribution.IntegerDistribution in project pyramid by cheng-li.

the class MultiLabelSynthesizer method sampleFromMix.

/**
 * C0, y0: w=(0,1)
 * C0, y1: w=(1,1)
 * C1, y0: w=(1,0)
 * C1, y1: w=(1,-1)
 * @return
 */
public static MultiLabelClfDataSet sampleFromMix() {
    int numData = 10000;
    int numClass = 2;
    int numFeature = 2;
    int numClusters = 2;
    double[] proportions = { 0.4, 0.6 };
    int[] indices = { 0, 1 };
    MultiLabelClfDataSet dataSet = MLClfDataSetBuilder.getBuilder().numFeatures(numFeature).numClasses(numClass).numDataPoints(numData).build();
    // generate weights
    Vector[][] weights = new Vector[numClusters][numClass];
    for (int c = 0; c < numClusters; c++) {
        for (int l = 0; l < numClass; l++) {
            Vector vector = new DenseVector(numFeature);
            weights[c][l] = vector;
        }
    }
    weights[0][0].set(0, 0);
    weights[0][0].set(1, 1);
    weights[0][1].set(0, 1);
    weights[0][1].set(1, 1);
    weights[1][0].set(0, 1);
    weights[1][0].set(1, 0);
    weights[1][1].set(0, 1);
    weights[1][1].set(1, -1);
    // generate features
    for (int i = 0; i < numData; i++) {
        for (int j = 0; j < numFeature; j++) {
            dataSet.setFeatureValue(i, j, Sampling.doubleUniform(-1, 1));
        }
    }
    IntegerDistribution distribution = new EnumeratedIntegerDistribution(indices, proportions);
    // assign labels
    for (int i = 0; i < numData; i++) {
        int cluster = distribution.sample();
        System.out.println("cluster " + cluster);
        for (int l = 0; l < numClass; l++) {
            System.out.println("row = " + dataSet.getRow(i));
            System.out.println("weight = " + weights[cluster][l]);
            double dot = weights[cluster][l].dot(dataSet.getRow(i));
            System.out.println("dot = " + dot);
            if (dot >= 0) {
                dataSet.addLabel(i, l);
            }
        }
    }
    return dataSet;
}
Also used : EnumeratedIntegerDistribution(org.apache.commons.math3.distribution.EnumeratedIntegerDistribution) IntegerDistribution(org.apache.commons.math3.distribution.IntegerDistribution) EnumeratedIntegerDistribution(org.apache.commons.math3.distribution.EnumeratedIntegerDistribution) DenseVector(org.apache.mahout.math.DenseVector) Vector(org.apache.mahout.math.Vector) MultiLabelClfDataSet(edu.neu.ccs.pyramid.dataset.MultiLabelClfDataSet) DenseVector(org.apache.mahout.math.DenseVector)

Aggregations

IntegerDistribution (org.apache.commons.math3.distribution.IntegerDistribution)4 MultiLabelClfDataSet (edu.neu.ccs.pyramid.dataset.MultiLabelClfDataSet)2 ClusteredDataGenerator (me.lemire.integercompression.synth.ClusteredDataGenerator)2 EnumeratedIntegerDistribution (org.apache.commons.math3.distribution.EnumeratedIntegerDistribution)2 UniformIntegerDistribution (org.apache.commons.math3.distribution.UniformIntegerDistribution)2 Well19937c (org.apache.commons.math3.random.Well19937c)2 DenseVector (org.apache.mahout.math.DenseVector)2 Vector (org.apache.mahout.math.Vector)2 MultiLabel (edu.neu.ccs.pyramid.dataset.MultiLabel)1