use of org.apache.commons.math3.distribution.IntegerDistribution in project RoaringBitmap by RoaringBitmap.
the class BenchmarkDataGenerator method generate.
static BenchmarkData generate(int param, int howMany, int smallType, int bigType) {
IntegerDistribution ud = new UniformIntegerDistribution(new Well19937c(param + 17), Short.MIN_VALUE, Short.MAX_VALUE);
ClusteredDataGenerator cd = new ClusteredDataGenerator();
IntegerDistribution p = new UniformIntegerDistribution(new Well19937c(param + 123), SMALLEST_ARRAY, BIGGEST_ARRAY / param);
BenchmarkContainer[] smalls = new BenchmarkContainer[howMany];
BenchmarkContainer[] bigs = new BenchmarkContainer[howMany];
for (int i = 0; i < howMany; i++) {
int smallSize = p.sample();
int bigSize = smallSize * param;
short[] small = smallType == 0 ? generateUniform(ud, smallSize) : generateClustered(cd, smallSize);
short[] big = bigType == 0 ? generateUniform(ud, bigSize) : generateClustered(cd, bigSize);
smalls[i] = new BenchmarkContainer(small);
bigs[i] = new BenchmarkContainer(big);
}
return new BenchmarkData(smalls, bigs);
}
use of org.apache.commons.math3.distribution.IntegerDistribution in project RoaringBitmap by RoaringBitmap.
the class BenchmarkDataGenerator method generate.
static BenchmarkData generate(int param, int howMany, int smallType, int bigType) {
IntegerDistribution ud = new UniformIntegerDistribution(new Well19937c(param + 17), Short.MIN_VALUE, Short.MAX_VALUE);
ClusteredDataGenerator cd = new ClusteredDataGenerator();
IntegerDistribution p = new UniformIntegerDistribution(new Well19937c(param + 123), SMALLEST_ARRAY, BIGGEST_ARRAY / param);
BenchmarkContainer[] smalls = new BenchmarkContainer[howMany];
BenchmarkContainer[] bigs = new BenchmarkContainer[howMany];
for (int i = 0; i < howMany; i++) {
int smallSize = p.sample();
int bigSize = smallSize * param;
short[] small = smallType == 0 ? generateUniform(ud, smallSize) : generateClustered(cd, smallSize);
short[] big = bigType == 0 ? generateUniform(ud, bigSize) : generateClustered(cd, bigSize);
smalls[i] = new BenchmarkContainer(small);
bigs[i] = new BenchmarkContainer(big);
}
return new BenchmarkData(smalls, bigs);
}
use of org.apache.commons.math3.distribution.IntegerDistribution in project pyramid by cheng-li.
the class MultiLabelSynthesizer method flipOneNonUniform.
/**
* y0: w=(0,1)
* y1: w=(1,1)
* y2: w=(1,0)
* y3: w=(1,-1)
* @param numData
* @return
*/
public static MultiLabelClfDataSet flipOneNonUniform(int numData) {
int numClass = 4;
int numFeature = 2;
MultiLabelClfDataSet dataSet = MLClfDataSetBuilder.getBuilder().numFeatures(numFeature).numClasses(numClass).numDataPoints(numData).build();
// generate weights
Vector[] weights = new Vector[numClass];
for (int k = 0; k < numClass; k++) {
Vector vector = new DenseVector(numFeature);
weights[k] = vector;
}
weights[0].set(0, 0);
weights[0].set(1, 1);
weights[1].set(0, 1);
weights[1].set(1, 1);
weights[2].set(0, 1);
weights[2].set(1, 0);
weights[3].set(0, 1);
weights[3].set(1, -1);
// generate features
for (int i = 0; i < numData; i++) {
for (int j = 0; j < numFeature; j++) {
dataSet.setFeatureValue(i, j, Sampling.doubleUniform(-1, 1));
}
}
// assign labels
for (int i = 0; i < numData; i++) {
for (int k = 0; k < numClass; k++) {
double dot = weights[k].dot(dataSet.getRow(i));
if (dot >= 0) {
dataSet.addLabel(i, k);
}
}
}
int[] indices = { 0, 1, 2, 3 };
double[] probs = { 0.4, 0.2, 0.2, 0.2 };
IntegerDistribution distribution = new EnumeratedIntegerDistribution(indices, probs);
// flip
for (int i = 0; i < numData; i++) {
int toChange = distribution.sample();
MultiLabel label = dataSet.getMultiLabels()[i];
if (label.matchClass(toChange)) {
label.removeLabel(toChange);
} else {
label.addLabel(toChange);
}
}
return dataSet;
}
use of org.apache.commons.math3.distribution.IntegerDistribution in project pyramid by cheng-li.
the class MultiLabelSynthesizer method sampleFromMix.
/**
* C0, y0: w=(0,1)
* C0, y1: w=(1,1)
* C1, y0: w=(1,0)
* C1, y1: w=(1,-1)
* @return
*/
public static MultiLabelClfDataSet sampleFromMix() {
int numData = 10000;
int numClass = 2;
int numFeature = 2;
int numClusters = 2;
double[] proportions = { 0.4, 0.6 };
int[] indices = { 0, 1 };
MultiLabelClfDataSet dataSet = MLClfDataSetBuilder.getBuilder().numFeatures(numFeature).numClasses(numClass).numDataPoints(numData).build();
// generate weights
Vector[][] weights = new Vector[numClusters][numClass];
for (int c = 0; c < numClusters; c++) {
for (int l = 0; l < numClass; l++) {
Vector vector = new DenseVector(numFeature);
weights[c][l] = vector;
}
}
weights[0][0].set(0, 0);
weights[0][0].set(1, 1);
weights[0][1].set(0, 1);
weights[0][1].set(1, 1);
weights[1][0].set(0, 1);
weights[1][0].set(1, 0);
weights[1][1].set(0, 1);
weights[1][1].set(1, -1);
// generate features
for (int i = 0; i < numData; i++) {
for (int j = 0; j < numFeature; j++) {
dataSet.setFeatureValue(i, j, Sampling.doubleUniform(-1, 1));
}
}
IntegerDistribution distribution = new EnumeratedIntegerDistribution(indices, proportions);
// assign labels
for (int i = 0; i < numData; i++) {
int cluster = distribution.sample();
System.out.println("cluster " + cluster);
for (int l = 0; l < numClass; l++) {
System.out.println("row = " + dataSet.getRow(i));
System.out.println("weight = " + weights[cluster][l]);
double dot = weights[cluster][l].dot(dataSet.getRow(i));
System.out.println("dot = " + dot);
if (dot >= 0) {
dataSet.addLabel(i, l);
}
}
}
return dataSet;
}
Aggregations