use of org.apache.commons.math3.distribution.EnumeratedDistribution in project druid by druid-io.
the class BenchmarkColumnValueGenerator method generateSingleRowValue.
private Object generateSingleRowValue() {
Object ret = null;
ValueType type = schema.getType();
if (distribution instanceof AbstractIntegerDistribution) {
ret = ((AbstractIntegerDistribution) distribution).sample();
} else if (distribution instanceof AbstractRealDistribution) {
ret = ((AbstractRealDistribution) distribution).sample();
} else if (distribution instanceof EnumeratedDistribution) {
ret = ((EnumeratedDistribution) distribution).sample();
}
ret = convertType(ret, type);
return ret;
}
use of org.apache.commons.math3.distribution.EnumeratedDistribution in project cassandra by apache.
the class SampledOpDistributionFactory method get.
public OpDistribution get(boolean isWarmup, MeasurementSink sink) {
PartitionGenerator generator = newGenerator();
List<Pair<Operation, Double>> operations = new ArrayList<>();
for (Map.Entry<T, Double> ratio : ratios.entrySet()) {
List<? extends Operation> ops = get(new Timer(ratio.getKey().toString(), sink), generator, ratio.getKey(), isWarmup);
for (Operation op : ops) operations.add(new Pair<>(op, ratio.getValue() / ops.size()));
}
return new SampledOpDistribution(new EnumeratedDistribution<>(operations), clustering.get());
}
use of org.apache.commons.math3.distribution.EnumeratedDistribution in project repseqio by repseqio.
the class MarkovInsertModel method create.
@Override
public InsertGenerator create(RandomGenerator random, final boolean v, List<VDJCGene> vGenes, List<VDJCGene> dGenes, List<VDJCGene> jGenes, List<VDJCGene> cGenes) {
Map<Byte, List<Pair<Byte, Double>>> distParams = new HashMap<>();
for (Map.Entry<String, Double> s : distribution.entrySet()) {
String[] split = s.getKey().split(">");
if (split.length != 2 || split[0].length() != 1 || split[1].length() != 1)
throw new IllegalArgumentException("Illegal distribution key: " + s.getKey() + ". " + "Expected something like \"A>C\"");
byte codeFrom = NucleotideSequence.ALPHABET.symbolToCode(split[0].charAt(0));
byte codeTo = NucleotideSequence.ALPHABET.symbolToCode(split[1].charAt(0));
if (codeFrom == -1 || codeTo == -1)
throw new IllegalArgumentException("Illegal nucleotide in: " + s.getKey() + ".");
List<Pair<Byte, Double>> pairs = distParams.get(codeFrom);
if (pairs == null)
distParams.put(codeFrom, pairs = new ArrayList<>());
pairs.add(new Pair<>(codeTo, s.getValue()));
}
final Map<Byte, EnumeratedDistribution<Byte>> dists = new HashMap<>();
for (byte from = 0; from < NucleotideSequence.ALPHABET.basicSize(); from++) {
List<Pair<Byte, Double>> d = distParams.get(from);
if (d == null)
throw new IllegalArgumentException("No distribution for letter: " + NucleotideSequence.ALPHABET.codeToSymbol(from));
dists.put(from, new EnumeratedDistribution<>(random, d));
}
final IndependentIntGenerator lengthDist = lengthDistribution.create(random);
return new InsertGenerator() {
@Override
public NucleotideSequence generate(GGene gene) {
ReferencePoint point = beginPoint(fromLeft, v);
int pointPosition = gene.getPartitioning().getPosition(point);
if (pointPosition == -1)
throw new RuntimeException("Point " + point + " is not available for gene " + gene);
byte letter = gene.getSequence(new Range(pointPosition, pointPosition + 1)).codeAt(0);
int length = lengthDist.sample();
byte[] array = new byte[length];
for (int i = 0; i < length; i++) {
byte cLetter = dists.get(letter).sample();
array[i] = cLetter;
letter = cLetter;
}
if (!fromLeft)
ArraysUtils.reverse(array);
return NucleotideSequence.ALPHABET.createBuilder().ensureCapacity(length).append(array).createAndDestroy();
}
};
}
use of org.apache.commons.math3.distribution.EnumeratedDistribution in project druid by druid-io.
the class ColumnValueGenerator method initDistribution.
private void initDistribution() {
GeneratorColumnSchema.ValueDistribution distributionType = schema.getDistributionType();
ValueType type = schema.getType();
List<Object> enumeratedValues = schema.getEnumeratedValues();
List<Double> enumeratedProbabilities = schema.getEnumeratedProbabilities();
List<Pair<Object, Double>> probabilities = new ArrayList<>();
switch(distributionType) {
case SEQUENTIAL:
// not random, just cycle through numbers from start to end, or cycle through enumerated values if provided
distribution = new SequentialDistribution(schema.getStartInt(), schema.getEndInt(), schema.getEnumeratedValues());
break;
case UNIFORM:
distribution = new UniformRealDistribution(schema.getStartDouble(), schema.getEndDouble());
break;
case DISCRETE_UNIFORM:
if (enumeratedValues == null) {
enumeratedValues = new ArrayList<>();
for (int i = schema.getStartInt(); i < schema.getEndInt(); i++) {
Object val = convertType(i, type);
enumeratedValues.add(val);
}
}
// give them all equal probability, the library will normalize probabilities to sum to 1.0
for (Object enumeratedValue : enumeratedValues) {
probabilities.add(new Pair<>(enumeratedValue, 0.1));
}
distribution = new EnumeratedTreeDistribution<>(probabilities);
break;
case NORMAL:
distribution = new NormalDistribution(schema.getMean(), schema.getStandardDeviation());
break;
case ROUNDED_NORMAL:
NormalDistribution normalDist = new NormalDistribution(schema.getMean(), schema.getStandardDeviation());
distribution = new RealRoundingDistribution(normalDist);
break;
case ZIPF:
int cardinality;
if (enumeratedValues == null) {
Integer startInt = schema.getStartInt();
cardinality = schema.getEndInt() - startInt;
ZipfDistribution zipf = new ZipfDistribution(cardinality, schema.getZipfExponent());
for (int i = 0; i < cardinality; i++) {
probabilities.add(new Pair<>((Object) (i + startInt), zipf.probability(i)));
}
} else {
cardinality = enumeratedValues.size();
ZipfDistribution zipf = new ZipfDistribution(enumeratedValues.size(), schema.getZipfExponent());
for (int i = 0; i < cardinality; i++) {
probabilities.add(new Pair<>(enumeratedValues.get(i), zipf.probability(i)));
}
}
distribution = new EnumeratedTreeDistribution<>(probabilities);
break;
case LAZY_ZIPF:
int lazyCardinality;
Integer startInt = schema.getStartInt();
lazyCardinality = schema.getEndInt() - startInt;
distribution = new ZipfDistribution(lazyCardinality, schema.getZipfExponent());
break;
case LAZY_DISCRETE_UNIFORM:
distribution = new UniformIntegerDistribution(schema.getStartInt(), schema.getEndInt());
break;
case ENUMERATED:
for (int i = 0; i < enumeratedValues.size(); i++) {
probabilities.add(new Pair<>(enumeratedValues.get(i), enumeratedProbabilities.get(i)));
}
distribution = new EnumeratedTreeDistribution<>(probabilities);
break;
default:
throw new UnsupportedOperationException("Unknown distribution type: " + distributionType);
}
if (distribution instanceof AbstractIntegerDistribution) {
((AbstractIntegerDistribution) distribution).reseedRandomGenerator(seed);
} else if (distribution instanceof AbstractRealDistribution) {
((AbstractRealDistribution) distribution).reseedRandomGenerator(seed);
} else {
((EnumeratedDistribution) distribution).reseedRandomGenerator(seed);
}
}
use of org.apache.commons.math3.distribution.EnumeratedDistribution in project druid by druid-io.
the class ColumnValueGenerator method generateSingleRowValue.
private Object generateSingleRowValue() {
Object ret = null;
ValueType type = schema.getType();
if (distribution instanceof AbstractIntegerDistribution) {
ret = ((AbstractIntegerDistribution) distribution).sample();
} else if (distribution instanceof AbstractRealDistribution) {
ret = ((AbstractRealDistribution) distribution).sample();
} else if (distribution instanceof EnumeratedDistribution) {
ret = ((EnumeratedDistribution) distribution).sample();
}
ret = convertType(ret, type);
return ret;
}
Aggregations