use of org.apache.datasketches.tuple.arrayofdoubles.ArrayOfDoublesSketch in project sketches-core by DataSketches.
the class ReadOnlyMemoryTest method wrapAndTryUpdatingUnionEstimationMode.
@Test
public void wrapAndTryUpdatingUnionEstimationMode() {
final int numUniques = 10000;
int key = 0;
final ArrayOfDoublesUpdatableSketch sketch1 = new ArrayOfDoublesUpdatableSketchBuilder().build();
for (int i = 0; i < numUniques; i++) {
sketch1.update(key++, new double[] { 1 });
}
final ArrayOfDoublesUnion union1 = new ArrayOfDoublesSetOperationBuilder().buildUnion();
union1.union(sketch1);
final ArrayOfDoublesUnion union2 = ArrayOfDoublesSketches.wrapUnion(Memory.wrap(union1.toByteArray()));
final ArrayOfDoublesSketch resultSketch = union2.getResult();
Assert.assertTrue(resultSketch.isEstimationMode());
Assert.assertEquals(resultSketch.getEstimate(), numUniques, numUniques * 0.04);
// make sure union update actually needs to modify the union
final ArrayOfDoublesUpdatableSketch sketch2 = new ArrayOfDoublesUpdatableSketchBuilder().build();
for (int i = 0; i < numUniques; i++) {
sketch2.update(key++, new double[] { 1 });
}
boolean thrown = false;
try {
union2.union(sketch2);
} catch (final SketchesReadOnlyException e) {
thrown = true;
}
Assert.assertTrue(thrown);
}
use of org.apache.datasketches.tuple.arrayofdoubles.ArrayOfDoublesSketch in project druid by druid-io.
the class ArrayOfDoublesSketchAggregatorFactory method factorizeBuffered.
@Override
public BufferAggregator factorizeBuffered(final ColumnSelectorFactory metricFactory) {
if (metricColumns == null) {
// input is sketches, use merge aggregator
final BaseObjectColumnValueSelector<ArrayOfDoublesSketch> selector = metricFactory.makeColumnValueSelector(fieldName);
if (selector instanceof NilColumnValueSelector) {
return new NoopArrayOfDoublesSketchBufferAggregator(numberOfValues);
}
return new ArrayOfDoublesSketchMergeBufferAggregator(selector, nominalEntries, numberOfValues, getMaxIntermediateSizeWithNulls());
}
// input is raw data (key and array of values), use build aggregator
final DimensionSelector keySelector = metricFactory.makeDimensionSelector(new DefaultDimensionSpec(fieldName, fieldName));
if (DimensionSelector.isNilSelector(keySelector)) {
return new NoopArrayOfDoublesSketchBufferAggregator(numberOfValues);
}
final List<BaseDoubleColumnValueSelector> valueSelectors = new ArrayList<>();
for (final String column : metricColumns) {
final BaseDoubleColumnValueSelector valueSelector = metricFactory.makeColumnValueSelector(column);
valueSelectors.add(valueSelector);
}
return new ArrayOfDoublesSketchBuildBufferAggregator(keySelector, valueSelectors, nominalEntries, getMaxIntermediateSizeWithNulls());
}
use of org.apache.datasketches.tuple.arrayofdoubles.ArrayOfDoublesSketch in project druid by druid-io.
the class ArrayOfDoublesSketchAggregatorFactory method factorize.
@Override
public Aggregator factorize(final ColumnSelectorFactory metricFactory) {
if (metricColumns == null) {
// input is sketches, use merge aggregator
final BaseObjectColumnValueSelector<ArrayOfDoublesSketch> selector = metricFactory.makeColumnValueSelector(fieldName);
if (selector instanceof NilColumnValueSelector) {
return new NoopArrayOfDoublesSketchAggregator(numberOfValues);
}
return new ArrayOfDoublesSketchMergeAggregator(selector, nominalEntries, numberOfValues);
}
// input is raw data (key and array of values), use build aggregator
final DimensionSelector keySelector = metricFactory.makeDimensionSelector(new DefaultDimensionSpec(fieldName, fieldName));
if (DimensionSelector.isNilSelector(keySelector)) {
return new NoopArrayOfDoublesSketchAggregator(numberOfValues);
}
final List<BaseDoubleColumnValueSelector> valueSelectors = new ArrayList<>();
for (final String column : metricColumns) {
final BaseDoubleColumnValueSelector valueSelector = metricFactory.makeColumnValueSelector(column);
valueSelectors.add(valueSelector);
}
return new ArrayOfDoublesSketchBuildAggregator(keySelector, valueSelectors, nominalEntries);
}
use of org.apache.datasketches.tuple.arrayofdoubles.ArrayOfDoublesSketch in project druid by druid-io.
the class ArrayOfDoublesSketchToVariancesPostAggregator method compute.
@Override
public double[] compute(final Map<String, Object> combinedAggregators) {
final ArrayOfDoublesSketch sketch = (ArrayOfDoublesSketch) getField().compute(combinedAggregators);
final SummaryStatistics[] stats = new SummaryStatistics[sketch.getNumValues()];
Arrays.setAll(stats, i -> new SummaryStatistics());
final ArrayOfDoublesSketchIterator it = sketch.iterator();
while (it.next()) {
final double[] values = it.getValues();
for (int i = 0; i < values.length; i++) {
stats[i].addValue(values[i]);
}
}
final double[] variances = new double[sketch.getNumValues()];
Arrays.setAll(variances, i -> stats[i].getVariance());
return variances;
}
use of org.apache.datasketches.tuple.arrayofdoubles.ArrayOfDoublesSketch in project druid by druid-io.
the class ArrayOfDoublesSketchTTestPostAggregator method compute.
@Override
public double[] compute(final Map<String, Object> combinedAggregators) {
final ArrayOfDoublesSketch sketch1 = (ArrayOfDoublesSketch) getFields().get(0).compute(combinedAggregators);
final ArrayOfDoublesSketch sketch2 = (ArrayOfDoublesSketch) getFields().get(1).compute(combinedAggregators);
if (sketch1.getNumValues() != sketch2.getNumValues()) {
throw new IAE("Sketches have different number of values: %d and %d", sketch1.getNumValues(), sketch2.getNumValues());
}
final SummaryStatistics[] stats1 = getStats(sketch1);
final SummaryStatistics[] stats2 = getStats(sketch2);
final int numberOfValues = sketch1.getNumValues();
final double[] pValues = new double[numberOfValues];
final TTest test = new TTest();
for (int i = 0; i < pValues.length; i++) {
pValues[i] = test.tTest(stats1[i], stats2[i]);
}
return pValues;
}
Aggregations