use of org.apache.druid.query.aggregation.hyperloglog.HyperUniquesAggregatorFactory in project druid by druid-io.
the class AggregatorFactoryTest method testResultArraySignature.
@Test
public void testResultArraySignature() {
final TimeseriesQuery query = Druids.newTimeseriesQueryBuilder().dataSource("dummy").intervals("2000/3000").granularity(Granularities.HOUR).aggregators(new CountAggregatorFactory("count"), new JavaScriptAggregatorFactory("js", ImmutableList.of("col"), "function(a,b) { return a + b; }", "function() { return 0; }", "function(a,b) { return a + b }", new JavaScriptConfig(true)), // long aggs
new LongSumAggregatorFactory("longSum", "long-col"), new LongMinAggregatorFactory("longMin", "long-col"), new LongMaxAggregatorFactory("longMax", "long-col"), new LongFirstAggregatorFactory("longFirst", "long-col", null), new LongLastAggregatorFactory("longLast", "long-col", null), new LongAnyAggregatorFactory("longAny", "long-col"), // double aggs
new DoubleSumAggregatorFactory("doubleSum", "double-col"), new DoubleMinAggregatorFactory("doubleMin", "double-col"), new DoubleMaxAggregatorFactory("doubleMax", "double-col"), new DoubleFirstAggregatorFactory("doubleFirst", "double-col", null), new DoubleLastAggregatorFactory("doubleLast", "double-col", null), new DoubleAnyAggregatorFactory("doubleAny", "double-col"), new DoubleMeanAggregatorFactory("doubleMean", "double-col"), // float aggs
new FloatSumAggregatorFactory("floatSum", "float-col"), new FloatMinAggregatorFactory("floatMin", "float-col"), new FloatMaxAggregatorFactory("floatMax", "float-col"), new FloatFirstAggregatorFactory("floatFirst", "float-col", null), new FloatLastAggregatorFactory("floatLast", "float-col", null), new FloatAnyAggregatorFactory("floatAny", "float-col"), // string aggregators
new StringFirstAggregatorFactory("stringFirst", "col", null, 1024), new StringLastAggregatorFactory("stringLast", "col", null, 1024), new StringAnyAggregatorFactory("stringAny", "col", 1024), // sketch aggs
new CardinalityAggregatorFactory("cardinality", ImmutableList.of(DefaultDimensionSpec.of("some-col")), false), new HyperUniquesAggregatorFactory("hyperUnique", "hyperunique"), new HistogramAggregatorFactory("histogram", "histogram", ImmutableList.of(0.25f, 0.5f, 0.75f)), // delegate aggs
new FilteredAggregatorFactory(new HyperUniquesAggregatorFactory("filtered", "hyperunique"), new SelectorDimFilter("col", "hello", null)), new SuppressedAggregatorFactory(new HyperUniquesAggregatorFactory("suppressed", "hyperunique"))).postAggregators(new FinalizingFieldAccessPostAggregator("count-finalize", "count"), new FinalizingFieldAccessPostAggregator("js-finalize", "js"), // long aggs
new FinalizingFieldAccessPostAggregator("longSum-finalize", "longSum"), new FinalizingFieldAccessPostAggregator("longMin-finalize", "longMin"), new FinalizingFieldAccessPostAggregator("longMax-finalize", "longMax"), new FinalizingFieldAccessPostAggregator("longFirst-finalize", "longFirst"), new FinalizingFieldAccessPostAggregator("longLast-finalize", "longLast"), new FinalizingFieldAccessPostAggregator("longAny-finalize", "longAny"), // double
new FinalizingFieldAccessPostAggregator("doubleSum-finalize", "doubleSum"), new FinalizingFieldAccessPostAggregator("doubleMin-finalize", "doubleMin"), new FinalizingFieldAccessPostAggregator("doubleMax-finalize", "doubleMax"), new FinalizingFieldAccessPostAggregator("doubleFirst-finalize", "doubleFirst"), new FinalizingFieldAccessPostAggregator("doubleLast-finalize", "doubleLast"), new FinalizingFieldAccessPostAggregator("doubleAny-finalize", "doubleAny"), new FinalizingFieldAccessPostAggregator("doubleMean-finalize", "doubleMean"), // finalized floats
new FinalizingFieldAccessPostAggregator("floatSum-finalize", "floatSum"), new FinalizingFieldAccessPostAggregator("floatMin-finalize", "floatMin"), new FinalizingFieldAccessPostAggregator("floatMax-finalize", "floatMax"), new FinalizingFieldAccessPostAggregator("floatFirst-finalize", "floatFirst"), new FinalizingFieldAccessPostAggregator("floatLast-finalize", "floatLast"), new FinalizingFieldAccessPostAggregator("floatAny-finalize", "floatAny"), // finalized strings
new FinalizingFieldAccessPostAggregator("stringFirst-finalize", "stringFirst"), new FinalizingFieldAccessPostAggregator("stringLast-finalize", "stringLast"), new FinalizingFieldAccessPostAggregator("stringAny-finalize", "stringAny"), // finalized sketch
new FinalizingFieldAccessPostAggregator("cardinality-finalize", "cardinality"), new FinalizingFieldAccessPostAggregator("hyperUnique-finalize", "hyperUnique"), new FinalizingFieldAccessPostAggregator("histogram-finalize", "histogram"), // finalized delegate
new FinalizingFieldAccessPostAggregator("filtered-finalize", "filtered"), new FinalizingFieldAccessPostAggregator("suppressed-finalize", "suppressed")).build();
Assert.assertEquals(RowSignature.builder().addTimeColumn().add("count", ColumnType.LONG).add("js", ColumnType.FLOAT).add("longSum", ColumnType.LONG).add("longMin", ColumnType.LONG).add("longMax", ColumnType.LONG).add("longFirst", ColumnType.LONG).add("longLast", ColumnType.LONG).add("longAny", ColumnType.LONG).add("doubleSum", ColumnType.DOUBLE).add("doubleMin", ColumnType.DOUBLE).add("doubleMax", ColumnType.DOUBLE).add("doubleFirst", ColumnType.DOUBLE).add("doubleLast", ColumnType.DOUBLE).add("doubleAny", ColumnType.DOUBLE).add("doubleMean", null).add("floatSum", ColumnType.FLOAT).add("floatMin", ColumnType.FLOAT).add("floatMax", ColumnType.FLOAT).add("floatFirst", ColumnType.FLOAT).add("floatLast", ColumnType.FLOAT).add("floatAny", ColumnType.FLOAT).add("stringFirst", null).add("stringLast", null).add("stringAny", ColumnType.STRING).add("cardinality", null).add("hyperUnique", null).add("histogram", null).add("filtered", null).add("suppressed", null).add("count-finalize", ColumnType.LONG).add("js-finalize", ColumnType.FLOAT).add("longSum-finalize", ColumnType.LONG).add("longMin-finalize", ColumnType.LONG).add("longMax-finalize", ColumnType.LONG).add("longFirst-finalize", ColumnType.LONG).add("longLast-finalize", ColumnType.LONG).add("longAny-finalize", ColumnType.LONG).add("doubleSum-finalize", ColumnType.DOUBLE).add("doubleMin-finalize", ColumnType.DOUBLE).add("doubleMax-finalize", ColumnType.DOUBLE).add("doubleFirst-finalize", ColumnType.DOUBLE).add("doubleLast-finalize", ColumnType.DOUBLE).add("doubleAny-finalize", ColumnType.DOUBLE).add("doubleMean-finalize", ColumnType.DOUBLE).add("floatSum-finalize", ColumnType.FLOAT).add("floatMin-finalize", ColumnType.FLOAT).add("floatMax-finalize", ColumnType.FLOAT).add("floatFirst-finalize", ColumnType.FLOAT).add("floatLast-finalize", ColumnType.FLOAT).add("floatAny-finalize", ColumnType.FLOAT).add("stringFirst-finalize", ColumnType.STRING).add("stringLast-finalize", ColumnType.STRING).add("stringAny-finalize", ColumnType.STRING).add("cardinality-finalize", ColumnType.DOUBLE).add("hyperUnique-finalize", ColumnType.DOUBLE).add("histogram-finalize", HistogramAggregatorFactory.TYPE_VISUAL).add("filtered-finalize", ColumnType.DOUBLE).add("suppressed-finalize", ColumnType.DOUBLE).build(), new TimeseriesQueryQueryToolChest().resultArraySignature(query));
}
use of org.apache.druid.query.aggregation.hyperloglog.HyperUniquesAggregatorFactory in project druid by druid-io.
the class SystemSchemaTest method setUp.
@Before
public void setUp() throws Exception {
serverView = EasyMock.createNiceMock(TimelineServerView.class);
client = EasyMock.createMock(DruidLeaderClient.class);
coordinatorClient = EasyMock.createMock(DruidLeaderClient.class);
overlordClient = EasyMock.createMock(DruidLeaderClient.class);
mapper = TestHelper.makeJsonMapper();
responseHolder = EasyMock.createMock(StringFullResponseHolder.class);
responseHandler = EasyMock.createMockBuilder(BytesAccumulatingResponseHandler.class).withConstructor().addMockedMethod("handleResponse", HttpResponse.class, HttpResponseHandler.TrafficCop.class).addMockedMethod("getStatus").createMock();
request = EasyMock.createMock(Request.class);
authMapper = createAuthMapper();
final File tmpDir = temporaryFolder.newFolder();
final QueryableIndex index1 = IndexBuilder.create().tmpDir(new File(tmpDir, "1")).segmentWriteOutMediumFactory(OffHeapMemorySegmentWriteOutMediumFactory.instance()).schema(new IncrementalIndexSchema.Builder().withMetrics(new CountAggregatorFactory("cnt"), new DoubleSumAggregatorFactory("m1", "m1"), new HyperUniquesAggregatorFactory("unique_dim1", "dim1")).withRollup(false).build()).rows(ROWS1).buildMMappedIndex();
final QueryableIndex index2 = IndexBuilder.create().tmpDir(new File(tmpDir, "2")).segmentWriteOutMediumFactory(OffHeapMemorySegmentWriteOutMediumFactory.instance()).schema(new IncrementalIndexSchema.Builder().withMetrics(new LongSumAggregatorFactory("m1", "m1")).withRollup(false).build()).rows(ROWS2).buildMMappedIndex();
final QueryableIndex index3 = IndexBuilder.create().tmpDir(new File(tmpDir, "3")).segmentWriteOutMediumFactory(OffHeapMemorySegmentWriteOutMediumFactory.instance()).schema(new IncrementalIndexSchema.Builder().withMetrics(new LongSumAggregatorFactory("m1", "m1")).withRollup(false).build()).rows(ROWS3).buildMMappedIndex();
walker = new SpecificSegmentsQuerySegmentWalker(conglomerate).add(segment1, index1).add(segment2, index2).add(segment3, index3);
druidSchema = new DruidSchema(CalciteTests.createMockQueryLifecycleFactory(walker, conglomerate), new TestServerInventoryView(walker.getSegments(), realtimeSegments), new SegmentManager(EasyMock.createMock(SegmentLoader.class)), new MapJoinableFactory(ImmutableSet.of(), ImmutableMap.of()), PLANNER_CONFIG_DEFAULT, new NoopEscalator(), new BrokerInternalQueryConfig(), null);
druidSchema.start();
druidSchema.awaitInitialization();
metadataView = EasyMock.createMock(MetadataSegmentView.class);
druidNodeDiscoveryProvider = EasyMock.createMock(DruidNodeDiscoveryProvider.class);
serverInventoryView = EasyMock.createMock(FilteredServerInventoryView.class);
schema = new SystemSchema(druidSchema, metadataView, serverView, serverInventoryView, EasyMock.createStrictMock(AuthorizerMapper.class), client, client, druidNodeDiscoveryProvider, mapper);
}
use of org.apache.druid.query.aggregation.hyperloglog.HyperUniquesAggregatorFactory in project druid by druid-io.
the class DruidSchemaTest method setUp.
@Before
public void setUp() throws Exception {
final File tmpDir = temporaryFolder.newFolder();
final QueryableIndex index1 = IndexBuilder.create().tmpDir(new File(tmpDir, "1")).segmentWriteOutMediumFactory(OffHeapMemorySegmentWriteOutMediumFactory.instance()).schema(new IncrementalIndexSchema.Builder().withMetrics(new CountAggregatorFactory("cnt"), new DoubleSumAggregatorFactory("m1", "m1"), new HyperUniquesAggregatorFactory("unique_dim1", "dim1")).withRollup(false).build()).rows(ROWS1).buildMMappedIndex();
final QueryableIndex index2 = IndexBuilder.create().tmpDir(new File(tmpDir, "2")).segmentWriteOutMediumFactory(OffHeapMemorySegmentWriteOutMediumFactory.instance()).schema(new IncrementalIndexSchema.Builder().withMetrics(new LongSumAggregatorFactory("m1", "m1")).withRollup(false).build()).rows(ROWS2).buildMMappedIndex();
walker = new SpecificSegmentsQuerySegmentWalker(conglomerate).add(DataSegment.builder().dataSource(CalciteTests.DATASOURCE1).interval(Intervals.of("2000/P1Y")).version("1").shardSpec(new LinearShardSpec(0)).size(0).build(), index1).add(DataSegment.builder().dataSource(CalciteTests.DATASOURCE1).interval(Intervals.of("2001/P1Y")).version("1").shardSpec(new LinearShardSpec(0)).size(0).build(), index2).add(DataSegment.builder().dataSource(CalciteTests.DATASOURCE2).interval(index2.getDataInterval()).version("1").shardSpec(new LinearShardSpec(0)).size(0).build(), index2);
final DataSegment segment1 = new DataSegment("foo3", Intervals.of("2012/2013"), "version3", null, ImmutableList.of("dim1", "dim2"), ImmutableList.of("met1", "met2"), new NumberedShardSpec(2, 3), null, 1, 100L, PruneSpecsHolder.DEFAULT);
final List<DataSegment> realtimeSegments = ImmutableList.of(segment1);
serverView = new TestServerInventoryView(walker.getSegments(), realtimeSegments);
druidServers = serverView.getDruidServers();
schema = new DruidSchema(CalciteTests.createMockQueryLifecycleFactory(walker, conglomerate), serverView, segmentManager, new MapJoinableFactory(ImmutableSet.of(globalTableJoinable), ImmutableMap.of(globalTableJoinable.getClass(), GlobalTableDataSource.class)), PLANNER_CONFIG_DEFAULT, new NoopEscalator(), new BrokerInternalQueryConfig(), null) {
@Override
protected DruidTable buildDruidTable(String dataSource) {
DruidTable table = super.buildDruidTable(dataSource);
buildTableLatch.countDown();
return table;
}
@Override
void markDataSourceAsNeedRebuild(String datasource) {
super.markDataSourceAsNeedRebuild(datasource);
markDataSourceLatch.countDown();
}
};
schema2 = new DruidSchema(CalciteTests.createMockQueryLifecycleFactory(walker, conglomerate), serverView, segmentManager, new MapJoinableFactory(ImmutableSet.of(globalTableJoinable), ImmutableMap.of(globalTableJoinable.getClass(), GlobalTableDataSource.class)), PLANNER_CONFIG_DEFAULT, new NoopEscalator(), new BrokerInternalQueryConfig(), null) {
boolean throwException = true;
@Override
protected DruidTable buildDruidTable(String dataSource) {
DruidTable table = super.buildDruidTable(dataSource);
buildTableLatch.countDown();
return table;
}
@Override
protected Set<SegmentId> refreshSegments(final Set<SegmentId> segments) throws IOException {
if (throwException) {
throwException = false;
throw new RuntimeException("Query[xxxx] url[http://xxxx:8083/druid/v2/] timed out.");
} else {
return super.refreshSegments(segments);
}
}
@Override
void markDataSourceAsNeedRebuild(String datasource) {
super.markDataSourceAsNeedRebuild(datasource);
markDataSourceLatch.countDown();
}
};
schema.start();
schema.awaitInitialization();
}
use of org.apache.druid.query.aggregation.hyperloglog.HyperUniquesAggregatorFactory in project druid by druid-io.
the class QueryableIndexColumnCapabilitiesTest method setup.
@BeforeClass
public static void setup() throws IOException {
MapInputRowParser parser = new MapInputRowParser(new TimeAndDimsParseSpec(new TimestampSpec("time", "auto", null), new DimensionsSpec(ImmutableList.<DimensionSchema>builder().addAll(DimensionsSpec.getDefaultSchemas(ImmutableList.of("d1", "d2"))).add(new DoubleDimensionSchema("d3")).add(new FloatDimensionSchema("d4")).add(new LongDimensionSchema("d5")).build())));
AggregatorFactory[] metricsSpecs = new AggregatorFactory[] { new CountAggregatorFactory("cnt"), new DoubleSumAggregatorFactory("m1", "d3"), new FloatSumAggregatorFactory("m2", "d4"), new LongSumAggregatorFactory("m3", "d5"), new HyperUniquesAggregatorFactory("m4", "d1") };
List<InputRow> rows = new ArrayList<>();
Map<String, Object> event = ImmutableMap.<String, Object>builder().put("time", DateTimes.nowUtc().getMillis()).put("d1", "some string").put("d2", ImmutableList.of("some", "list")).put("d3", 1.234).put("d4", 1.234f).put("d5", 10L).build();
rows.add(Iterables.getOnlyElement(parser.parseBatch(event)));
IndexBuilder builder = IndexBuilder.create().rows(rows).schema(new IncrementalIndexSchema.Builder().withMetrics(metricsSpecs).withDimensionsSpec(parser).withRollup(false).build()).tmpDir(temporaryFolder.newFolder());
INC_INDEX = builder.buildIncrementalIndex();
MMAP_INDEX = builder.buildMMappedIndex();
List<InputRow> rowsWithNulls = new ArrayList<>();
rowsWithNulls.add(Iterables.getOnlyElement(parser.parseBatch(event)));
Map<String, Object> eventWithNulls = new HashMap<>();
eventWithNulls.put("time", DateTimes.nowUtc().getMillis());
eventWithNulls.put("d1", null);
eventWithNulls.put("d2", ImmutableList.of());
eventWithNulls.put("d3", null);
eventWithNulls.put("d4", null);
eventWithNulls.put("d5", null);
rowsWithNulls.add(Iterables.getOnlyElement(parser.parseBatch(eventWithNulls)));
IndexBuilder builderWithNulls = IndexBuilder.create().rows(rowsWithNulls).schema(new IncrementalIndexSchema.Builder().withMetrics(metricsSpecs).withDimensionsSpec(parser).withRollup(false).build()).tmpDir(temporaryFolder.newFolder());
INC_INDEX_WITH_NULLS = builderWithNulls.buildIncrementalIndex();
MMAP_INDEX_WITH_NULLS = builderWithNulls.buildMMappedIndex();
}
use of org.apache.druid.query.aggregation.hyperloglog.HyperUniquesAggregatorFactory in project druid by druid-io.
the class BuiltinApproxCountDistinctSqlAggregator method toDruidAggregation.
@Nullable
@Override
public Aggregation toDruidAggregation(final PlannerContext plannerContext, final RowSignature rowSignature, final VirtualColumnRegistry virtualColumnRegistry, final RexBuilder rexBuilder, final String name, final AggregateCall aggregateCall, final Project project, final List<Aggregation> existingAggregations, final boolean finalizeAggregations) {
// Don't use Aggregations.getArgumentsForSimpleAggregator, since it won't let us use direct column access
// for string columns.
final RexNode rexNode = Expressions.fromFieldAccess(rowSignature, project, Iterables.getOnlyElement(aggregateCall.getArgList()));
final DruidExpression arg = Expressions.toDruidExpression(plannerContext, rowSignature, rexNode);
if (arg == null) {
return null;
}
final AggregatorFactory aggregatorFactory;
final String aggregatorName = finalizeAggregations ? Calcites.makePrefixedName(name, "a") : name;
if (arg.isDirectColumnAccess() && rowSignature.getColumnType(arg.getDirectColumn()).map(type -> type.is(ValueType.COMPLEX)).orElse(false)) {
aggregatorFactory = new HyperUniquesAggregatorFactory(aggregatorName, arg.getDirectColumn(), false, true);
} else {
final RelDataType dataType = rexNode.getType();
final ColumnType inputType = Calcites.getColumnTypeForRelDataType(dataType);
if (inputType == null) {
throw new ISE("Cannot translate sqlTypeName[%s] to Druid type for field[%s]", dataType.getSqlTypeName(), aggregatorName);
}
final DimensionSpec dimensionSpec;
if (arg.isSimpleExtraction()) {
dimensionSpec = arg.getSimpleExtraction().toDimensionSpec(null, inputType);
} else {
String virtualColumnName = virtualColumnRegistry.getOrCreateVirtualColumnForExpression(arg, dataType);
dimensionSpec = new DefaultDimensionSpec(virtualColumnName, null, inputType);
}
aggregatorFactory = new CardinalityAggregatorFactory(aggregatorName, null, ImmutableList.of(dimensionSpec), false, true);
}
return Aggregation.create(Collections.singletonList(aggregatorFactory), finalizeAggregations ? new HyperUniqueFinalizingPostAggregator(name, aggregatorFactory.getName()) : null);
}
Aggregations