use of org.apache.druid.query.aggregation.first.StringFirstAggregatorFactory in project druid by druid-io.
the class CalciteQueryTest method testGroupByAggregatorDefaultValuesNonVectorized.
@Test
public void testGroupByAggregatorDefaultValuesNonVectorized() throws Exception {
cannotVectorize();
testQuery("SELECT\n" + " dim2,\n" + " ANY_VALUE(dim1, 1024) FILTER(WHERE dim1 = 'nonexistent'),\n" + " ANY_VALUE(l1) FILTER(WHERE dim1 = 'nonexistent'),\n" + " EARLIEST(dim1, 1024) FILTER(WHERE dim1 = 'nonexistent'),\n" + " EARLIEST(l1) FILTER(WHERE dim1 = 'nonexistent'),\n" + " LATEST(dim1, 1024) FILTER(WHERE dim1 = 'nonexistent'),\n" + " LATEST(l1) FILTER(WHERE dim1 = 'nonexistent'),\n" + " ARRAY_AGG(DISTINCT dim3) FILTER(WHERE dim1 = 'nonexistent'),\n" + " STRING_AGG(DISTINCT dim3, '|') FILTER(WHERE dim1 = 'nonexistent'),\n" + " BIT_AND(l1) FILTER(WHERE dim1 = 'nonexistent'),\n" + " BIT_OR(l1) FILTER(WHERE dim1 = 'nonexistent'),\n" + " BIT_XOR(l1) FILTER(WHERE dim1 = 'nonexistent')\n" + "FROM druid.numfoo WHERE dim2 = 'a' GROUP BY dim2", ImmutableList.of(GroupByQuery.builder().setDataSource(CalciteTests.DATASOURCE3).setInterval(querySegmentSpec(Filtration.eternity())).setDimFilter(selector("dim2", "a", null)).setGranularity(Granularities.ALL).setVirtualColumns(expressionVirtualColumn("v0", "'a'", ColumnType.STRING)).setDimensions(new DefaultDimensionSpec("v0", "_d0", ColumnType.STRING)).setAggregatorSpecs(aggregators(new FilteredAggregatorFactory(new StringAnyAggregatorFactory("a0", "dim1", 1024), selector("dim1", "nonexistent", null)), new FilteredAggregatorFactory(new LongAnyAggregatorFactory("a1", "l1"), selector("dim1", "nonexistent", null)), new FilteredAggregatorFactory(new StringFirstAggregatorFactory("a2", "dim1", null, 1024), selector("dim1", "nonexistent", null)), new FilteredAggregatorFactory(new LongFirstAggregatorFactory("a3", "l1", null), selector("dim1", "nonexistent", null)), new FilteredAggregatorFactory(new StringLastAggregatorFactory("a4", "dim1", null, 1024), selector("dim1", "nonexistent", null)), new FilteredAggregatorFactory(new LongLastAggregatorFactory("a5", "l1", null), selector("dim1", "nonexistent", null)), new FilteredAggregatorFactory(new ExpressionLambdaAggregatorFactory("a6", ImmutableSet.of("dim3"), "__acc", "ARRAY<STRING>[]", "ARRAY<STRING>[]", true, true, false, "array_set_add(\"__acc\", \"dim3\")", "array_set_add_all(\"__acc\", \"a6\")", null, null, ExpressionLambdaAggregatorFactory.DEFAULT_MAX_SIZE_BYTES, TestExprMacroTable.INSTANCE), selector("dim1", "nonexistent", null)), new FilteredAggregatorFactory(new ExpressionLambdaAggregatorFactory("a7", ImmutableSet.of("dim3"), "__acc", "[]", "[]", true, false, false, "array_set_add(\"__acc\", \"dim3\")", "array_set_add_all(\"__acc\", \"a7\")", null, "if(array_length(o) == 0, null, array_to_string(o, '|'))", ExpressionLambdaAggregatorFactory.DEFAULT_MAX_SIZE_BYTES, TestExprMacroTable.INSTANCE), and(not(selector("dim3", null, null)), selector("dim1", "nonexistent", null))), new FilteredAggregatorFactory(new ExpressionLambdaAggregatorFactory("a8", ImmutableSet.of("l1"), "__acc", "0", "0", NullHandling.sqlCompatible(), false, false, "bitwiseAnd(\"__acc\", \"l1\")", "bitwiseAnd(\"__acc\", \"a8\")", null, null, ExpressionLambdaAggregatorFactory.DEFAULT_MAX_SIZE_BYTES, TestExprMacroTable.INSTANCE), and(not(selector("l1", null, null)), selector("dim1", "nonexistent", null))), new FilteredAggregatorFactory(new ExpressionLambdaAggregatorFactory("a9", ImmutableSet.of("l1"), "__acc", "0", "0", NullHandling.sqlCompatible(), false, false, "bitwiseOr(\"__acc\", \"l1\")", "bitwiseOr(\"__acc\", \"a9\")", null, null, ExpressionLambdaAggregatorFactory.DEFAULT_MAX_SIZE_BYTES, TestExprMacroTable.INSTANCE), and(not(selector("l1", null, null)), selector("dim1", "nonexistent", null))), new FilteredAggregatorFactory(new ExpressionLambdaAggregatorFactory("a10", ImmutableSet.of("l1"), "__acc", "0", "0", NullHandling.sqlCompatible(), false, false, "bitwiseXor(\"__acc\", \"l1\")", "bitwiseXor(\"__acc\", \"a10\")", null, null, ExpressionLambdaAggregatorFactory.DEFAULT_MAX_SIZE_BYTES, TestExprMacroTable.INSTANCE), and(not(selector("l1", null, null)), selector("dim1", "nonexistent", null))))).setContext(QUERY_CONTEXT_DEFAULT).build()), ImmutableList.of(useDefault ? new Object[] { "a", "", 0L, "", 0L, "", 0L, null, "", 0L, 0L, 0L } : new Object[] { "a", null, null, null, null, null, null, null, null, null, null, null }));
}
use of org.apache.druid.query.aggregation.first.StringFirstAggregatorFactory in project druid by druid-io.
the class CalciteQueryTest method testFirstLatestAggregatorsSkipNulls.
@Test
public void testFirstLatestAggregatorsSkipNulls() throws Exception {
// Cannot vectorize LATEST aggregator.
skipVectorize();
final DimFilter filter;
if (useDefault) {
filter = not(selector("dim1", null, null));
} else {
filter = and(not(selector("dim1", null, null)), not(selector("l1", null, null)), not(selector("d1", null, null)), not(selector("f1", null, null)));
}
testQuery("SELECT EARLIEST(dim1, 32), LATEST(l1), LATEST(d1), LATEST(f1) " + "FROM druid.numfoo " + "WHERE dim1 IS NOT NULL AND l1 IS NOT NULL AND d1 IS NOT NULL AND f1 is NOT NULL", ImmutableList.of(Druids.newTimeseriesQueryBuilder().dataSource(CalciteTests.DATASOURCE3).intervals(querySegmentSpec(Filtration.eternity())).granularity(Granularities.ALL).filters(filter).aggregators(aggregators(new StringFirstAggregatorFactory("a0", "dim1", null, 32), new LongLastAggregatorFactory("a1", "l1", null), new DoubleLastAggregatorFactory("a2", "d1", null), new FloatLastAggregatorFactory("a3", "f1", null))).context(QUERY_CONTEXT_DEFAULT).build()), ImmutableList.of(// first row of dim1 is empty string, which is null in default mode, last non-null numeric rows are zeros
new Object[] { useDefault ? "10.1" : "", 0L, 0.0, 0.0f }));
}
use of org.apache.druid.query.aggregation.first.StringFirstAggregatorFactory in project druid by druid-io.
the class CalciteQueryTest method testTimeseriesEmptyResultsAggregatorDefaultValuesNonVectorized.
@Test
public void testTimeseriesEmptyResultsAggregatorDefaultValuesNonVectorized() throws Exception {
cannotVectorize();
// timeseries with all granularity have a single group, so should return default results for given aggregators
testQuery("SELECT\n" + " ANY_VALUE(dim1, 1024),\n" + " ANY_VALUE(l1),\n" + " EARLIEST(dim1, 1024),\n" + " EARLIEST(l1),\n" + " LATEST(dim1, 1024),\n" + " LATEST(l1),\n" + " ARRAY_AGG(DISTINCT dim3),\n" + " STRING_AGG(DISTINCT dim3, '|'),\n" + " BIT_AND(l1),\n" + " BIT_OR(l1),\n" + " BIT_XOR(l1)\n" + "FROM druid.numfoo WHERE dim2 = 0", ImmutableList.of(Druids.newTimeseriesQueryBuilder().dataSource(CalciteTests.DATASOURCE3).intervals(querySegmentSpec(Filtration.eternity())).filters(bound("dim2", "0", "0", false, false, null, StringComparators.NUMERIC)).granularity(Granularities.ALL).aggregators(aggregators(new StringAnyAggregatorFactory("a0", "dim1", 1024), new LongAnyAggregatorFactory("a1", "l1"), new StringFirstAggregatorFactory("a2", "dim1", null, 1024), new LongFirstAggregatorFactory("a3", "l1", null), new StringLastAggregatorFactory("a4", "dim1", null, 1024), new LongLastAggregatorFactory("a5", "l1", null), new ExpressionLambdaAggregatorFactory("a6", ImmutableSet.of("dim3"), "__acc", "ARRAY<STRING>[]", "ARRAY<STRING>[]", true, true, false, "array_set_add(\"__acc\", \"dim3\")", "array_set_add_all(\"__acc\", \"a6\")", null, null, ExpressionLambdaAggregatorFactory.DEFAULT_MAX_SIZE_BYTES, TestExprMacroTable.INSTANCE), new FilteredAggregatorFactory(new ExpressionLambdaAggregatorFactory("a7", ImmutableSet.of("dim3"), "__acc", "[]", "[]", true, false, false, "array_set_add(\"__acc\", \"dim3\")", "array_set_add_all(\"__acc\", \"a7\")", null, "if(array_length(o) == 0, null, array_to_string(o, '|'))", ExpressionLambdaAggregatorFactory.DEFAULT_MAX_SIZE_BYTES, TestExprMacroTable.INSTANCE), not(selector("dim3", null, null))), new FilteredAggregatorFactory(new ExpressionLambdaAggregatorFactory("a8", ImmutableSet.of("l1"), "__acc", "0", "0", NullHandling.sqlCompatible(), false, false, "bitwiseAnd(\"__acc\", \"l1\")", "bitwiseAnd(\"__acc\", \"a8\")", null, null, ExpressionLambdaAggregatorFactory.DEFAULT_MAX_SIZE_BYTES, TestExprMacroTable.INSTANCE), not(selector("l1", null, null))), new FilteredAggregatorFactory(new ExpressionLambdaAggregatorFactory("a9", ImmutableSet.of("l1"), "__acc", "0", "0", NullHandling.sqlCompatible(), false, false, "bitwiseOr(\"__acc\", \"l1\")", "bitwiseOr(\"__acc\", \"a9\")", null, null, ExpressionLambdaAggregatorFactory.DEFAULT_MAX_SIZE_BYTES, TestExprMacroTable.INSTANCE), not(selector("l1", null, null))), new FilteredAggregatorFactory(new ExpressionLambdaAggregatorFactory("a10", ImmutableSet.of("l1"), "__acc", "0", "0", NullHandling.sqlCompatible(), false, false, "bitwiseXor(\"__acc\", \"l1\")", "bitwiseXor(\"__acc\", \"a10\")", null, null, ExpressionLambdaAggregatorFactory.DEFAULT_MAX_SIZE_BYTES, TestExprMacroTable.INSTANCE), not(selector("l1", null, null))))).context(QUERY_CONTEXT_DEFAULT).build()), ImmutableList.of(useDefault ? new Object[] { "", 0L, "", 0L, "", 0L, null, "", 0L, 0L, 0L } : new Object[] { null, null, null, null, null, null, null, null, null, null, null }));
}
use of org.apache.druid.query.aggregation.first.StringFirstAggregatorFactory in project druid by druid-io.
the class CalciteQueryTest method testStringEarliestInSubquery.
// This test the off-heap (buffer) version of the EarliestAggregator (String)
@Test
public void testStringEarliestInSubquery() throws Exception {
// Cannot vectorize EARLIEST aggregator.
skipVectorize();
testQuery("SELECT SUM(val) FROM (SELECT dim2, EARLIEST(dim1, 10) AS val FROM foo GROUP BY dim2)", ImmutableList.of(GroupByQuery.builder().setDataSource(GroupByQuery.builder().setDataSource(CalciteTests.DATASOURCE1).setInterval(querySegmentSpec(Filtration.eternity())).setGranularity(Granularities.ALL).setDimensions(dimensions(new DefaultDimensionSpec("dim2", "d0"))).setAggregatorSpecs(aggregators(new StringFirstAggregatorFactory("a0:a", "dim1", null, 10))).setPostAggregatorSpecs(ImmutableList.of(new FinalizingFieldAccessPostAggregator("a0", "a0:a"))).setContext(QUERY_CONTEXT_DEFAULT).build()).setInterval(querySegmentSpec(Filtration.eternity())).setGranularity(Granularities.ALL).setAggregatorSpecs(aggregators(new DoubleSumAggregatorFactory("_a0", null, "CAST(\"a0\", 'DOUBLE')", ExprMacroTable.nil()))).setContext(QUERY_CONTEXT_DEFAULT).build()), ImmutableList.of(// [abc, def]
new Object[] { NullHandling.sqlCompatible() ? 12.1 : 10.1 }));
}
use of org.apache.druid.query.aggregation.first.StringFirstAggregatorFactory in project druid by druid-io.
the class AggregatorFactoryTest method testResultArraySignature.
@Test
public void testResultArraySignature() {
final TimeseriesQuery query = Druids.newTimeseriesQueryBuilder().dataSource("dummy").intervals("2000/3000").granularity(Granularities.HOUR).aggregators(new CountAggregatorFactory("count"), new JavaScriptAggregatorFactory("js", ImmutableList.of("col"), "function(a,b) { return a + b; }", "function() { return 0; }", "function(a,b) { return a + b }", new JavaScriptConfig(true)), // long aggs
new LongSumAggregatorFactory("longSum", "long-col"), new LongMinAggregatorFactory("longMin", "long-col"), new LongMaxAggregatorFactory("longMax", "long-col"), new LongFirstAggregatorFactory("longFirst", "long-col", null), new LongLastAggregatorFactory("longLast", "long-col", null), new LongAnyAggregatorFactory("longAny", "long-col"), // double aggs
new DoubleSumAggregatorFactory("doubleSum", "double-col"), new DoubleMinAggregatorFactory("doubleMin", "double-col"), new DoubleMaxAggregatorFactory("doubleMax", "double-col"), new DoubleFirstAggregatorFactory("doubleFirst", "double-col", null), new DoubleLastAggregatorFactory("doubleLast", "double-col", null), new DoubleAnyAggregatorFactory("doubleAny", "double-col"), new DoubleMeanAggregatorFactory("doubleMean", "double-col"), // float aggs
new FloatSumAggregatorFactory("floatSum", "float-col"), new FloatMinAggregatorFactory("floatMin", "float-col"), new FloatMaxAggregatorFactory("floatMax", "float-col"), new FloatFirstAggregatorFactory("floatFirst", "float-col", null), new FloatLastAggregatorFactory("floatLast", "float-col", null), new FloatAnyAggregatorFactory("floatAny", "float-col"), // string aggregators
new StringFirstAggregatorFactory("stringFirst", "col", null, 1024), new StringLastAggregatorFactory("stringLast", "col", null, 1024), new StringAnyAggregatorFactory("stringAny", "col", 1024), // sketch aggs
new CardinalityAggregatorFactory("cardinality", ImmutableList.of(DefaultDimensionSpec.of("some-col")), false), new HyperUniquesAggregatorFactory("hyperUnique", "hyperunique"), new HistogramAggregatorFactory("histogram", "histogram", ImmutableList.of(0.25f, 0.5f, 0.75f)), // delegate aggs
new FilteredAggregatorFactory(new HyperUniquesAggregatorFactory("filtered", "hyperunique"), new SelectorDimFilter("col", "hello", null)), new SuppressedAggregatorFactory(new HyperUniquesAggregatorFactory("suppressed", "hyperunique"))).postAggregators(new FinalizingFieldAccessPostAggregator("count-finalize", "count"), new FinalizingFieldAccessPostAggregator("js-finalize", "js"), // long aggs
new FinalizingFieldAccessPostAggregator("longSum-finalize", "longSum"), new FinalizingFieldAccessPostAggregator("longMin-finalize", "longMin"), new FinalizingFieldAccessPostAggregator("longMax-finalize", "longMax"), new FinalizingFieldAccessPostAggregator("longFirst-finalize", "longFirst"), new FinalizingFieldAccessPostAggregator("longLast-finalize", "longLast"), new FinalizingFieldAccessPostAggregator("longAny-finalize", "longAny"), // double
new FinalizingFieldAccessPostAggregator("doubleSum-finalize", "doubleSum"), new FinalizingFieldAccessPostAggregator("doubleMin-finalize", "doubleMin"), new FinalizingFieldAccessPostAggregator("doubleMax-finalize", "doubleMax"), new FinalizingFieldAccessPostAggregator("doubleFirst-finalize", "doubleFirst"), new FinalizingFieldAccessPostAggregator("doubleLast-finalize", "doubleLast"), new FinalizingFieldAccessPostAggregator("doubleAny-finalize", "doubleAny"), new FinalizingFieldAccessPostAggregator("doubleMean-finalize", "doubleMean"), // finalized floats
new FinalizingFieldAccessPostAggregator("floatSum-finalize", "floatSum"), new FinalizingFieldAccessPostAggregator("floatMin-finalize", "floatMin"), new FinalizingFieldAccessPostAggregator("floatMax-finalize", "floatMax"), new FinalizingFieldAccessPostAggregator("floatFirst-finalize", "floatFirst"), new FinalizingFieldAccessPostAggregator("floatLast-finalize", "floatLast"), new FinalizingFieldAccessPostAggregator("floatAny-finalize", "floatAny"), // finalized strings
new FinalizingFieldAccessPostAggregator("stringFirst-finalize", "stringFirst"), new FinalizingFieldAccessPostAggregator("stringLast-finalize", "stringLast"), new FinalizingFieldAccessPostAggregator("stringAny-finalize", "stringAny"), // finalized sketch
new FinalizingFieldAccessPostAggregator("cardinality-finalize", "cardinality"), new FinalizingFieldAccessPostAggregator("hyperUnique-finalize", "hyperUnique"), new FinalizingFieldAccessPostAggregator("histogram-finalize", "histogram"), // finalized delegate
new FinalizingFieldAccessPostAggregator("filtered-finalize", "filtered"), new FinalizingFieldAccessPostAggregator("suppressed-finalize", "suppressed")).build();
Assert.assertEquals(RowSignature.builder().addTimeColumn().add("count", ColumnType.LONG).add("js", ColumnType.FLOAT).add("longSum", ColumnType.LONG).add("longMin", ColumnType.LONG).add("longMax", ColumnType.LONG).add("longFirst", ColumnType.LONG).add("longLast", ColumnType.LONG).add("longAny", ColumnType.LONG).add("doubleSum", ColumnType.DOUBLE).add("doubleMin", ColumnType.DOUBLE).add("doubleMax", ColumnType.DOUBLE).add("doubleFirst", ColumnType.DOUBLE).add("doubleLast", ColumnType.DOUBLE).add("doubleAny", ColumnType.DOUBLE).add("doubleMean", null).add("floatSum", ColumnType.FLOAT).add("floatMin", ColumnType.FLOAT).add("floatMax", ColumnType.FLOAT).add("floatFirst", ColumnType.FLOAT).add("floatLast", ColumnType.FLOAT).add("floatAny", ColumnType.FLOAT).add("stringFirst", null).add("stringLast", null).add("stringAny", ColumnType.STRING).add("cardinality", null).add("hyperUnique", null).add("histogram", null).add("filtered", null).add("suppressed", null).add("count-finalize", ColumnType.LONG).add("js-finalize", ColumnType.FLOAT).add("longSum-finalize", ColumnType.LONG).add("longMin-finalize", ColumnType.LONG).add("longMax-finalize", ColumnType.LONG).add("longFirst-finalize", ColumnType.LONG).add("longLast-finalize", ColumnType.LONG).add("longAny-finalize", ColumnType.LONG).add("doubleSum-finalize", ColumnType.DOUBLE).add("doubleMin-finalize", ColumnType.DOUBLE).add("doubleMax-finalize", ColumnType.DOUBLE).add("doubleFirst-finalize", ColumnType.DOUBLE).add("doubleLast-finalize", ColumnType.DOUBLE).add("doubleAny-finalize", ColumnType.DOUBLE).add("doubleMean-finalize", ColumnType.DOUBLE).add("floatSum-finalize", ColumnType.FLOAT).add("floatMin-finalize", ColumnType.FLOAT).add("floatMax-finalize", ColumnType.FLOAT).add("floatFirst-finalize", ColumnType.FLOAT).add("floatLast-finalize", ColumnType.FLOAT).add("floatAny-finalize", ColumnType.FLOAT).add("stringFirst-finalize", ColumnType.STRING).add("stringLast-finalize", ColumnType.STRING).add("stringAny-finalize", ColumnType.STRING).add("cardinality-finalize", ColumnType.DOUBLE).add("hyperUnique-finalize", ColumnType.DOUBLE).add("histogram-finalize", HistogramAggregatorFactory.TYPE_VISUAL).add("filtered-finalize", ColumnType.DOUBLE).add("suppressed-finalize", ColumnType.DOUBLE).build(), new TimeseriesQueryQueryToolChest().resultArraySignature(query));
}
Aggregations