use of org.apache.druid.query.aggregation.AggregatorFactory in project druid by druid-io.
the class InputSourceSamplerTest method testWithMoreRollup.
@Test
public void testWithMoreRollup() throws IOException {
final TimestampSpec timestampSpec = new TimestampSpec("t", null, null);
final DimensionsSpec dimensionsSpec = new DimensionsSpec(ImmutableList.of(StringDimensionSchema.create("dim1")));
final AggregatorFactory[] aggregatorFactories = { new LongSumAggregatorFactory("met1", "met1") };
final GranularitySpec granularitySpec = new UniformGranularitySpec(Granularities.DAY, Granularities.HOUR, true, null);
final DataSchema dataSchema = createDataSchema(timestampSpec, dimensionsSpec, aggregatorFactories, granularitySpec, null);
final InputSource inputSource = createInputSource(getTestRows(), dataSchema);
final InputFormat inputFormat = createInputFormat();
SamplerResponse response = inputSourceSampler.sample(inputSource, inputFormat, dataSchema, null);
Assert.assertEquals(6, response.getNumRowsRead());
Assert.assertEquals(5, response.getNumRowsIndexed());
Assert.assertEquals(3, response.getData().size());
List<SamplerResponseRow> data = response.getData();
assertEqualsSamplerResponseRow(new SamplerResponseRow(getRawColumns().get(0), new SamplerTestUtils.MapAllowingNullValuesBuilder<String, Object>().put("__time", 1555934400000L).put("dim1", "foo").put("met1", 11L).build(), null, null), data.get(0));
assertEqualsSamplerResponseRow(new SamplerResponseRow(getRawColumns().get(3), new SamplerTestUtils.MapAllowingNullValuesBuilder<String, Object>().put("__time", 1555934400000L).put("dim1", "foo2").put("met1", 4L).build(), null, null), data.get(1));
assertEqualsSamplerResponseRow(new SamplerResponseRow(getRawColumns().get(5), null, true, getUnparseableTimestampString()), data.get(2));
}
use of org.apache.druid.query.aggregation.AggregatorFactory in project druid by druid-io.
the class InputSourceSamplerTest method testMultipleJsonStringInOneBlock.
/**
* This case tests sampling for multiple json lines in one text block
* Currently only RecordSupplierInputSource supports this kind of input, see https://github.com/apache/druid/pull/10383 for more information
*
* This test combines illegal json block and legal json block together to verify:
* 1. all lines in the illegal json block should not be parsed
* 2. the illegal json block should not affect the processing of the 2nd record
* 3. all lines in legal json block should be parsed successfully
*/
@Test
public void testMultipleJsonStringInOneBlock() throws IOException {
if (!ParserType.STR_JSON.equals(parserType) || !useInputFormatApi) {
return;
}
final TimestampSpec timestampSpec = new TimestampSpec("t", null, null);
final DimensionsSpec dimensionsSpec = new DimensionsSpec(ImmutableList.of(StringDimensionSchema.create("dim1PlusBar")));
final TransformSpec transformSpec = new TransformSpec(null, ImmutableList.of(new ExpressionTransform("dim1PlusBar", "concat(dim1 + 'bar')", TestExprMacroTable.INSTANCE)));
final AggregatorFactory[] aggregatorFactories = { new LongSumAggregatorFactory("met1", "met1") };
final GranularitySpec granularitySpec = new UniformGranularitySpec(Granularities.DAY, Granularities.HOUR, true, null);
final DataSchema dataSchema = createDataSchema(timestampSpec, dimensionsSpec, aggregatorFactories, granularitySpec, transformSpec);
List<String> jsonBlockList = ImmutableList.of(// include the line which can't be parsed into JSON object to form a illegal json block
String.join("", STR_JSON_ROWS), // exclude the last line to form a legal json block
STR_JSON_ROWS.stream().limit(STR_JSON_ROWS.size() - 1).collect(Collectors.joining()));
SamplerResponse response = inputSourceSampler.sample(new RecordSupplierInputSource("topicName", new TestRecordSupplier(jsonBlockList), true), createInputFormat(), dataSchema, new SamplerConfig(200, 3000));
//
// the 1st json block contains STR_JSON_ROWS.size() lines, and 2nd json block contains STR_JSON_ROWS.size()-1 lines
// together there should STR_JSON_ROWS.size() * 2 - 1 lines
//
int illegalRows = STR_JSON_ROWS.size();
int legalRows = STR_JSON_ROWS.size() - 1;
Assert.assertEquals(illegalRows + legalRows, response.getNumRowsRead());
Assert.assertEquals(legalRows, response.getNumRowsIndexed());
Assert.assertEquals(illegalRows + 2, response.getData().size());
List<SamplerResponseRow> data = response.getData();
List<Map<String, Object>> rawColumnList = this.getRawColumns();
int index = 0;
//
// first n rows are related to the first json block which fails to parse
//
String parseExceptionMessage;
if (useInputFormatApi) {
parseExceptionMessage = "Timestamp[bad_timestamp] is unparseable! Event: {t=bad_timestamp, dim1=foo, met1=6}";
} else {
parseExceptionMessage = "Timestamp[bad_timestamp] is unparseable! Event: {t=bad_timestamp, dim1=foo, met1=6}";
}
for (; index < illegalRows; index++) {
assertEqualsSamplerResponseRow(new SamplerResponseRow(rawColumnList.get(index), null, true, parseExceptionMessage), data.get(index));
}
//
// following are parsed rows for legal json block
//
assertEqualsSamplerResponseRow(new SamplerResponseRow(rawColumnList.get(0), new SamplerTestUtils.MapAllowingNullValuesBuilder<String, Object>().put("__time", 1555934400000L).put("dim1PlusBar", "foobar").put("met1", 11L).build(), null, null), data.get(index++));
assertEqualsSamplerResponseRow(new SamplerResponseRow(rawColumnList.get(3), new SamplerTestUtils.MapAllowingNullValuesBuilder<String, Object>().put("__time", 1555934400000L).put("dim1PlusBar", "foo2bar").put("met1", 4L).build(), null, null), data.get(index));
}
use of org.apache.druid.query.aggregation.AggregatorFactory in project druid by druid-io.
the class InputSourceSamplerTest method testWithTransformsAutoDimensions.
@Test
public void testWithTransformsAutoDimensions() throws IOException {
final TimestampSpec timestampSpec = new TimestampSpec("t", null, null);
final DimensionsSpec dimensionsSpec = new DimensionsSpec(null);
final TransformSpec transformSpec = new TransformSpec(null, ImmutableList.of(new ExpressionTransform("dim1PlusBar", "concat(dim1, 'bar')", TestExprMacroTable.INSTANCE)));
final AggregatorFactory[] aggregatorFactories = { new LongSumAggregatorFactory("met1", "met1") };
final GranularitySpec granularitySpec = new UniformGranularitySpec(Granularities.DAY, Granularities.HOUR, true, null);
final DataSchema dataSchema = createDataSchema(timestampSpec, dimensionsSpec, aggregatorFactories, granularitySpec, transformSpec);
final InputSource inputSource = createInputSource(getTestRows(), dataSchema);
final InputFormat inputFormat = createInputFormat();
SamplerResponse response = inputSourceSampler.sample(inputSource, inputFormat, dataSchema, null);
Assert.assertEquals(6, response.getNumRowsRead());
Assert.assertEquals(5, response.getNumRowsIndexed());
Assert.assertEquals(4, response.getData().size());
List<SamplerResponseRow> data = response.getData();
assertEqualsSamplerResponseRow(new SamplerResponseRow(getRawColumns().get(0), new SamplerTestUtils.MapAllowingNullValuesBuilder<String, Object>().put("__time", 1555934400000L).put("dim1", "foo").put("dim2", null).put("met1", 6L).build(), null, null), data.get(0));
assertEqualsSamplerResponseRow(new SamplerResponseRow(getRawColumns().get(3), new SamplerTestUtils.MapAllowingNullValuesBuilder<String, Object>().put("__time", 1555934400000L).put("dim1", "foo2").put("dim2", null).put("met1", 4L).build(), null, null), data.get(1));
assertEqualsSamplerResponseRow(new SamplerResponseRow(getRawColumns().get(4), new SamplerTestUtils.MapAllowingNullValuesBuilder<String, Object>().put("__time", 1555934400000L).put("dim1", "foo").put("dim2", "bar").put("met1", 5L).build(), null, null), data.get(2));
assertEqualsSamplerResponseRow(new SamplerResponseRow(getRawColumns().get(5), null, true, getUnparseableTimestampString()), data.get(3));
}
use of org.apache.druid.query.aggregation.AggregatorFactory in project druid by druid-io.
the class InputSourceSamplerTest method testWithNoRollup.
@Test
public void testWithNoRollup() throws IOException {
final TimestampSpec timestampSpec = new TimestampSpec("t", null, null);
final DimensionsSpec dimensionsSpec = new DimensionsSpec(null);
final AggregatorFactory[] aggregatorFactories = { new LongSumAggregatorFactory("met1", "met1") };
final GranularitySpec granularitySpec = new UniformGranularitySpec(Granularities.DAY, Granularities.HOUR, false, null);
final DataSchema dataSchema = createDataSchema(timestampSpec, dimensionsSpec, aggregatorFactories, granularitySpec, null);
final InputSource inputSource = createInputSource(getTestRows(), dataSchema);
final InputFormat inputFormat = createInputFormat();
SamplerResponse response = inputSourceSampler.sample(inputSource, inputFormat, dataSchema, null);
Assert.assertEquals(6, response.getNumRowsRead());
Assert.assertEquals(5, response.getNumRowsIndexed());
Assert.assertEquals(6, response.getData().size());
List<SamplerResponseRow> data = response.getData();
assertEqualsSamplerResponseRow(new SamplerResponseRow(getRawColumns().get(0), new SamplerTestUtils.MapAllowingNullValuesBuilder<String, Object>().put("__time", 1555934400000L).put("dim1", "foo").put("dim2", null).put("met1", 1L).build(), null, null), data.get(0));
assertEqualsSamplerResponseRow(new SamplerResponseRow(getRawColumns().get(1), new SamplerTestUtils.MapAllowingNullValuesBuilder<String, Object>().put("__time", 1555934400000L).put("dim1", "foo").put("dim2", null).put("met1", 2L).build(), null, null), data.get(1));
assertEqualsSamplerResponseRow(new SamplerResponseRow(getRawColumns().get(2), new SamplerTestUtils.MapAllowingNullValuesBuilder<String, Object>().put("__time", 1555934400000L).put("dim1", "foo").put("dim2", null).put("met1", 3L).build(), null, null), data.get(2));
assertEqualsSamplerResponseRow(new SamplerResponseRow(getRawColumns().get(3), new SamplerTestUtils.MapAllowingNullValuesBuilder<String, Object>().put("__time", 1555934400000L).put("dim1", "foo2").put("dim2", null).put("met1", 4L).build(), null, null), data.get(3));
assertEqualsSamplerResponseRow(new SamplerResponseRow(getRawColumns().get(4), new SamplerTestUtils.MapAllowingNullValuesBuilder<String, Object>().put("__time", 1555934400000L).put("dim1", "foo").put("dim2", "bar").put("met1", 5L).build(), null, null), data.get(4));
assertEqualsSamplerResponseRow(new SamplerResponseRow(getRawColumns().get(5), null, true, getUnparseableTimestampString()), data.get(5));
}
use of org.apache.druid.query.aggregation.AggregatorFactory in project druid by druid-io.
the class Queries method computeRequiredColumns.
/**
* Helper for implementations of {@link Query#getRequiredColumns()}. Returns the list of columns that will be read
* out of a datasource by a query that uses the provided objects in the usual way.
*
* The returned set always contains {@code __time}, no matter what.
*
* If the virtual columns, filter, dimensions, aggregators, or additional columns refer to a virtual column, then the
* inputs of the virtual column will be returned instead of the name of the virtual column itself. Therefore, the
* returned list will never contain the names of any virtual columns.
*
* @param virtualColumns virtual columns whose inputs should be included.
* @param filter optional filter whose inputs should be included.
* @param dimensions dimension specs whose inputs should be included.
* @param aggregators aggregators whose inputs should be included.
* @param additionalColumns additional columns to include. Each of these will be added to the returned set, unless it
* refers to a virtual column, in which case the virtual column inputs will be added instead.
*/
public static Set<String> computeRequiredColumns(final VirtualColumns virtualColumns, @Nullable final DimFilter filter, final List<DimensionSpec> dimensions, final List<AggregatorFactory> aggregators, final List<String> additionalColumns) {
final Set<String> requiredColumns = new HashSet<>();
// Everyone needs __time (it's used by intervals filters).
requiredColumns.add(ColumnHolder.TIME_COLUMN_NAME);
for (VirtualColumn virtualColumn : virtualColumns.getVirtualColumns()) {
for (String column : virtualColumn.requiredColumns()) {
if (!virtualColumns.exists(column)) {
requiredColumns.addAll(virtualColumn.requiredColumns());
}
}
}
if (filter != null) {
for (String column : filter.getRequiredColumns()) {
if (!virtualColumns.exists(column)) {
requiredColumns.add(column);
}
}
}
for (DimensionSpec dimensionSpec : dimensions) {
if (!virtualColumns.exists(dimensionSpec.getDimension())) {
requiredColumns.add(dimensionSpec.getDimension());
}
}
for (AggregatorFactory aggregator : aggregators) {
for (String column : aggregator.requiredFields()) {
if (!virtualColumns.exists(column)) {
requiredColumns.add(column);
}
}
}
for (String column : additionalColumns) {
if (!virtualColumns.exists(column)) {
requiredColumns.add(column);
}
}
return requiredColumns;
}
Aggregations