use of org.apache.druid.segment.transform.ExpressionTransform in project druid by druid-io.
the class InputRowSchemasTest method test_createColumnsFilter_schemaless.
@Test
public void test_createColumnsFilter_schemaless() {
final ColumnsFilter columnsFilter = InputRowSchemas.createColumnsFilter(new TimestampSpec("ts", "auto", null), DimensionsSpec.builder().setDimensionExclusions(ImmutableList.of("ts", "foo", "bar", "qux", "bob")).build(), new TransformSpec(new SelectorDimFilter("bar", "x", null), ImmutableList.of(new ExpressionTransform("baz", "qux + 3", ExprMacroTable.nil()))), new AggregatorFactory[] { new LongSumAggregatorFactory("billy", "bob") });
Assert.assertEquals(ColumnsFilter.exclusionBased(ImmutableSet.of("foo")), columnsFilter);
}
use of org.apache.druid.segment.transform.ExpressionTransform in project druid by druid-io.
the class KinesisIndexTaskTest method testRunWithTransformSpec.
@Test(timeout = 120_000L)
public void testRunWithTransformSpec() throws Exception {
recordSupplier.assign(EasyMock.anyObject());
EasyMock.expectLastCall().anyTimes();
EasyMock.expect(recordSupplier.getEarliestSequenceNumber(EasyMock.anyObject())).andReturn("0").anyTimes();
recordSupplier.seek(EasyMock.anyObject(), EasyMock.anyString());
EasyMock.expectLastCall().anyTimes();
EasyMock.expect(recordSupplier.poll(EasyMock.anyLong())).andReturn(generateRecords(0, 13)).once();
recordSupplier.close();
EasyMock.expectLastCall().once();
replayAll();
final KinesisIndexTask task = createTask(null, NEW_DATA_SCHEMA.withTransformSpec(new TransformSpec(new SelectorDimFilter("dim1", "b", null), ImmutableList.of(new ExpressionTransform("dim1t", "concat(dim1,dim1)", ExprMacroTable.nil())))), new KinesisIndexTaskIOConfig(0, "sequence0", new SeekableStreamStartSequenceNumbers<>(STREAM, ImmutableMap.of(SHARD_ID1, "0"), ImmutableSet.of()), new SeekableStreamEndSequenceNumbers<>(STREAM, ImmutableMap.of(SHARD_ID1, "4")), true, null, null, INPUT_FORMAT, "awsEndpoint", null, null, null, null, false));
final ListenableFuture<TaskStatus> future = runTask(task);
// Wait for the task to start reading
while (task.getRunner().getStatus() != SeekableStreamIndexTaskRunner.Status.READING) {
Thread.sleep(10);
}
// Wait for task to exit
Assert.assertEquals(TaskState.SUCCESS, future.get().getStatusCode());
verifyAll();
// Check metrics
Assert.assertEquals(1, task.getRunner().getRowIngestionMeters().getProcessed());
Assert.assertEquals(0, task.getRunner().getRowIngestionMeters().getUnparseable());
Assert.assertEquals(4, task.getRunner().getRowIngestionMeters().getThrownAway());
// Check published metadata
assertEqualsExceptVersion(ImmutableList.of(sdd("2009/P1D", 0)), publishedDescriptors());
Assert.assertEquals(new KinesisDataSourceMetadata(new SeekableStreamEndSequenceNumbers<>(STREAM, ImmutableMap.of(SHARD_ID1, "4"))), newDataSchemaMetadata());
// Check segments in deep storage
final List<SegmentDescriptor> publishedDescriptors = publishedDescriptors();
Assert.assertEquals(ImmutableList.of("b"), readSegmentColumn("dim1", publishedDescriptors.get(0)));
Assert.assertEquals(ImmutableList.of("bb"), readSegmentColumn("dim1t", publishedDescriptors.get(0)));
}
use of org.apache.druid.segment.transform.ExpressionTransform in project druid by druid-io.
the class InputSourceSamplerTest method testWithTransformsDimensionsSpec.
@Test
public void testWithTransformsDimensionsSpec() throws IOException {
final TimestampSpec timestampSpec = new TimestampSpec("t", null, null);
final DimensionsSpec dimensionsSpec = new DimensionsSpec(ImmutableList.of(StringDimensionSchema.create("dim1PlusBar")));
final TransformSpec transformSpec = new TransformSpec(null, ImmutableList.of(new ExpressionTransform("dim1PlusBar", "concat(dim1 + 'bar')", TestExprMacroTable.INSTANCE)));
final AggregatorFactory[] aggregatorFactories = { new LongSumAggregatorFactory("met1", "met1") };
final GranularitySpec granularitySpec = new UniformGranularitySpec(Granularities.DAY, Granularities.HOUR, true, null);
final DataSchema dataSchema = createDataSchema(timestampSpec, dimensionsSpec, aggregatorFactories, granularitySpec, transformSpec);
final InputSource inputSource = createInputSource(getTestRows(), dataSchema);
final InputFormat inputFormat = createInputFormat();
SamplerResponse response = inputSourceSampler.sample(inputSource, inputFormat, dataSchema, null);
Assert.assertEquals(6, response.getNumRowsRead());
Assert.assertEquals(5, response.getNumRowsIndexed());
Assert.assertEquals(3, response.getData().size());
List<SamplerResponseRow> data = response.getData();
assertEqualsSamplerResponseRow(new SamplerResponseRow(getRawColumns().get(0), new SamplerTestUtils.MapAllowingNullValuesBuilder<String, Object>().put("__time", 1555934400000L).put("dim1PlusBar", "foobar").put("met1", 11L).build(), null, null), data.get(0));
assertEqualsSamplerResponseRow(new SamplerResponseRow(getRawColumns().get(3), new SamplerTestUtils.MapAllowingNullValuesBuilder<String, Object>().put("__time", 1555934400000L).put("dim1PlusBar", "foo2bar").put("met1", 4L).build(), null, null), data.get(1));
assertEqualsSamplerResponseRow(new SamplerResponseRow(getRawColumns().get(5), null, true, getUnparseableTimestampString()), data.get(2));
}
use of org.apache.druid.segment.transform.ExpressionTransform in project druid by druid-io.
the class InputSourceSamplerTest method testIndexParseException.
@Test
public void testIndexParseException() throws IOException {
final TimestampSpec timestampSpec = new TimestampSpec("t", null, null);
final DimensionsSpec dimensionsSpec = new DimensionsSpec(ImmutableList.of(StringDimensionSchema.create("dim1PlusBar")));
final TransformSpec transformSpec = new TransformSpec(null, ImmutableList.of(new ExpressionTransform("dim1PlusBar", "concat(dim1 + 'bar')", TestExprMacroTable.INSTANCE)));
final AggregatorFactory[] aggregatorFactories = { new LongSumAggregatorFactory("met1", "met1") };
final GranularitySpec granularitySpec = new UniformGranularitySpec(Granularities.DAY, Granularities.HOUR, true, null);
final DataSchema dataSchema = createDataSchema(timestampSpec, dimensionsSpec, aggregatorFactories, granularitySpec, transformSpec);
//
// add a invalid row to cause parse exception when indexing
//
Map<String, Object> rawColumns4ParseExceptionRow = ImmutableMap.of("t", "2019-04-22T12:00", "dim1", "foo2", "met1", "invalidNumber");
final List<String> inputTestRows = Lists.newArrayList(getTestRows());
inputTestRows.add(ParserType.STR_CSV.equals(parserType) ? "2019-04-22T12:00,foo2,,invalidNumber" : OBJECT_MAPPER.writeValueAsString(rawColumns4ParseExceptionRow));
final InputSource inputSource = createInputSource(inputTestRows, dataSchema);
final InputFormat inputFormat = createInputFormat();
SamplerResponse response = inputSourceSampler.sample(inputSource, inputFormat, dataSchema, null);
Assert.assertEquals(7, response.getNumRowsRead());
Assert.assertEquals(5, response.getNumRowsIndexed());
Assert.assertEquals(4, response.getData().size());
List<SamplerResponseRow> data = response.getData();
assertEqualsSamplerResponseRow(new SamplerResponseRow(getRawColumns().get(0), new SamplerTestUtils.MapAllowingNullValuesBuilder<String, Object>().put("__time", 1555934400000L).put("dim1PlusBar", "foobar").put("met1", 11L).build(), null, null), data.get(0));
assertEqualsSamplerResponseRow(new SamplerResponseRow(getRawColumns().get(3), new SamplerTestUtils.MapAllowingNullValuesBuilder<String, Object>().put("__time", 1555934400000L).put("dim1PlusBar", "foo2bar").put("met1", 4L).build(), null, null), data.get(1));
assertEqualsSamplerResponseRow(new SamplerResponseRow(getRawColumns().get(5), null, true, getUnparseableTimestampString()), data.get(2));
//
// the last row has parse exception when indexing, check if rawColumns and exception message match the expected
//
String indexParseExceptioMessage = ParserType.STR_CSV.equals(parserType) ? "Found unparseable columns in row: [SamplerInputRow{row=TransformedInputRow{row=MapBasedInputRow{timestamp=2019-04-22T12:00:00.000Z, event={t=2019-04-22T12:00, dim1=foo2, dim2=null, met1=invalidNumber}, dimensions=[dim1PlusBar]}}}], exceptions: [Unable to parse value[invalidNumber] for field[met1]]" : "Found unparseable columns in row: [SamplerInputRow{row=TransformedInputRow{row=MapBasedInputRow{timestamp=2019-04-22T12:00:00.000Z, event={t=2019-04-22T12:00, dim1=foo2, met1=invalidNumber}, dimensions=[dim1PlusBar]}}}], exceptions: [Unable to parse value[invalidNumber] for field[met1]]";
assertEqualsSamplerResponseRow(new SamplerResponseRow(rawColumns4ParseExceptionRow, null, true, indexParseExceptioMessage), data.get(3));
}
use of org.apache.druid.segment.transform.ExpressionTransform in project druid by druid-io.
the class HadoopDruidIndexerMapperTest method testHadoopyStringParserWithTransformSpec.
@Test
public void testHadoopyStringParserWithTransformSpec() throws Exception {
final HadoopDruidIndexerConfig config = new HadoopDruidIndexerConfig(new HadoopIngestionSpec(DATA_SCHEMA.withTransformSpec(new TransformSpec(new SelectorDimFilter("dim1", "foo", null), ImmutableList.of(new ExpressionTransform("dim1t", "concat(dim1,dim1)", ExprMacroTable.nil())))), IO_CONFIG, TUNING_CONFIG));
final MyMapper mapper = new MyMapper();
final Configuration hadoopConfig = new Configuration();
hadoopConfig.set(HadoopDruidIndexerConfig.CONFIG_PROPERTY, HadoopDruidIndexerConfig.JSON_MAPPER.writeValueAsString(config));
final Mapper.Context mapContext = EasyMock.mock(Mapper.Context.class);
EasyMock.expect(mapContext.getConfiguration()).andReturn(hadoopConfig).once();
EasyMock.expect(mapContext.getCounter(HadoopDruidIndexerConfig.IndexJobCounters.ROWS_THROWN_AWAY_COUNTER)).andReturn(getTestCounter());
EasyMock.replay(mapContext);
mapper.setup(mapContext);
final List<Map<String, Object>> rows = ImmutableList.of(ImmutableMap.of("t", "2000-01-01T00:00:00.000Z", "dim1", "foo", "dim2", "x", "m1", 1.0), ImmutableMap.of("t", "2000-01-01T00:00:00.000Z", "dim1", "bar", "dim2", "y", "m1", 1.0), ImmutableMap.of("t", "2000-01-01T00:00:00.000Z", "dim1", "foo", "dim2", "z", "m1", 1.0));
for (Map<String, Object> row : rows) {
mapper.map(NullWritable.get(), new Text(JSON_MAPPER.writeValueAsString(row)), mapContext);
}
assertRowListEquals(ImmutableList.of(ImmutableMap.of("t", "2000-01-01T00:00:00.000Z", "dim1", "foo", "dim1t", "foofoo", "dim2", "x", "m1", 1.0), ImmutableMap.of("t", "2000-01-01T00:00:00.000Z", "dim1", "foo", "dim1t", "foofoo", "dim2", "z", "m1", 1.0)), mapper.getRows());
}
Aggregations