Search in sources :

Example 21 with TransformSpec

use of org.apache.druid.segment.transform.TransformSpec in project druid by druid-io.

the class InputSourceSamplerTest method testIndexParseException.

@Test
public void testIndexParseException() throws IOException {
    final TimestampSpec timestampSpec = new TimestampSpec("t", null, null);
    final DimensionsSpec dimensionsSpec = new DimensionsSpec(ImmutableList.of(StringDimensionSchema.create("dim1PlusBar")));
    final TransformSpec transformSpec = new TransformSpec(null, ImmutableList.of(new ExpressionTransform("dim1PlusBar", "concat(dim1 + 'bar')", TestExprMacroTable.INSTANCE)));
    final AggregatorFactory[] aggregatorFactories = { new LongSumAggregatorFactory("met1", "met1") };
    final GranularitySpec granularitySpec = new UniformGranularitySpec(Granularities.DAY, Granularities.HOUR, true, null);
    final DataSchema dataSchema = createDataSchema(timestampSpec, dimensionsSpec, aggregatorFactories, granularitySpec, transformSpec);
    // 
    // add a invalid row to cause parse exception when indexing
    // 
    Map<String, Object> rawColumns4ParseExceptionRow = ImmutableMap.of("t", "2019-04-22T12:00", "dim1", "foo2", "met1", "invalidNumber");
    final List<String> inputTestRows = Lists.newArrayList(getTestRows());
    inputTestRows.add(ParserType.STR_CSV.equals(parserType) ? "2019-04-22T12:00,foo2,,invalidNumber" : OBJECT_MAPPER.writeValueAsString(rawColumns4ParseExceptionRow));
    final InputSource inputSource = createInputSource(inputTestRows, dataSchema);
    final InputFormat inputFormat = createInputFormat();
    SamplerResponse response = inputSourceSampler.sample(inputSource, inputFormat, dataSchema, null);
    Assert.assertEquals(7, response.getNumRowsRead());
    Assert.assertEquals(5, response.getNumRowsIndexed());
    Assert.assertEquals(4, response.getData().size());
    List<SamplerResponseRow> data = response.getData();
    assertEqualsSamplerResponseRow(new SamplerResponseRow(getRawColumns().get(0), new SamplerTestUtils.MapAllowingNullValuesBuilder<String, Object>().put("__time", 1555934400000L).put("dim1PlusBar", "foobar").put("met1", 11L).build(), null, null), data.get(0));
    assertEqualsSamplerResponseRow(new SamplerResponseRow(getRawColumns().get(3), new SamplerTestUtils.MapAllowingNullValuesBuilder<String, Object>().put("__time", 1555934400000L).put("dim1PlusBar", "foo2bar").put("met1", 4L).build(), null, null), data.get(1));
    assertEqualsSamplerResponseRow(new SamplerResponseRow(getRawColumns().get(5), null, true, getUnparseableTimestampString()), data.get(2));
    // 
    // the last row has parse exception when indexing, check if rawColumns and exception message match the expected
    // 
    String indexParseExceptioMessage = ParserType.STR_CSV.equals(parserType) ? "Found unparseable columns in row: [SamplerInputRow{row=TransformedInputRow{row=MapBasedInputRow{timestamp=2019-04-22T12:00:00.000Z, event={t=2019-04-22T12:00, dim1=foo2, dim2=null, met1=invalidNumber}, dimensions=[dim1PlusBar]}}}], exceptions: [Unable to parse value[invalidNumber] for field[met1]]" : "Found unparseable columns in row: [SamplerInputRow{row=TransformedInputRow{row=MapBasedInputRow{timestamp=2019-04-22T12:00:00.000Z, event={t=2019-04-22T12:00, dim1=foo2, met1=invalidNumber}, dimensions=[dim1PlusBar]}}}], exceptions: [Unable to parse value[invalidNumber] for field[met1]]";
    assertEqualsSamplerResponseRow(new SamplerResponseRow(rawColumns4ParseExceptionRow, null, true, indexParseExceptioMessage), data.get(3));
}
Also used : RecordSupplierInputSource(org.apache.druid.indexing.seekablestream.RecordSupplierInputSource) InlineInputSource(org.apache.druid.data.input.impl.InlineInputSource) InputSource(org.apache.druid.data.input.InputSource) SamplerResponse(org.apache.druid.client.indexing.SamplerResponse) LongSumAggregatorFactory(org.apache.druid.query.aggregation.LongSumAggregatorFactory) LongSumAggregatorFactory(org.apache.druid.query.aggregation.LongSumAggregatorFactory) AggregatorFactory(org.apache.druid.query.aggregation.AggregatorFactory) TransformSpec(org.apache.druid.segment.transform.TransformSpec) DataSchema(org.apache.druid.segment.indexing.DataSchema) UniformGranularitySpec(org.apache.druid.segment.indexing.granularity.UniformGranularitySpec) UniformGranularitySpec(org.apache.druid.segment.indexing.granularity.UniformGranularitySpec) GranularitySpec(org.apache.druid.segment.indexing.granularity.GranularitySpec) JsonInputFormat(org.apache.druid.data.input.impl.JsonInputFormat) InputFormat(org.apache.druid.data.input.InputFormat) CsvInputFormat(org.apache.druid.data.input.impl.CsvInputFormat) TimestampSpec(org.apache.druid.data.input.impl.TimestampSpec) DimensionsSpec(org.apache.druid.data.input.impl.DimensionsSpec) SamplerResponseRow(org.apache.druid.client.indexing.SamplerResponse.SamplerResponseRow) ExpressionTransform(org.apache.druid.segment.transform.ExpressionTransform) InitializedNullHandlingTest(org.apache.druid.testing.InitializedNullHandlingTest) Test(org.junit.Test)

Example 22 with TransformSpec

use of org.apache.druid.segment.transform.TransformSpec in project druid by druid-io.

the class HadoopDruidIndexerMapperTest method testHadoopyStringParserWithTransformSpec.

@Test
public void testHadoopyStringParserWithTransformSpec() throws Exception {
    final HadoopDruidIndexerConfig config = new HadoopDruidIndexerConfig(new HadoopIngestionSpec(DATA_SCHEMA.withTransformSpec(new TransformSpec(new SelectorDimFilter("dim1", "foo", null), ImmutableList.of(new ExpressionTransform("dim1t", "concat(dim1,dim1)", ExprMacroTable.nil())))), IO_CONFIG, TUNING_CONFIG));
    final MyMapper mapper = new MyMapper();
    final Configuration hadoopConfig = new Configuration();
    hadoopConfig.set(HadoopDruidIndexerConfig.CONFIG_PROPERTY, HadoopDruidIndexerConfig.JSON_MAPPER.writeValueAsString(config));
    final Mapper.Context mapContext = EasyMock.mock(Mapper.Context.class);
    EasyMock.expect(mapContext.getConfiguration()).andReturn(hadoopConfig).once();
    EasyMock.expect(mapContext.getCounter(HadoopDruidIndexerConfig.IndexJobCounters.ROWS_THROWN_AWAY_COUNTER)).andReturn(getTestCounter());
    EasyMock.replay(mapContext);
    mapper.setup(mapContext);
    final List<Map<String, Object>> rows = ImmutableList.of(ImmutableMap.of("t", "2000-01-01T00:00:00.000Z", "dim1", "foo", "dim2", "x", "m1", 1.0), ImmutableMap.of("t", "2000-01-01T00:00:00.000Z", "dim1", "bar", "dim2", "y", "m1", 1.0), ImmutableMap.of("t", "2000-01-01T00:00:00.000Z", "dim1", "foo", "dim2", "z", "m1", 1.0));
    for (Map<String, Object> row : rows) {
        mapper.map(NullWritable.get(), new Text(JSON_MAPPER.writeValueAsString(row)), mapContext);
    }
    assertRowListEquals(ImmutableList.of(ImmutableMap.of("t", "2000-01-01T00:00:00.000Z", "dim1", "foo", "dim1t", "foofoo", "dim2", "x", "m1", 1.0), ImmutableMap.of("t", "2000-01-01T00:00:00.000Z", "dim1", "foo", "dim1t", "foofoo", "dim2", "z", "m1", 1.0)), mapper.getRows());
}
Also used : Configuration(org.apache.hadoop.conf.Configuration) Text(org.apache.hadoop.io.Text) TransformSpec(org.apache.druid.segment.transform.TransformSpec) ObjectMapper(com.fasterxml.jackson.databind.ObjectMapper) Mapper(org.apache.hadoop.mapreduce.Mapper) SelectorDimFilter(org.apache.druid.query.filter.SelectorDimFilter) ExpressionTransform(org.apache.druid.segment.transform.ExpressionTransform) Map(java.util.Map) ImmutableMap(com.google.common.collect.ImmutableMap) Test(org.junit.Test)

Example 23 with TransformSpec

use of org.apache.druid.segment.transform.TransformSpec in project druid by druid-io.

the class SqlFirehoseTest method testFirehoseTransformingParser.

@Test
public void testFirehoseTransformingParser() throws Exception {
    final TestCloseable closeable = new TestCloseable();
    List<Object> expectedResults = new ArrayList<>();
    for (Map<String, Object> map : inputs) {
        expectedResults.add(map.get("x") + "foo");
    }
    final List<JsonIterator<Map<String, Object>>> lineIterators = fileList.stream().map(s -> new JsonIterator<Map<String, Object>>(TYPE_REF, s, closeable, objectMapper)).collect(Collectors.toList());
    final InputRowParser stringParser = new TransformingStringInputRowParser(new TimeAndDimsParseSpec(new TimestampSpec("timestamp", "auto", null), new DimensionsSpec(DimensionsSpec.getDefaultSchemas(ImmutableList.of("x")))), Charset.defaultCharset().name(), new TransformSpec(null, ImmutableList.of(new ExpressionTransform("xfoo", "concat(x,'foo')", ExprMacroTable.nil()))));
    try (final SqlFirehose firehose = new SqlFirehose(lineIterators.iterator(), stringParser, closeable)) {
        final List<Object> results = new ArrayList<>();
        while (firehose.hasMore()) {
            final InputRow inputRow = firehose.nextRow();
            if (inputRow == null) {
                results.add(null);
            } else {
                results.add(inputRow.getDimension("xfoo").get(0));
            }
        }
        Assert.assertEquals(expectedResults, results);
    }
}
Also used : JsonGenerator(com.fasterxml.jackson.core.JsonGenerator) TimeAndDimsParseSpec(org.apache.druid.data.input.impl.TimeAndDimsParseSpec) TimestampSpec(org.apache.druid.data.input.impl.TimestampSpec) ArrayList(java.util.ArrayList) ImmutableList(com.google.common.collect.ImmutableList) Charset(java.nio.charset.Charset) StringInputRowParser(org.apache.druid.data.input.impl.StringInputRowParser) After(org.junit.After) Map(java.util.Map) ExpressionTransform(org.apache.druid.segment.transform.ExpressionTransform) TypeReference(com.fasterxml.jackson.core.type.TypeReference) FileUtils(org.apache.druid.java.util.common.FileUtils) Before(org.junit.Before) JsonIterator(org.apache.druid.data.input.impl.prefetch.JsonIterator) ImmutableMap(com.google.common.collect.ImmutableMap) MapInputRowParser(org.apache.druid.data.input.impl.MapInputRowParser) ObjectMapper(com.fasterxml.jackson.databind.ObjectMapper) FileOutputStream(java.io.FileOutputStream) DimensionsSpec(org.apache.druid.data.input.impl.DimensionsSpec) Test(org.junit.Test) IOException(java.io.IOException) InputRowParser(org.apache.druid.data.input.impl.InputRowParser) FileInputStream(java.io.FileInputStream) Collectors(java.util.stream.Collectors) SmileFactory(com.fasterxml.jackson.dataformat.smile.SmileFactory) File(java.io.File) ExprMacroTable(org.apache.druid.math.expr.ExprMacroTable) InputRow(org.apache.druid.data.input.InputRow) List(java.util.List) TransformingStringInputRowParser(org.apache.druid.segment.transform.TransformingStringInputRowParser) Closeable(java.io.Closeable) Assert(org.junit.Assert) TransformSpec(org.apache.druid.segment.transform.TransformSpec) ArrayList(java.util.ArrayList) TransformSpec(org.apache.druid.segment.transform.TransformSpec) TimeAndDimsParseSpec(org.apache.druid.data.input.impl.TimeAndDimsParseSpec) TimestampSpec(org.apache.druid.data.input.impl.TimestampSpec) InputRow(org.apache.druid.data.input.InputRow) DimensionsSpec(org.apache.druid.data.input.impl.DimensionsSpec) JsonIterator(org.apache.druid.data.input.impl.prefetch.JsonIterator) StringInputRowParser(org.apache.druid.data.input.impl.StringInputRowParser) MapInputRowParser(org.apache.druid.data.input.impl.MapInputRowParser) InputRowParser(org.apache.druid.data.input.impl.InputRowParser) TransformingStringInputRowParser(org.apache.druid.segment.transform.TransformingStringInputRowParser) ExpressionTransform(org.apache.druid.segment.transform.ExpressionTransform) TransformingStringInputRowParser(org.apache.druid.segment.transform.TransformingStringInputRowParser) Test(org.junit.Test)

Aggregations

TransformSpec (org.apache.druid.segment.transform.TransformSpec)23 Test (org.junit.Test)19 ExpressionTransform (org.apache.druid.segment.transform.ExpressionTransform)16 DimensionsSpec (org.apache.druid.data.input.impl.DimensionsSpec)13 TimestampSpec (org.apache.druid.data.input.impl.TimestampSpec)13 SelectorDimFilter (org.apache.druid.query.filter.SelectorDimFilter)12 GranularitySpec (org.apache.druid.segment.indexing.granularity.GranularitySpec)10 AggregatorFactory (org.apache.druid.query.aggregation.AggregatorFactory)9 DataSchema (org.apache.druid.segment.indexing.DataSchema)9 InitializedNullHandlingTest (org.apache.druid.testing.InitializedNullHandlingTest)9 Map (java.util.Map)8 LongSumAggregatorFactory (org.apache.druid.query.aggregation.LongSumAggregatorFactory)8 UniformGranularitySpec (org.apache.druid.segment.indexing.granularity.UniformGranularitySpec)8 TaskStatus (org.apache.druid.indexer.TaskStatus)7 DataSegment (org.apache.druid.timeline.DataSegment)7 ImmutableMap (com.google.common.collect.ImmutableMap)6 InputFormat (org.apache.druid.data.input.InputFormat)6 ArrayList (java.util.ArrayList)5 SamplerResponse (org.apache.druid.client.indexing.SamplerResponse)5 SamplerResponseRow (org.apache.druid.client.indexing.SamplerResponse.SamplerResponseRow)5