Search in sources :

Example 56 with TimestampSpec

use of org.apache.druid.data.input.impl.TimestampSpec in project druid by druid-io.

the class DruidSegmentReaderTest method testReaderWithInclusiveColumnsFilter.

@Test
public void testReaderWithInclusiveColumnsFilter() throws IOException {
    final DruidSegmentReader reader = new DruidSegmentReader(makeInputEntity(Intervals.of("2000/P1D")), indexIO, new TimestampSpec("__time", "millis", DateTimes.of("1971")), new DimensionsSpec(ImmutableList.of(StringDimensionSchema.create("s"), new DoubleDimensionSchema("d"))), ColumnsFilter.inclusionBased(ImmutableSet.of("__time", "s", "d")), null, temporaryFolder.newFolder());
    Assert.assertEquals(ImmutableList.of(new MapBasedInputRow(DateTimes.of("2000"), ImmutableList.of("s", "d"), ImmutableMap.<String, Object>builder().put("__time", DateTimes.of("2000T").getMillis()).put("s", "foo").put("d", 1.23d).build()), new MapBasedInputRow(DateTimes.of("2000T01"), ImmutableList.of("s", "d"), ImmutableMap.<String, Object>builder().put("__time", DateTimes.of("2000T01").getMillis()).put("s", "bar").put("d", 4.56d).build())), readRows(reader));
}
Also used : DoubleDimensionSchema(org.apache.druid.data.input.impl.DoubleDimensionSchema) TimestampSpec(org.apache.druid.data.input.impl.TimestampSpec) DimensionsSpec(org.apache.druid.data.input.impl.DimensionsSpec) MapBasedInputRow(org.apache.druid.data.input.MapBasedInputRow) NullHandlingTest(org.apache.druid.common.config.NullHandlingTest) Test(org.junit.Test)

Example 57 with TimestampSpec

use of org.apache.druid.data.input.impl.TimestampSpec in project druid by druid-io.

the class DruidSegmentReaderTest method testReader.

@Test
public void testReader() throws IOException {
    final DruidSegmentReader reader = new DruidSegmentReader(makeInputEntity(Intervals.of("2000/P1D")), indexIO, new TimestampSpec("__time", "millis", DateTimes.of("1971")), new DimensionsSpec(ImmutableList.of(StringDimensionSchema.create("s"), new DoubleDimensionSchema("d"))), ColumnsFilter.all(), null, temporaryFolder.newFolder());
    Assert.assertEquals(ImmutableList.of(new MapBasedInputRow(DateTimes.of("2000"), ImmutableList.of("s", "d"), ImmutableMap.<String, Object>builder().put("__time", DateTimes.of("2000T").getMillis()).put("s", "foo").put("d", 1.23d).put("cnt", 1L).put("met_s", makeHLLC("foo")).build()), new MapBasedInputRow(DateTimes.of("2000T01"), ImmutableList.of("s", "d"), ImmutableMap.<String, Object>builder().put("__time", DateTimes.of("2000T01").getMillis()).put("s", "bar").put("d", 4.56d).put("cnt", 1L).put("met_s", makeHLLC("bar")).build())), readRows(reader));
}
Also used : DoubleDimensionSchema(org.apache.druid.data.input.impl.DoubleDimensionSchema) TimestampSpec(org.apache.druid.data.input.impl.TimestampSpec) DimensionsSpec(org.apache.druid.data.input.impl.DimensionsSpec) MapBasedInputRow(org.apache.druid.data.input.MapBasedInputRow) NullHandlingTest(org.apache.druid.common.config.NullHandlingTest) Test(org.junit.Test)

Example 58 with TimestampSpec

use of org.apache.druid.data.input.impl.TimestampSpec in project druid by druid-io.

the class CsvInputSourceSamplerTest method testCSVColumnAllNull.

@Test
public void testCSVColumnAllNull() {
    final TimestampSpec timestampSpec = new TimestampSpec(null, null, DateTimes.of("1970"));
    final DimensionsSpec dimensionsSpec = new DimensionsSpec(null);
    final DataSchema dataSchema = new DataSchema("sampler", timestampSpec, dimensionsSpec, null, null, null);
    final List<String> strCsvRows = ImmutableList.of("FirstName,LastName,Number,Gender", "J,G,,Male", "Kobe,Bryant,,Male", "Lisa, Krystal,,Female", "Michael,Jackson,,Male");
    final InputSource inputSource = new InlineInputSource(String.join("\n", strCsvRows));
    final InputFormat inputFormat = new CsvInputFormat(null, null, null, true, 0);
    final InputSourceSampler inputSourceSampler = new InputSourceSampler();
    final SamplerResponse response = inputSourceSampler.sample(inputSource, inputFormat, dataSchema, null);
    Assert.assertEquals(4, response.getNumRowsRead());
    Assert.assertEquals(4, response.getNumRowsIndexed());
    Assert.assertEquals(4, response.getData().size());
    List<SamplerResponseRow> data = response.getData();
    Assert.assertEquals(new SamplerResponseRow(new SamplerTestUtils.MapAllowingNullValuesBuilder<String, Object>().put("Number", null).put("FirstName", "J").put("LastName", "G").put("Gender", "Male").build(), new SamplerTestUtils.MapAllowingNullValuesBuilder<String, Object>().put("__time", 0L).put("Number", null).put("FirstName", "J").put("LastName", "G").put("Gender", "Male").build(), null, null), data.get(0));
    Assert.assertEquals(new SamplerResponseRow(new SamplerTestUtils.MapAllowingNullValuesBuilder<String, Object>().put("Number", null).put("FirstName", "Kobe").put("LastName", "Bryant").put("Gender", "Male").build(), new SamplerTestUtils.MapAllowingNullValuesBuilder<String, Object>().put("Number", null).put("__time", 0L).put("FirstName", "Kobe").put("LastName", "Bryant").put("Gender", "Male").build(), null, null), data.get(1));
    Assert.assertEquals(new SamplerResponseRow(new SamplerTestUtils.MapAllowingNullValuesBuilder<String, Object>().put("Number", null).put("FirstName", "Lisa").put("LastName", " Krystal").put("Gender", "Female").build(), new SamplerTestUtils.MapAllowingNullValuesBuilder<String, Object>().put("Number", null).put("__time", 0L).put("FirstName", "Lisa").put("LastName", " Krystal").put("Gender", "Female").build(), null, null), data.get(2));
    Assert.assertEquals(new SamplerResponseRow(new SamplerTestUtils.MapAllowingNullValuesBuilder<String, Object>().put("Number", null).put("FirstName", "Michael").put("LastName", "Jackson").put("Gender", "Male").build(), new SamplerTestUtils.MapAllowingNullValuesBuilder<String, Object>().put("__time", 0L).put("Number", null).put("FirstName", "Michael").put("LastName", "Jackson").put("Gender", "Male").build(), null, null), data.get(3));
}
Also used : InlineInputSource(org.apache.druid.data.input.impl.InlineInputSource) InputSource(org.apache.druid.data.input.InputSource) SamplerResponse(org.apache.druid.client.indexing.SamplerResponse) DataSchema(org.apache.druid.segment.indexing.DataSchema) InputFormat(org.apache.druid.data.input.InputFormat) CsvInputFormat(org.apache.druid.data.input.impl.CsvInputFormat) TimestampSpec(org.apache.druid.data.input.impl.TimestampSpec) DimensionsSpec(org.apache.druid.data.input.impl.DimensionsSpec) CsvInputFormat(org.apache.druid.data.input.impl.CsvInputFormat) SamplerResponseRow(org.apache.druid.client.indexing.SamplerResponse.SamplerResponseRow) InlineInputSource(org.apache.druid.data.input.impl.InlineInputSource) Test(org.junit.Test)

Example 59 with TimestampSpec

use of org.apache.druid.data.input.impl.TimestampSpec in project druid by druid-io.

the class InputSourceSamplerTest method testWithMoreRollup.

@Test
public void testWithMoreRollup() throws IOException {
    final TimestampSpec timestampSpec = new TimestampSpec("t", null, null);
    final DimensionsSpec dimensionsSpec = new DimensionsSpec(ImmutableList.of(StringDimensionSchema.create("dim1")));
    final AggregatorFactory[] aggregatorFactories = { new LongSumAggregatorFactory("met1", "met1") };
    final GranularitySpec granularitySpec = new UniformGranularitySpec(Granularities.DAY, Granularities.HOUR, true, null);
    final DataSchema dataSchema = createDataSchema(timestampSpec, dimensionsSpec, aggregatorFactories, granularitySpec, null);
    final InputSource inputSource = createInputSource(getTestRows(), dataSchema);
    final InputFormat inputFormat = createInputFormat();
    SamplerResponse response = inputSourceSampler.sample(inputSource, inputFormat, dataSchema, null);
    Assert.assertEquals(6, response.getNumRowsRead());
    Assert.assertEquals(5, response.getNumRowsIndexed());
    Assert.assertEquals(3, response.getData().size());
    List<SamplerResponseRow> data = response.getData();
    assertEqualsSamplerResponseRow(new SamplerResponseRow(getRawColumns().get(0), new SamplerTestUtils.MapAllowingNullValuesBuilder<String, Object>().put("__time", 1555934400000L).put("dim1", "foo").put("met1", 11L).build(), null, null), data.get(0));
    assertEqualsSamplerResponseRow(new SamplerResponseRow(getRawColumns().get(3), new SamplerTestUtils.MapAllowingNullValuesBuilder<String, Object>().put("__time", 1555934400000L).put("dim1", "foo2").put("met1", 4L).build(), null, null), data.get(1));
    assertEqualsSamplerResponseRow(new SamplerResponseRow(getRawColumns().get(5), null, true, getUnparseableTimestampString()), data.get(2));
}
Also used : RecordSupplierInputSource(org.apache.druid.indexing.seekablestream.RecordSupplierInputSource) InlineInputSource(org.apache.druid.data.input.impl.InlineInputSource) InputSource(org.apache.druid.data.input.InputSource) SamplerResponse(org.apache.druid.client.indexing.SamplerResponse) LongSumAggregatorFactory(org.apache.druid.query.aggregation.LongSumAggregatorFactory) LongSumAggregatorFactory(org.apache.druid.query.aggregation.LongSumAggregatorFactory) AggregatorFactory(org.apache.druid.query.aggregation.AggregatorFactory) DataSchema(org.apache.druid.segment.indexing.DataSchema) UniformGranularitySpec(org.apache.druid.segment.indexing.granularity.UniformGranularitySpec) UniformGranularitySpec(org.apache.druid.segment.indexing.granularity.UniformGranularitySpec) GranularitySpec(org.apache.druid.segment.indexing.granularity.GranularitySpec) JsonInputFormat(org.apache.druid.data.input.impl.JsonInputFormat) InputFormat(org.apache.druid.data.input.InputFormat) CsvInputFormat(org.apache.druid.data.input.impl.CsvInputFormat) TimestampSpec(org.apache.druid.data.input.impl.TimestampSpec) DimensionsSpec(org.apache.druid.data.input.impl.DimensionsSpec) SamplerResponseRow(org.apache.druid.client.indexing.SamplerResponse.SamplerResponseRow) InitializedNullHandlingTest(org.apache.druid.testing.InitializedNullHandlingTest) Test(org.junit.Test)

Example 60 with TimestampSpec

use of org.apache.druid.data.input.impl.TimestampSpec in project druid by druid-io.

the class InputSourceSamplerTest method testMultipleJsonStringInOneBlock.

/**
 * This case tests sampling for multiple json lines in one text block
 * Currently only RecordSupplierInputSource supports this kind of input, see https://github.com/apache/druid/pull/10383 for more information
 *
 * This test combines illegal json block and legal json block together to verify:
 * 1. all lines in the illegal json block should not be parsed
 * 2. the illegal json block should not affect the processing of the 2nd record
 * 3. all lines in legal json block should be parsed successfully
 */
@Test
public void testMultipleJsonStringInOneBlock() throws IOException {
    if (!ParserType.STR_JSON.equals(parserType) || !useInputFormatApi) {
        return;
    }
    final TimestampSpec timestampSpec = new TimestampSpec("t", null, null);
    final DimensionsSpec dimensionsSpec = new DimensionsSpec(ImmutableList.of(StringDimensionSchema.create("dim1PlusBar")));
    final TransformSpec transformSpec = new TransformSpec(null, ImmutableList.of(new ExpressionTransform("dim1PlusBar", "concat(dim1 + 'bar')", TestExprMacroTable.INSTANCE)));
    final AggregatorFactory[] aggregatorFactories = { new LongSumAggregatorFactory("met1", "met1") };
    final GranularitySpec granularitySpec = new UniformGranularitySpec(Granularities.DAY, Granularities.HOUR, true, null);
    final DataSchema dataSchema = createDataSchema(timestampSpec, dimensionsSpec, aggregatorFactories, granularitySpec, transformSpec);
    List<String> jsonBlockList = ImmutableList.of(// include the line which can't be parsed into JSON object to form a illegal json block
    String.join("", STR_JSON_ROWS), // exclude the last line to form a legal json block
    STR_JSON_ROWS.stream().limit(STR_JSON_ROWS.size() - 1).collect(Collectors.joining()));
    SamplerResponse response = inputSourceSampler.sample(new RecordSupplierInputSource("topicName", new TestRecordSupplier(jsonBlockList), true), createInputFormat(), dataSchema, new SamplerConfig(200, 3000));
    // 
    // the 1st json block contains STR_JSON_ROWS.size() lines, and 2nd json block contains STR_JSON_ROWS.size()-1 lines
    // together there should STR_JSON_ROWS.size() * 2 - 1 lines
    // 
    int illegalRows = STR_JSON_ROWS.size();
    int legalRows = STR_JSON_ROWS.size() - 1;
    Assert.assertEquals(illegalRows + legalRows, response.getNumRowsRead());
    Assert.assertEquals(legalRows, response.getNumRowsIndexed());
    Assert.assertEquals(illegalRows + 2, response.getData().size());
    List<SamplerResponseRow> data = response.getData();
    List<Map<String, Object>> rawColumnList = this.getRawColumns();
    int index = 0;
    // 
    // first n rows are related to the first json block which fails to parse
    // 
    String parseExceptionMessage;
    if (useInputFormatApi) {
        parseExceptionMessage = "Timestamp[bad_timestamp] is unparseable! Event: {t=bad_timestamp, dim1=foo, met1=6}";
    } else {
        parseExceptionMessage = "Timestamp[bad_timestamp] is unparseable! Event: {t=bad_timestamp, dim1=foo, met1=6}";
    }
    for (; index < illegalRows; index++) {
        assertEqualsSamplerResponseRow(new SamplerResponseRow(rawColumnList.get(index), null, true, parseExceptionMessage), data.get(index));
    }
    // 
    // following are parsed rows for legal json block
    // 
    assertEqualsSamplerResponseRow(new SamplerResponseRow(rawColumnList.get(0), new SamplerTestUtils.MapAllowingNullValuesBuilder<String, Object>().put("__time", 1555934400000L).put("dim1PlusBar", "foobar").put("met1", 11L).build(), null, null), data.get(index++));
    assertEqualsSamplerResponseRow(new SamplerResponseRow(rawColumnList.get(3), new SamplerTestUtils.MapAllowingNullValuesBuilder<String, Object>().put("__time", 1555934400000L).put("dim1PlusBar", "foo2bar").put("met1", 4L).build(), null, null), data.get(index));
}
Also used : SamplerResponse(org.apache.druid.client.indexing.SamplerResponse) LongSumAggregatorFactory(org.apache.druid.query.aggregation.LongSumAggregatorFactory) LongSumAggregatorFactory(org.apache.druid.query.aggregation.LongSumAggregatorFactory) AggregatorFactory(org.apache.druid.query.aggregation.AggregatorFactory) TransformSpec(org.apache.druid.segment.transform.TransformSpec) DataSchema(org.apache.druid.segment.indexing.DataSchema) UniformGranularitySpec(org.apache.druid.segment.indexing.granularity.UniformGranularitySpec) UniformGranularitySpec(org.apache.druid.segment.indexing.granularity.UniformGranularitySpec) GranularitySpec(org.apache.druid.segment.indexing.granularity.GranularitySpec) RecordSupplierInputSource(org.apache.druid.indexing.seekablestream.RecordSupplierInputSource) TimestampSpec(org.apache.druid.data.input.impl.TimestampSpec) DimensionsSpec(org.apache.druid.data.input.impl.DimensionsSpec) SamplerResponseRow(org.apache.druid.client.indexing.SamplerResponse.SamplerResponseRow) ExpressionTransform(org.apache.druid.segment.transform.ExpressionTransform) Map(java.util.Map) ImmutableMap(com.google.common.collect.ImmutableMap) InitializedNullHandlingTest(org.apache.druid.testing.InitializedNullHandlingTest) Test(org.junit.Test)

Aggregations

TimestampSpec (org.apache.druid.data.input.impl.TimestampSpec)154 DimensionsSpec (org.apache.druid.data.input.impl.DimensionsSpec)113 Test (org.junit.Test)110 DataSchema (org.apache.druid.segment.indexing.DataSchema)49 InputRow (org.apache.druid.data.input.InputRow)47 InitializedNullHandlingTest (org.apache.druid.testing.InitializedNullHandlingTest)41 UniformGranularitySpec (org.apache.druid.segment.indexing.granularity.UniformGranularitySpec)39 InputRowSchema (org.apache.druid.data.input.InputRowSchema)37 InputEntityReader (org.apache.druid.data.input.InputEntityReader)33 AggregatorFactory (org.apache.druid.query.aggregation.AggregatorFactory)32 JSONPathSpec (org.apache.druid.java.util.common.parsers.JSONPathSpec)30 JSONParseSpec (org.apache.druid.data.input.impl.JSONParseSpec)29 ArrayList (java.util.ArrayList)28 CsvInputFormat (org.apache.druid.data.input.impl.CsvInputFormat)28 StringInputRowParser (org.apache.druid.data.input.impl.StringInputRowParser)27 LongSumAggregatorFactory (org.apache.druid.query.aggregation.LongSumAggregatorFactory)27 JSONPathFieldSpec (org.apache.druid.java.util.common.parsers.JSONPathFieldSpec)25 InputRowListPlusRawValues (org.apache.druid.data.input.InputRowListPlusRawValues)21 DoubleSumAggregatorFactory (org.apache.druid.query.aggregation.DoubleSumAggregatorFactory)21 ArbitraryGranularitySpec (org.apache.druid.segment.indexing.granularity.ArbitraryGranularitySpec)20