Search in sources :

Example 11 with SamplerResponse

use of org.apache.druid.client.indexing.SamplerResponse in project druid by druid-io.

the class KinesisSamplerSpecTest method testSample.

@Test(timeout = 10_000L)
public void testSample() throws Exception {
    EasyMock.expect(recordSupplier.getPartitionIds(STREAM)).andReturn(ImmutableSet.of(SHARD_ID)).once();
    recordSupplier.assign(ImmutableSet.of(StreamPartition.of(STREAM, SHARD_ID)));
    EasyMock.expectLastCall().once();
    recordSupplier.seekToEarliest(ImmutableSet.of(StreamPartition.of(STREAM, SHARD_ID)));
    EasyMock.expectLastCall().once();
    EasyMock.expect(recordSupplier.poll(EasyMock.anyLong())).andReturn(generateRecords(STREAM)).once();
    recordSupplier.close();
    EasyMock.expectLastCall().once();
    replayAll();
    KinesisSupervisorSpec supervisorSpec = new KinesisSupervisorSpec(null, DATA_SCHEMA, null, new KinesisSupervisorIOConfig(STREAM, new JsonInputFormat(new JSONPathSpec(true, ImmutableList.of()), ImmutableMap.of(), false), null, null, null, null, null, null, null, true, null, null, null, null, null, null, null, null, null, false), null, null, null, null, null, null, null, null, null, null, null, null);
    KinesisSamplerSpec samplerSpec = new TestableKinesisSamplerSpec(supervisorSpec, new SamplerConfig(5, null), new InputSourceSampler(), null);
    SamplerResponse response = samplerSpec.sample();
    verifyAll();
    Assert.assertEquals(5, response.getNumRowsRead());
    Assert.assertEquals(3, response.getNumRowsIndexed());
    Assert.assertEquals(5, response.getData().size());
    Iterator<SamplerResponse.SamplerResponseRow> it = response.getData().iterator();
    Assert.assertEquals(new SamplerResponse.SamplerResponseRow(ImmutableMap.<String, Object>builder().put("timestamp", "2008").put("dim1", "a").put("dim2", "y").put("dimLong", "10").put("dimFloat", "20.0").put("met1", "1.0").build(), new SamplerTestUtils.MapAllowingNullValuesBuilder<String, Object>().put("__time", 1199145600000L).put("dim1", "a").put("dim1t", null).put("dim2", "y").put("dimLong", 10L).put("dimFloat", 20.0F).put("rows", 1L).put("met1sum", 1.0).build(), null, null), it.next());
    Assert.assertEquals(new SamplerResponse.SamplerResponseRow(ImmutableMap.<String, Object>builder().put("timestamp", "2009").put("dim1", "b").put("dim2", "y").put("dimLong", "10").put("dimFloat", "20.0").put("met1", "1.0").build(), new SamplerTestUtils.MapAllowingNullValuesBuilder<String, Object>().put("__time", 1230768000000L).put("dim1", "b").put("dim1t", null).put("dim2", "y").put("dimLong", 10L).put("dimFloat", 20.0F).put("rows", 1L).put("met1sum", 1.0).build(), null, null), it.next());
    Assert.assertEquals(new SamplerResponse.SamplerResponseRow(ImmutableMap.<String, Object>builder().put("timestamp", "2010").put("dim1", "c").put("dim2", "y").put("dimLong", "10").put("dimFloat", "20.0").put("met1", "1.0").build(), new SamplerTestUtils.MapAllowingNullValuesBuilder<String, Object>().put("__time", 1262304000000L).put("dim1", "c").put("dim1t", null).put("dim2", "y").put("dimLong", 10L).put("dimFloat", 20.0F).put("rows", 1L).put("met1sum", 1.0).build(), null, null), it.next());
    Assert.assertEquals(new SamplerResponse.SamplerResponseRow(ImmutableMap.<String, Object>builder().put("timestamp", "246140482-04-24T15:36:27.903Z").put("dim1", "x").put("dim2", "z").put("dimLong", "10").put("dimFloat", "20.0").put("met1", "1.0").build(), null, true, "Encountered row with timestamp[246140482-04-24T15:36:27.903Z] that cannot be represented as a long: [{timestamp=246140482-04-24T15:36:27.903Z, dim1=x, dim2=z, dimLong=10, dimFloat=20.0, met1=1.0}]"), it.next());
    Assert.assertEquals(new SamplerResponse.SamplerResponseRow(null, null, true, "Unable to parse row [unparseable] into JSON"), it.next());
    Assert.assertFalse(it.hasNext());
}
Also used : SamplerConfig(org.apache.druid.indexing.overlord.sampler.SamplerConfig) SamplerResponse(org.apache.druid.client.indexing.SamplerResponse) SamplerTestUtils(org.apache.druid.indexing.overlord.sampler.SamplerTestUtils) KinesisSupervisorSpec(org.apache.druid.indexing.kinesis.supervisor.KinesisSupervisorSpec) KinesisSupervisorIOConfig(org.apache.druid.indexing.kinesis.supervisor.KinesisSupervisorIOConfig) JsonInputFormat(org.apache.druid.data.input.impl.JsonInputFormat) JSONPathSpec(org.apache.druid.java.util.common.parsers.JSONPathSpec) InputSourceSampler(org.apache.druid.indexing.overlord.sampler.InputSourceSampler) Test(org.junit.Test)

Example 12 with SamplerResponse

use of org.apache.druid.client.indexing.SamplerResponse in project druid by druid-io.

the class InputSourceSamplerTest method testWithRollup.

@Test
public void testWithRollup() throws IOException {
    final TimestampSpec timestampSpec = new TimestampSpec("t", null, null);
    final DimensionsSpec dimensionsSpec = new DimensionsSpec(null);
    final AggregatorFactory[] aggregatorFactories = { new LongSumAggregatorFactory("met1", "met1") };
    final GranularitySpec granularitySpec = new UniformGranularitySpec(Granularities.DAY, Granularities.HOUR, true, null);
    final DataSchema dataSchema = createDataSchema(timestampSpec, dimensionsSpec, aggregatorFactories, granularitySpec, null);
    final InputSource inputSource = createInputSource(getTestRows(), dataSchema);
    final InputFormat inputFormat = createInputFormat();
    SamplerResponse response = inputSourceSampler.sample(inputSource, inputFormat, dataSchema, null);
    Assert.assertEquals(6, response.getNumRowsRead());
    Assert.assertEquals(5, response.getNumRowsIndexed());
    Assert.assertEquals(4, response.getData().size());
    List<SamplerResponseRow> data = response.getData();
    assertEqualsSamplerResponseRow(new SamplerResponseRow(getRawColumns().get(0), new SamplerTestUtils.MapAllowingNullValuesBuilder<String, Object>().put("__time", 1555934400000L).put("dim1", "foo").put("dim2", null).put("met1", 6L).build(), null, null), data.get(0));
    assertEqualsSamplerResponseRow(new SamplerResponseRow(getRawColumns().get(3), new SamplerTestUtils.MapAllowingNullValuesBuilder<String, Object>().put("__time", 1555934400000L).put("dim1", "foo2").put("dim2", null).put("met1", 4L).build(), null, null), data.get(1));
    assertEqualsSamplerResponseRow(new SamplerResponseRow(getRawColumns().get(4), new SamplerTestUtils.MapAllowingNullValuesBuilder<String, Object>().put("__time", 1555934400000L).put("dim1", "foo").put("dim2", "bar").put("met1", 5L).build(), null, null), data.get(2));
    assertEqualsSamplerResponseRow(new SamplerResponseRow(getRawColumns().get(5), null, true, getUnparseableTimestampString()), data.get(3));
}
Also used : RecordSupplierInputSource(org.apache.druid.indexing.seekablestream.RecordSupplierInputSource) InlineInputSource(org.apache.druid.data.input.impl.InlineInputSource) InputSource(org.apache.druid.data.input.InputSource) SamplerResponse(org.apache.druid.client.indexing.SamplerResponse) LongSumAggregatorFactory(org.apache.druid.query.aggregation.LongSumAggregatorFactory) LongSumAggregatorFactory(org.apache.druid.query.aggregation.LongSumAggregatorFactory) AggregatorFactory(org.apache.druid.query.aggregation.AggregatorFactory) DataSchema(org.apache.druid.segment.indexing.DataSchema) UniformGranularitySpec(org.apache.druid.segment.indexing.granularity.UniformGranularitySpec) UniformGranularitySpec(org.apache.druid.segment.indexing.granularity.UniformGranularitySpec) GranularitySpec(org.apache.druid.segment.indexing.granularity.GranularitySpec) JsonInputFormat(org.apache.druid.data.input.impl.JsonInputFormat) InputFormat(org.apache.druid.data.input.InputFormat) CsvInputFormat(org.apache.druid.data.input.impl.CsvInputFormat) TimestampSpec(org.apache.druid.data.input.impl.TimestampSpec) DimensionsSpec(org.apache.druid.data.input.impl.DimensionsSpec) SamplerResponseRow(org.apache.druid.client.indexing.SamplerResponse.SamplerResponseRow) InitializedNullHandlingTest(org.apache.druid.testing.InitializedNullHandlingTest) Test(org.junit.Test)

Example 13 with SamplerResponse

use of org.apache.druid.client.indexing.SamplerResponse in project druid by druid-io.

the class InputSourceSamplerTest method testWithDimensionSpec.

@Test
public void testWithDimensionSpec() throws IOException {
    final TimestampSpec timestampSpec = new TimestampSpec("t", null, null);
    final DimensionsSpec dimensionsSpec = new DimensionsSpec(ImmutableList.of(StringDimensionSchema.create("dim1"), StringDimensionSchema.create("met1")));
    final DataSchema dataSchema = createDataSchema(timestampSpec, dimensionsSpec, null, null, null);
    final InputSource inputSource = createInputSource(getTestRows(), dataSchema);
    final InputFormat inputFormat = createInputFormat();
    SamplerResponse response = inputSourceSampler.sample(inputSource, inputFormat, dataSchema, null);
    Assert.assertEquals(6, response.getNumRowsRead());
    Assert.assertEquals(5, response.getNumRowsIndexed());
    Assert.assertEquals(6, response.getData().size());
    List<SamplerResponseRow> data = response.getData();
    assertEqualsSamplerResponseRow(new SamplerResponseRow(getRawColumns().get(0), new SamplerTestUtils.MapAllowingNullValuesBuilder<String, Object>().put("__time", 1555934400000L).put("dim1", "foo").put("met1", "1").build(), null, null), data.get(0));
    assertEqualsSamplerResponseRow(new SamplerResponseRow(getRawColumns().get(1), new SamplerTestUtils.MapAllowingNullValuesBuilder<String, Object>().put("__time", 1555934400000L).put("dim1", "foo").put("met1", "2").build(), null, null), data.get(1));
    assertEqualsSamplerResponseRow(new SamplerResponseRow(getRawColumns().get(2), new SamplerTestUtils.MapAllowingNullValuesBuilder<String, Object>().put("__time", 1555934460000L).put("dim1", "foo").put("met1", "3").build(), null, null), data.get(2));
    assertEqualsSamplerResponseRow(new SamplerResponseRow(getRawColumns().get(3), new SamplerTestUtils.MapAllowingNullValuesBuilder<String, Object>().put("__time", 1555934400000L).put("dim1", "foo2").put("met1", "4").build(), null, null), data.get(3));
    assertEqualsSamplerResponseRow(new SamplerResponseRow(getRawColumns().get(4), new SamplerTestUtils.MapAllowingNullValuesBuilder<String, Object>().put("__time", 1555934400000L).put("dim1", "foo").put("met1", "5").build(), null, null), data.get(4));
    assertEqualsSamplerResponseRow(new SamplerResponseRow(getRawColumns().get(5), null, true, getUnparseableTimestampString()), data.get(5));
}
Also used : DataSchema(org.apache.druid.segment.indexing.DataSchema) RecordSupplierInputSource(org.apache.druid.indexing.seekablestream.RecordSupplierInputSource) InlineInputSource(org.apache.druid.data.input.impl.InlineInputSource) InputSource(org.apache.druid.data.input.InputSource) JsonInputFormat(org.apache.druid.data.input.impl.JsonInputFormat) InputFormat(org.apache.druid.data.input.InputFormat) CsvInputFormat(org.apache.druid.data.input.impl.CsvInputFormat) SamplerResponse(org.apache.druid.client.indexing.SamplerResponse) TimestampSpec(org.apache.druid.data.input.impl.TimestampSpec) DimensionsSpec(org.apache.druid.data.input.impl.DimensionsSpec) SamplerResponseRow(org.apache.druid.client.indexing.SamplerResponse.SamplerResponseRow) InitializedNullHandlingTest(org.apache.druid.testing.InitializedNullHandlingTest) Test(org.junit.Test)

Example 14 with SamplerResponse

use of org.apache.druid.client.indexing.SamplerResponse in project druid by druid-io.

the class InputSourceSamplerTest method testWithFilter.

@Test
public void testWithFilter() throws IOException {
    final TimestampSpec timestampSpec = new TimestampSpec("t", null, null);
    final DimensionsSpec dimensionsSpec = new DimensionsSpec(null);
    final TransformSpec transformSpec = new TransformSpec(new SelectorDimFilter("dim1", "foo", null), null);
    final AggregatorFactory[] aggregatorFactories = { new LongSumAggregatorFactory("met1", "met1") };
    final GranularitySpec granularitySpec = new UniformGranularitySpec(Granularities.DAY, Granularities.HOUR, true, null);
    final DataSchema dataSchema = createDataSchema(timestampSpec, dimensionsSpec, aggregatorFactories, granularitySpec, transformSpec);
    final InputSource inputSource = createInputSource(getTestRows(), dataSchema);
    final InputFormat inputFormat = createInputFormat();
    SamplerResponse response = inputSourceSampler.sample(inputSource, inputFormat, dataSchema, null);
    Assert.assertEquals(5, response.getNumRowsRead());
    Assert.assertEquals(4, response.getNumRowsIndexed());
    Assert.assertEquals(3, response.getData().size());
    List<SamplerResponseRow> data = response.getData();
    assertEqualsSamplerResponseRow(new SamplerResponseRow(getRawColumns().get(0), new SamplerTestUtils.MapAllowingNullValuesBuilder<String, Object>().put("__time", 1555934400000L).put("dim1", "foo").put("dim2", null).put("met1", 6L).build(), null, null), data.get(0));
    assertEqualsSamplerResponseRow(new SamplerResponseRow(getRawColumns().get(4), new SamplerTestUtils.MapAllowingNullValuesBuilder<String, Object>().put("__time", 1555934400000L).put("dim1", "foo").put("dim2", "bar").put("met1", 5L).build(), null, null), data.get(1));
    assertEqualsSamplerResponseRow(new SamplerResponseRow(getRawColumns().get(5), null, true, getUnparseableTimestampString()), data.get(2));
}
Also used : RecordSupplierInputSource(org.apache.druid.indexing.seekablestream.RecordSupplierInputSource) InlineInputSource(org.apache.druid.data.input.impl.InlineInputSource) InputSource(org.apache.druid.data.input.InputSource) SamplerResponse(org.apache.druid.client.indexing.SamplerResponse) LongSumAggregatorFactory(org.apache.druid.query.aggregation.LongSumAggregatorFactory) LongSumAggregatorFactory(org.apache.druid.query.aggregation.LongSumAggregatorFactory) AggregatorFactory(org.apache.druid.query.aggregation.AggregatorFactory) TransformSpec(org.apache.druid.segment.transform.TransformSpec) DataSchema(org.apache.druid.segment.indexing.DataSchema) UniformGranularitySpec(org.apache.druid.segment.indexing.granularity.UniformGranularitySpec) SelectorDimFilter(org.apache.druid.query.filter.SelectorDimFilter) UniformGranularitySpec(org.apache.druid.segment.indexing.granularity.UniformGranularitySpec) GranularitySpec(org.apache.druid.segment.indexing.granularity.GranularitySpec) JsonInputFormat(org.apache.druid.data.input.impl.JsonInputFormat) InputFormat(org.apache.druid.data.input.InputFormat) CsvInputFormat(org.apache.druid.data.input.impl.CsvInputFormat) TimestampSpec(org.apache.druid.data.input.impl.TimestampSpec) DimensionsSpec(org.apache.druid.data.input.impl.DimensionsSpec) SamplerResponseRow(org.apache.druid.client.indexing.SamplerResponse.SamplerResponseRow) InitializedNullHandlingTest(org.apache.druid.testing.InitializedNullHandlingTest) Test(org.junit.Test)

Example 15 with SamplerResponse

use of org.apache.druid.client.indexing.SamplerResponse in project druid by druid-io.

the class InputSourceSamplerTest method testWithTimestampSpec.

@Test
public void testWithTimestampSpec() throws IOException {
    final TimestampSpec timestampSpec = new TimestampSpec("t", null, null);
    final DimensionsSpec dimensionsSpec = new DimensionsSpec(null);
    final DataSchema dataSchema = createDataSchema(timestampSpec, dimensionsSpec, null, null, null);
    final InputSource inputSource = createInputSource(getTestRows(), dataSchema);
    final InputFormat inputFormat = createInputFormat();
    SamplerResponse response = inputSourceSampler.sample(inputSource, inputFormat, dataSchema, null);
    Assert.assertEquals(6, response.getNumRowsRead());
    Assert.assertEquals(5, response.getNumRowsIndexed());
    Assert.assertEquals(6, response.getData().size());
    List<SamplerResponseRow> data = response.getData();
    assertEqualsSamplerResponseRow(new SamplerResponseRow(getRawColumns().get(0), new SamplerTestUtils.MapAllowingNullValuesBuilder<String, Object>().put("__time", 1555934400000L).put("dim2", null).put("dim1", "foo").put("met1", "1").build(), null, null), data.get(0));
    assertEqualsSamplerResponseRow(new SamplerResponseRow(getRawColumns().get(1), new SamplerTestUtils.MapAllowingNullValuesBuilder<String, Object>().put("__time", 1555934400000L).put("dim2", null).put("dim1", "foo").put("met1", "2").build(), null, null), data.get(1));
    assertEqualsSamplerResponseRow(new SamplerResponseRow(getRawColumns().get(2), new SamplerTestUtils.MapAllowingNullValuesBuilder<String, Object>().put("__time", 1555934460000L).put("dim2", null).put("dim1", "foo").put("met1", "3").build(), null, null), data.get(2));
    assertEqualsSamplerResponseRow(new SamplerResponseRow(getRawColumns().get(3), new SamplerTestUtils.MapAllowingNullValuesBuilder<String, Object>().put("__time", 1555934400000L).put("dim2", null).put("dim1", "foo2").put("met1", "4").build(), null, null), data.get(3));
    assertEqualsSamplerResponseRow(new SamplerResponseRow(getRawColumns().get(4), new SamplerTestUtils.MapAllowingNullValuesBuilder<String, Object>().put("__time", 1555934400000L).put("dim2", "bar").put("dim1", "foo").put("met1", "5").build(), null, null), data.get(4));
    assertEqualsSamplerResponseRow(new SamplerResponseRow(getRawColumns().get(5), null, true, getUnparseableTimestampString()), data.get(5));
}
Also used : DataSchema(org.apache.druid.segment.indexing.DataSchema) RecordSupplierInputSource(org.apache.druid.indexing.seekablestream.RecordSupplierInputSource) InlineInputSource(org.apache.druid.data.input.impl.InlineInputSource) InputSource(org.apache.druid.data.input.InputSource) JsonInputFormat(org.apache.druid.data.input.impl.JsonInputFormat) InputFormat(org.apache.druid.data.input.InputFormat) CsvInputFormat(org.apache.druid.data.input.impl.CsvInputFormat) SamplerResponse(org.apache.druid.client.indexing.SamplerResponse) TimestampSpec(org.apache.druid.data.input.impl.TimestampSpec) DimensionsSpec(org.apache.druid.data.input.impl.DimensionsSpec) SamplerResponseRow(org.apache.druid.client.indexing.SamplerResponse.SamplerResponseRow) InitializedNullHandlingTest(org.apache.druid.testing.InitializedNullHandlingTest) Test(org.junit.Test)

Aggregations

SamplerResponse (org.apache.druid.client.indexing.SamplerResponse)19 Test (org.junit.Test)18 SamplerResponseRow (org.apache.druid.client.indexing.SamplerResponse.SamplerResponseRow)15 InputSource (org.apache.druid.data.input.InputSource)14 DataSchema (org.apache.druid.segment.indexing.DataSchema)14 InitializedNullHandlingTest (org.apache.druid.testing.InitializedNullHandlingTest)14 InlineInputSource (org.apache.druid.data.input.impl.InlineInputSource)13 JsonInputFormat (org.apache.druid.data.input.impl.JsonInputFormat)13 RecordSupplierInputSource (org.apache.druid.indexing.seekablestream.RecordSupplierInputSource)13 InputFormat (org.apache.druid.data.input.InputFormat)12 DimensionsSpec (org.apache.druid.data.input.impl.DimensionsSpec)12 TimestampSpec (org.apache.druid.data.input.impl.TimestampSpec)12 CsvInputFormat (org.apache.druid.data.input.impl.CsvInputFormat)11 AggregatorFactory (org.apache.druid.query.aggregation.AggregatorFactory)8 LongSumAggregatorFactory (org.apache.druid.query.aggregation.LongSumAggregatorFactory)8 GranularitySpec (org.apache.druid.segment.indexing.granularity.GranularitySpec)8 UniformGranularitySpec (org.apache.druid.segment.indexing.granularity.UniformGranularitySpec)8 TransformSpec (org.apache.druid.segment.transform.TransformSpec)5 ExpressionTransform (org.apache.druid.segment.transform.ExpressionTransform)4 File (java.io.File)2