Search in sources :

Example 6 with SamplerResponse

use of org.apache.druid.client.indexing.SamplerResponse in project druid by druid-io.

the class InputSourceSamplerTest method testNoDataSchema.

@Test
public void testNoDataSchema() {
    final InputSource inputSource = createInputSource(getTestRows(), null);
    final SamplerResponse response = inputSourceSampler.sample(inputSource, createInputFormat(), null, null);
    Assert.assertEquals(6, response.getNumRowsRead());
    Assert.assertEquals(0, response.getNumRowsIndexed());
    Assert.assertEquals(6, response.getData().size());
    List<SamplerResponseRow> data = response.getData();
    assertEqualsSamplerResponseRow(new SamplerResponseRow(getRawColumns().get(0), null, true, unparseableTimestampErrorString(data.get(0).getInput(), 1)), data.get(0));
    assertEqualsSamplerResponseRow(new SamplerResponseRow(getRawColumns().get(1), null, true, unparseableTimestampErrorString(data.get(1).getInput(), 2)), data.get(1));
    assertEqualsSamplerResponseRow(new SamplerResponseRow(getRawColumns().get(2), null, true, unparseableTimestampErrorString(data.get(2).getInput(), 3)), data.get(2));
    assertEqualsSamplerResponseRow(new SamplerResponseRow(getRawColumns().get(3), null, true, unparseableTimestampErrorString(data.get(3).getInput(), 4)), data.get(3));
    assertEqualsSamplerResponseRow(new SamplerResponseRow(getRawColumns().get(4), null, true, unparseableTimestampErrorString(data.get(4).getInput(), 5)), data.get(4));
    assertEqualsSamplerResponseRow(new SamplerResponseRow(getRawColumns().get(5), null, true, unparseableTimestampErrorString(data.get(5).getInput(), 6)), data.get(5));
}
Also used : RecordSupplierInputSource(org.apache.druid.indexing.seekablestream.RecordSupplierInputSource) InlineInputSource(org.apache.druid.data.input.impl.InlineInputSource) InputSource(org.apache.druid.data.input.InputSource) SamplerResponse(org.apache.druid.client.indexing.SamplerResponse) SamplerResponseRow(org.apache.druid.client.indexing.SamplerResponse.SamplerResponseRow) InitializedNullHandlingTest(org.apache.druid.testing.InitializedNullHandlingTest) Test(org.junit.Test)

Example 7 with SamplerResponse

use of org.apache.druid.client.indexing.SamplerResponse in project druid by druid-io.

the class IndexTaskSamplerSpecTest method testSerde.

@Test
public void testSerde() throws IOException {
    String json = "{\n" + "  \"type\": \"index\",\n" + "  \"samplerConfig\": {\n" + "    \"numRows\": 123,\n" + "    \"timeoutMs\": 2345\n" + "  },\n" + "  \"spec\": {\n" + "    \"dataSchema\": {\n" + "      \"dataSource\": \"sampler\",\n" + "      \"dimensionsSpec\": {},\n" + "      \"timestampSpec\": {\n" + "        \"missingValue\": \"1970\"\n" + "      }\n" + "    },\n" + "    \"ioConfig\": {\n" + "      \"type\": \"index\",\n" + "      \"inputSource\": {\n" + "        \"type\": \"local\",\n" + "        \"baseDir\": \"/tmp\",\n" + "        \"filter\": \"wikiticker-2015-09-12-sampled.json\"\n" + "      },\n" + "      \"inputFormat\": {\n" + "        \"type\": \"json\"\n" + "      }\n" + "    }\n" + "  }\n" + "}";
    Capture<InputSource> capturedInputSource = EasyMock.newCapture();
    Capture<InputFormat> capturedInputFormat = EasyMock.newCapture();
    Capture<DataSchema> capturedDataSchema = EasyMock.newCapture();
    Capture<SamplerConfig> capturedSamplerConfig = EasyMock.newCapture();
    IndexTaskSamplerSpec spec = MAPPER.readValue(json, IndexTaskSamplerSpec.class);
    EasyMock.expect(inputSourceSampler.sample(EasyMock.capture(capturedInputSource), EasyMock.capture(capturedInputFormat), EasyMock.capture(capturedDataSchema), EasyMock.capture(capturedSamplerConfig))).andReturn(new SamplerResponse(0, 0, null));
    replayAll();
    spec.sample();
    verifyAll();
    InputSource inputSource = capturedInputSource.getValue();
    Assert.assertEquals(new File("/tmp"), ((LocalInputSource) inputSource).getBaseDir());
    Assert.assertEquals("wikiticker-2015-09-12-sampled.json", ((LocalInputSource) inputSource).getFilter());
    DataSchema dataSchema = capturedDataSchema.getValue();
    Assert.assertEquals("sampler", dataSchema.getDataSource());
    Assert.assertEquals(JsonInputFormat.class, capturedInputFormat.getValue().getClass());
    SamplerConfig samplerConfig = capturedSamplerConfig.getValue();
    Assert.assertEquals(123, samplerConfig.getNumRows());
    Assert.assertEquals(2345, samplerConfig.getTimeoutMs());
}
Also used : DataSchema(org.apache.druid.segment.indexing.DataSchema) LocalInputSource(org.apache.druid.data.input.impl.LocalInputSource) InputSource(org.apache.druid.data.input.InputSource) InputFormat(org.apache.druid.data.input.InputFormat) JsonInputFormat(org.apache.druid.data.input.impl.JsonInputFormat) SamplerResponse(org.apache.druid.client.indexing.SamplerResponse) File(java.io.File) Test(org.junit.Test)

Example 8 with SamplerResponse

use of org.apache.druid.client.indexing.SamplerResponse in project druid by druid-io.

the class InputSourceSamplerTest method testWithTransformsAutoDimensions.

@Test
public void testWithTransformsAutoDimensions() throws IOException {
    final TimestampSpec timestampSpec = new TimestampSpec("t", null, null);
    final DimensionsSpec dimensionsSpec = new DimensionsSpec(null);
    final TransformSpec transformSpec = new TransformSpec(null, ImmutableList.of(new ExpressionTransform("dim1PlusBar", "concat(dim1, 'bar')", TestExprMacroTable.INSTANCE)));
    final AggregatorFactory[] aggregatorFactories = { new LongSumAggregatorFactory("met1", "met1") };
    final GranularitySpec granularitySpec = new UniformGranularitySpec(Granularities.DAY, Granularities.HOUR, true, null);
    final DataSchema dataSchema = createDataSchema(timestampSpec, dimensionsSpec, aggregatorFactories, granularitySpec, transformSpec);
    final InputSource inputSource = createInputSource(getTestRows(), dataSchema);
    final InputFormat inputFormat = createInputFormat();
    SamplerResponse response = inputSourceSampler.sample(inputSource, inputFormat, dataSchema, null);
    Assert.assertEquals(6, response.getNumRowsRead());
    Assert.assertEquals(5, response.getNumRowsIndexed());
    Assert.assertEquals(4, response.getData().size());
    List<SamplerResponseRow> data = response.getData();
    assertEqualsSamplerResponseRow(new SamplerResponseRow(getRawColumns().get(0), new SamplerTestUtils.MapAllowingNullValuesBuilder<String, Object>().put("__time", 1555934400000L).put("dim1", "foo").put("dim2", null).put("met1", 6L).build(), null, null), data.get(0));
    assertEqualsSamplerResponseRow(new SamplerResponseRow(getRawColumns().get(3), new SamplerTestUtils.MapAllowingNullValuesBuilder<String, Object>().put("__time", 1555934400000L).put("dim1", "foo2").put("dim2", null).put("met1", 4L).build(), null, null), data.get(1));
    assertEqualsSamplerResponseRow(new SamplerResponseRow(getRawColumns().get(4), new SamplerTestUtils.MapAllowingNullValuesBuilder<String, Object>().put("__time", 1555934400000L).put("dim1", "foo").put("dim2", "bar").put("met1", 5L).build(), null, null), data.get(2));
    assertEqualsSamplerResponseRow(new SamplerResponseRow(getRawColumns().get(5), null, true, getUnparseableTimestampString()), data.get(3));
}
Also used : RecordSupplierInputSource(org.apache.druid.indexing.seekablestream.RecordSupplierInputSource) InlineInputSource(org.apache.druid.data.input.impl.InlineInputSource) InputSource(org.apache.druid.data.input.InputSource) SamplerResponse(org.apache.druid.client.indexing.SamplerResponse) LongSumAggregatorFactory(org.apache.druid.query.aggregation.LongSumAggregatorFactory) LongSumAggregatorFactory(org.apache.druid.query.aggregation.LongSumAggregatorFactory) AggregatorFactory(org.apache.druid.query.aggregation.AggregatorFactory) TransformSpec(org.apache.druid.segment.transform.TransformSpec) DataSchema(org.apache.druid.segment.indexing.DataSchema) UniformGranularitySpec(org.apache.druid.segment.indexing.granularity.UniformGranularitySpec) UniformGranularitySpec(org.apache.druid.segment.indexing.granularity.UniformGranularitySpec) GranularitySpec(org.apache.druid.segment.indexing.granularity.GranularitySpec) JsonInputFormat(org.apache.druid.data.input.impl.JsonInputFormat) InputFormat(org.apache.druid.data.input.InputFormat) CsvInputFormat(org.apache.druid.data.input.impl.CsvInputFormat) TimestampSpec(org.apache.druid.data.input.impl.TimestampSpec) DimensionsSpec(org.apache.druid.data.input.impl.DimensionsSpec) SamplerResponseRow(org.apache.druid.client.indexing.SamplerResponse.SamplerResponseRow) ExpressionTransform(org.apache.druid.segment.transform.ExpressionTransform) InitializedNullHandlingTest(org.apache.druid.testing.InitializedNullHandlingTest) Test(org.junit.Test)

Example 9 with SamplerResponse

use of org.apache.druid.client.indexing.SamplerResponse in project druid by druid-io.

the class InputSourceSamplerTest method testWithNoRollup.

@Test
public void testWithNoRollup() throws IOException {
    final TimestampSpec timestampSpec = new TimestampSpec("t", null, null);
    final DimensionsSpec dimensionsSpec = new DimensionsSpec(null);
    final AggregatorFactory[] aggregatorFactories = { new LongSumAggregatorFactory("met1", "met1") };
    final GranularitySpec granularitySpec = new UniformGranularitySpec(Granularities.DAY, Granularities.HOUR, false, null);
    final DataSchema dataSchema = createDataSchema(timestampSpec, dimensionsSpec, aggregatorFactories, granularitySpec, null);
    final InputSource inputSource = createInputSource(getTestRows(), dataSchema);
    final InputFormat inputFormat = createInputFormat();
    SamplerResponse response = inputSourceSampler.sample(inputSource, inputFormat, dataSchema, null);
    Assert.assertEquals(6, response.getNumRowsRead());
    Assert.assertEquals(5, response.getNumRowsIndexed());
    Assert.assertEquals(6, response.getData().size());
    List<SamplerResponseRow> data = response.getData();
    assertEqualsSamplerResponseRow(new SamplerResponseRow(getRawColumns().get(0), new SamplerTestUtils.MapAllowingNullValuesBuilder<String, Object>().put("__time", 1555934400000L).put("dim1", "foo").put("dim2", null).put("met1", 1L).build(), null, null), data.get(0));
    assertEqualsSamplerResponseRow(new SamplerResponseRow(getRawColumns().get(1), new SamplerTestUtils.MapAllowingNullValuesBuilder<String, Object>().put("__time", 1555934400000L).put("dim1", "foo").put("dim2", null).put("met1", 2L).build(), null, null), data.get(1));
    assertEqualsSamplerResponseRow(new SamplerResponseRow(getRawColumns().get(2), new SamplerTestUtils.MapAllowingNullValuesBuilder<String, Object>().put("__time", 1555934400000L).put("dim1", "foo").put("dim2", null).put("met1", 3L).build(), null, null), data.get(2));
    assertEqualsSamplerResponseRow(new SamplerResponseRow(getRawColumns().get(3), new SamplerTestUtils.MapAllowingNullValuesBuilder<String, Object>().put("__time", 1555934400000L).put("dim1", "foo2").put("dim2", null).put("met1", 4L).build(), null, null), data.get(3));
    assertEqualsSamplerResponseRow(new SamplerResponseRow(getRawColumns().get(4), new SamplerTestUtils.MapAllowingNullValuesBuilder<String, Object>().put("__time", 1555934400000L).put("dim1", "foo").put("dim2", "bar").put("met1", 5L).build(), null, null), data.get(4));
    assertEqualsSamplerResponseRow(new SamplerResponseRow(getRawColumns().get(5), null, true, getUnparseableTimestampString()), data.get(5));
}
Also used : RecordSupplierInputSource(org.apache.druid.indexing.seekablestream.RecordSupplierInputSource) InlineInputSource(org.apache.druid.data.input.impl.InlineInputSource) InputSource(org.apache.druid.data.input.InputSource) SamplerResponse(org.apache.druid.client.indexing.SamplerResponse) LongSumAggregatorFactory(org.apache.druid.query.aggregation.LongSumAggregatorFactory) LongSumAggregatorFactory(org.apache.druid.query.aggregation.LongSumAggregatorFactory) AggregatorFactory(org.apache.druid.query.aggregation.AggregatorFactory) DataSchema(org.apache.druid.segment.indexing.DataSchema) UniformGranularitySpec(org.apache.druid.segment.indexing.granularity.UniformGranularitySpec) UniformGranularitySpec(org.apache.druid.segment.indexing.granularity.UniformGranularitySpec) GranularitySpec(org.apache.druid.segment.indexing.granularity.GranularitySpec) JsonInputFormat(org.apache.druid.data.input.impl.JsonInputFormat) InputFormat(org.apache.druid.data.input.InputFormat) CsvInputFormat(org.apache.druid.data.input.impl.CsvInputFormat) TimestampSpec(org.apache.druid.data.input.impl.TimestampSpec) DimensionsSpec(org.apache.druid.data.input.impl.DimensionsSpec) SamplerResponseRow(org.apache.druid.client.indexing.SamplerResponse.SamplerResponseRow) InitializedNullHandlingTest(org.apache.druid.testing.InitializedNullHandlingTest) Test(org.junit.Test)

Example 10 with SamplerResponse

use of org.apache.druid.client.indexing.SamplerResponse in project druid by druid-io.

the class KafkaSamplerSpecTest method testSample.

@Test(timeout = 30_000L)
public void testSample() {
    insertData(generateRecords(TOPIC));
    KafkaSupervisorSpec supervisorSpec = new KafkaSupervisorSpec(null, DATA_SCHEMA, null, new KafkaSupervisorIOConfig(TOPIC, new JsonInputFormat(JSONPathSpec.DEFAULT, null, null), null, null, null, kafkaServer.consumerProperties(), null, null, null, null, true, null, null, null, null), null, null, null, null, null, null, null, null, null, null, null);
    KafkaSamplerSpec samplerSpec = new KafkaSamplerSpec(supervisorSpec, new SamplerConfig(5, null), new InputSourceSampler(), OBJECT_MAPPER);
    SamplerResponse response = samplerSpec.sample();
    Assert.assertEquals(5, response.getNumRowsRead());
    Assert.assertEquals(3, response.getNumRowsIndexed());
    Assert.assertEquals(5, response.getData().size());
    Iterator<SamplerResponse.SamplerResponseRow> it = response.getData().iterator();
    Assert.assertEquals(new SamplerResponse.SamplerResponseRow(new SamplerTestUtils.MapAllowingNullValuesBuilder<String, Object>().put("timestamp", "2008").put("dim1", "a").put("dim2", "y").put("dimLong", "10").put("dimFloat", "20.0").put("met1", "1.0").build(), new SamplerTestUtils.MapAllowingNullValuesBuilder<String, Object>().put("__time", 1199145600000L).put("dim1", "a").put("dim1t", null).put("dim2", "y").put("dimLong", 10L).put("dimFloat", 20.0F).put("rows", 1L).put("met1sum", 1.0).build(), null, null), it.next());
    Assert.assertEquals(new SamplerResponse.SamplerResponseRow(new SamplerTestUtils.MapAllowingNullValuesBuilder<String, Object>().put("timestamp", "2009").put("dim1", "b").put("dim2", "y").put("dimLong", "10").put("dimFloat", "20.0").put("met1", "1.0").build(), new SamplerTestUtils.MapAllowingNullValuesBuilder<String, Object>().put("__time", 1230768000000L).put("dim1", "b").put("dim1t", null).put("dim2", "y").put("dimLong", 10L).put("dimFloat", 20.0F).put("rows", 1L).put("met1sum", 1.0).build(), null, null), it.next());
    Assert.assertEquals(new SamplerResponse.SamplerResponseRow(new SamplerTestUtils.MapAllowingNullValuesBuilder<String, Object>().put("timestamp", "2010").put("dim1", "c").put("dim2", "y").put("dimLong", "10").put("dimFloat", "20.0").put("met1", "1.0").build(), new SamplerTestUtils.MapAllowingNullValuesBuilder<String, Object>().put("__time", 1262304000000L).put("dim1", "c").put("dim1t", null).put("dim2", "y").put("dimLong", 10L).put("dimFloat", 20.0F).put("rows", 1L).put("met1sum", 1.0).build(), null, null), it.next());
    Assert.assertEquals(new SamplerResponse.SamplerResponseRow(new SamplerTestUtils.MapAllowingNullValuesBuilder<String, Object>().put("timestamp", "246140482-04-24T15:36:27.903Z").put("dim1", "x").put("dim2", "z").put("dimLong", "10").put("dimFloat", "20.0").put("met1", "1.0").build(), null, true, "Encountered row with timestamp[246140482-04-24T15:36:27.903Z] that cannot be represented as a long: [{timestamp=246140482-04-24T15:36:27.903Z, dim1=x, dim2=z, dimLong=10, dimFloat=20.0, met1=1.0}]"), it.next());
    Assert.assertEquals(new SamplerResponse.SamplerResponseRow(null, null, true, "Unable to parse row [unparseable] into JSON"), it.next());
    Assert.assertFalse(it.hasNext());
}
Also used : SamplerConfig(org.apache.druid.indexing.overlord.sampler.SamplerConfig) SamplerResponse(org.apache.druid.client.indexing.SamplerResponse) SamplerTestUtils(org.apache.druid.indexing.overlord.sampler.SamplerTestUtils) JsonInputFormat(org.apache.druid.data.input.impl.JsonInputFormat) KafkaSupervisorIOConfig(org.apache.druid.indexing.kafka.supervisor.KafkaSupervisorIOConfig) InputSourceSampler(org.apache.druid.indexing.overlord.sampler.InputSourceSampler) KafkaSupervisorSpec(org.apache.druid.indexing.kafka.supervisor.KafkaSupervisorSpec) InitializedNullHandlingTest(org.apache.druid.testing.InitializedNullHandlingTest) Test(org.junit.Test)

Aggregations

SamplerResponse (org.apache.druid.client.indexing.SamplerResponse)19 Test (org.junit.Test)18 SamplerResponseRow (org.apache.druid.client.indexing.SamplerResponse.SamplerResponseRow)15 InputSource (org.apache.druid.data.input.InputSource)14 DataSchema (org.apache.druid.segment.indexing.DataSchema)14 InitializedNullHandlingTest (org.apache.druid.testing.InitializedNullHandlingTest)14 InlineInputSource (org.apache.druid.data.input.impl.InlineInputSource)13 JsonInputFormat (org.apache.druid.data.input.impl.JsonInputFormat)13 RecordSupplierInputSource (org.apache.druid.indexing.seekablestream.RecordSupplierInputSource)13 InputFormat (org.apache.druid.data.input.InputFormat)12 DimensionsSpec (org.apache.druid.data.input.impl.DimensionsSpec)12 TimestampSpec (org.apache.druid.data.input.impl.TimestampSpec)12 CsvInputFormat (org.apache.druid.data.input.impl.CsvInputFormat)11 AggregatorFactory (org.apache.druid.query.aggregation.AggregatorFactory)8 LongSumAggregatorFactory (org.apache.druid.query.aggregation.LongSumAggregatorFactory)8 GranularitySpec (org.apache.druid.segment.indexing.granularity.GranularitySpec)8 UniformGranularitySpec (org.apache.druid.segment.indexing.granularity.UniformGranularitySpec)8 TransformSpec (org.apache.druid.segment.transform.TransformSpec)5 ExpressionTransform (org.apache.druid.segment.transform.ExpressionTransform)4 File (java.io.File)2