use of org.apache.druid.client.indexing.SamplerResponse in project druid by druid-io.
the class InputSourceSamplerTest method testNoDataSchema.
@Test
public void testNoDataSchema() {
final InputSource inputSource = createInputSource(getTestRows(), null);
final SamplerResponse response = inputSourceSampler.sample(inputSource, createInputFormat(), null, null);
Assert.assertEquals(6, response.getNumRowsRead());
Assert.assertEquals(0, response.getNumRowsIndexed());
Assert.assertEquals(6, response.getData().size());
List<SamplerResponseRow> data = response.getData();
assertEqualsSamplerResponseRow(new SamplerResponseRow(getRawColumns().get(0), null, true, unparseableTimestampErrorString(data.get(0).getInput(), 1)), data.get(0));
assertEqualsSamplerResponseRow(new SamplerResponseRow(getRawColumns().get(1), null, true, unparseableTimestampErrorString(data.get(1).getInput(), 2)), data.get(1));
assertEqualsSamplerResponseRow(new SamplerResponseRow(getRawColumns().get(2), null, true, unparseableTimestampErrorString(data.get(2).getInput(), 3)), data.get(2));
assertEqualsSamplerResponseRow(new SamplerResponseRow(getRawColumns().get(3), null, true, unparseableTimestampErrorString(data.get(3).getInput(), 4)), data.get(3));
assertEqualsSamplerResponseRow(new SamplerResponseRow(getRawColumns().get(4), null, true, unparseableTimestampErrorString(data.get(4).getInput(), 5)), data.get(4));
assertEqualsSamplerResponseRow(new SamplerResponseRow(getRawColumns().get(5), null, true, unparseableTimestampErrorString(data.get(5).getInput(), 6)), data.get(5));
}
use of org.apache.druid.client.indexing.SamplerResponse in project druid by druid-io.
the class IndexTaskSamplerSpecTest method testSerde.
@Test
public void testSerde() throws IOException {
String json = "{\n" + " \"type\": \"index\",\n" + " \"samplerConfig\": {\n" + " \"numRows\": 123,\n" + " \"timeoutMs\": 2345\n" + " },\n" + " \"spec\": {\n" + " \"dataSchema\": {\n" + " \"dataSource\": \"sampler\",\n" + " \"dimensionsSpec\": {},\n" + " \"timestampSpec\": {\n" + " \"missingValue\": \"1970\"\n" + " }\n" + " },\n" + " \"ioConfig\": {\n" + " \"type\": \"index\",\n" + " \"inputSource\": {\n" + " \"type\": \"local\",\n" + " \"baseDir\": \"/tmp\",\n" + " \"filter\": \"wikiticker-2015-09-12-sampled.json\"\n" + " },\n" + " \"inputFormat\": {\n" + " \"type\": \"json\"\n" + " }\n" + " }\n" + " }\n" + "}";
Capture<InputSource> capturedInputSource = EasyMock.newCapture();
Capture<InputFormat> capturedInputFormat = EasyMock.newCapture();
Capture<DataSchema> capturedDataSchema = EasyMock.newCapture();
Capture<SamplerConfig> capturedSamplerConfig = EasyMock.newCapture();
IndexTaskSamplerSpec spec = MAPPER.readValue(json, IndexTaskSamplerSpec.class);
EasyMock.expect(inputSourceSampler.sample(EasyMock.capture(capturedInputSource), EasyMock.capture(capturedInputFormat), EasyMock.capture(capturedDataSchema), EasyMock.capture(capturedSamplerConfig))).andReturn(new SamplerResponse(0, 0, null));
replayAll();
spec.sample();
verifyAll();
InputSource inputSource = capturedInputSource.getValue();
Assert.assertEquals(new File("/tmp"), ((LocalInputSource) inputSource).getBaseDir());
Assert.assertEquals("wikiticker-2015-09-12-sampled.json", ((LocalInputSource) inputSource).getFilter());
DataSchema dataSchema = capturedDataSchema.getValue();
Assert.assertEquals("sampler", dataSchema.getDataSource());
Assert.assertEquals(JsonInputFormat.class, capturedInputFormat.getValue().getClass());
SamplerConfig samplerConfig = capturedSamplerConfig.getValue();
Assert.assertEquals(123, samplerConfig.getNumRows());
Assert.assertEquals(2345, samplerConfig.getTimeoutMs());
}
use of org.apache.druid.client.indexing.SamplerResponse in project druid by druid-io.
the class InputSourceSamplerTest method testWithTransformsAutoDimensions.
@Test
public void testWithTransformsAutoDimensions() throws IOException {
final TimestampSpec timestampSpec = new TimestampSpec("t", null, null);
final DimensionsSpec dimensionsSpec = new DimensionsSpec(null);
final TransformSpec transformSpec = new TransformSpec(null, ImmutableList.of(new ExpressionTransform("dim1PlusBar", "concat(dim1, 'bar')", TestExprMacroTable.INSTANCE)));
final AggregatorFactory[] aggregatorFactories = { new LongSumAggregatorFactory("met1", "met1") };
final GranularitySpec granularitySpec = new UniformGranularitySpec(Granularities.DAY, Granularities.HOUR, true, null);
final DataSchema dataSchema = createDataSchema(timestampSpec, dimensionsSpec, aggregatorFactories, granularitySpec, transformSpec);
final InputSource inputSource = createInputSource(getTestRows(), dataSchema);
final InputFormat inputFormat = createInputFormat();
SamplerResponse response = inputSourceSampler.sample(inputSource, inputFormat, dataSchema, null);
Assert.assertEquals(6, response.getNumRowsRead());
Assert.assertEquals(5, response.getNumRowsIndexed());
Assert.assertEquals(4, response.getData().size());
List<SamplerResponseRow> data = response.getData();
assertEqualsSamplerResponseRow(new SamplerResponseRow(getRawColumns().get(0), new SamplerTestUtils.MapAllowingNullValuesBuilder<String, Object>().put("__time", 1555934400000L).put("dim1", "foo").put("dim2", null).put("met1", 6L).build(), null, null), data.get(0));
assertEqualsSamplerResponseRow(new SamplerResponseRow(getRawColumns().get(3), new SamplerTestUtils.MapAllowingNullValuesBuilder<String, Object>().put("__time", 1555934400000L).put("dim1", "foo2").put("dim2", null).put("met1", 4L).build(), null, null), data.get(1));
assertEqualsSamplerResponseRow(new SamplerResponseRow(getRawColumns().get(4), new SamplerTestUtils.MapAllowingNullValuesBuilder<String, Object>().put("__time", 1555934400000L).put("dim1", "foo").put("dim2", "bar").put("met1", 5L).build(), null, null), data.get(2));
assertEqualsSamplerResponseRow(new SamplerResponseRow(getRawColumns().get(5), null, true, getUnparseableTimestampString()), data.get(3));
}
use of org.apache.druid.client.indexing.SamplerResponse in project druid by druid-io.
the class InputSourceSamplerTest method testWithNoRollup.
@Test
public void testWithNoRollup() throws IOException {
final TimestampSpec timestampSpec = new TimestampSpec("t", null, null);
final DimensionsSpec dimensionsSpec = new DimensionsSpec(null);
final AggregatorFactory[] aggregatorFactories = { new LongSumAggregatorFactory("met1", "met1") };
final GranularitySpec granularitySpec = new UniformGranularitySpec(Granularities.DAY, Granularities.HOUR, false, null);
final DataSchema dataSchema = createDataSchema(timestampSpec, dimensionsSpec, aggregatorFactories, granularitySpec, null);
final InputSource inputSource = createInputSource(getTestRows(), dataSchema);
final InputFormat inputFormat = createInputFormat();
SamplerResponse response = inputSourceSampler.sample(inputSource, inputFormat, dataSchema, null);
Assert.assertEquals(6, response.getNumRowsRead());
Assert.assertEquals(5, response.getNumRowsIndexed());
Assert.assertEquals(6, response.getData().size());
List<SamplerResponseRow> data = response.getData();
assertEqualsSamplerResponseRow(new SamplerResponseRow(getRawColumns().get(0), new SamplerTestUtils.MapAllowingNullValuesBuilder<String, Object>().put("__time", 1555934400000L).put("dim1", "foo").put("dim2", null).put("met1", 1L).build(), null, null), data.get(0));
assertEqualsSamplerResponseRow(new SamplerResponseRow(getRawColumns().get(1), new SamplerTestUtils.MapAllowingNullValuesBuilder<String, Object>().put("__time", 1555934400000L).put("dim1", "foo").put("dim2", null).put("met1", 2L).build(), null, null), data.get(1));
assertEqualsSamplerResponseRow(new SamplerResponseRow(getRawColumns().get(2), new SamplerTestUtils.MapAllowingNullValuesBuilder<String, Object>().put("__time", 1555934400000L).put("dim1", "foo").put("dim2", null).put("met1", 3L).build(), null, null), data.get(2));
assertEqualsSamplerResponseRow(new SamplerResponseRow(getRawColumns().get(3), new SamplerTestUtils.MapAllowingNullValuesBuilder<String, Object>().put("__time", 1555934400000L).put("dim1", "foo2").put("dim2", null).put("met1", 4L).build(), null, null), data.get(3));
assertEqualsSamplerResponseRow(new SamplerResponseRow(getRawColumns().get(4), new SamplerTestUtils.MapAllowingNullValuesBuilder<String, Object>().put("__time", 1555934400000L).put("dim1", "foo").put("dim2", "bar").put("met1", 5L).build(), null, null), data.get(4));
assertEqualsSamplerResponseRow(new SamplerResponseRow(getRawColumns().get(5), null, true, getUnparseableTimestampString()), data.get(5));
}
use of org.apache.druid.client.indexing.SamplerResponse in project druid by druid-io.
the class KafkaSamplerSpecTest method testSample.
@Test(timeout = 30_000L)
public void testSample() {
insertData(generateRecords(TOPIC));
KafkaSupervisorSpec supervisorSpec = new KafkaSupervisorSpec(null, DATA_SCHEMA, null, new KafkaSupervisorIOConfig(TOPIC, new JsonInputFormat(JSONPathSpec.DEFAULT, null, null), null, null, null, kafkaServer.consumerProperties(), null, null, null, null, true, null, null, null, null), null, null, null, null, null, null, null, null, null, null, null);
KafkaSamplerSpec samplerSpec = new KafkaSamplerSpec(supervisorSpec, new SamplerConfig(5, null), new InputSourceSampler(), OBJECT_MAPPER);
SamplerResponse response = samplerSpec.sample();
Assert.assertEquals(5, response.getNumRowsRead());
Assert.assertEquals(3, response.getNumRowsIndexed());
Assert.assertEquals(5, response.getData().size());
Iterator<SamplerResponse.SamplerResponseRow> it = response.getData().iterator();
Assert.assertEquals(new SamplerResponse.SamplerResponseRow(new SamplerTestUtils.MapAllowingNullValuesBuilder<String, Object>().put("timestamp", "2008").put("dim1", "a").put("dim2", "y").put("dimLong", "10").put("dimFloat", "20.0").put("met1", "1.0").build(), new SamplerTestUtils.MapAllowingNullValuesBuilder<String, Object>().put("__time", 1199145600000L).put("dim1", "a").put("dim1t", null).put("dim2", "y").put("dimLong", 10L).put("dimFloat", 20.0F).put("rows", 1L).put("met1sum", 1.0).build(), null, null), it.next());
Assert.assertEquals(new SamplerResponse.SamplerResponseRow(new SamplerTestUtils.MapAllowingNullValuesBuilder<String, Object>().put("timestamp", "2009").put("dim1", "b").put("dim2", "y").put("dimLong", "10").put("dimFloat", "20.0").put("met1", "1.0").build(), new SamplerTestUtils.MapAllowingNullValuesBuilder<String, Object>().put("__time", 1230768000000L).put("dim1", "b").put("dim1t", null).put("dim2", "y").put("dimLong", 10L).put("dimFloat", 20.0F).put("rows", 1L).put("met1sum", 1.0).build(), null, null), it.next());
Assert.assertEquals(new SamplerResponse.SamplerResponseRow(new SamplerTestUtils.MapAllowingNullValuesBuilder<String, Object>().put("timestamp", "2010").put("dim1", "c").put("dim2", "y").put("dimLong", "10").put("dimFloat", "20.0").put("met1", "1.0").build(), new SamplerTestUtils.MapAllowingNullValuesBuilder<String, Object>().put("__time", 1262304000000L).put("dim1", "c").put("dim1t", null).put("dim2", "y").put("dimLong", 10L).put("dimFloat", 20.0F).put("rows", 1L).put("met1sum", 1.0).build(), null, null), it.next());
Assert.assertEquals(new SamplerResponse.SamplerResponseRow(new SamplerTestUtils.MapAllowingNullValuesBuilder<String, Object>().put("timestamp", "246140482-04-24T15:36:27.903Z").put("dim1", "x").put("dim2", "z").put("dimLong", "10").put("dimFloat", "20.0").put("met1", "1.0").build(), null, true, "Encountered row with timestamp[246140482-04-24T15:36:27.903Z] that cannot be represented as a long: [{timestamp=246140482-04-24T15:36:27.903Z, dim1=x, dim2=z, dimLong=10, dimFloat=20.0, met1=1.0}]"), it.next());
Assert.assertEquals(new SamplerResponse.SamplerResponseRow(null, null, true, "Unable to parse row [unparseable] into JSON"), it.next());
Assert.assertFalse(it.hasNext());
}
Aggregations