use of org.apache.druid.data.input.InputSource in project druid by druid-io.
the class InputSourceSamplerTest method testMissingValueTimestampSpec.
@Test
public void testMissingValueTimestampSpec() throws IOException {
final TimestampSpec timestampSpec = new TimestampSpec(null, null, DateTimes.of("1970"));
final DimensionsSpec dimensionsSpec = new DimensionsSpec(null);
final DataSchema dataSchema = createDataSchema(timestampSpec, dimensionsSpec, null, null, null);
final InputSource inputSource = createInputSource(getTestRows(), dataSchema);
final InputFormat inputFormat = createInputFormat();
SamplerResponse response = inputSourceSampler.sample(inputSource, inputFormat, dataSchema, null);
Assert.assertEquals(6, response.getNumRowsRead());
Assert.assertEquals(6, response.getNumRowsIndexed());
Assert.assertEquals(6, response.getData().size());
List<SamplerResponseRow> data = response.getData();
assertEqualsSamplerResponseRow(new SamplerResponseRow(getRawColumns().get(0), new SamplerTestUtils.MapAllowingNullValuesBuilder<String, Object>().put("__time", 0L).put("t", "2019-04-22T12:00").put("dim2", null).put("dim1", "foo").put("met1", "1").build(), null, null), data.get(0));
assertEqualsSamplerResponseRow(new SamplerResponseRow(getRawColumns().get(1), new SamplerTestUtils.MapAllowingNullValuesBuilder<String, Object>().put("__time", 0L).put("t", "2019-04-22T12:00").put("dim2", null).put("dim1", "foo").put("met1", "2").build(), null, null), data.get(1));
assertEqualsSamplerResponseRow(new SamplerResponseRow(getRawColumns().get(2), new SamplerTestUtils.MapAllowingNullValuesBuilder<String, Object>().put("__time", 0L).put("t", "2019-04-22T12:01").put("dim2", null).put("dim1", "foo").put("met1", "3").build(), null, null), data.get(2));
assertEqualsSamplerResponseRow(new SamplerResponseRow(getRawColumns().get(3), new SamplerTestUtils.MapAllowingNullValuesBuilder<String, Object>().put("__time", 0L).put("t", "2019-04-22T12:00").put("dim2", null).put("dim1", "foo2").put("met1", "4").build(), null, null), data.get(3));
assertEqualsSamplerResponseRow(new SamplerResponseRow(getRawColumns().get(4), new SamplerTestUtils.MapAllowingNullValuesBuilder<String, Object>().put("__time", 0L).put("t", "2019-04-22T12:00").put("dim2", "bar").put("dim1", "foo").put("met1", "5").build(), null, null), data.get(4));
assertEqualsSamplerResponseRow(new SamplerResponseRow(getRawColumns().get(5), new SamplerTestUtils.MapAllowingNullValuesBuilder<String, Object>().put("__time", 0L).put("t", "bad_timestamp").put("dim2", null).put("dim1", "foo").put("met1", "6").build(), null, null), data.get(5));
}
use of org.apache.druid.data.input.InputSource in project druid by druid-io.
the class InputSourceSamplerTest method testReaderCreationException.
@Test(expected = SamplerException.class)
public void testReaderCreationException() {
InputSource failingReaderInputSource = new InputSource() {
@Override
public boolean isSplittable() {
return false;
}
@Override
public boolean needsFormat() {
return false;
}
@Override
public InputSourceReader reader(InputRowSchema inputRowSchema, @Nullable InputFormat inputFormat, File temporaryDirectory) {
throw new RuntimeException();
}
};
inputSourceSampler.sample(failingReaderInputSource, null, null, null);
}
use of org.apache.druid.data.input.InputSource in project druid by druid-io.
the class InputSourceSamplerTest method testNoDataSchema.
@Test
public void testNoDataSchema() {
final InputSource inputSource = createInputSource(getTestRows(), null);
final SamplerResponse response = inputSourceSampler.sample(inputSource, createInputFormat(), null, null);
Assert.assertEquals(6, response.getNumRowsRead());
Assert.assertEquals(0, response.getNumRowsIndexed());
Assert.assertEquals(6, response.getData().size());
List<SamplerResponseRow> data = response.getData();
assertEqualsSamplerResponseRow(new SamplerResponseRow(getRawColumns().get(0), null, true, unparseableTimestampErrorString(data.get(0).getInput(), 1)), data.get(0));
assertEqualsSamplerResponseRow(new SamplerResponseRow(getRawColumns().get(1), null, true, unparseableTimestampErrorString(data.get(1).getInput(), 2)), data.get(1));
assertEqualsSamplerResponseRow(new SamplerResponseRow(getRawColumns().get(2), null, true, unparseableTimestampErrorString(data.get(2).getInput(), 3)), data.get(2));
assertEqualsSamplerResponseRow(new SamplerResponseRow(getRawColumns().get(3), null, true, unparseableTimestampErrorString(data.get(3).getInput(), 4)), data.get(3));
assertEqualsSamplerResponseRow(new SamplerResponseRow(getRawColumns().get(4), null, true, unparseableTimestampErrorString(data.get(4).getInput(), 5)), data.get(4));
assertEqualsSamplerResponseRow(new SamplerResponseRow(getRawColumns().get(5), null, true, unparseableTimestampErrorString(data.get(5).getInput(), 6)), data.get(5));
}
use of org.apache.druid.data.input.InputSource in project druid by druid-io.
the class IndexTaskSamplerSpecTest method testSerde.
@Test
public void testSerde() throws IOException {
String json = "{\n" + " \"type\": \"index\",\n" + " \"samplerConfig\": {\n" + " \"numRows\": 123,\n" + " \"timeoutMs\": 2345\n" + " },\n" + " \"spec\": {\n" + " \"dataSchema\": {\n" + " \"dataSource\": \"sampler\",\n" + " \"dimensionsSpec\": {},\n" + " \"timestampSpec\": {\n" + " \"missingValue\": \"1970\"\n" + " }\n" + " },\n" + " \"ioConfig\": {\n" + " \"type\": \"index\",\n" + " \"inputSource\": {\n" + " \"type\": \"local\",\n" + " \"baseDir\": \"/tmp\",\n" + " \"filter\": \"wikiticker-2015-09-12-sampled.json\"\n" + " },\n" + " \"inputFormat\": {\n" + " \"type\": \"json\"\n" + " }\n" + " }\n" + " }\n" + "}";
Capture<InputSource> capturedInputSource = EasyMock.newCapture();
Capture<InputFormat> capturedInputFormat = EasyMock.newCapture();
Capture<DataSchema> capturedDataSchema = EasyMock.newCapture();
Capture<SamplerConfig> capturedSamplerConfig = EasyMock.newCapture();
IndexTaskSamplerSpec spec = MAPPER.readValue(json, IndexTaskSamplerSpec.class);
EasyMock.expect(inputSourceSampler.sample(EasyMock.capture(capturedInputSource), EasyMock.capture(capturedInputFormat), EasyMock.capture(capturedDataSchema), EasyMock.capture(capturedSamplerConfig))).andReturn(new SamplerResponse(0, 0, null));
replayAll();
spec.sample();
verifyAll();
InputSource inputSource = capturedInputSource.getValue();
Assert.assertEquals(new File("/tmp"), ((LocalInputSource) inputSource).getBaseDir());
Assert.assertEquals("wikiticker-2015-09-12-sampled.json", ((LocalInputSource) inputSource).getFilter());
DataSchema dataSchema = capturedDataSchema.getValue();
Assert.assertEquals("sampler", dataSchema.getDataSource());
Assert.assertEquals(JsonInputFormat.class, capturedInputFormat.getValue().getClass());
SamplerConfig samplerConfig = capturedSamplerConfig.getValue();
Assert.assertEquals(123, samplerConfig.getNumRows());
Assert.assertEquals(2345, samplerConfig.getTimeoutMs());
}
use of org.apache.druid.data.input.InputSource in project druid by druid-io.
the class RecordSupplierInputSourceTest method testRead.
@Test
public void testRead() throws IOException {
final RandomCsvSupplier supplier = new RandomCsvSupplier();
final InputSource inputSource = new RecordSupplierInputSource<>("topic", supplier, false);
final List<String> colNames = IntStream.range(0, NUM_COLS).mapToObj(i -> StringUtils.format("col_%d", i)).collect(Collectors.toList());
final InputFormat inputFormat = new CsvInputFormat(colNames, null, null, false, 0);
final InputSourceReader reader = inputSource.reader(new InputRowSchema(new TimestampSpec("col_0", "auto", null), new DimensionsSpec(DimensionsSpec.getDefaultSchemas(colNames.subList(1, colNames.size()))), ColumnsFilter.all()), inputFormat, temporaryFolder.newFolder());
int read = 0;
try (CloseableIterator<InputRow> iterator = reader.read()) {
for (; read < NUM_ROWS && iterator.hasNext(); read++) {
final InputRow inputRow = iterator.next();
Assert.assertEquals(DateTimes.of(TIMESTAMP_STRING), inputRow.getTimestamp());
Assert.assertEquals(NUM_COLS - 1, inputRow.getDimensions().size());
}
}
Assert.assertEquals(NUM_ROWS, read);
Assert.assertTrue(supplier.isClosed());
}
Aggregations