use of org.apache.druid.data.input.InputRowSchema in project druid by druid-io.
the class TimestampsParquetReaderTest method testDateHandling.
@Test
public void testDateHandling() throws IOException {
final String file = "example/timestamps/test_date_data.snappy.parquet";
InputRowSchema schemaAsString = new InputRowSchema(new TimestampSpec("date_as_string", "Y-M-d", null), new DimensionsSpec(DimensionsSpec.getDefaultSchemas(ImmutableList.of())), ColumnsFilter.all());
InputRowSchema schemaAsDate = new InputRowSchema(new TimestampSpec("date_as_date", null, null), new DimensionsSpec(DimensionsSpec.getDefaultSchemas(ImmutableList.of())), ColumnsFilter.all());
InputEntityReader readerAsString = createReader(file, schemaAsString, JSONPathSpec.DEFAULT);
InputEntityReader readerAsDate = createReader(file, schemaAsDate, JSONPathSpec.DEFAULT);
List<InputRow> rowsWithString = readAllRows(readerAsString);
List<InputRow> rowsWithDate = readAllRows(readerAsDate);
Assert.assertEquals(rowsWithDate.size(), rowsWithString.size());
for (int i = 0; i < rowsWithDate.size(); i++) {
Assert.assertEquals(rowsWithString.get(i).getTimestamp(), rowsWithDate.get(i).getTimestamp());
}
readerAsString = createReader(file, schemaAsString, JSONPathSpec.DEFAULT);
readerAsDate = createReader(file, schemaAsDate, JSONPathSpec.DEFAULT);
List<InputRowListPlusRawValues> sampledAsString = sampleAllRows(readerAsString);
List<InputRowListPlusRawValues> sampledAsDate = sampleAllRows(readerAsDate);
final String expectedJson = "{\n" + " \"date_as_string\" : \"2017-06-18\",\n" + " \"timestamp_as_timestamp\" : 1497702471815,\n" + " \"timestamp_as_string\" : \"2017-06-17 14:27:51.815\",\n" + " \"idx\" : 1,\n" + " \"date_as_date\" : 1497744000000\n" + "}";
Assert.assertEquals(expectedJson, DEFAULT_JSON_WRITER.writeValueAsString(sampledAsString.get(0).getRawValues()));
Assert.assertEquals(expectedJson, DEFAULT_JSON_WRITER.writeValueAsString(sampledAsDate.get(0).getRawValues()));
}
use of org.apache.druid.data.input.InputRowSchema in project druid by druid-io.
the class S3InputSourceTest method testReader.
@Test
public void testReader() throws IOException {
EasyMock.reset(S3_CLIENT);
expectListObjects(PREFIXES.get(0), ImmutableList.of(EXPECTED_URIS.get(0)), CONTENT);
expectListObjects(EXPECTED_URIS.get(1), ImmutableList.of(EXPECTED_URIS.get(1)), CONTENT);
expectGetObject(EXPECTED_URIS.get(0));
expectGetObject(EXPECTED_URIS.get(1));
EasyMock.replay(S3_CLIENT);
S3InputSource inputSource = new S3InputSource(SERVICE, SERVER_SIDE_ENCRYPTING_AMAZON_S3_BUILDER, INPUT_DATA_CONFIG, null, ImmutableList.of(PREFIXES.get(0), EXPECTED_URIS.get(1)), null, null);
InputRowSchema someSchema = new InputRowSchema(new TimestampSpec("time", "auto", null), new DimensionsSpec(DimensionsSpec.getDefaultSchemas(ImmutableList.of("dim1", "dim2"))), ColumnsFilter.all());
InputSourceReader reader = inputSource.reader(someSchema, new CsvInputFormat(ImmutableList.of("time", "dim1", "dim2"), "|", false, null, 0), temporaryFolder.newFolder());
CloseableIterator<InputRow> iterator = reader.read();
while (iterator.hasNext()) {
InputRow nextRow = iterator.next();
Assert.assertEquals(NOW, nextRow.getTimestamp());
Assert.assertEquals("hello", nextRow.getDimension("dim1").get(0));
Assert.assertEquals("world", nextRow.getDimension("dim2").get(0));
}
EasyMock.verify(S3_CLIENT);
}
use of org.apache.druid.data.input.InputRowSchema in project druid by druid-io.
the class S3InputSourceTest method testReaderRetriesOnSdkClientExceptionButNeverSucceedsThenThrows.
@Test(expected = SdkClientException.class)
public void testReaderRetriesOnSdkClientExceptionButNeverSucceedsThenThrows() throws Exception {
EasyMock.reset(S3_CLIENT);
expectListObjects(PREFIXES.get(0), ImmutableList.of(EXPECTED_URIS.get(0)), CONTENT);
expectSdkClientException(EXPECTED_URIS.get(0));
EasyMock.replay(S3_CLIENT);
S3InputSource inputSource = new S3InputSource(SERVICE, SERVER_SIDE_ENCRYPTING_AMAZON_S3_BUILDER, INPUT_DATA_CONFIG, null, ImmutableList.of(PREFIXES.get(0)), null, null, // only have three retries since they are slow
3);
InputRowSchema someSchema = new InputRowSchema(new TimestampSpec("time", "auto", null), new DimensionsSpec(DimensionsSpec.getDefaultSchemas(ImmutableList.of("dim1", "dim2"))), ColumnsFilter.all());
InputSourceReader reader = inputSource.reader(someSchema, new CsvInputFormat(ImmutableList.of("time", "dim1", "dim2"), "|", false, null, 0), temporaryFolder.newFolder());
CloseableIterator<InputRow> iterator = reader.read();
while (iterator.hasNext()) {
InputRow nextRow = iterator.next();
Assert.assertEquals(NOW, nextRow.getTimestamp());
Assert.assertEquals("hello", nextRow.getDimension("dim1").get(0));
Assert.assertEquals("world", nextRow.getDimension("dim2").get(0));
}
EasyMock.verify(S3_CLIENT);
}
use of org.apache.druid.data.input.InputRowSchema in project druid by druid-io.
the class InputSourceSamplerTest method testReaderCreationException.
@Test(expected = SamplerException.class)
public void testReaderCreationException() {
InputSource failingReaderInputSource = new InputSource() {
@Override
public boolean isSplittable() {
return false;
}
@Override
public boolean needsFormat() {
return false;
}
@Override
public InputSourceReader reader(InputRowSchema inputRowSchema, @Nullable InputFormat inputFormat, File temporaryDirectory) {
throw new RuntimeException();
}
};
inputSourceSampler.sample(failingReaderInputSource, null, null, null);
}
use of org.apache.druid.data.input.InputRowSchema in project druid by druid-io.
the class StreamChunkParserTest method testWithNullParserAndInputformatParseProperly.
@Test
public void testWithNullParserAndInputformatParseProperly() throws IOException {
final JsonInputFormat inputFormat = new JsonInputFormat(JSONPathSpec.DEFAULT, Collections.emptyMap(), null);
final StreamChunkParser<ByteEntity> chunkParser = new StreamChunkParser<>(null, inputFormat, new InputRowSchema(TIMESTAMP_SPEC, DimensionsSpec.EMPTY, ColumnsFilter.all()), TransformSpec.NONE, temporaryFolder.newFolder(), row -> true, rowIngestionMeters, parseExceptionHandler);
parseAndAssertResult(chunkParser);
}
Aggregations