use of org.apache.druid.data.input.InputEntityReader in project druid by druid-io.
the class DelimitedReaderTest method testCustomizeSeparator.
@Test
public void testCustomizeSeparator() throws IOException {
final ByteEntity source = writeData(ImmutableList.of("ts|name|score", "2019-01-01T00:00:10Z|name_1|5\t1", "2019-01-01T00:00:20Z|name_2|10\t2", "2019-01-01T00:00:30Z|name_3|15\t3"));
final DelimitedInputFormat format = new DelimitedInputFormat(ImmutableList.of(), "\t", "|", null, true, 0);
final InputEntityReader reader = format.createReader(INPUT_ROW_SCHEMA, source, null);
int numResults = 0;
try (CloseableIterator<InputRow> iterator = reader.read()) {
while (iterator.hasNext()) {
final InputRow row = iterator.next();
Assert.assertEquals(DateTimes.of(StringUtils.format("2019-01-01T00:00:%02dZ", (numResults + 1) * 10)), row.getTimestamp());
Assert.assertEquals(StringUtils.format("name_%d", numResults + 1), Iterables.getOnlyElement(row.getDimension("name")));
Assert.assertEquals(ImmutableList.of(Integer.toString((numResults + 1) * 5), Integer.toString(numResults + 1)), row.getDimension("score"));
numResults++;
}
Assert.assertEquals(3, numResults);
}
}
use of org.apache.druid.data.input.InputEntityReader in project druid by druid-io.
the class DelimitedReaderTest method assertResult.
private void assertResult(ByteEntity source, DelimitedInputFormat format) throws IOException {
final InputEntityReader reader = format.createReader(INPUT_ROW_SCHEMA, source, null);
int numResults = 0;
try (CloseableIterator<InputRow> iterator = reader.read()) {
while (iterator.hasNext()) {
final InputRow row = iterator.next();
Assert.assertEquals(DateTimes.of(StringUtils.format("2019-01-01T00:00:%02dZ", (numResults + 1) * 10)), row.getTimestamp());
Assert.assertEquals(StringUtils.format("name_%d", numResults + 1), Iterables.getOnlyElement(row.getDimension("name")));
Assert.assertEquals(Integer.toString((numResults + 1) * 5), Iterables.getOnlyElement(row.getDimension("score")));
numResults++;
}
Assert.assertEquals(3, numResults);
}
}
use of org.apache.druid.data.input.InputEntityReader in project druid by druid-io.
the class AvroOCFReaderTest method testSampleSerdeRaw.
@Test
public void testSampleSerdeRaw() throws Exception {
final ObjectMapper mapper = new DefaultObjectMapper();
mapper.setInjectableValues(new InjectableValues.Std().addValue(ObjectMapper.class, mapper));
final InputEntityReader reader = createReader(mapper, null);
try (CloseableIterator<InputRowListPlusRawValues> iterator = reader.sample()) {
Assert.assertTrue(iterator.hasNext());
final InputRowListPlusRawValues row = iterator.next();
Assert.assertFalse(iterator.hasNext());
final List<InputRow> inputRows = row.getInputRows();
Assert.assertNotNull(inputRows);
final InputRow inputRow = Iterables.getOnlyElement(inputRows);
assertInputRow(inputRow);
// Ensure the raw values can be serialised into JSON
mapper.writeValueAsString(row.getRawValues());
}
}
use of org.apache.druid.data.input.InputEntityReader in project druid by druid-io.
the class KafkaInputFormatTest method testTimestampFromHeader.
@Test
public void testTimestampFromHeader() throws IOException {
Iterable<Header> sample_header_with_ts = Iterables.unmodifiableIterable(Iterables.concat(SAMPLE_HEADERS, ImmutableList.of(new Header() {
@Override
public String key() {
return "headerTs";
}
@Override
public byte[] value() {
return "2021-06-24".getBytes(StandardCharsets.UTF_8);
}
})));
final byte[] key = StringUtils.toUtf8("{\n" + " \"key\": \"sampleKey\"\n" + "}");
final byte[] payload = StringUtils.toUtf8("{\n" + " \"timestamp\": \"2021-06-24\",\n" + " \"bar\": null,\n" + " \"foo\": \"x\",\n" + " \"baz\": 4,\n" + " \"o\": {\n" + " \"mg\": 1\n" + " }\n" + "}");
Headers headers = new RecordHeaders(sample_header_with_ts);
inputEntity = new KafkaRecordEntity(new ConsumerRecord<byte[], byte[]>("sample", 0, 0, timestamp, null, null, 0, 0, key, payload, headers));
final InputEntityReader reader = format.createReader(new InputRowSchema(new TimestampSpec("kafka.newheader.headerTs", "iso", null), new DimensionsSpec(DimensionsSpec.getDefaultSchemas(ImmutableList.of("bar", "foo", "kafka.newheader.encoding", "kafka.newheader.kafkapkc"))), ColumnsFilter.all()), inputEntity, null);
final int numExpectedIterations = 1;
try (CloseableIterator<InputRow> iterator = reader.read()) {
int numActualIterations = 0;
while (iterator.hasNext()) {
final InputRow row = iterator.next();
final MapBasedInputRow mrow = (MapBasedInputRow) row;
// Payload verifications
Assert.assertEquals(DateTimes.of("2021-06-24"), row.getTimestamp());
Assert.assertEquals("x", Iterables.getOnlyElement(row.getDimension("foo")));
Assert.assertEquals("4", Iterables.getOnlyElement(row.getDimension("baz")));
Assert.assertEquals("4", Iterables.getOnlyElement(row.getDimension("root_baz")));
Assert.assertEquals("1", Iterables.getOnlyElement(row.getDimension("path_omg")));
Assert.assertEquals("1", Iterables.getOnlyElement(row.getDimension("jq_omg")));
// Header verification
Assert.assertEquals("application/json", Iterables.getOnlyElement(row.getDimension("kafka.newheader.encoding")));
Assert.assertEquals("pkc-bar", Iterables.getOnlyElement(row.getDimension("kafka.newheader.kafkapkc")));
Assert.assertEquals(String.valueOf(DateTimes.of("2021-06-24").getMillis()), Iterables.getOnlyElement(row.getDimension("kafka.newts.timestamp")));
Assert.assertEquals("2021-06-24", Iterables.getOnlyElement(row.getDimension("kafka.newheader.headerTs")));
Assert.assertEquals("2021-06-24", Iterables.getOnlyElement(row.getDimension("timestamp")));
// Key verification
Assert.assertEquals("sampleKey", Iterables.getOnlyElement(row.getDimension("kafka.newkey.key")));
Assert.assertTrue(row.getDimension("root_baz2").isEmpty());
Assert.assertTrue(row.getDimension("path_omg2").isEmpty());
Assert.assertTrue(row.getDimension("jq_omg2").isEmpty());
Assert.assertTrue(row.getDimension("jq_omg2").isEmpty());
numActualIterations++;
}
Assert.assertEquals(numExpectedIterations, numActualIterations);
}
}
use of org.apache.druid.data.input.InputEntityReader in project druid by druid-io.
the class OrcReaderTest method testOrcFile11Format.
// This test is migrated from OrcHadoopInputRowParserTest
@Test
public void testOrcFile11Format() throws IOException {
final OrcInputFormat inputFormat = new OrcInputFormat(new JSONPathSpec(true, ImmutableList.of(new JSONPathFieldSpec(JSONPathFieldType.PATH, "struct_list_struct_int", "$.middle.list[1].int1"), new JSONPathFieldSpec(JSONPathFieldType.PATH, "struct_list_struct_intlist", "$.middle.list[*].int1"), new JSONPathFieldSpec(JSONPathFieldType.PATH, "struct_list_struct_middleListLength", "$.middle.list.length()"), new JSONPathFieldSpec(JSONPathFieldType.PATH, "list_struct_string", "$.list[0].string1"), new JSONPathFieldSpec(JSONPathFieldType.PATH, "map_struct_int", "$.map.chani.int1"))), null, new Configuration());
final InputEntityReader reader = createReader(new TimestampSpec("ts", "millis", null), new DimensionsSpec(null), inputFormat, "example/orc-file-11-format.orc");
try (CloseableIterator<InputRow> iterator = reader.read()) {
int actualRowCount = 0;
// Check the first row
Assert.assertTrue(iterator.hasNext());
InputRow row = iterator.next();
actualRowCount++;
Assert.assertEquals("false", Iterables.getOnlyElement(row.getDimension("boolean1")));
Assert.assertEquals("1", Iterables.getOnlyElement(row.getDimension("byte1")));
Assert.assertEquals("1024", Iterables.getOnlyElement(row.getDimension("short1")));
Assert.assertEquals("65536", Iterables.getOnlyElement(row.getDimension("int1")));
Assert.assertEquals("9223372036854775807", Iterables.getOnlyElement(row.getDimension("long1")));
Assert.assertEquals("1.0", Iterables.getOnlyElement(row.getDimension("float1")));
Assert.assertEquals("-15.0", Iterables.getOnlyElement(row.getDimension("double1")));
Assert.assertEquals("AAECAwQAAA==", Iterables.getOnlyElement(row.getDimension("bytes1")));
Assert.assertEquals("hi", Iterables.getOnlyElement(row.getDimension("string1")));
Assert.assertEquals("1.23456786547456E7", Iterables.getOnlyElement(row.getDimension("decimal1")));
Assert.assertEquals("2", Iterables.getOnlyElement(row.getDimension("struct_list_struct_int")));
Assert.assertEquals(ImmutableList.of("1", "2"), row.getDimension("struct_list_struct_intlist"));
Assert.assertEquals("good", Iterables.getOnlyElement(row.getDimension("list_struct_string")));
Assert.assertEquals("2", Iterables.getOnlyElement(row.getDimension("struct_list_struct_middleListLength")));
Assert.assertEquals(DateTimes.of("2000-03-12T15:00:00.0Z"), row.getTimestamp());
while (iterator.hasNext()) {
actualRowCount++;
row = iterator.next();
}
// Check the last row
Assert.assertEquals("true", Iterables.getOnlyElement(row.getDimension("boolean1")));
Assert.assertEquals("100", Iterables.getOnlyElement(row.getDimension("byte1")));
Assert.assertEquals("2048", Iterables.getOnlyElement(row.getDimension("short1")));
Assert.assertEquals("65536", Iterables.getOnlyElement(row.getDimension("int1")));
Assert.assertEquals("9223372036854775807", Iterables.getOnlyElement(row.getDimension("long1")));
Assert.assertEquals("2.0", Iterables.getOnlyElement(row.getDimension("float1")));
Assert.assertEquals("-5.0", Iterables.getOnlyElement(row.getDimension("double1")));
Assert.assertEquals("", Iterables.getOnlyElement(row.getDimension("bytes1")));
Assert.assertEquals("bye", Iterables.getOnlyElement(row.getDimension("string1")));
Assert.assertEquals("1.23456786547457E7", Iterables.getOnlyElement(row.getDimension("decimal1")));
Assert.assertEquals("2", Iterables.getOnlyElement(row.getDimension("struct_list_struct_int")));
Assert.assertEquals(ImmutableList.of("1", "2"), row.getDimension("struct_list_struct_intlist"));
Assert.assertEquals("cat", Iterables.getOnlyElement(row.getDimension("list_struct_string")));
Assert.assertEquals("5", Iterables.getOnlyElement(row.getDimension("map_struct_int")));
Assert.assertEquals(DateTimes.of("2000-03-12T15:00:01.000Z"), row.getTimestamp());
Assert.assertEquals(7500, actualRowCount);
}
}
Aggregations