Search in sources :

Example 21 with InputEntityReader

use of org.apache.druid.data.input.InputEntityReader in project druid by druid-io.

the class DelimitedReaderTest method testCustomizeSeparator.

@Test
public void testCustomizeSeparator() throws IOException {
    final ByteEntity source = writeData(ImmutableList.of("ts|name|score", "2019-01-01T00:00:10Z|name_1|5\t1", "2019-01-01T00:00:20Z|name_2|10\t2", "2019-01-01T00:00:30Z|name_3|15\t3"));
    final DelimitedInputFormat format = new DelimitedInputFormat(ImmutableList.of(), "\t", "|", null, true, 0);
    final InputEntityReader reader = format.createReader(INPUT_ROW_SCHEMA, source, null);
    int numResults = 0;
    try (CloseableIterator<InputRow> iterator = reader.read()) {
        while (iterator.hasNext()) {
            final InputRow row = iterator.next();
            Assert.assertEquals(DateTimes.of(StringUtils.format("2019-01-01T00:00:%02dZ", (numResults + 1) * 10)), row.getTimestamp());
            Assert.assertEquals(StringUtils.format("name_%d", numResults + 1), Iterables.getOnlyElement(row.getDimension("name")));
            Assert.assertEquals(ImmutableList.of(Integer.toString((numResults + 1) * 5), Integer.toString(numResults + 1)), row.getDimension("score"));
            numResults++;
        }
        Assert.assertEquals(3, numResults);
    }
}
Also used : InputRow(org.apache.druid.data.input.InputRow) InputEntityReader(org.apache.druid.data.input.InputEntityReader) Test(org.junit.Test)

Example 22 with InputEntityReader

use of org.apache.druid.data.input.InputEntityReader in project druid by druid-io.

the class DelimitedReaderTest method assertResult.

private void assertResult(ByteEntity source, DelimitedInputFormat format) throws IOException {
    final InputEntityReader reader = format.createReader(INPUT_ROW_SCHEMA, source, null);
    int numResults = 0;
    try (CloseableIterator<InputRow> iterator = reader.read()) {
        while (iterator.hasNext()) {
            final InputRow row = iterator.next();
            Assert.assertEquals(DateTimes.of(StringUtils.format("2019-01-01T00:00:%02dZ", (numResults + 1) * 10)), row.getTimestamp());
            Assert.assertEquals(StringUtils.format("name_%d", numResults + 1), Iterables.getOnlyElement(row.getDimension("name")));
            Assert.assertEquals(Integer.toString((numResults + 1) * 5), Iterables.getOnlyElement(row.getDimension("score")));
            numResults++;
        }
        Assert.assertEquals(3, numResults);
    }
}
Also used : InputRow(org.apache.druid.data.input.InputRow) InputEntityReader(org.apache.druid.data.input.InputEntityReader)

Example 23 with InputEntityReader

use of org.apache.druid.data.input.InputEntityReader in project druid by druid-io.

the class AvroOCFReaderTest method testSampleSerdeRaw.

@Test
public void testSampleSerdeRaw() throws Exception {
    final ObjectMapper mapper = new DefaultObjectMapper();
    mapper.setInjectableValues(new InjectableValues.Std().addValue(ObjectMapper.class, mapper));
    final InputEntityReader reader = createReader(mapper, null);
    try (CloseableIterator<InputRowListPlusRawValues> iterator = reader.sample()) {
        Assert.assertTrue(iterator.hasNext());
        final InputRowListPlusRawValues row = iterator.next();
        Assert.assertFalse(iterator.hasNext());
        final List<InputRow> inputRows = row.getInputRows();
        Assert.assertNotNull(inputRows);
        final InputRow inputRow = Iterables.getOnlyElement(inputRows);
        assertInputRow(inputRow);
        // Ensure the raw values can be serialised into JSON
        mapper.writeValueAsString(row.getRawValues());
    }
}
Also used : InputRowListPlusRawValues(org.apache.druid.data.input.InputRowListPlusRawValues) InputRow(org.apache.druid.data.input.InputRow) DefaultObjectMapper(org.apache.druid.jackson.DefaultObjectMapper) InputEntityReader(org.apache.druid.data.input.InputEntityReader) ObjectMapper(com.fasterxml.jackson.databind.ObjectMapper) DefaultObjectMapper(org.apache.druid.jackson.DefaultObjectMapper) AvroStreamInputRowParserTest(org.apache.druid.data.input.AvroStreamInputRowParserTest) Test(org.junit.Test) AvroHadoopInputRowParserTest(org.apache.druid.data.input.AvroHadoopInputRowParserTest)

Example 24 with InputEntityReader

use of org.apache.druid.data.input.InputEntityReader in project druid by druid-io.

the class KafkaInputFormatTest method testTimestampFromHeader.

@Test
public void testTimestampFromHeader() throws IOException {
    Iterable<Header> sample_header_with_ts = Iterables.unmodifiableIterable(Iterables.concat(SAMPLE_HEADERS, ImmutableList.of(new Header() {

        @Override
        public String key() {
            return "headerTs";
        }

        @Override
        public byte[] value() {
            return "2021-06-24".getBytes(StandardCharsets.UTF_8);
        }
    })));
    final byte[] key = StringUtils.toUtf8("{\n" + "    \"key\": \"sampleKey\"\n" + "}");
    final byte[] payload = StringUtils.toUtf8("{\n" + "    \"timestamp\": \"2021-06-24\",\n" + "    \"bar\": null,\n" + "    \"foo\": \"x\",\n" + "    \"baz\": 4,\n" + "    \"o\": {\n" + "        \"mg\": 1\n" + "    }\n" + "}");
    Headers headers = new RecordHeaders(sample_header_with_ts);
    inputEntity = new KafkaRecordEntity(new ConsumerRecord<byte[], byte[]>("sample", 0, 0, timestamp, null, null, 0, 0, key, payload, headers));
    final InputEntityReader reader = format.createReader(new InputRowSchema(new TimestampSpec("kafka.newheader.headerTs", "iso", null), new DimensionsSpec(DimensionsSpec.getDefaultSchemas(ImmutableList.of("bar", "foo", "kafka.newheader.encoding", "kafka.newheader.kafkapkc"))), ColumnsFilter.all()), inputEntity, null);
    final int numExpectedIterations = 1;
    try (CloseableIterator<InputRow> iterator = reader.read()) {
        int numActualIterations = 0;
        while (iterator.hasNext()) {
            final InputRow row = iterator.next();
            final MapBasedInputRow mrow = (MapBasedInputRow) row;
            // Payload verifications
            Assert.assertEquals(DateTimes.of("2021-06-24"), row.getTimestamp());
            Assert.assertEquals("x", Iterables.getOnlyElement(row.getDimension("foo")));
            Assert.assertEquals("4", Iterables.getOnlyElement(row.getDimension("baz")));
            Assert.assertEquals("4", Iterables.getOnlyElement(row.getDimension("root_baz")));
            Assert.assertEquals("1", Iterables.getOnlyElement(row.getDimension("path_omg")));
            Assert.assertEquals("1", Iterables.getOnlyElement(row.getDimension("jq_omg")));
            // Header verification
            Assert.assertEquals("application/json", Iterables.getOnlyElement(row.getDimension("kafka.newheader.encoding")));
            Assert.assertEquals("pkc-bar", Iterables.getOnlyElement(row.getDimension("kafka.newheader.kafkapkc")));
            Assert.assertEquals(String.valueOf(DateTimes.of("2021-06-24").getMillis()), Iterables.getOnlyElement(row.getDimension("kafka.newts.timestamp")));
            Assert.assertEquals("2021-06-24", Iterables.getOnlyElement(row.getDimension("kafka.newheader.headerTs")));
            Assert.assertEquals("2021-06-24", Iterables.getOnlyElement(row.getDimension("timestamp")));
            // Key verification
            Assert.assertEquals("sampleKey", Iterables.getOnlyElement(row.getDimension("kafka.newkey.key")));
            Assert.assertTrue(row.getDimension("root_baz2").isEmpty());
            Assert.assertTrue(row.getDimension("path_omg2").isEmpty());
            Assert.assertTrue(row.getDimension("jq_omg2").isEmpty());
            Assert.assertTrue(row.getDimension("jq_omg2").isEmpty());
            numActualIterations++;
        }
        Assert.assertEquals(numExpectedIterations, numActualIterations);
    }
}
Also used : KafkaRecordEntity(org.apache.druid.data.input.kafka.KafkaRecordEntity) Headers(org.apache.kafka.common.header.Headers) RecordHeaders(org.apache.kafka.common.header.internals.RecordHeaders) InputEntityReader(org.apache.druid.data.input.InputEntityReader) ConsumerRecord(org.apache.kafka.clients.consumer.ConsumerRecord) RecordHeaders(org.apache.kafka.common.header.internals.RecordHeaders) Header(org.apache.kafka.common.header.Header) TimestampSpec(org.apache.druid.data.input.impl.TimestampSpec) MapBasedInputRow(org.apache.druid.data.input.MapBasedInputRow) InputRow(org.apache.druid.data.input.InputRow) DimensionsSpec(org.apache.druid.data.input.impl.DimensionsSpec) InputRowSchema(org.apache.druid.data.input.InputRowSchema) MapBasedInputRow(org.apache.druid.data.input.MapBasedInputRow) Test(org.junit.Test)

Example 25 with InputEntityReader

use of org.apache.druid.data.input.InputEntityReader in project druid by druid-io.

the class OrcReaderTest method testOrcFile11Format.

// This test is migrated from OrcHadoopInputRowParserTest
@Test
public void testOrcFile11Format() throws IOException {
    final OrcInputFormat inputFormat = new OrcInputFormat(new JSONPathSpec(true, ImmutableList.of(new JSONPathFieldSpec(JSONPathFieldType.PATH, "struct_list_struct_int", "$.middle.list[1].int1"), new JSONPathFieldSpec(JSONPathFieldType.PATH, "struct_list_struct_intlist", "$.middle.list[*].int1"), new JSONPathFieldSpec(JSONPathFieldType.PATH, "struct_list_struct_middleListLength", "$.middle.list.length()"), new JSONPathFieldSpec(JSONPathFieldType.PATH, "list_struct_string", "$.list[0].string1"), new JSONPathFieldSpec(JSONPathFieldType.PATH, "map_struct_int", "$.map.chani.int1"))), null, new Configuration());
    final InputEntityReader reader = createReader(new TimestampSpec("ts", "millis", null), new DimensionsSpec(null), inputFormat, "example/orc-file-11-format.orc");
    try (CloseableIterator<InputRow> iterator = reader.read()) {
        int actualRowCount = 0;
        // Check the first row
        Assert.assertTrue(iterator.hasNext());
        InputRow row = iterator.next();
        actualRowCount++;
        Assert.assertEquals("false", Iterables.getOnlyElement(row.getDimension("boolean1")));
        Assert.assertEquals("1", Iterables.getOnlyElement(row.getDimension("byte1")));
        Assert.assertEquals("1024", Iterables.getOnlyElement(row.getDimension("short1")));
        Assert.assertEquals("65536", Iterables.getOnlyElement(row.getDimension("int1")));
        Assert.assertEquals("9223372036854775807", Iterables.getOnlyElement(row.getDimension("long1")));
        Assert.assertEquals("1.0", Iterables.getOnlyElement(row.getDimension("float1")));
        Assert.assertEquals("-15.0", Iterables.getOnlyElement(row.getDimension("double1")));
        Assert.assertEquals("AAECAwQAAA==", Iterables.getOnlyElement(row.getDimension("bytes1")));
        Assert.assertEquals("hi", Iterables.getOnlyElement(row.getDimension("string1")));
        Assert.assertEquals("1.23456786547456E7", Iterables.getOnlyElement(row.getDimension("decimal1")));
        Assert.assertEquals("2", Iterables.getOnlyElement(row.getDimension("struct_list_struct_int")));
        Assert.assertEquals(ImmutableList.of("1", "2"), row.getDimension("struct_list_struct_intlist"));
        Assert.assertEquals("good", Iterables.getOnlyElement(row.getDimension("list_struct_string")));
        Assert.assertEquals("2", Iterables.getOnlyElement(row.getDimension("struct_list_struct_middleListLength")));
        Assert.assertEquals(DateTimes.of("2000-03-12T15:00:00.0Z"), row.getTimestamp());
        while (iterator.hasNext()) {
            actualRowCount++;
            row = iterator.next();
        }
        // Check the last row
        Assert.assertEquals("true", Iterables.getOnlyElement(row.getDimension("boolean1")));
        Assert.assertEquals("100", Iterables.getOnlyElement(row.getDimension("byte1")));
        Assert.assertEquals("2048", Iterables.getOnlyElement(row.getDimension("short1")));
        Assert.assertEquals("65536", Iterables.getOnlyElement(row.getDimension("int1")));
        Assert.assertEquals("9223372036854775807", Iterables.getOnlyElement(row.getDimension("long1")));
        Assert.assertEquals("2.0", Iterables.getOnlyElement(row.getDimension("float1")));
        Assert.assertEquals("-5.0", Iterables.getOnlyElement(row.getDimension("double1")));
        Assert.assertEquals("", Iterables.getOnlyElement(row.getDimension("bytes1")));
        Assert.assertEquals("bye", Iterables.getOnlyElement(row.getDimension("string1")));
        Assert.assertEquals("1.23456786547457E7", Iterables.getOnlyElement(row.getDimension("decimal1")));
        Assert.assertEquals("2", Iterables.getOnlyElement(row.getDimension("struct_list_struct_int")));
        Assert.assertEquals(ImmutableList.of("1", "2"), row.getDimension("struct_list_struct_intlist"));
        Assert.assertEquals("cat", Iterables.getOnlyElement(row.getDimension("list_struct_string")));
        Assert.assertEquals("5", Iterables.getOnlyElement(row.getDimension("map_struct_int")));
        Assert.assertEquals(DateTimes.of("2000-03-12T15:00:01.000Z"), row.getTimestamp());
        Assert.assertEquals(7500, actualRowCount);
    }
}
Also used : Configuration(org.apache.hadoop.conf.Configuration) TimestampSpec(org.apache.druid.data.input.impl.TimestampSpec) InputRow(org.apache.druid.data.input.InputRow) JSONPathSpec(org.apache.druid.java.util.common.parsers.JSONPathSpec) DimensionsSpec(org.apache.druid.data.input.impl.DimensionsSpec) JSONPathFieldSpec(org.apache.druid.java.util.common.parsers.JSONPathFieldSpec) InputEntityReader(org.apache.druid.data.input.InputEntityReader) Test(org.junit.Test)

Aggregations

InputEntityReader (org.apache.druid.data.input.InputEntityReader)58 Test (org.junit.Test)56 InputRow (org.apache.druid.data.input.InputRow)54 InputRowSchema (org.apache.druid.data.input.InputRowSchema)39 TimestampSpec (org.apache.druid.data.input.impl.TimestampSpec)33 JSONPathSpec (org.apache.druid.java.util.common.parsers.JSONPathSpec)33 DimensionsSpec (org.apache.druid.data.input.impl.DimensionsSpec)31 JSONPathFieldSpec (org.apache.druid.java.util.common.parsers.JSONPathFieldSpec)28 InputRowListPlusRawValues (org.apache.druid.data.input.InputRowListPlusRawValues)26 MapBasedInputRow (org.apache.druid.data.input.MapBasedInputRow)8 Configuration (org.apache.hadoop.conf.Configuration)8 ObjectMapper (com.fasterxml.jackson.databind.ObjectMapper)5 AvroHadoopInputRowParserTest (org.apache.druid.data.input.AvroHadoopInputRowParserTest)5 AvroStreamInputRowParserTest (org.apache.druid.data.input.AvroStreamInputRowParserTest)5 DefaultObjectMapper (org.apache.druid.jackson.DefaultObjectMapper)5 KafkaRecordEntity (org.apache.druid.data.input.kafka.KafkaRecordEntity)4 ConsumerRecord (org.apache.kafka.clients.consumer.ConsumerRecord)4 Headers (org.apache.kafka.common.header.Headers)4 RecordHeaders (org.apache.kafka.common.header.internals.RecordHeaders)4 BigDecimal (java.math.BigDecimal)3