Search in sources :

Example 56 with InputEntityReader

use of org.apache.druid.data.input.InputEntityReader in project druid by druid-io.

the class KafkaInputFormatTest method testWithHeaderKeyAndValue.

@Test
public void testWithHeaderKeyAndValue() throws IOException {
    final byte[] key = StringUtils.toUtf8("{\n" + "    \"key\": \"sampleKey\"\n" + "}");
    final byte[] payload = StringUtils.toUtf8("{\n" + "    \"timestamp\": \"2021-06-25\",\n" + "    \"bar\": null,\n" + "    \"foo\": \"x\",\n" + "    \"baz\": 4,\n" + "    \"o\": {\n" + "        \"mg\": 1\n" + "    }\n" + "}");
    Headers headers = new RecordHeaders(SAMPLE_HEADERS);
    inputEntity = new KafkaRecordEntity(new ConsumerRecord<byte[], byte[]>("sample", 0, 0, timestamp, null, null, 0, 0, key, payload, headers));
    final InputEntityReader reader = format.createReader(new InputRowSchema(new TimestampSpec("timestamp", "iso", null), new DimensionsSpec(DimensionsSpec.getDefaultSchemas(ImmutableList.of("bar", "foo", "kafka.newheader.encoding", "kafka.newheader.kafkapkc", "kafka.newts.timestamp"))), ColumnsFilter.all()), inputEntity, null);
    final int numExpectedIterations = 1;
    try (CloseableIterator<InputRow> iterator = reader.read()) {
        int numActualIterations = 0;
        while (iterator.hasNext()) {
            final InputRow row = iterator.next();
            // Payload verifications
            Assert.assertEquals(DateTimes.of("2021-06-25"), row.getTimestamp());
            Assert.assertEquals("x", Iterables.getOnlyElement(row.getDimension("foo")));
            Assert.assertEquals("4", Iterables.getOnlyElement(row.getDimension("baz")));
            Assert.assertEquals("4", Iterables.getOnlyElement(row.getDimension("root_baz")));
            Assert.assertEquals("1", Iterables.getOnlyElement(row.getDimension("path_omg")));
            Assert.assertEquals("1", Iterables.getOnlyElement(row.getDimension("jq_omg")));
            // Header verification
            Assert.assertEquals("application/json", Iterables.getOnlyElement(row.getDimension("kafka.newheader.encoding")));
            Assert.assertEquals("pkc-bar", Iterables.getOnlyElement(row.getDimension("kafka.newheader.kafkapkc")));
            Assert.assertEquals(String.valueOf(DateTimes.of("2021-06-24").getMillis()), Iterables.getOnlyElement(row.getDimension("kafka.newts.timestamp")));
            Assert.assertEquals("2021-06-25", Iterables.getOnlyElement(row.getDimension("timestamp")));
            // Key verification
            Assert.assertEquals("sampleKey", Iterables.getOnlyElement(row.getDimension("kafka.newkey.key")));
            Assert.assertTrue(row.getDimension("root_baz2").isEmpty());
            Assert.assertTrue(row.getDimension("path_omg2").isEmpty());
            Assert.assertTrue(row.getDimension("jq_omg2").isEmpty());
            numActualIterations++;
        }
        Assert.assertEquals(numExpectedIterations, numActualIterations);
    }
}
Also used : KafkaRecordEntity(org.apache.druid.data.input.kafka.KafkaRecordEntity) RecordHeaders(org.apache.kafka.common.header.internals.RecordHeaders) Headers(org.apache.kafka.common.header.Headers) RecordHeaders(org.apache.kafka.common.header.internals.RecordHeaders) TimestampSpec(org.apache.druid.data.input.impl.TimestampSpec) MapBasedInputRow(org.apache.druid.data.input.MapBasedInputRow) InputRow(org.apache.druid.data.input.InputRow) DimensionsSpec(org.apache.druid.data.input.impl.DimensionsSpec) InputEntityReader(org.apache.druid.data.input.InputEntityReader) InputRowSchema(org.apache.druid.data.input.InputRowSchema) ConsumerRecord(org.apache.kafka.clients.consumer.ConsumerRecord) Test(org.junit.Test)

Example 57 with InputEntityReader

use of org.apache.druid.data.input.InputEntityReader in project druid by druid-io.

the class KafkaInputFormatTest method testWithOutKeyAndHeaderSpecs.

@Test
public void testWithOutKeyAndHeaderSpecs() throws IOException {
    final byte[] payload = StringUtils.toUtf8("{\n" + "    \"timestamp\": \"2021-06-24\",\n" + "    \"bar\": null,\n" + "    \"foo\": \"x\",\n" + "    \"baz\": 4,\n" + "    \"o\": {\n" + "        \"mg\": 1\n" + "    }\n" + "}");
    Headers headers = new RecordHeaders(SAMPLE_HEADERS);
    inputEntity = new KafkaRecordEntity(new ConsumerRecord<byte[], byte[]>("sample", 0, 0, timestamp, null, null, 0, 0, null, payload, headers));
    KafkaInputFormat localFormat = new KafkaInputFormat(null, null, // Value Format
    new JsonInputFormat(new JSONPathSpec(true, ImmutableList.of(new JSONPathFieldSpec(JSONPathFieldType.ROOT, "root_baz", "baz"), new JSONPathFieldSpec(JSONPathFieldType.ROOT, "root_baz2", "baz2"), new JSONPathFieldSpec(JSONPathFieldType.PATH, "path_omg", "$.o.mg"), new JSONPathFieldSpec(JSONPathFieldType.PATH, "path_omg2", "$.o.mg2"), new JSONPathFieldSpec(JSONPathFieldType.JQ, "jq_omg", ".o.mg"), new JSONPathFieldSpec(JSONPathFieldType.JQ, "jq_omg2", ".o.mg2"))), // make sure JsonReader is used
    null, // make sure JsonReader is used
    null, // make sure JsonReader is used
    false), "kafka.newheader.", "kafka.newkey.", "kafka.newts.");
    final InputEntityReader reader = localFormat.createReader(new InputRowSchema(new TimestampSpec("timestamp", "iso", null), new DimensionsSpec(DimensionsSpec.getDefaultSchemas(ImmutableList.of("bar", "foo", "kafka.newts.timestamp"))), ColumnsFilter.all()), inputEntity, null);
    final int numExpectedIterations = 1;
    try (CloseableIterator<InputRow> iterator = reader.read()) {
        int numActualIterations = 0;
        while (iterator.hasNext()) {
            final InputRow row = iterator.next();
            // Key verification
            Assert.assertTrue(row.getDimension("kafka.newkey.key").isEmpty());
            Assert.assertEquals("x", Iterables.getOnlyElement(row.getDimension("foo")));
            Assert.assertEquals("4", Iterables.getOnlyElement(row.getDimension("baz")));
            Assert.assertEquals("4", Iterables.getOnlyElement(row.getDimension("root_baz")));
            Assert.assertEquals("1", Iterables.getOnlyElement(row.getDimension("path_omg")));
            Assert.assertEquals("1", Iterables.getOnlyElement(row.getDimension("jq_omg")));
            numActualIterations++;
        }
        Assert.assertEquals(numExpectedIterations, numActualIterations);
    }
}
Also used : KafkaRecordEntity(org.apache.druid.data.input.kafka.KafkaRecordEntity) Headers(org.apache.kafka.common.header.Headers) RecordHeaders(org.apache.kafka.common.header.internals.RecordHeaders) JSONPathFieldSpec(org.apache.druid.java.util.common.parsers.JSONPathFieldSpec) InputEntityReader(org.apache.druid.data.input.InputEntityReader) ConsumerRecord(org.apache.kafka.clients.consumer.ConsumerRecord) RecordHeaders(org.apache.kafka.common.header.internals.RecordHeaders) JsonInputFormat(org.apache.druid.data.input.impl.JsonInputFormat) TimestampSpec(org.apache.druid.data.input.impl.TimestampSpec) MapBasedInputRow(org.apache.druid.data.input.MapBasedInputRow) InputRow(org.apache.druid.data.input.InputRow) JSONPathSpec(org.apache.druid.java.util.common.parsers.JSONPathSpec) DimensionsSpec(org.apache.druid.data.input.impl.DimensionsSpec) InputRowSchema(org.apache.druid.data.input.InputRowSchema) Test(org.junit.Test)

Example 58 with InputEntityReader

use of org.apache.druid.data.input.InputEntityReader in project druid by druid-io.

the class KafkaInputFormatTest method testWithOutKey.

@Test
public // Headers cannot be null, so testing only no key use case!
void testWithOutKey() throws IOException {
    final byte[] payload = StringUtils.toUtf8("{\n" + "    \"timestamp\": \"2021-06-24\",\n" + "    \"bar\": null,\n" + "    \"foo\": \"x\",\n" + "    \"baz\": 4,\n" + "    \"o\": {\n" + "        \"mg\": 1\n" + "    }\n" + "}");
    Headers headers = new RecordHeaders(SAMPLE_HEADERS);
    inputEntity = new KafkaRecordEntity(new ConsumerRecord<byte[], byte[]>("sample", 0, 0, timestamp, null, null, 0, 0, null, payload, headers));
    final InputEntityReader reader = format.createReader(new InputRowSchema(new TimestampSpec("timestamp", "iso", null), new DimensionsSpec(DimensionsSpec.getDefaultSchemas(ImmutableList.of("bar", "foo", "kafka.newheader.encoding", "kafka.newheader.kafkapkc", "kafka.newts.timestamp"))), ColumnsFilter.all()), inputEntity, null);
    final int numExpectedIterations = 1;
    try (CloseableIterator<InputRow> iterator = reader.read()) {
        int numActualIterations = 0;
        while (iterator.hasNext()) {
            final InputRow row = iterator.next();
            // Key verification
            Assert.assertTrue(row.getDimension("kafka.newkey.key").isEmpty());
            numActualIterations++;
        }
        Assert.assertEquals(numExpectedIterations, numActualIterations);
    }
}
Also used : KafkaRecordEntity(org.apache.druid.data.input.kafka.KafkaRecordEntity) RecordHeaders(org.apache.kafka.common.header.internals.RecordHeaders) Headers(org.apache.kafka.common.header.Headers) RecordHeaders(org.apache.kafka.common.header.internals.RecordHeaders) TimestampSpec(org.apache.druid.data.input.impl.TimestampSpec) MapBasedInputRow(org.apache.druid.data.input.MapBasedInputRow) InputRow(org.apache.druid.data.input.InputRow) DimensionsSpec(org.apache.druid.data.input.impl.DimensionsSpec) InputEntityReader(org.apache.druid.data.input.InputEntityReader) InputRowSchema(org.apache.druid.data.input.InputRowSchema) ConsumerRecord(org.apache.kafka.clients.consumer.ConsumerRecord) Test(org.junit.Test)

Aggregations

InputEntityReader (org.apache.druid.data.input.InputEntityReader)58 Test (org.junit.Test)56 InputRow (org.apache.druid.data.input.InputRow)54 InputRowSchema (org.apache.druid.data.input.InputRowSchema)39 TimestampSpec (org.apache.druid.data.input.impl.TimestampSpec)33 JSONPathSpec (org.apache.druid.java.util.common.parsers.JSONPathSpec)33 DimensionsSpec (org.apache.druid.data.input.impl.DimensionsSpec)31 JSONPathFieldSpec (org.apache.druid.java.util.common.parsers.JSONPathFieldSpec)28 InputRowListPlusRawValues (org.apache.druid.data.input.InputRowListPlusRawValues)26 MapBasedInputRow (org.apache.druid.data.input.MapBasedInputRow)8 Configuration (org.apache.hadoop.conf.Configuration)8 ObjectMapper (com.fasterxml.jackson.databind.ObjectMapper)5 AvroHadoopInputRowParserTest (org.apache.druid.data.input.AvroHadoopInputRowParserTest)5 AvroStreamInputRowParserTest (org.apache.druid.data.input.AvroStreamInputRowParserTest)5 DefaultObjectMapper (org.apache.druid.jackson.DefaultObjectMapper)5 KafkaRecordEntity (org.apache.druid.data.input.kafka.KafkaRecordEntity)4 ConsumerRecord (org.apache.kafka.clients.consumer.ConsumerRecord)4 Headers (org.apache.kafka.common.header.Headers)4 RecordHeaders (org.apache.kafka.common.header.internals.RecordHeaders)4 BigDecimal (java.math.BigDecimal)3