Search in sources :

Example 1 with KafkaRecordEntity

use of org.apache.druid.data.input.kafka.KafkaRecordEntity in project druid by druid-io.

the class KafkaInputFormatTest method testTimestampFromHeader.

@Test
public void testTimestampFromHeader() throws IOException {
    Iterable<Header> sample_header_with_ts = Iterables.unmodifiableIterable(Iterables.concat(SAMPLE_HEADERS, ImmutableList.of(new Header() {

        @Override
        public String key() {
            return "headerTs";
        }

        @Override
        public byte[] value() {
            return "2021-06-24".getBytes(StandardCharsets.UTF_8);
        }
    })));
    final byte[] key = StringUtils.toUtf8("{\n" + "    \"key\": \"sampleKey\"\n" + "}");
    final byte[] payload = StringUtils.toUtf8("{\n" + "    \"timestamp\": \"2021-06-24\",\n" + "    \"bar\": null,\n" + "    \"foo\": \"x\",\n" + "    \"baz\": 4,\n" + "    \"o\": {\n" + "        \"mg\": 1\n" + "    }\n" + "}");
    Headers headers = new RecordHeaders(sample_header_with_ts);
    inputEntity = new KafkaRecordEntity(new ConsumerRecord<byte[], byte[]>("sample", 0, 0, timestamp, null, null, 0, 0, key, payload, headers));
    final InputEntityReader reader = format.createReader(new InputRowSchema(new TimestampSpec("kafka.newheader.headerTs", "iso", null), new DimensionsSpec(DimensionsSpec.getDefaultSchemas(ImmutableList.of("bar", "foo", "kafka.newheader.encoding", "kafka.newheader.kafkapkc"))), ColumnsFilter.all()), inputEntity, null);
    final int numExpectedIterations = 1;
    try (CloseableIterator<InputRow> iterator = reader.read()) {
        int numActualIterations = 0;
        while (iterator.hasNext()) {
            final InputRow row = iterator.next();
            final MapBasedInputRow mrow = (MapBasedInputRow) row;
            // Payload verifications
            Assert.assertEquals(DateTimes.of("2021-06-24"), row.getTimestamp());
            Assert.assertEquals("x", Iterables.getOnlyElement(row.getDimension("foo")));
            Assert.assertEquals("4", Iterables.getOnlyElement(row.getDimension("baz")));
            Assert.assertEquals("4", Iterables.getOnlyElement(row.getDimension("root_baz")));
            Assert.assertEquals("1", Iterables.getOnlyElement(row.getDimension("path_omg")));
            Assert.assertEquals("1", Iterables.getOnlyElement(row.getDimension("jq_omg")));
            // Header verification
            Assert.assertEquals("application/json", Iterables.getOnlyElement(row.getDimension("kafka.newheader.encoding")));
            Assert.assertEquals("pkc-bar", Iterables.getOnlyElement(row.getDimension("kafka.newheader.kafkapkc")));
            Assert.assertEquals(String.valueOf(DateTimes.of("2021-06-24").getMillis()), Iterables.getOnlyElement(row.getDimension("kafka.newts.timestamp")));
            Assert.assertEquals("2021-06-24", Iterables.getOnlyElement(row.getDimension("kafka.newheader.headerTs")));
            Assert.assertEquals("2021-06-24", Iterables.getOnlyElement(row.getDimension("timestamp")));
            // Key verification
            Assert.assertEquals("sampleKey", Iterables.getOnlyElement(row.getDimension("kafka.newkey.key")));
            Assert.assertTrue(row.getDimension("root_baz2").isEmpty());
            Assert.assertTrue(row.getDimension("path_omg2").isEmpty());
            Assert.assertTrue(row.getDimension("jq_omg2").isEmpty());
            Assert.assertTrue(row.getDimension("jq_omg2").isEmpty());
            numActualIterations++;
        }
        Assert.assertEquals(numExpectedIterations, numActualIterations);
    }
}
Also used : KafkaRecordEntity(org.apache.druid.data.input.kafka.KafkaRecordEntity) Headers(org.apache.kafka.common.header.Headers) RecordHeaders(org.apache.kafka.common.header.internals.RecordHeaders) InputEntityReader(org.apache.druid.data.input.InputEntityReader) ConsumerRecord(org.apache.kafka.clients.consumer.ConsumerRecord) RecordHeaders(org.apache.kafka.common.header.internals.RecordHeaders) Header(org.apache.kafka.common.header.Header) TimestampSpec(org.apache.druid.data.input.impl.TimestampSpec) MapBasedInputRow(org.apache.druid.data.input.MapBasedInputRow) InputRow(org.apache.druid.data.input.InputRow) DimensionsSpec(org.apache.druid.data.input.impl.DimensionsSpec) InputRowSchema(org.apache.druid.data.input.InputRowSchema) MapBasedInputRow(org.apache.druid.data.input.MapBasedInputRow) Test(org.junit.Test)

Example 2 with KafkaRecordEntity

use of org.apache.druid.data.input.kafka.KafkaRecordEntity in project druid by druid-io.

the class KafkaInputFormat method createReader.

@Override
public InputEntityReader createReader(InputRowSchema inputRowSchema, InputEntity source, File temporaryDirectory) {
    KafkaRecordEntity record = (KafkaRecordEntity) source;
    InputRowSchema newInputRowSchema = new InputRowSchema(dummyTimestampSpec, inputRowSchema.getDimensionsSpec(), inputRowSchema.getColumnsFilter());
    return new KafkaInputReader(inputRowSchema, record, (headerFormat == null) ? null : headerFormat.createReader(record.getRecord().headers(), headerColumnPrefix), (keyFormat == null || record.getRecord().key() == null) ? null : keyFormat.createReader(newInputRowSchema, new ByteEntity(record.getRecord().key()), temporaryDirectory), (record.getRecord().value() == null) ? null : valueFormat.createReader(newInputRowSchema, source, temporaryDirectory), keyColumnName, timestampColumnName);
}
Also used : KafkaRecordEntity(org.apache.druid.data.input.kafka.KafkaRecordEntity) ByteEntity(org.apache.druid.data.input.impl.ByteEntity) InputRowSchema(org.apache.druid.data.input.InputRowSchema)

Example 3 with KafkaRecordEntity

use of org.apache.druid.data.input.kafka.KafkaRecordEntity in project druid by druid-io.

the class KafkaStringHeaderFormatTest method testIllegalHeaderCharacter.

@Test
public void testIllegalHeaderCharacter() {
    Iterable<Header> header = ImmutableList.of(new Header() {

        @Override
        public String key() {
            return "encoding";
        }

        @Override
        public byte[] value() {
            return "€pplic€tion/json".getBytes(StandardCharsets.US_ASCII);
        }
    }, new Header() {

        @Override
        public String key() {
            return "kafkapkc";
        }

        @Override
        public byte[] value() {
            return "pkc-bar".getBytes(StandardCharsets.US_ASCII);
        }
    });
    String headerLabelPrefix = "test.kafka.header.";
    Headers headers = new RecordHeaders(header);
    inputEntity = new KafkaRecordEntity(new ConsumerRecord<byte[], byte[]>("sample", 0, 0, timestamp, null, null, 0, 0, null, "sampleValue".getBytes(StandardCharsets.UTF_8), headers));
    List<Pair<String, Object>> expectedResults = Arrays.asList(Pair.of("test.kafka.header.encoding", "?pplic?tion/json"), Pair.of("test.kafka.header.kafkapkc", "pkc-bar"));
    KafkaHeaderFormat headerInput = new KafkaStringHeaderFormat("US-ASCII");
    KafkaHeaderReader headerParser = headerInput.createReader(inputEntity.getRecord().headers(), headerLabelPrefix);
    List<Pair<String, Object>> rows = headerParser.read();
    Assert.assertEquals(expectedResults, rows);
}
Also used : KafkaRecordEntity(org.apache.druid.data.input.kafka.KafkaRecordEntity) Headers(org.apache.kafka.common.header.Headers) RecordHeaders(org.apache.kafka.common.header.internals.RecordHeaders) ConsumerRecord(org.apache.kafka.clients.consumer.ConsumerRecord) RecordHeaders(org.apache.kafka.common.header.internals.RecordHeaders) Header(org.apache.kafka.common.header.Header) Pair(org.apache.druid.java.util.common.Pair) Test(org.junit.Test)

Example 4 with KafkaRecordEntity

use of org.apache.druid.data.input.kafka.KafkaRecordEntity in project druid by druid-io.

the class KafkaInputFormatTest method testWithHeaderKeyAndValue.

@Test
public void testWithHeaderKeyAndValue() throws IOException {
    final byte[] key = StringUtils.toUtf8("{\n" + "    \"key\": \"sampleKey\"\n" + "}");
    final byte[] payload = StringUtils.toUtf8("{\n" + "    \"timestamp\": \"2021-06-25\",\n" + "    \"bar\": null,\n" + "    \"foo\": \"x\",\n" + "    \"baz\": 4,\n" + "    \"o\": {\n" + "        \"mg\": 1\n" + "    }\n" + "}");
    Headers headers = new RecordHeaders(SAMPLE_HEADERS);
    inputEntity = new KafkaRecordEntity(new ConsumerRecord<byte[], byte[]>("sample", 0, 0, timestamp, null, null, 0, 0, key, payload, headers));
    final InputEntityReader reader = format.createReader(new InputRowSchema(new TimestampSpec("timestamp", "iso", null), new DimensionsSpec(DimensionsSpec.getDefaultSchemas(ImmutableList.of("bar", "foo", "kafka.newheader.encoding", "kafka.newheader.kafkapkc", "kafka.newts.timestamp"))), ColumnsFilter.all()), inputEntity, null);
    final int numExpectedIterations = 1;
    try (CloseableIterator<InputRow> iterator = reader.read()) {
        int numActualIterations = 0;
        while (iterator.hasNext()) {
            final InputRow row = iterator.next();
            // Payload verifications
            Assert.assertEquals(DateTimes.of("2021-06-25"), row.getTimestamp());
            Assert.assertEquals("x", Iterables.getOnlyElement(row.getDimension("foo")));
            Assert.assertEquals("4", Iterables.getOnlyElement(row.getDimension("baz")));
            Assert.assertEquals("4", Iterables.getOnlyElement(row.getDimension("root_baz")));
            Assert.assertEquals("1", Iterables.getOnlyElement(row.getDimension("path_omg")));
            Assert.assertEquals("1", Iterables.getOnlyElement(row.getDimension("jq_omg")));
            // Header verification
            Assert.assertEquals("application/json", Iterables.getOnlyElement(row.getDimension("kafka.newheader.encoding")));
            Assert.assertEquals("pkc-bar", Iterables.getOnlyElement(row.getDimension("kafka.newheader.kafkapkc")));
            Assert.assertEquals(String.valueOf(DateTimes.of("2021-06-24").getMillis()), Iterables.getOnlyElement(row.getDimension("kafka.newts.timestamp")));
            Assert.assertEquals("2021-06-25", Iterables.getOnlyElement(row.getDimension("timestamp")));
            // Key verification
            Assert.assertEquals("sampleKey", Iterables.getOnlyElement(row.getDimension("kafka.newkey.key")));
            Assert.assertTrue(row.getDimension("root_baz2").isEmpty());
            Assert.assertTrue(row.getDimension("path_omg2").isEmpty());
            Assert.assertTrue(row.getDimension("jq_omg2").isEmpty());
            numActualIterations++;
        }
        Assert.assertEquals(numExpectedIterations, numActualIterations);
    }
}
Also used : KafkaRecordEntity(org.apache.druid.data.input.kafka.KafkaRecordEntity) RecordHeaders(org.apache.kafka.common.header.internals.RecordHeaders) Headers(org.apache.kafka.common.header.Headers) RecordHeaders(org.apache.kafka.common.header.internals.RecordHeaders) TimestampSpec(org.apache.druid.data.input.impl.TimestampSpec) MapBasedInputRow(org.apache.druid.data.input.MapBasedInputRow) InputRow(org.apache.druid.data.input.InputRow) DimensionsSpec(org.apache.druid.data.input.impl.DimensionsSpec) InputEntityReader(org.apache.druid.data.input.InputEntityReader) InputRowSchema(org.apache.druid.data.input.InputRowSchema) ConsumerRecord(org.apache.kafka.clients.consumer.ConsumerRecord) Test(org.junit.Test)

Example 5 with KafkaRecordEntity

use of org.apache.druid.data.input.kafka.KafkaRecordEntity in project druid by druid-io.

the class KafkaInputFormatTest method testWithOutKeyAndHeaderSpecs.

@Test
public void testWithOutKeyAndHeaderSpecs() throws IOException {
    final byte[] payload = StringUtils.toUtf8("{\n" + "    \"timestamp\": \"2021-06-24\",\n" + "    \"bar\": null,\n" + "    \"foo\": \"x\",\n" + "    \"baz\": 4,\n" + "    \"o\": {\n" + "        \"mg\": 1\n" + "    }\n" + "}");
    Headers headers = new RecordHeaders(SAMPLE_HEADERS);
    inputEntity = new KafkaRecordEntity(new ConsumerRecord<byte[], byte[]>("sample", 0, 0, timestamp, null, null, 0, 0, null, payload, headers));
    KafkaInputFormat localFormat = new KafkaInputFormat(null, null, // Value Format
    new JsonInputFormat(new JSONPathSpec(true, ImmutableList.of(new JSONPathFieldSpec(JSONPathFieldType.ROOT, "root_baz", "baz"), new JSONPathFieldSpec(JSONPathFieldType.ROOT, "root_baz2", "baz2"), new JSONPathFieldSpec(JSONPathFieldType.PATH, "path_omg", "$.o.mg"), new JSONPathFieldSpec(JSONPathFieldType.PATH, "path_omg2", "$.o.mg2"), new JSONPathFieldSpec(JSONPathFieldType.JQ, "jq_omg", ".o.mg"), new JSONPathFieldSpec(JSONPathFieldType.JQ, "jq_omg2", ".o.mg2"))), // make sure JsonReader is used
    null, // make sure JsonReader is used
    null, // make sure JsonReader is used
    false), "kafka.newheader.", "kafka.newkey.", "kafka.newts.");
    final InputEntityReader reader = localFormat.createReader(new InputRowSchema(new TimestampSpec("timestamp", "iso", null), new DimensionsSpec(DimensionsSpec.getDefaultSchemas(ImmutableList.of("bar", "foo", "kafka.newts.timestamp"))), ColumnsFilter.all()), inputEntity, null);
    final int numExpectedIterations = 1;
    try (CloseableIterator<InputRow> iterator = reader.read()) {
        int numActualIterations = 0;
        while (iterator.hasNext()) {
            final InputRow row = iterator.next();
            // Key verification
            Assert.assertTrue(row.getDimension("kafka.newkey.key").isEmpty());
            Assert.assertEquals("x", Iterables.getOnlyElement(row.getDimension("foo")));
            Assert.assertEquals("4", Iterables.getOnlyElement(row.getDimension("baz")));
            Assert.assertEquals("4", Iterables.getOnlyElement(row.getDimension("root_baz")));
            Assert.assertEquals("1", Iterables.getOnlyElement(row.getDimension("path_omg")));
            Assert.assertEquals("1", Iterables.getOnlyElement(row.getDimension("jq_omg")));
            numActualIterations++;
        }
        Assert.assertEquals(numExpectedIterations, numActualIterations);
    }
}
Also used : KafkaRecordEntity(org.apache.druid.data.input.kafka.KafkaRecordEntity) Headers(org.apache.kafka.common.header.Headers) RecordHeaders(org.apache.kafka.common.header.internals.RecordHeaders) JSONPathFieldSpec(org.apache.druid.java.util.common.parsers.JSONPathFieldSpec) InputEntityReader(org.apache.druid.data.input.InputEntityReader) ConsumerRecord(org.apache.kafka.clients.consumer.ConsumerRecord) RecordHeaders(org.apache.kafka.common.header.internals.RecordHeaders) JsonInputFormat(org.apache.druid.data.input.impl.JsonInputFormat) TimestampSpec(org.apache.druid.data.input.impl.TimestampSpec) MapBasedInputRow(org.apache.druid.data.input.MapBasedInputRow) InputRow(org.apache.druid.data.input.InputRow) JSONPathSpec(org.apache.druid.java.util.common.parsers.JSONPathSpec) DimensionsSpec(org.apache.druid.data.input.impl.DimensionsSpec) InputRowSchema(org.apache.druid.data.input.InputRowSchema) Test(org.junit.Test)

Aggregations

KafkaRecordEntity (org.apache.druid.data.input.kafka.KafkaRecordEntity)9 ConsumerRecord (org.apache.kafka.clients.consumer.ConsumerRecord)7 Headers (org.apache.kafka.common.header.Headers)7 RecordHeaders (org.apache.kafka.common.header.internals.RecordHeaders)7 Test (org.junit.Test)7 InputRowSchema (org.apache.druid.data.input.InputRowSchema)5 InputEntityReader (org.apache.druid.data.input.InputEntityReader)4 InputRow (org.apache.druid.data.input.InputRow)4 MapBasedInputRow (org.apache.druid.data.input.MapBasedInputRow)4 DimensionsSpec (org.apache.druid.data.input.impl.DimensionsSpec)4 TimestampSpec (org.apache.druid.data.input.impl.TimestampSpec)4 Pair (org.apache.druid.java.util.common.Pair)3 Header (org.apache.kafka.common.header.Header)2 TypeReference (com.fasterxml.jackson.core.type.TypeReference)1 ObjectMapper (com.fasterxml.jackson.databind.ObjectMapper)1 IOException (java.io.IOException)1 ByteBuffer (java.nio.ByteBuffer)1 Collections (java.util.Collections)1 HashMap (java.util.HashMap)1 List (java.util.List)1