use of org.apache.druid.data.input.kafka.KafkaRecordEntity in project druid by druid-io.
the class KafkaInputFormatTest method testTimestampFromHeader.
@Test
public void testTimestampFromHeader() throws IOException {
Iterable<Header> sample_header_with_ts = Iterables.unmodifiableIterable(Iterables.concat(SAMPLE_HEADERS, ImmutableList.of(new Header() {
@Override
public String key() {
return "headerTs";
}
@Override
public byte[] value() {
return "2021-06-24".getBytes(StandardCharsets.UTF_8);
}
})));
final byte[] key = StringUtils.toUtf8("{\n" + " \"key\": \"sampleKey\"\n" + "}");
final byte[] payload = StringUtils.toUtf8("{\n" + " \"timestamp\": \"2021-06-24\",\n" + " \"bar\": null,\n" + " \"foo\": \"x\",\n" + " \"baz\": 4,\n" + " \"o\": {\n" + " \"mg\": 1\n" + " }\n" + "}");
Headers headers = new RecordHeaders(sample_header_with_ts);
inputEntity = new KafkaRecordEntity(new ConsumerRecord<byte[], byte[]>("sample", 0, 0, timestamp, null, null, 0, 0, key, payload, headers));
final InputEntityReader reader = format.createReader(new InputRowSchema(new TimestampSpec("kafka.newheader.headerTs", "iso", null), new DimensionsSpec(DimensionsSpec.getDefaultSchemas(ImmutableList.of("bar", "foo", "kafka.newheader.encoding", "kafka.newheader.kafkapkc"))), ColumnsFilter.all()), inputEntity, null);
final int numExpectedIterations = 1;
try (CloseableIterator<InputRow> iterator = reader.read()) {
int numActualIterations = 0;
while (iterator.hasNext()) {
final InputRow row = iterator.next();
final MapBasedInputRow mrow = (MapBasedInputRow) row;
// Payload verifications
Assert.assertEquals(DateTimes.of("2021-06-24"), row.getTimestamp());
Assert.assertEquals("x", Iterables.getOnlyElement(row.getDimension("foo")));
Assert.assertEquals("4", Iterables.getOnlyElement(row.getDimension("baz")));
Assert.assertEquals("4", Iterables.getOnlyElement(row.getDimension("root_baz")));
Assert.assertEquals("1", Iterables.getOnlyElement(row.getDimension("path_omg")));
Assert.assertEquals("1", Iterables.getOnlyElement(row.getDimension("jq_omg")));
// Header verification
Assert.assertEquals("application/json", Iterables.getOnlyElement(row.getDimension("kafka.newheader.encoding")));
Assert.assertEquals("pkc-bar", Iterables.getOnlyElement(row.getDimension("kafka.newheader.kafkapkc")));
Assert.assertEquals(String.valueOf(DateTimes.of("2021-06-24").getMillis()), Iterables.getOnlyElement(row.getDimension("kafka.newts.timestamp")));
Assert.assertEquals("2021-06-24", Iterables.getOnlyElement(row.getDimension("kafka.newheader.headerTs")));
Assert.assertEquals("2021-06-24", Iterables.getOnlyElement(row.getDimension("timestamp")));
// Key verification
Assert.assertEquals("sampleKey", Iterables.getOnlyElement(row.getDimension("kafka.newkey.key")));
Assert.assertTrue(row.getDimension("root_baz2").isEmpty());
Assert.assertTrue(row.getDimension("path_omg2").isEmpty());
Assert.assertTrue(row.getDimension("jq_omg2").isEmpty());
Assert.assertTrue(row.getDimension("jq_omg2").isEmpty());
numActualIterations++;
}
Assert.assertEquals(numExpectedIterations, numActualIterations);
}
}
use of org.apache.druid.data.input.kafka.KafkaRecordEntity in project druid by druid-io.
the class KafkaInputFormat method createReader.
@Override
public InputEntityReader createReader(InputRowSchema inputRowSchema, InputEntity source, File temporaryDirectory) {
KafkaRecordEntity record = (KafkaRecordEntity) source;
InputRowSchema newInputRowSchema = new InputRowSchema(dummyTimestampSpec, inputRowSchema.getDimensionsSpec(), inputRowSchema.getColumnsFilter());
return new KafkaInputReader(inputRowSchema, record, (headerFormat == null) ? null : headerFormat.createReader(record.getRecord().headers(), headerColumnPrefix), (keyFormat == null || record.getRecord().key() == null) ? null : keyFormat.createReader(newInputRowSchema, new ByteEntity(record.getRecord().key()), temporaryDirectory), (record.getRecord().value() == null) ? null : valueFormat.createReader(newInputRowSchema, source, temporaryDirectory), keyColumnName, timestampColumnName);
}
use of org.apache.druid.data.input.kafka.KafkaRecordEntity in project druid by druid-io.
the class KafkaStringHeaderFormatTest method testIllegalHeaderCharacter.
@Test
public void testIllegalHeaderCharacter() {
Iterable<Header> header = ImmutableList.of(new Header() {
@Override
public String key() {
return "encoding";
}
@Override
public byte[] value() {
return "€pplic€tion/json".getBytes(StandardCharsets.US_ASCII);
}
}, new Header() {
@Override
public String key() {
return "kafkapkc";
}
@Override
public byte[] value() {
return "pkc-bar".getBytes(StandardCharsets.US_ASCII);
}
});
String headerLabelPrefix = "test.kafka.header.";
Headers headers = new RecordHeaders(header);
inputEntity = new KafkaRecordEntity(new ConsumerRecord<byte[], byte[]>("sample", 0, 0, timestamp, null, null, 0, 0, null, "sampleValue".getBytes(StandardCharsets.UTF_8), headers));
List<Pair<String, Object>> expectedResults = Arrays.asList(Pair.of("test.kafka.header.encoding", "?pplic?tion/json"), Pair.of("test.kafka.header.kafkapkc", "pkc-bar"));
KafkaHeaderFormat headerInput = new KafkaStringHeaderFormat("US-ASCII");
KafkaHeaderReader headerParser = headerInput.createReader(inputEntity.getRecord().headers(), headerLabelPrefix);
List<Pair<String, Object>> rows = headerParser.read();
Assert.assertEquals(expectedResults, rows);
}
use of org.apache.druid.data.input.kafka.KafkaRecordEntity in project druid by druid-io.
the class KafkaInputFormatTest method testWithHeaderKeyAndValue.
@Test
public void testWithHeaderKeyAndValue() throws IOException {
final byte[] key = StringUtils.toUtf8("{\n" + " \"key\": \"sampleKey\"\n" + "}");
final byte[] payload = StringUtils.toUtf8("{\n" + " \"timestamp\": \"2021-06-25\",\n" + " \"bar\": null,\n" + " \"foo\": \"x\",\n" + " \"baz\": 4,\n" + " \"o\": {\n" + " \"mg\": 1\n" + " }\n" + "}");
Headers headers = new RecordHeaders(SAMPLE_HEADERS);
inputEntity = new KafkaRecordEntity(new ConsumerRecord<byte[], byte[]>("sample", 0, 0, timestamp, null, null, 0, 0, key, payload, headers));
final InputEntityReader reader = format.createReader(new InputRowSchema(new TimestampSpec("timestamp", "iso", null), new DimensionsSpec(DimensionsSpec.getDefaultSchemas(ImmutableList.of("bar", "foo", "kafka.newheader.encoding", "kafka.newheader.kafkapkc", "kafka.newts.timestamp"))), ColumnsFilter.all()), inputEntity, null);
final int numExpectedIterations = 1;
try (CloseableIterator<InputRow> iterator = reader.read()) {
int numActualIterations = 0;
while (iterator.hasNext()) {
final InputRow row = iterator.next();
// Payload verifications
Assert.assertEquals(DateTimes.of("2021-06-25"), row.getTimestamp());
Assert.assertEquals("x", Iterables.getOnlyElement(row.getDimension("foo")));
Assert.assertEquals("4", Iterables.getOnlyElement(row.getDimension("baz")));
Assert.assertEquals("4", Iterables.getOnlyElement(row.getDimension("root_baz")));
Assert.assertEquals("1", Iterables.getOnlyElement(row.getDimension("path_omg")));
Assert.assertEquals("1", Iterables.getOnlyElement(row.getDimension("jq_omg")));
// Header verification
Assert.assertEquals("application/json", Iterables.getOnlyElement(row.getDimension("kafka.newheader.encoding")));
Assert.assertEquals("pkc-bar", Iterables.getOnlyElement(row.getDimension("kafka.newheader.kafkapkc")));
Assert.assertEquals(String.valueOf(DateTimes.of("2021-06-24").getMillis()), Iterables.getOnlyElement(row.getDimension("kafka.newts.timestamp")));
Assert.assertEquals("2021-06-25", Iterables.getOnlyElement(row.getDimension("timestamp")));
// Key verification
Assert.assertEquals("sampleKey", Iterables.getOnlyElement(row.getDimension("kafka.newkey.key")));
Assert.assertTrue(row.getDimension("root_baz2").isEmpty());
Assert.assertTrue(row.getDimension("path_omg2").isEmpty());
Assert.assertTrue(row.getDimension("jq_omg2").isEmpty());
numActualIterations++;
}
Assert.assertEquals(numExpectedIterations, numActualIterations);
}
}
use of org.apache.druid.data.input.kafka.KafkaRecordEntity in project druid by druid-io.
the class KafkaInputFormatTest method testWithOutKeyAndHeaderSpecs.
@Test
public void testWithOutKeyAndHeaderSpecs() throws IOException {
final byte[] payload = StringUtils.toUtf8("{\n" + " \"timestamp\": \"2021-06-24\",\n" + " \"bar\": null,\n" + " \"foo\": \"x\",\n" + " \"baz\": 4,\n" + " \"o\": {\n" + " \"mg\": 1\n" + " }\n" + "}");
Headers headers = new RecordHeaders(SAMPLE_HEADERS);
inputEntity = new KafkaRecordEntity(new ConsumerRecord<byte[], byte[]>("sample", 0, 0, timestamp, null, null, 0, 0, null, payload, headers));
KafkaInputFormat localFormat = new KafkaInputFormat(null, null, // Value Format
new JsonInputFormat(new JSONPathSpec(true, ImmutableList.of(new JSONPathFieldSpec(JSONPathFieldType.ROOT, "root_baz", "baz"), new JSONPathFieldSpec(JSONPathFieldType.ROOT, "root_baz2", "baz2"), new JSONPathFieldSpec(JSONPathFieldType.PATH, "path_omg", "$.o.mg"), new JSONPathFieldSpec(JSONPathFieldType.PATH, "path_omg2", "$.o.mg2"), new JSONPathFieldSpec(JSONPathFieldType.JQ, "jq_omg", ".o.mg"), new JSONPathFieldSpec(JSONPathFieldType.JQ, "jq_omg2", ".o.mg2"))), // make sure JsonReader is used
null, // make sure JsonReader is used
null, // make sure JsonReader is used
false), "kafka.newheader.", "kafka.newkey.", "kafka.newts.");
final InputEntityReader reader = localFormat.createReader(new InputRowSchema(new TimestampSpec("timestamp", "iso", null), new DimensionsSpec(DimensionsSpec.getDefaultSchemas(ImmutableList.of("bar", "foo", "kafka.newts.timestamp"))), ColumnsFilter.all()), inputEntity, null);
final int numExpectedIterations = 1;
try (CloseableIterator<InputRow> iterator = reader.read()) {
int numActualIterations = 0;
while (iterator.hasNext()) {
final InputRow row = iterator.next();
// Key verification
Assert.assertTrue(row.getDimension("kafka.newkey.key").isEmpty());
Assert.assertEquals("x", Iterables.getOnlyElement(row.getDimension("foo")));
Assert.assertEquals("4", Iterables.getOnlyElement(row.getDimension("baz")));
Assert.assertEquals("4", Iterables.getOnlyElement(row.getDimension("root_baz")));
Assert.assertEquals("1", Iterables.getOnlyElement(row.getDimension("path_omg")));
Assert.assertEquals("1", Iterables.getOnlyElement(row.getDimension("jq_omg")));
numActualIterations++;
}
Assert.assertEquals(numExpectedIterations, numActualIterations);
}
}
Aggregations