use of org.apache.gobblin.kafka.client.ByteArrayBasedKafkaRecord in project incubator-gobblin by apache.
the class KafkaDeserializerExtractorTest method testConfluentAvroDeserializerForSchemaEvolution.
@Test
public void testConfluentAvroDeserializerForSchemaEvolution() throws IOException, RestClientException, SchemaRegistryException {
WorkUnitState mockWorkUnitState = getMockWorkUnitState();
mockWorkUnitState.setProp("schema.registry.url", TEST_URL);
Schema schemaV1 = SchemaBuilder.record(TEST_RECORD_NAME).namespace(TEST_NAMESPACE).fields().name(TEST_FIELD_NAME).type().stringType().noDefault().endRecord();
Schema schemaV2 = SchemaBuilder.record(TEST_RECORD_NAME).namespace(TEST_NAMESPACE).fields().name(TEST_FIELD_NAME).type().stringType().noDefault().optionalString(TEST_FIELD_NAME2).endRecord();
GenericRecord testGenericRecord = new GenericRecordBuilder(schemaV1).set(TEST_FIELD_NAME, "testValue").build();
SchemaRegistryClient mockSchemaRegistryClient = mock(SchemaRegistryClient.class);
when(mockSchemaRegistryClient.getByID(any(Integer.class))).thenReturn(schemaV1);
Serializer<Object> kafkaEncoder = new KafkaAvroSerializer(mockSchemaRegistryClient);
Deserializer<Object> kafkaDecoder = new KafkaAvroDeserializer(mockSchemaRegistryClient);
ByteBuffer testGenericRecordByteBuffer = ByteBuffer.wrap(kafkaEncoder.serialize(TEST_TOPIC_NAME, testGenericRecord));
KafkaSchemaRegistry<Integer, Schema> mockKafkaSchemaRegistry = mock(KafkaSchemaRegistry.class);
when(mockKafkaSchemaRegistry.getLatestSchemaByTopic(TEST_TOPIC_NAME)).thenReturn(schemaV2);
KafkaDeserializerExtractor kafkaDecoderExtractor = new KafkaDeserializerExtractor(mockWorkUnitState, Optional.fromNullable(Deserializers.CONFLUENT_AVRO), kafkaDecoder, mockKafkaSchemaRegistry);
when(kafkaDecoderExtractor.getSchema()).thenReturn(schemaV2);
ByteArrayBasedKafkaRecord mockMessageAndOffset = getMockMessageAndOffset(testGenericRecordByteBuffer);
GenericRecord received = (GenericRecord) kafkaDecoderExtractor.decodeRecord(mockMessageAndOffset);
Assert.assertEquals(received.toString(), "{\"testField\": \"testValue\", \"testField2\": null}");
}
use of org.apache.gobblin.kafka.client.ByteArrayBasedKafkaRecord in project incubator-gobblin by apache.
the class KafkaDeserializerExtractorTest method testConfluentJsonDeserializer.
@Test
public void testConfluentJsonDeserializer() throws IOException {
WorkUnitState mockWorkUnitState = getMockWorkUnitState();
mockWorkUnitState.setProp("json.value.type", KafkaRecord.class.getName());
KafkaRecord testKafkaRecord = new KafkaRecord("Hello World");
Serializer<KafkaRecord> kafkaEncoder = new KafkaJsonSerializer<>();
kafkaEncoder.configure(PropertiesUtils.propsToStringKeyMap(mockWorkUnitState.getProperties()), false);
Deserializer<KafkaRecord> kafkaDecoder = new KafkaJsonDeserializer<>();
kafkaDecoder.configure(PropertiesUtils.propsToStringKeyMap(mockWorkUnitState.getProperties()), false);
ByteBuffer testKafkaRecordByteBuffer = ByteBuffer.wrap(kafkaEncoder.serialize(TEST_TOPIC_NAME, testKafkaRecord));
KafkaSchemaRegistry<?, ?> mockKafkaSchemaRegistry = mock(KafkaSchemaRegistry.class);
KafkaDeserializerExtractor kafkaDecoderExtractor = new KafkaDeserializerExtractor(mockWorkUnitState, Optional.fromNullable(Deserializers.CONFLUENT_JSON), kafkaDecoder, mockKafkaSchemaRegistry);
ByteArrayBasedKafkaRecord mockMessageAndOffset = getMockMessageAndOffset(testKafkaRecordByteBuffer);
Assert.assertEquals(kafkaDecoderExtractor.decodeRecord(mockMessageAndOffset), testKafkaRecord);
}
use of org.apache.gobblin.kafka.client.ByteArrayBasedKafkaRecord in project incubator-gobblin by apache.
the class KafkaExtractor method readRecordImpl.
/**
* Return the next decodable record from the current partition. If the current partition has no more
* decodable record, move on to the next partition. If all partitions have been processed, return null.
*/
@SuppressWarnings("unchecked")
@Override
public D readRecordImpl(D reuse) throws DataRecordException, IOException {
long readStartTime = System.nanoTime();
while (!allPartitionsFinished()) {
if (currentPartitionFinished()) {
moveToNextPartition();
continue;
}
if (this.messageIterator == null || !this.messageIterator.hasNext()) {
try {
long fetchStartTime = System.nanoTime();
this.messageIterator = fetchNextMessageBuffer();
this.currentPartitionFetchMessageBufferTime += System.nanoTime() - fetchStartTime;
} catch (Exception e) {
LOG.error(String.format("Failed to fetch next message buffer for partition %s. Will skip this partition.", getCurrentPartition()), e);
moveToNextPartition();
continue;
}
if (this.messageIterator == null || !this.messageIterator.hasNext()) {
moveToNextPartition();
continue;
}
}
while (!currentPartitionFinished()) {
if (!this.messageIterator.hasNext()) {
break;
}
KafkaConsumerRecord nextValidMessage = this.messageIterator.next();
// until we get to x.
if (nextValidMessage.getOffset() < this.nextWatermark.get(this.currentPartitionIdx)) {
continue;
}
this.nextWatermark.set(this.currentPartitionIdx, nextValidMessage.getNextOffset());
try {
D record = null;
// track time for decode/convert depending on the record type
long decodeStartTime = System.nanoTime();
if (nextValidMessage instanceof ByteArrayBasedKafkaRecord) {
record = decodeRecord((ByteArrayBasedKafkaRecord) nextValidMessage);
} else if (nextValidMessage instanceof DecodeableKafkaRecord) {
// if value is null then this is a bad record that is returned for further error handling, so raise an error
if (((DecodeableKafkaRecord) nextValidMessage).getValue() == null) {
throw new DataRecordException("Could not decode Kafka record");
}
// get value from decodeable record and convert to the output schema if necessary
record = convertRecord(((DecodeableKafkaRecord<?, D>) nextValidMessage).getValue());
} else {
throw new IllegalStateException("Unsupported KafkaConsumerRecord type. The returned record can either be ByteArrayBasedKafkaRecord" + " or DecodeableKafkaRecord");
}
this.currentPartitionDecodeRecordTime += System.nanoTime() - decodeStartTime;
this.currentPartitionRecordCount++;
this.currentPartitionTotalSize += nextValidMessage.getValueSizeInBytes();
this.currentPartitionReadRecordTime += System.nanoTime() - readStartTime;
return record;
} catch (Throwable t) {
this.errorPartitions.add(this.currentPartitionIdx);
this.undecodableMessageCount++;
if (shouldLogError()) {
LOG.error(String.format("A record from partition %s cannot be decoded.", getCurrentPartition()), t);
incrementErrorCount();
}
}
}
}
LOG.info("Finished pulling topic " + this.topicName);
this.currentPartitionReadRecordTime += System.nanoTime() - readStartTime;
return null;
}
use of org.apache.gobblin.kafka.client.ByteArrayBasedKafkaRecord in project incubator-gobblin by apache.
the class KafkaSimpleJsonExtractor method decodeRecord.
@Override
protected byte[] decodeRecord(ByteArrayBasedKafkaRecord messageAndOffset) throws IOException {
long offset = messageAndOffset.getOffset();
byte[] keyBytes = messageAndOffset.getKeyBytes();
String key = (keyBytes == null) ? "" : new String(keyBytes, CHARSET);
byte[] payloadBytes = messageAndOffset.getMessageBytes();
String payload = (payloadBytes == null) ? "" : new String(payloadBytes, CHARSET);
KafkaRecord record = new KafkaRecord(offset, key, payload);
byte[] decodedRecord = gson.toJson(record).getBytes(CHARSET);
return decodedRecord;
}
use of org.apache.gobblin.kafka.client.ByteArrayBasedKafkaRecord in project incubator-gobblin by apache.
the class KafkaDeserializerExtractorTest method testDeserializeRecord.
@Test
public void testDeserializeRecord() throws IOException {
WorkUnitState mockWorkUnitState = getMockWorkUnitState();
String testString = "Hello World";
ByteBuffer testStringByteBuffer = ByteBuffer.wrap(testString.getBytes(StandardCharsets.UTF_8));
Deserializer<Object> mockKafkaDecoder = mock(Deserializer.class);
KafkaSchemaRegistry<?, ?> mockKafkaSchemaRegistry = mock(KafkaSchemaRegistry.class);
when(mockKafkaDecoder.deserialize(TEST_TOPIC_NAME, testStringByteBuffer.array())).thenReturn(testString);
KafkaDeserializerExtractor kafkaDecoderExtractor = new KafkaDeserializerExtractor(mockWorkUnitState, Optional.fromNullable(Deserializers.BYTE_ARRAY), mockKafkaDecoder, mockKafkaSchemaRegistry);
ByteArrayBasedKafkaRecord mockMessageAndOffset = getMockMessageAndOffset(testStringByteBuffer);
Assert.assertEquals(kafkaDecoderExtractor.decodeRecord(mockMessageAndOffset), testString);
}
Aggregations