use of org.apache.gobblin.stream.MetadataUpdateControlMessage in project incubator-gobblin by apache.
the class Converter method processStream.
/**
* Apply conversions to the input {@link RecordStreamWithMetadata}.
*/
@Override
public RecordStreamWithMetadata<DO, SO> processStream(RecordStreamWithMetadata<DI, SI> inputStream, WorkUnitState workUnitState) throws SchemaConversionException {
init(workUnitState);
this.outputGlobalMetadata = GlobalMetadata.<SI, SO>builderWithInput(inputStream.getGlobalMetadata(), Optional.fromNullable(convertSchema(inputStream.getGlobalMetadata().getSchema(), workUnitState))).build();
Flowable<StreamEntity<DO>> outputStream = inputStream.getRecordStream().flatMap(in -> {
if (in instanceof ControlMessage) {
ControlMessage out = (ControlMessage) in;
getMessageHandler().handleMessage((ControlMessage) in);
// update the output schema with the new input schema from the MetadataUpdateControlMessage
if (in instanceof MetadataUpdateControlMessage) {
this.outputGlobalMetadata = GlobalMetadata.<SI, SO>builderWithInput(((MetadataUpdateControlMessage) in).getGlobalMetadata(), Optional.fromNullable(convertSchema((SI) ((MetadataUpdateControlMessage) in).getGlobalMetadata().getSchema(), workUnitState))).build();
out = new MetadataUpdateControlMessage<SO, DO>(this.outputGlobalMetadata);
}
return Flowable.just(((ControlMessage<DO>) out));
} else if (in instanceof RecordEnvelope) {
RecordEnvelope<DI> recordEnvelope = (RecordEnvelope<DI>) in;
Iterator<DO> convertedIterable = convertRecord(this.outputGlobalMetadata.getSchema(), recordEnvelope.getRecord(), workUnitState).iterator();
if (!convertedIterable.hasNext()) {
// if the iterable is empty, ack the record, return an empty flowable
in.ack();
return Flowable.empty();
}
DO firstRecord = convertedIterable.next();
if (!convertedIterable.hasNext()) {
// if the iterable has only one element, use RecordEnvelope.withRecord, which is more efficient
return Flowable.just(recordEnvelope.withRecord(firstRecord));
} else {
// if the iterable has multiple records, use a ForkRecordBuilder
RecordEnvelope<DI>.ForkRecordBuilder<DO> forkRecordBuilder = recordEnvelope.forkRecordBuilder();
return Flowable.just(firstRecord).concatWith(Flowable.fromIterable(() -> convertedIterable)).map(forkRecordBuilder::childRecord).doOnComplete(forkRecordBuilder::close);
}
} else {
throw new UnsupportedOperationException();
}
}, 1);
outputStream = outputStream.doOnComplete(this::close);
return inputStream.withRecordStream(outputStream, this.outputGlobalMetadata);
}
use of org.apache.gobblin.stream.MetadataUpdateControlMessage in project incubator-gobblin by apache.
the class TestRecordStream method testMetadataUpdateControlMessages.
/**
* Test of metadata update control messages that signal the converters to change schemas
* @throws Exception
*/
@Test
public void testMetadataUpdateControlMessages() throws Exception {
MyExtractor extractor = new MyExtractor(new StreamEntity[] { new RecordEnvelope<>("a"), new MetadataUpdateControlMessage<>(GlobalMetadata.<String>builder().schema("Schema1").build()), new RecordEnvelope<>("b"), new MetadataUpdateControlMessage(GlobalMetadata.<String>builder().schema("Schema2").build()) });
SchemaAppendConverter converter = new SchemaAppendConverter();
MyDataWriter writer = new MyDataWriter();
Task task = setupTask(extractor, writer, converter);
task.run();
task.commit();
Assert.assertEquals(task.getTaskState().getWorkingState(), WorkUnitState.WorkingState.SUCCESSFUL);
Assert.assertEquals(converter.records, Lists.newArrayList("a:schema", "b:Schema1"));
Assert.assertEquals(converter.messages, Lists.newArrayList(new MetadataUpdateControlMessage<>(GlobalMetadata.<String>builder().schema("Schema1").build()), new MetadataUpdateControlMessage<>(GlobalMetadata.<String>builder().schema("Schema2").build())));
Assert.assertEquals(writer.records, Lists.newArrayList("a:schema", "b:Schema1"));
Assert.assertEquals(writer.messages, Lists.newArrayList(new MetadataUpdateControlMessage<>(GlobalMetadata.<String>builder().schema("Schema1").build()), new MetadataUpdateControlMessage<>(GlobalMetadata.<String>builder().schema("Schema2").build())));
}
use of org.apache.gobblin.stream.MetadataUpdateControlMessage in project incubator-gobblin by apache.
the class TestRecordStream method testMetadataUpdateWithStreamProcessors.
/**
* Test with the converter configured in the list of {@link RecordStreamProcessor}s.
* @throws Exception
*/
@Test
public void testMetadataUpdateWithStreamProcessors() throws Exception {
MyExtractor extractor = new MyExtractor(new StreamEntity[] { new RecordEnvelope<>("a"), new MetadataUpdateControlMessage<>(GlobalMetadata.<String>builder().schema("Schema1").build()), new RecordEnvelope<>("b"), new MetadataUpdateControlMessage(GlobalMetadata.<String>builder().schema("Schema2").build()) });
SchemaAppendConverter converter = new SchemaAppendConverter();
MyDataWriter writer = new MyDataWriter();
Task task = setupTask(extractor, writer, Collections.EMPTY_LIST, Lists.newArrayList(converter));
task.run();
task.commit();
Assert.assertEquals(task.getTaskState().getWorkingState(), WorkUnitState.WorkingState.SUCCESSFUL);
Assert.assertEquals(converter.records, Lists.newArrayList("a:schema", "b:Schema1"));
Assert.assertEquals(converter.messages, Lists.newArrayList(new MetadataUpdateControlMessage<>(GlobalMetadata.<String>builder().schema("Schema1").build()), new MetadataUpdateControlMessage<>(GlobalMetadata.<String>builder().schema("Schema2").build())));
Assert.assertEquals(writer.records, Lists.newArrayList("a:schema", "b:Schema1"));
Assert.assertEquals(writer.messages, Lists.newArrayList(new MetadataUpdateControlMessage<>(GlobalMetadata.<String>builder().schema("Schema1").build()), new MetadataUpdateControlMessage<>(GlobalMetadata.<String>builder().schema("Schema2").build())));
}
Aggregations