Search in sources :

Example 1 with MetadataUpdateControlMessage

use of org.apache.gobblin.stream.MetadataUpdateControlMessage in project incubator-gobblin by apache.

the class Converter method processStream.

/**
 * Apply conversions to the input {@link RecordStreamWithMetadata}.
 */
@Override
public RecordStreamWithMetadata<DO, SO> processStream(RecordStreamWithMetadata<DI, SI> inputStream, WorkUnitState workUnitState) throws SchemaConversionException {
    init(workUnitState);
    this.outputGlobalMetadata = GlobalMetadata.<SI, SO>builderWithInput(inputStream.getGlobalMetadata(), Optional.fromNullable(convertSchema(inputStream.getGlobalMetadata().getSchema(), workUnitState))).build();
    Flowable<StreamEntity<DO>> outputStream = inputStream.getRecordStream().flatMap(in -> {
        if (in instanceof ControlMessage) {
            ControlMessage out = (ControlMessage) in;
            getMessageHandler().handleMessage((ControlMessage) in);
            // update the output schema with the new input schema from the MetadataUpdateControlMessage
            if (in instanceof MetadataUpdateControlMessage) {
                this.outputGlobalMetadata = GlobalMetadata.<SI, SO>builderWithInput(((MetadataUpdateControlMessage) in).getGlobalMetadata(), Optional.fromNullable(convertSchema((SI) ((MetadataUpdateControlMessage) in).getGlobalMetadata().getSchema(), workUnitState))).build();
                out = new MetadataUpdateControlMessage<SO, DO>(this.outputGlobalMetadata);
            }
            return Flowable.just(((ControlMessage<DO>) out));
        } else if (in instanceof RecordEnvelope) {
            RecordEnvelope<DI> recordEnvelope = (RecordEnvelope<DI>) in;
            Iterator<DO> convertedIterable = convertRecord(this.outputGlobalMetadata.getSchema(), recordEnvelope.getRecord(), workUnitState).iterator();
            if (!convertedIterable.hasNext()) {
                // if the iterable is empty, ack the record, return an empty flowable
                in.ack();
                return Flowable.empty();
            }
            DO firstRecord = convertedIterable.next();
            if (!convertedIterable.hasNext()) {
                // if the iterable has only one element, use RecordEnvelope.withRecord, which is more efficient
                return Flowable.just(recordEnvelope.withRecord(firstRecord));
            } else {
                // if the iterable has multiple records, use a ForkRecordBuilder
                RecordEnvelope<DI>.ForkRecordBuilder<DO> forkRecordBuilder = recordEnvelope.forkRecordBuilder();
                return Flowable.just(firstRecord).concatWith(Flowable.fromIterable(() -> convertedIterable)).map(forkRecordBuilder::childRecord).doOnComplete(forkRecordBuilder::close);
            }
        } else {
            throw new UnsupportedOperationException();
        }
    }, 1);
    outputStream = outputStream.doOnComplete(this::close);
    return inputStream.withRecordStream(outputStream, this.outputGlobalMetadata);
}
Also used : RecordEnvelope(org.apache.gobblin.stream.RecordEnvelope) StreamEntity(org.apache.gobblin.stream.StreamEntity) MetadataUpdateControlMessage(org.apache.gobblin.stream.MetadataUpdateControlMessage) Iterator(java.util.Iterator) MetadataUpdateControlMessage(org.apache.gobblin.stream.MetadataUpdateControlMessage) ControlMessage(org.apache.gobblin.stream.ControlMessage)

Example 2 with MetadataUpdateControlMessage

use of org.apache.gobblin.stream.MetadataUpdateControlMessage in project incubator-gobblin by apache.

the class TestRecordStream method testMetadataUpdateControlMessages.

/**
 * Test of metadata update control messages that signal the converters to change schemas
 * @throws Exception
 */
@Test
public void testMetadataUpdateControlMessages() throws Exception {
    MyExtractor extractor = new MyExtractor(new StreamEntity[] { new RecordEnvelope<>("a"), new MetadataUpdateControlMessage<>(GlobalMetadata.<String>builder().schema("Schema1").build()), new RecordEnvelope<>("b"), new MetadataUpdateControlMessage(GlobalMetadata.<String>builder().schema("Schema2").build()) });
    SchemaAppendConverter converter = new SchemaAppendConverter();
    MyDataWriter writer = new MyDataWriter();
    Task task = setupTask(extractor, writer, converter);
    task.run();
    task.commit();
    Assert.assertEquals(task.getTaskState().getWorkingState(), WorkUnitState.WorkingState.SUCCESSFUL);
    Assert.assertEquals(converter.records, Lists.newArrayList("a:schema", "b:Schema1"));
    Assert.assertEquals(converter.messages, Lists.newArrayList(new MetadataUpdateControlMessage<>(GlobalMetadata.<String>builder().schema("Schema1").build()), new MetadataUpdateControlMessage<>(GlobalMetadata.<String>builder().schema("Schema2").build())));
    Assert.assertEquals(writer.records, Lists.newArrayList("a:schema", "b:Schema1"));
    Assert.assertEquals(writer.messages, Lists.newArrayList(new MetadataUpdateControlMessage<>(GlobalMetadata.<String>builder().schema("Schema1").build()), new MetadataUpdateControlMessage<>(GlobalMetadata.<String>builder().schema("Schema2").build())));
}
Also used : MetadataUpdateControlMessage(org.apache.gobblin.stream.MetadataUpdateControlMessage) Test(org.testng.annotations.Test)

Example 3 with MetadataUpdateControlMessage

use of org.apache.gobblin.stream.MetadataUpdateControlMessage in project incubator-gobblin by apache.

the class TestRecordStream method testMetadataUpdateWithStreamProcessors.

/**
 * Test with the converter configured in the list of {@link RecordStreamProcessor}s.
 * @throws Exception
 */
@Test
public void testMetadataUpdateWithStreamProcessors() throws Exception {
    MyExtractor extractor = new MyExtractor(new StreamEntity[] { new RecordEnvelope<>("a"), new MetadataUpdateControlMessage<>(GlobalMetadata.<String>builder().schema("Schema1").build()), new RecordEnvelope<>("b"), new MetadataUpdateControlMessage(GlobalMetadata.<String>builder().schema("Schema2").build()) });
    SchemaAppendConverter converter = new SchemaAppendConverter();
    MyDataWriter writer = new MyDataWriter();
    Task task = setupTask(extractor, writer, Collections.EMPTY_LIST, Lists.newArrayList(converter));
    task.run();
    task.commit();
    Assert.assertEquals(task.getTaskState().getWorkingState(), WorkUnitState.WorkingState.SUCCESSFUL);
    Assert.assertEquals(converter.records, Lists.newArrayList("a:schema", "b:Schema1"));
    Assert.assertEquals(converter.messages, Lists.newArrayList(new MetadataUpdateControlMessage<>(GlobalMetadata.<String>builder().schema("Schema1").build()), new MetadataUpdateControlMessage<>(GlobalMetadata.<String>builder().schema("Schema2").build())));
    Assert.assertEquals(writer.records, Lists.newArrayList("a:schema", "b:Schema1"));
    Assert.assertEquals(writer.messages, Lists.newArrayList(new MetadataUpdateControlMessage<>(GlobalMetadata.<String>builder().schema("Schema1").build()), new MetadataUpdateControlMessage<>(GlobalMetadata.<String>builder().schema("Schema2").build())));
}
Also used : MetadataUpdateControlMessage(org.apache.gobblin.stream.MetadataUpdateControlMessage) Test(org.testng.annotations.Test)

Aggregations

MetadataUpdateControlMessage (org.apache.gobblin.stream.MetadataUpdateControlMessage)3 Test (org.testng.annotations.Test)2 Iterator (java.util.Iterator)1 ControlMessage (org.apache.gobblin.stream.ControlMessage)1 RecordEnvelope (org.apache.gobblin.stream.RecordEnvelope)1 StreamEntity (org.apache.gobblin.stream.StreamEntity)1