Search in sources :

Example 1 with ControlMessage

use of org.apache.gobblin.stream.ControlMessage in project incubator-gobblin by apache.

the class Converter method processStream.

/**
 * Apply conversions to the input {@link RecordStreamWithMetadata}.
 */
@Override
public RecordStreamWithMetadata<DO, SO> processStream(RecordStreamWithMetadata<DI, SI> inputStream, WorkUnitState workUnitState) throws SchemaConversionException {
    init(workUnitState);
    this.outputGlobalMetadata = GlobalMetadata.<SI, SO>builderWithInput(inputStream.getGlobalMetadata(), Optional.fromNullable(convertSchema(inputStream.getGlobalMetadata().getSchema(), workUnitState))).build();
    Flowable<StreamEntity<DO>> outputStream = inputStream.getRecordStream().flatMap(in -> {
        if (in instanceof ControlMessage) {
            ControlMessage out = (ControlMessage) in;
            getMessageHandler().handleMessage((ControlMessage) in);
            // update the output schema with the new input schema from the MetadataUpdateControlMessage
            if (in instanceof MetadataUpdateControlMessage) {
                this.outputGlobalMetadata = GlobalMetadata.<SI, SO>builderWithInput(((MetadataUpdateControlMessage) in).getGlobalMetadata(), Optional.fromNullable(convertSchema((SI) ((MetadataUpdateControlMessage) in).getGlobalMetadata().getSchema(), workUnitState))).build();
                out = new MetadataUpdateControlMessage<SO, DO>(this.outputGlobalMetadata);
            }
            return Flowable.just(((ControlMessage<DO>) out));
        } else if (in instanceof RecordEnvelope) {
            RecordEnvelope<DI> recordEnvelope = (RecordEnvelope<DI>) in;
            Iterator<DO> convertedIterable = convertRecord(this.outputGlobalMetadata.getSchema(), recordEnvelope.getRecord(), workUnitState).iterator();
            if (!convertedIterable.hasNext()) {
                // if the iterable is empty, ack the record, return an empty flowable
                in.ack();
                return Flowable.empty();
            }
            DO firstRecord = convertedIterable.next();
            if (!convertedIterable.hasNext()) {
                // if the iterable has only one element, use RecordEnvelope.withRecord, which is more efficient
                return Flowable.just(recordEnvelope.withRecord(firstRecord));
            } else {
                // if the iterable has multiple records, use a ForkRecordBuilder
                RecordEnvelope<DI>.ForkRecordBuilder<DO> forkRecordBuilder = recordEnvelope.forkRecordBuilder();
                return Flowable.just(firstRecord).concatWith(Flowable.fromIterable(() -> convertedIterable)).map(forkRecordBuilder::childRecord).doOnComplete(forkRecordBuilder::close);
            }
        } else {
            throw new UnsupportedOperationException();
        }
    }, 1);
    outputStream = outputStream.doOnComplete(this::close);
    return inputStream.withRecordStream(outputStream, this.outputGlobalMetadata);
}
Also used : RecordEnvelope(org.apache.gobblin.stream.RecordEnvelope) StreamEntity(org.apache.gobblin.stream.StreamEntity) MetadataUpdateControlMessage(org.apache.gobblin.stream.MetadataUpdateControlMessage) Iterator(java.util.Iterator) MetadataUpdateControlMessage(org.apache.gobblin.stream.MetadataUpdateControlMessage) ControlMessage(org.apache.gobblin.stream.ControlMessage)

Example 2 with ControlMessage

use of org.apache.gobblin.stream.ControlMessage in project incubator-gobblin by apache.

the class Fork method consumeRecordStream.

@SuppressWarnings(value = "RV_RETURN_VALUE_IGNORED", justification = "We actually don't care about the return value of subscribe.")
public void consumeRecordStream(RecordStreamWithMetadata<D, S> stream) throws RecordStreamProcessor.StreamProcessingException {
    if (this.converter instanceof MultiConverter) {
        // if multiconverter, unpack it
        for (Converter cverter : ((MultiConverter) this.converter).getConverters()) {
            stream = cverter.processStream(stream, this.taskState);
        }
    } else {
        stream = this.converter.processStream(stream, this.taskState);
    }
    stream = this.rowLevelPolicyChecker.processStream(stream, this.taskState);
    stream = stream.mapStream(s -> s.map(r -> {
        onEachRecord();
        return r;
    }));
    stream = stream.mapStream(s -> s.doOnSubscribe(subscription -> onStart()));
    stream = stream.mapStream(s -> s.doOnComplete(() -> verifyAndSetForkState(ForkState.RUNNING, ForkState.SUCCEEDED)));
    stream = stream.mapStream(s -> s.doOnCancel(() -> verifyAndSetForkState(ForkState.RUNNING, ForkState.SUCCEEDED)));
    stream = stream.mapStream(s -> s.doOnError(exc -> {
        verifyAndSetForkState(ForkState.RUNNING, ForkState.FAILED);
        this.logger.error(String.format("Fork %d of task %s failed to process data records", this.index, this.taskId), exc);
    }));
    stream = stream.mapStream(s -> s.doFinally(this::cleanup));
    stream.getRecordStream().subscribe(r -> {
        if (r instanceof RecordEnvelope) {
            this.writer.get().writeEnvelope((RecordEnvelope) r);
        } else if (r instanceof ControlMessage) {
            this.writer.get().getMessageHandler().handleMessage((ControlMessage) r);
            r.ack();
        }
    }, e -> logger.error("Failed to process record.", e), () -> {
        if (this.writer.isPresent()) {
            this.writer.get().close();
        }
    });
}
Also used : ForkOperatorUtils(org.apache.gobblin.util.ForkOperatorUtils) Tag(org.apache.gobblin.metrics.Tag) SpeculativeAttemptAwareConstruct(org.apache.gobblin.commit.SpeculativeAttemptAwareConstruct) GobblinMetrics(org.apache.gobblin.metrics.GobblinMetrics) ExecutionModel(org.apache.gobblin.runtime.ExecutionModel) LoggerFactory(org.slf4j.LoggerFactory) ControlMessage(org.apache.gobblin.stream.ControlMessage) BoundedBlockingRecordQueue(org.apache.gobblin.runtime.BoundedBlockingRecordQueue) TaskPublisher(org.apache.gobblin.publisher.TaskPublisher) PartitionedDataWriter(org.apache.gobblin.writer.PartitionedDataWriter) AtomicReference(java.util.concurrent.atomic.AtomicReference) Task(org.apache.gobblin.runtime.Task) TaskState(org.apache.gobblin.runtime.TaskState) ImmutableList(com.google.common.collect.ImmutableList) Closer(com.google.common.io.Closer) DataWriterBuilder(org.apache.gobblin.writer.DataWriterBuilder) Optional(com.google.common.base.Optional) SuppressWarnings(edu.umd.cs.findbugs.annotations.SuppressWarnings) RecordStreamProcessor(org.apache.gobblin.records.RecordStreamProcessor) TaskLevelPolicyCheckResults(org.apache.gobblin.qualitychecker.task.TaskLevelPolicyCheckResults) Logger(org.slf4j.Logger) TaskContext(org.apache.gobblin.runtime.TaskContext) Converter(org.apache.gobblin.converter.Converter) Instrumented(org.apache.gobblin.instrumented.Instrumented) State(org.apache.gobblin.configuration.State) RowLevelPolicyCheckResults(org.apache.gobblin.qualitychecker.row.RowLevelPolicyCheckResults) TaskExecutor(org.apache.gobblin.runtime.TaskExecutor) Throwables(com.google.common.base.Throwables) IOException(java.io.IOException) FinalState(org.apache.gobblin.util.FinalState) ConfigurationKeys(org.apache.gobblin.configuration.ConfigurationKeys) DataWriter(org.apache.gobblin.writer.DataWriter) List(java.util.List) RecordEnvelope(org.apache.gobblin.stream.RecordEnvelope) WatermarkAwareWriter(org.apache.gobblin.writer.WatermarkAwareWriter) DataWriterWrapperBuilder(org.apache.gobblin.writer.DataWriterWrapperBuilder) Destination(org.apache.gobblin.writer.Destination) Closeable(java.io.Closeable) RowLevelPolicyChecker(org.apache.gobblin.qualitychecker.row.RowLevelPolicyChecker) RecordStreamWithMetadata(org.apache.gobblin.records.RecordStreamWithMetadata) Preconditions(com.google.common.base.Preconditions) RecordStreamConsumer(org.apache.gobblin.records.RecordStreamConsumer) DataConversionException(org.apache.gobblin.converter.DataConversionException) TaskMetrics(org.apache.gobblin.runtime.util.TaskMetrics) Constructs(org.apache.gobblin.Constructs) MultiConverter(org.apache.gobblin.runtime.MultiConverter) ConstructState(org.apache.gobblin.state.ConstructState) MultiConverter(org.apache.gobblin.runtime.MultiConverter) RecordEnvelope(org.apache.gobblin.stream.RecordEnvelope) Converter(org.apache.gobblin.converter.Converter) MultiConverter(org.apache.gobblin.runtime.MultiConverter) ControlMessage(org.apache.gobblin.stream.ControlMessage) SuppressWarnings(edu.umd.cs.findbugs.annotations.SuppressWarnings)

Aggregations

ControlMessage (org.apache.gobblin.stream.ControlMessage)2 RecordEnvelope (org.apache.gobblin.stream.RecordEnvelope)2 Optional (com.google.common.base.Optional)1 Preconditions (com.google.common.base.Preconditions)1 Throwables (com.google.common.base.Throwables)1 ImmutableList (com.google.common.collect.ImmutableList)1 Closer (com.google.common.io.Closer)1 SuppressWarnings (edu.umd.cs.findbugs.annotations.SuppressWarnings)1 Closeable (java.io.Closeable)1 IOException (java.io.IOException)1 Iterator (java.util.Iterator)1 List (java.util.List)1 AtomicReference (java.util.concurrent.atomic.AtomicReference)1 Constructs (org.apache.gobblin.Constructs)1 SpeculativeAttemptAwareConstruct (org.apache.gobblin.commit.SpeculativeAttemptAwareConstruct)1 ConfigurationKeys (org.apache.gobblin.configuration.ConfigurationKeys)1 State (org.apache.gobblin.configuration.State)1 Converter (org.apache.gobblin.converter.Converter)1 DataConversionException (org.apache.gobblin.converter.DataConversionException)1 Instrumented (org.apache.gobblin.instrumented.Instrumented)1