Search in sources :

Example 1 with Converter

use of org.apache.gobblin.converter.Converter in project incubator-gobblin by apache.

the class ConverterInitializerFactory method newInstance.

private static ConverterInitializer newInstance(State state, WorkUnitStream workUnits, int branches, int branchId) {
    Preconditions.checkNotNull(state);
    String converterClassesParam = ForkOperatorUtils.getPropertyNameForBranch(ConfigurationKeys.CONVERTER_CLASSES_KEY, branches, branchId);
    List<String> converterClasses = COMMA_SPLITTER.splitToList(state.getProp(converterClassesParam, ""));
    if (converterClasses.isEmpty()) {
        return NoopConverterInitializer.INSTANCE;
    }
    List<ConverterInitializer> cis = Lists.newArrayList();
    for (String converterClass : converterClasses) {
        Converter converter;
        try {
            converter = (Converter) Class.forName(converterClass).newInstance();
        } catch (Exception e) {
            throw new RuntimeException(e);
        }
        cis.add(converter.getInitializer(state, workUnits, branches, branchId));
    }
    return new MultiConverterInitializer(cis);
}
Also used : Converter(org.apache.gobblin.converter.Converter)

Example 2 with Converter

use of org.apache.gobblin.converter.Converter in project incubator-gobblin by apache.

the class TaskContext method getRecordStreamProcessors.

/**
 * Get the list of post-fork {@link RecordStreamProcessor}s for a given branch.
 *
 * @param index branch index
 * @param forkTaskState a {@link TaskState} instance specific to the fork identified by the branch index
 * @return list (possibly empty) of {@link RecordStreamProcessor}s
 */
@SuppressWarnings("unchecked")
public List<RecordStreamProcessor<?, ?, ?, ?>> getRecordStreamProcessors(int index, TaskState forkTaskState) {
    String streamProcessorClassKey = ForkOperatorUtils.getPropertyNameForBranch(ConfigurationKeys.RECORD_STREAM_PROCESSOR_CLASSES_KEY, index);
    if (!this.taskState.contains(streamProcessorClassKey)) {
        return Collections.emptyList();
    }
    if (index >= 0) {
        forkTaskState.setProp(ConfigurationKeys.FORK_BRANCH_ID_KEY, index);
    }
    List<RecordStreamProcessor<?, ?, ?, ?>> streamProcessors = Lists.newArrayList();
    for (String streamProcessorClass : Splitter.on(",").omitEmptyStrings().trimResults().split(this.taskState.getProp(streamProcessorClassKey))) {
        try {
            RecordStreamProcessor<?, ?, ?, ?> streamProcessor = RecordStreamProcessor.class.cast(Class.forName(streamProcessorClass).newInstance());
            if (streamProcessor instanceof Converter) {
                InstrumentedConverterDecorator instrumentedConverter = new InstrumentedConverterDecorator<>((Converter) streamProcessor);
                instrumentedConverter.init(forkTaskState);
                streamProcessors.add(instrumentedConverter);
            } else {
                streamProcessors.add(streamProcessor);
            }
        } catch (ClassNotFoundException cnfe) {
            throw new RuntimeException(cnfe);
        } catch (InstantiationException ie) {
            throw new RuntimeException(ie);
        } catch (IllegalAccessException iae) {
            throw new RuntimeException(iae);
        }
    }
    return streamProcessors;
}
Also used : Converter(org.apache.gobblin.converter.Converter) RecordStreamProcessor(org.apache.gobblin.records.RecordStreamProcessor) InstrumentedConverterDecorator(org.apache.gobblin.instrumented.converter.InstrumentedConverterDecorator)

Example 3 with Converter

use of org.apache.gobblin.converter.Converter in project incubator-gobblin by apache.

the class StreamModelTaskRunner method run.

protected void run() throws Exception {
    // Get the fork operator. By default IdentityForkOperator is used with a single branch.
    ForkOperator forkOperator = closer.register(this.taskContext.getForkOperator());
    RecordStreamWithMetadata<?, ?> stream = this.extractor.recordStream(this.shutdownRequested);
    ConnectableFlowable connectableStream = stream.getRecordStream().publish();
    stream = stream.withRecordStream(connectableStream);
    stream = stream.mapRecords(r -> {
        this.task.onRecordExtract();
        return r;
    });
    if (this.task.isStreamingTask()) {
        // Start watermark manager and tracker
        if (this.watermarkTracker.isPresent()) {
            this.watermarkTracker.get().start();
        }
        this.watermarkManager.get().start();
        ((StreamingExtractor) this.taskContext.getRawSourceExtractor()).start(this.watermarkStorage.get());
        stream = stream.mapRecords(r -> {
            AcknowledgableWatermark ackableWatermark = new AcknowledgableWatermark(r.getWatermark());
            if (watermarkTracker.isPresent()) {
                watermarkTracker.get().track(ackableWatermark);
            }
            r.addCallBack(ackableWatermark);
            return r;
        });
    }
    // Use the recordStreamProcessor list if it is configured. This list can contain both all RecordStreamProcessor types
    if (!this.recordStreamProcessors.isEmpty()) {
        for (RecordStreamProcessor streamProcessor : this.recordStreamProcessors) {
            stream = streamProcessor.processStream(stream, this.taskState);
        }
    } else {
        if (this.converter instanceof MultiConverter) {
            // if multiconverter, unpack it
            for (Converter cverter : ((MultiConverter) this.converter).getConverters()) {
                stream = cverter.processStream(stream, this.taskState);
            }
        } else {
            stream = this.converter.processStream(stream, this.taskState);
        }
    }
    stream = this.rowChecker.processStream(stream, this.taskState);
    Forker.ForkedStream<?, ?> forkedStreams = new Forker().forkStream(stream, forkOperator, this.taskState);
    boolean isForkAsync = !this.task.areSingleBranchTasksSynchronous(this.taskContext) || forkedStreams.getForkedStreams().size() > 1;
    int bufferSize = this.taskState.getPropAsInt(ConfigurationKeys.FORK_RECORD_QUEUE_CAPACITY_KEY, ConfigurationKeys.DEFAULT_FORK_RECORD_QUEUE_CAPACITY);
    for (int fidx = 0; fidx < forkedStreams.getForkedStreams().size(); fidx++) {
        RecordStreamWithMetadata<?, ?> forkedStream = forkedStreams.getForkedStreams().get(fidx);
        if (forkedStream != null) {
            if (isForkAsync) {
                forkedStream = forkedStream.mapStream(f -> f.observeOn(Schedulers.from(this.taskExecutor.getForkExecutor()), false, bufferSize));
            }
            Fork fork = new Fork(this.taskContext, forkedStream.getGlobalMetadata().getSchema(), forkedStreams.getForkedStreams().size(), fidx, this.taskMode);
            fork.consumeRecordStream(forkedStream);
            this.forks.put(Optional.of(fork), Optional.of(Futures.immediateFuture(null)));
            this.task.configureStreamingFork(fork, this.watermarkingStrategy);
        }
    }
    connectableStream.connect();
    if (!ExponentialBackoff.awaitCondition().callable(() -> this.forks.keySet().stream().map(Optional::get).allMatch(Fork::isDone)).initialDelay(1000L).maxDelay(1000L).maxWait(TimeUnit.MINUTES.toMillis(60)).await()) {
        throw new TimeoutException("Forks did not finish withing specified timeout.");
    }
}
Also used : StreamingExtractor(org.apache.gobblin.source.extractor.StreamingExtractor) WatermarkManager(org.apache.gobblin.writer.WatermarkManager) ForkOperator(org.apache.gobblin.fork.ForkOperator) ConnectableFlowable(io.reactivex.flowables.ConnectableFlowable) TimeoutException(java.util.concurrent.TimeoutException) AtomicBoolean(java.util.concurrent.atomic.AtomicBoolean) AcknowledgableWatermark(org.apache.gobblin.writer.AcknowledgableWatermark) Fork(org.apache.gobblin.runtime.fork.Fork) Future(java.util.concurrent.Future) Closer(com.google.common.io.Closer) Optional(com.google.common.base.Optional) Map(java.util.Map) Schedulers(io.reactivex.schedulers.Schedulers) RecordStreamProcessor(org.apache.gobblin.records.RecordStreamProcessor) Forker(org.apache.gobblin.fork.Forker) WatermarkStorage(org.apache.gobblin.writer.WatermarkStorage) ExponentialBackoff(org.apache.gobblin.util.ExponentialBackoff) Converter(org.apache.gobblin.converter.Converter) ConfigurationKeys(org.apache.gobblin.configuration.ConfigurationKeys) TimeUnit(java.util.concurrent.TimeUnit) Extractor(org.apache.gobblin.source.extractor.Extractor) List(java.util.List) Futures(com.google.common.util.concurrent.Futures) FineGrainedWatermarkTracker(org.apache.gobblin.writer.FineGrainedWatermarkTracker) RowLevelPolicyChecker(org.apache.gobblin.qualitychecker.row.RowLevelPolicyChecker) RecordStreamWithMetadata(org.apache.gobblin.records.RecordStreamWithMetadata) AllArgsConstructor(lombok.AllArgsConstructor) Fork(org.apache.gobblin.runtime.fork.Fork) StreamingExtractor(org.apache.gobblin.source.extractor.StreamingExtractor) ConnectableFlowable(io.reactivex.flowables.ConnectableFlowable) Forker(org.apache.gobblin.fork.Forker) RecordStreamProcessor(org.apache.gobblin.records.RecordStreamProcessor) AcknowledgableWatermark(org.apache.gobblin.writer.AcknowledgableWatermark) ForkOperator(org.apache.gobblin.fork.ForkOperator) Converter(org.apache.gobblin.converter.Converter) TimeoutException(java.util.concurrent.TimeoutException)

Example 4 with Converter

use of org.apache.gobblin.converter.Converter in project incubator-gobblin by apache.

the class TaskContext method getConverters.

/**
 * Get the list of post-fork {@link Converter}s for a given branch.
 *
 * @param index branch index
 * @param forkTaskState a {@link TaskState} instance specific to the fork identified by the branch index
 * @return list (possibly empty) of {@link Converter}s
 */
@SuppressWarnings("unchecked")
public List<Converter<?, ?, ?, ?>> getConverters(int index, TaskState forkTaskState) {
    String converterClassKey = ForkOperatorUtils.getPropertyNameForBranch(ConfigurationKeys.CONVERTER_CLASSES_KEY, index);
    if (!this.taskState.contains(converterClassKey)) {
        return Collections.emptyList();
    }
    if (index >= 0) {
        forkTaskState.setProp(ConfigurationKeys.FORK_BRANCH_ID_KEY, index);
    }
    List<Converter<?, ?, ?, ?>> converters = Lists.newArrayList();
    for (String converterClass : Splitter.on(",").omitEmptyStrings().trimResults().split(this.taskState.getProp(converterClassKey))) {
        try {
            Converter<?, ?, ?, ?> converter = Converter.class.cast(Class.forName(converterClass).newInstance());
            InstrumentedConverterDecorator instrumentedConverter = new InstrumentedConverterDecorator<>(converter);
            instrumentedConverter.init(forkTaskState);
            converters.add(instrumentedConverter);
        } catch (ClassNotFoundException cnfe) {
            throw new RuntimeException(cnfe);
        } catch (InstantiationException ie) {
            throw new RuntimeException(ie);
        } catch (IllegalAccessException iae) {
            throw new RuntimeException(iae);
        }
    }
    return converters;
}
Also used : Converter(org.apache.gobblin.converter.Converter) InstrumentedConverterDecorator(org.apache.gobblin.instrumented.converter.InstrumentedConverterDecorator)

Example 5 with Converter

use of org.apache.gobblin.converter.Converter in project incubator-gobblin by apache.

the class MultiConverter method convertSchema.

@Override
public Object convertSchema(Object inputSchema, WorkUnitState workUnit) throws SchemaConversionException {
    Object schema = inputSchema;
    for (Converter converter : this.converters) {
        // Apply the converter and remember the output schema of this converter
        schema = converter.convertSchema(schema, workUnit);
        this.convertedSchemaMap.put(converter, schema);
    }
    return schema;
}
Also used : Converter(org.apache.gobblin.converter.Converter) IdentityConverter(org.apache.gobblin.converter.IdentityConverter)

Aggregations

Converter (org.apache.gobblin.converter.Converter)6 RecordStreamProcessor (org.apache.gobblin.records.RecordStreamProcessor)3 Optional (com.google.common.base.Optional)2 Closer (com.google.common.io.Closer)2 List (java.util.List)2 ConfigurationKeys (org.apache.gobblin.configuration.ConfigurationKeys)2 InstrumentedConverterDecorator (org.apache.gobblin.instrumented.converter.InstrumentedConverterDecorator)2 RowLevelPolicyChecker (org.apache.gobblin.qualitychecker.row.RowLevelPolicyChecker)2 RecordStreamWithMetadata (org.apache.gobblin.records.RecordStreamWithMetadata)2 Preconditions (com.google.common.base.Preconditions)1 Throwables (com.google.common.base.Throwables)1 ImmutableList (com.google.common.collect.ImmutableList)1 Futures (com.google.common.util.concurrent.Futures)1 SuppressWarnings (edu.umd.cs.findbugs.annotations.SuppressWarnings)1 ConnectableFlowable (io.reactivex.flowables.ConnectableFlowable)1 Schedulers (io.reactivex.schedulers.Schedulers)1 Closeable (java.io.Closeable)1 IOException (java.io.IOException)1 Map (java.util.Map)1 Future (java.util.concurrent.Future)1