use of org.apache.gobblin.converter.Converter in project incubator-gobblin by apache.
the class ConverterInitializerFactory method newInstance.
private static ConverterInitializer newInstance(State state, WorkUnitStream workUnits, int branches, int branchId) {
Preconditions.checkNotNull(state);
String converterClassesParam = ForkOperatorUtils.getPropertyNameForBranch(ConfigurationKeys.CONVERTER_CLASSES_KEY, branches, branchId);
List<String> converterClasses = COMMA_SPLITTER.splitToList(state.getProp(converterClassesParam, ""));
if (converterClasses.isEmpty()) {
return NoopConverterInitializer.INSTANCE;
}
List<ConverterInitializer> cis = Lists.newArrayList();
for (String converterClass : converterClasses) {
Converter converter;
try {
converter = (Converter) Class.forName(converterClass).newInstance();
} catch (Exception e) {
throw new RuntimeException(e);
}
cis.add(converter.getInitializer(state, workUnits, branches, branchId));
}
return new MultiConverterInitializer(cis);
}
use of org.apache.gobblin.converter.Converter in project incubator-gobblin by apache.
the class TaskContext method getRecordStreamProcessors.
/**
* Get the list of post-fork {@link RecordStreamProcessor}s for a given branch.
*
* @param index branch index
* @param forkTaskState a {@link TaskState} instance specific to the fork identified by the branch index
* @return list (possibly empty) of {@link RecordStreamProcessor}s
*/
@SuppressWarnings("unchecked")
public List<RecordStreamProcessor<?, ?, ?, ?>> getRecordStreamProcessors(int index, TaskState forkTaskState) {
String streamProcessorClassKey = ForkOperatorUtils.getPropertyNameForBranch(ConfigurationKeys.RECORD_STREAM_PROCESSOR_CLASSES_KEY, index);
if (!this.taskState.contains(streamProcessorClassKey)) {
return Collections.emptyList();
}
if (index >= 0) {
forkTaskState.setProp(ConfigurationKeys.FORK_BRANCH_ID_KEY, index);
}
List<RecordStreamProcessor<?, ?, ?, ?>> streamProcessors = Lists.newArrayList();
for (String streamProcessorClass : Splitter.on(",").omitEmptyStrings().trimResults().split(this.taskState.getProp(streamProcessorClassKey))) {
try {
RecordStreamProcessor<?, ?, ?, ?> streamProcessor = RecordStreamProcessor.class.cast(Class.forName(streamProcessorClass).newInstance());
if (streamProcessor instanceof Converter) {
InstrumentedConverterDecorator instrumentedConverter = new InstrumentedConverterDecorator<>((Converter) streamProcessor);
instrumentedConverter.init(forkTaskState);
streamProcessors.add(instrumentedConverter);
} else {
streamProcessors.add(streamProcessor);
}
} catch (ClassNotFoundException cnfe) {
throw new RuntimeException(cnfe);
} catch (InstantiationException ie) {
throw new RuntimeException(ie);
} catch (IllegalAccessException iae) {
throw new RuntimeException(iae);
}
}
return streamProcessors;
}
use of org.apache.gobblin.converter.Converter in project incubator-gobblin by apache.
the class StreamModelTaskRunner method run.
protected void run() throws Exception {
// Get the fork operator. By default IdentityForkOperator is used with a single branch.
ForkOperator forkOperator = closer.register(this.taskContext.getForkOperator());
RecordStreamWithMetadata<?, ?> stream = this.extractor.recordStream(this.shutdownRequested);
ConnectableFlowable connectableStream = stream.getRecordStream().publish();
stream = stream.withRecordStream(connectableStream);
stream = stream.mapRecords(r -> {
this.task.onRecordExtract();
return r;
});
if (this.task.isStreamingTask()) {
// Start watermark manager and tracker
if (this.watermarkTracker.isPresent()) {
this.watermarkTracker.get().start();
}
this.watermarkManager.get().start();
((StreamingExtractor) this.taskContext.getRawSourceExtractor()).start(this.watermarkStorage.get());
stream = stream.mapRecords(r -> {
AcknowledgableWatermark ackableWatermark = new AcknowledgableWatermark(r.getWatermark());
if (watermarkTracker.isPresent()) {
watermarkTracker.get().track(ackableWatermark);
}
r.addCallBack(ackableWatermark);
return r;
});
}
// Use the recordStreamProcessor list if it is configured. This list can contain both all RecordStreamProcessor types
if (!this.recordStreamProcessors.isEmpty()) {
for (RecordStreamProcessor streamProcessor : this.recordStreamProcessors) {
stream = streamProcessor.processStream(stream, this.taskState);
}
} else {
if (this.converter instanceof MultiConverter) {
// if multiconverter, unpack it
for (Converter cverter : ((MultiConverter) this.converter).getConverters()) {
stream = cverter.processStream(stream, this.taskState);
}
} else {
stream = this.converter.processStream(stream, this.taskState);
}
}
stream = this.rowChecker.processStream(stream, this.taskState);
Forker.ForkedStream<?, ?> forkedStreams = new Forker().forkStream(stream, forkOperator, this.taskState);
boolean isForkAsync = !this.task.areSingleBranchTasksSynchronous(this.taskContext) || forkedStreams.getForkedStreams().size() > 1;
int bufferSize = this.taskState.getPropAsInt(ConfigurationKeys.FORK_RECORD_QUEUE_CAPACITY_KEY, ConfigurationKeys.DEFAULT_FORK_RECORD_QUEUE_CAPACITY);
for (int fidx = 0; fidx < forkedStreams.getForkedStreams().size(); fidx++) {
RecordStreamWithMetadata<?, ?> forkedStream = forkedStreams.getForkedStreams().get(fidx);
if (forkedStream != null) {
if (isForkAsync) {
forkedStream = forkedStream.mapStream(f -> f.observeOn(Schedulers.from(this.taskExecutor.getForkExecutor()), false, bufferSize));
}
Fork fork = new Fork(this.taskContext, forkedStream.getGlobalMetadata().getSchema(), forkedStreams.getForkedStreams().size(), fidx, this.taskMode);
fork.consumeRecordStream(forkedStream);
this.forks.put(Optional.of(fork), Optional.of(Futures.immediateFuture(null)));
this.task.configureStreamingFork(fork, this.watermarkingStrategy);
}
}
connectableStream.connect();
if (!ExponentialBackoff.awaitCondition().callable(() -> this.forks.keySet().stream().map(Optional::get).allMatch(Fork::isDone)).initialDelay(1000L).maxDelay(1000L).maxWait(TimeUnit.MINUTES.toMillis(60)).await()) {
throw new TimeoutException("Forks did not finish withing specified timeout.");
}
}
use of org.apache.gobblin.converter.Converter in project incubator-gobblin by apache.
the class TaskContext method getConverters.
/**
* Get the list of post-fork {@link Converter}s for a given branch.
*
* @param index branch index
* @param forkTaskState a {@link TaskState} instance specific to the fork identified by the branch index
* @return list (possibly empty) of {@link Converter}s
*/
@SuppressWarnings("unchecked")
public List<Converter<?, ?, ?, ?>> getConverters(int index, TaskState forkTaskState) {
String converterClassKey = ForkOperatorUtils.getPropertyNameForBranch(ConfigurationKeys.CONVERTER_CLASSES_KEY, index);
if (!this.taskState.contains(converterClassKey)) {
return Collections.emptyList();
}
if (index >= 0) {
forkTaskState.setProp(ConfigurationKeys.FORK_BRANCH_ID_KEY, index);
}
List<Converter<?, ?, ?, ?>> converters = Lists.newArrayList();
for (String converterClass : Splitter.on(",").omitEmptyStrings().trimResults().split(this.taskState.getProp(converterClassKey))) {
try {
Converter<?, ?, ?, ?> converter = Converter.class.cast(Class.forName(converterClass).newInstance());
InstrumentedConverterDecorator instrumentedConverter = new InstrumentedConverterDecorator<>(converter);
instrumentedConverter.init(forkTaskState);
converters.add(instrumentedConverter);
} catch (ClassNotFoundException cnfe) {
throw new RuntimeException(cnfe);
} catch (InstantiationException ie) {
throw new RuntimeException(ie);
} catch (IllegalAccessException iae) {
throw new RuntimeException(iae);
}
}
return converters;
}
use of org.apache.gobblin.converter.Converter in project incubator-gobblin by apache.
the class MultiConverter method convertSchema.
@Override
public Object convertSchema(Object inputSchema, WorkUnitState workUnit) throws SchemaConversionException {
Object schema = inputSchema;
for (Converter converter : this.converters) {
// Apply the converter and remember the output schema of this converter
schema = converter.convertSchema(schema, workUnit);
this.convertedSchemaMap.put(converter, schema);
}
return schema;
}
Aggregations