use of org.apache.beam.runners.dataflow.util.CloudObject in project beam by apache.
the class UserParDoFnFactory method create.
@Override
public ParDoFn create(PipelineOptions options, CloudObject cloudUserFn, @Nullable List<SideInputInfo> sideInputInfos, TupleTag<?> mainOutputTag, Map<TupleTag<?>, Integer> outputTupleTagsToReceiverIndices, DataflowExecutionContext<?> executionContext, DataflowOperationContext operationContext) throws Exception {
DoFnInstanceManager instanceManager = fnCache.get(operationContext.nameContext().systemName(), () -> DoFnInstanceManagers.cloningPool(doFnExtractor.getDoFnInfo(cloudUserFn), options));
DoFnInfo<?, ?> doFnInfo = instanceManager.peek();
DataflowExecutionContext.DataflowStepContext stepContext = executionContext.getStepContext(operationContext);
Iterable<PCollectionView<?>> sideInputViews = doFnInfo.getSideInputViews();
SideInputReader sideInputReader = executionContext.getSideInputReader(sideInputInfos, sideInputViews, operationContext);
if (doFnInfo.getDoFn() instanceof BatchStatefulParDoOverrides.BatchStatefulDoFn) {
// HACK: BatchStatefulDoFn is a class from DataflowRunner's overrides
// that just instructs the worker to execute it differently. This will
// be replaced by metadata in the Runner API payload
BatchStatefulParDoOverrides.BatchStatefulDoFn fn = (BatchStatefulParDoOverrides.BatchStatefulDoFn) doFnInfo.getDoFn();
DoFn underlyingFn = fn.getUnderlyingDoFn();
return new BatchModeUngroupingParDoFn((BatchModeExecutionContext.StepContext) stepContext, new SimpleParDoFn(options, DoFnInstanceManagers.singleInstance(doFnInfo.withFn(underlyingFn)), sideInputReader, doFnInfo.getMainOutput(), outputTupleTagsToReceiverIndices, stepContext, operationContext, doFnInfo.getDoFnSchemaInformation(), doFnInfo.getSideInputMapping(), runnerFactory));
} else if (doFnInfo.getDoFn() instanceof StreamingPCollectionViewWriterFn) {
// HACK: StreamingPCollectionViewWriterFn is a class from
// DataflowPipelineTranslator. Using the class as an indicator is a migration path
// to simply having an indicator string.
checkArgument(stepContext instanceof StreamingModeExecutionContext.StreamingModeStepContext, "stepContext must be a StreamingModeStepContext to use StreamingPCollectionViewWriterFn");
DataflowRunner.StreamingPCollectionViewWriterFn<Object> writerFn = (StreamingPCollectionViewWriterFn<Object>) doFnInfo.getDoFn();
return new StreamingPCollectionViewWriterParDoFn((StreamingModeExecutionContext.StreamingModeStepContext) stepContext, writerFn.getView().getTagInternal(), writerFn.getDataCoder(), (Coder<BoundedWindow>) doFnInfo.getWindowingStrategy().getWindowFn().windowCoder());
} else {
return new SimpleParDoFn(options, instanceManager, sideInputReader, doFnInfo.getMainOutput(), outputTupleTagsToReceiverIndices, stepContext, operationContext, doFnInfo.getDoFnSchemaInformation(), doFnInfo.getSideInputMapping(), runnerFactory);
}
}
use of org.apache.beam.runners.dataflow.util.CloudObject in project beam by apache.
the class IntrinsicMapTaskExecutorFactory method createWriteOperation.
OperationNode createWriteOperation(ParallelInstructionNode node, PipelineOptions options, SinkFactory sinkFactory, DataflowExecutionContext executionContext, DataflowOperationContext context) throws Exception {
ParallelInstruction instruction = node.getParallelInstruction();
WriteInstruction write = instruction.getWrite();
Coder<?> coder = CloudObjects.coderFromCloudObject(CloudObject.fromSpec(write.getSink().getCodec()));
CloudObject cloudSink = CloudObject.fromSpec(write.getSink().getSpec());
Sink<?> sink = sinkFactory.create(cloudSink, coder, options, executionContext, context);
return OperationNode.create(WriteOperation.create(sink, EMPTY_OUTPUT_RECEIVER_ARRAY, context));
}
use of org.apache.beam.runners.dataflow.util.CloudObject in project beam by apache.
the class IsmSinkFactory method create.
@Override
public Sink<?> create(CloudObject spec, @Nullable Coder<?> coder, @Nullable PipelineOptions options, @Nullable DataflowExecutionContext executionContext, DataflowOperationContext operationContext) throws Exception {
options = checkArgumentNotNull(options);
coder = checkArgumentNotNull(coder);
// The validity of this coder is checked in detail by the typed create, below
@SuppressWarnings("unchecked") Coder<WindowedValue<IsmRecord<Object>>> typedCoder = (Coder<WindowedValue<IsmRecord<Object>>>) coder;
String filename = getString(spec, WorkerPropertyNames.FILENAME);
checkArgument(typedCoder instanceof WindowedValueCoder, "%s only supports using %s but got %s.", IsmSink.class, WindowedValueCoder.class, typedCoder);
WindowedValueCoder<IsmRecord<Object>> windowedCoder = (WindowedValueCoder<IsmRecord<Object>>) typedCoder;
checkArgument(windowedCoder.getValueCoder() instanceof IsmRecordCoder, "%s only supports using %s but got %s.", IsmSink.class, IsmRecordCoder.class, windowedCoder.getValueCoder());
@SuppressWarnings("unchecked") IsmRecordCoder<Object> ismCoder = (IsmRecordCoder<Object>) windowedCoder.getValueCoder();
long bloomFilterSizeLimitBytes = Math.max(MIN_BLOOM_FILTER_SIZE_BYTES, DoubleMath.roundToLong(BLOOM_FILTER_SIZE_LIMIT_MULTIPLIER * options.as(DataflowWorkerHarnessOptions.class).getWorkerCacheMb() * // Note the conversion from MiB to bytes
1024 * 1024, RoundingMode.DOWN));
return new IsmSink<>(FileSystems.matchNewResource(filename, false), ismCoder, bloomFilterSizeLimitBytes);
}
use of org.apache.beam.runners.dataflow.util.CloudObject in project beam by apache.
the class ReaderRegistry method create.
/**
* Creates a {@link NativeReader} from a Dataflow API {@link Source} specification, using the
* {@link Coder} contained in the {@link Source} specification.
*/
public NativeReader<?> create(Source cloudSource, @Nullable PipelineOptions options, @Nullable DataflowExecutionContext executionContext, DataflowOperationContext operationContext) throws Exception {
cloudSource = CloudSourceUtils.flattenBaseSpecs(cloudSource);
CloudObject sourceSpec = CloudObject.fromSpec(cloudSource.getSpec());
Coder<?> coder = null;
if (cloudSource.getCodec() != null) {
coder = CloudObjects.coderFromCloudObject(CloudObject.fromSpec(cloudSource.getCodec()));
}
return create(sourceSpec, coder, options, executionContext, operationContext);
}
use of org.apache.beam.runners.dataflow.util.CloudObject in project beam by apache.
the class PairWithConstantKeyDoFnFactory method create.
@Override
public ParDoFn create(PipelineOptions options, CloudObject cloudUserFn, List<SideInputInfo> sideInputInfos, TupleTag<?> mainOutputTag, Map<TupleTag<?>, Integer> outputTupleTagsToReceiverIndices, DataflowExecutionContext<?> executionContext, DataflowOperationContext operationContext) throws Exception {
Coder<?> coder = CloudObjects.coderFromCloudObject(CloudObject.fromSpec(Structs.getObject(cloudUserFn, PropertyNames.ENCODING)));
Object key = CoderUtils.decodeFromByteArray(coder, Structs.getBytes(cloudUserFn, WorkerPropertyNames.ENCODED_KEY));
return new PairWithConstantKeyParDoFn(key);
}
Aggregations