use of org.apache.beam.sdk.transforms.reflect.DoFnSignature in project beam by apache.
the class PTransformMatchers method splittableProcessKeyedBounded.
/**
* A {@link PTransformMatcher} that matches a {@link ParDo.SingleOutput} containing a {@link DoFn}
* that is splittable, as signified by {@link ProcessElementMethod#isSplittable()}.
*/
public static PTransformMatcher splittableProcessKeyedBounded() {
return new PTransformMatcher() {
@Override
public boolean matches(AppliedPTransform<?, ?, ?> application) {
PTransform<?, ?> transform = application.getTransform();
if (transform instanceof SplittableParDo.ProcessKeyedElements) {
DoFn<?, ?> fn = ((SplittableParDo.ProcessKeyedElements) transform).getFn();
DoFnSignature signature = DoFnSignatures.signatureForDoFn(fn);
return signature.processElement().isSplittable() && signature.isBoundedPerElement() == IsBounded.BOUNDED;
}
return false;
}
@Override
public String toString() {
return MoreObjects.toStringHelper("SplittableProcessKeyedBoundedMatcher").toString();
}
};
}
use of org.apache.beam.sdk.transforms.reflect.DoFnSignature in project beam by apache.
the class DoFnOperator method earlyBindStateIfNeeded.
private void earlyBindStateIfNeeded() throws IllegalArgumentException, IllegalAccessException {
if (keyCoder != null) {
if (doFn != null) {
DoFnSignature signature = DoFnSignatures.getSignature(doFn.getClass());
FlinkStateInternals.EarlyBinder earlyBinder = new FlinkStateInternals.EarlyBinder(getKeyedStateBackend(), serializedOptions);
for (DoFnSignature.StateDeclaration value : signature.stateDeclarations().values()) {
StateSpec<?> spec = (StateSpec<?>) signature.stateDeclarations().get(value.id()).field().get(doFn);
spec.bind(value.id(), earlyBinder);
}
if (doFnRunner instanceof StatefulDoFnRunner) {
((StatefulDoFnRunner<InputT, OutputT, BoundedWindow>) doFnRunner).getSystemStateTags().forEach(tag -> tag.getSpec().bind(tag.getId(), earlyBinder));
}
}
}
}
use of org.apache.beam.sdk.transforms.reflect.DoFnSignature in project beam by apache.
the class BatchStatefulParDoOverrides method verifyFnIsStateful.
private static <InputT, OutputT> void verifyFnIsStateful(DoFn<InputT, OutputT> fn) {
DoFnSignature signature = DoFnSignatures.getSignature(fn.getClass());
// It is still correct to use this without state or timers, but a bad idea.
// Since it is internal it should never be used wrong, so it is OK to crash.
checkState(signature.usesState() || signature.usesTimers(), "%s used for %s that does not use state or timers.", BatchStatefulParDoOverrides.class.getSimpleName(), ParDo.class.getSimpleName());
}
use of org.apache.beam.sdk.transforms.reflect.DoFnSignature in project beam by apache.
the class SamzaDoFnRunners method create.
/**
* Create DoFnRunner for java runner.
*/
public static <InT, FnOutT> DoFnRunner<InT, FnOutT> create(SamzaPipelineOptions pipelineOptions, DoFn<InT, FnOutT> doFn, WindowingStrategy<?, ?> windowingStrategy, String transformFullName, String transformId, Context context, TupleTag<FnOutT> mainOutputTag, SideInputHandler sideInputHandler, SamzaTimerInternalsFactory<?> timerInternalsFactory, Coder<?> keyCoder, DoFnRunners.OutputManager outputManager, Coder<InT> inputCoder, List<TupleTag<?>> sideOutputTags, Map<TupleTag<?>, Coder<?>> outputCoders, DoFnSchemaInformation doFnSchemaInformation, Map<String, PCollectionView<?>> sideInputMapping) {
final KeyedInternals keyedInternals;
final TimerInternals timerInternals;
final StateInternals stateInternals;
final DoFnSignature signature = DoFnSignatures.getSignature(doFn.getClass());
final SamzaStoreStateInternals.Factory<?> stateInternalsFactory = SamzaStoreStateInternals.createStateInternalsFactory(transformId, keyCoder, context.getTaskContext(), pipelineOptions, signature);
final SamzaExecutionContext executionContext = (SamzaExecutionContext) context.getApplicationContainerContext();
if (StateUtils.isStateful(doFn)) {
keyedInternals = new KeyedInternals(stateInternalsFactory, timerInternalsFactory);
stateInternals = keyedInternals.stateInternals();
timerInternals = keyedInternals.timerInternals();
} else {
keyedInternals = null;
stateInternals = stateInternalsFactory.stateInternalsForKey(null);
timerInternals = timerInternalsFactory.timerInternalsForKey(null);
}
final StepContext stepContext = createStepContext(stateInternals, timerInternals);
final DoFnRunner<InT, FnOutT> underlyingRunner = DoFnRunners.simpleRunner(pipelineOptions, doFn, sideInputHandler, outputManager, mainOutputTag, sideOutputTags, stepContext, inputCoder, outputCoders, windowingStrategy, doFnSchemaInformation, sideInputMapping);
final DoFnRunner<InT, FnOutT> doFnRunnerWithMetrics = pipelineOptions.getEnableMetrics() ? DoFnRunnerWithMetrics.wrap(underlyingRunner, executionContext.getMetricsContainer(), transformFullName) : underlyingRunner;
if (keyedInternals != null) {
final DoFnRunner<InT, FnOutT> statefulDoFnRunner = DoFnRunners.defaultStatefulDoFnRunner(doFn, inputCoder, doFnRunnerWithMetrics, stepContext, windowingStrategy, new StatefulDoFnRunner.TimeInternalsCleanupTimer(timerInternals, windowingStrategy), createStateCleaner(doFn, windowingStrategy, keyedInternals.stateInternals()));
return new DoFnRunnerWithKeyedInternals<>(statefulDoFnRunner, keyedInternals);
} else {
return doFnRunnerWithMetrics;
}
}
use of org.apache.beam.sdk.transforms.reflect.DoFnSignature in project beam by apache.
the class TransformTranslator method parDo.
private static <InputT, OutputT> TransformEvaluator<ParDo.MultiOutput<InputT, OutputT>> parDo() {
return new TransformEvaluator<ParDo.MultiOutput<InputT, OutputT>>() {
@Override
@SuppressWarnings("unchecked")
public void evaluate(ParDo.MultiOutput<InputT, OutputT> transform, EvaluationContext context) {
String stepName = context.getCurrentTransform().getFullName();
DoFn<InputT, OutputT> doFn = transform.getFn();
checkState(!DoFnSignatures.signatureForDoFn(doFn).processElement().isSplittable(), "Not expected to directly translate splittable DoFn, should have been overridden: %s", doFn);
JavaRDD<WindowedValue<InputT>> inRDD = ((BoundedDataset<InputT>) context.borrowDataset(transform)).getRDD();
WindowingStrategy<?, ?> windowingStrategy = context.getInput(transform).getWindowingStrategy();
MetricsContainerStepMapAccumulator metricsAccum = MetricsAccumulator.getInstance();
Coder<InputT> inputCoder = (Coder<InputT>) context.getInput(transform).getCoder();
Map<TupleTag<?>, Coder<?>> outputCoders = context.getOutputCoders();
JavaPairRDD<TupleTag<?>, WindowedValue<?>> all;
DoFnSignature signature = DoFnSignatures.getSignature(transform.getFn().getClass());
boolean stateful = signature.stateDeclarations().size() > 0 || signature.timerDeclarations().size() > 0;
DoFnSchemaInformation doFnSchemaInformation;
doFnSchemaInformation = ParDoTranslation.getSchemaInformation(context.getCurrentTransform());
Map<String, PCollectionView<?>> sideInputMapping = ParDoTranslation.getSideInputMapping(context.getCurrentTransform());
MultiDoFnFunction<InputT, OutputT> multiDoFnFunction = new MultiDoFnFunction<>(metricsAccum, stepName, doFn, context.getSerializableOptions(), transform.getMainOutputTag(), transform.getAdditionalOutputTags().getAll(), inputCoder, outputCoders, TranslationUtils.getSideInputs(transform.getSideInputs().values(), context), windowingStrategy, stateful, doFnSchemaInformation, sideInputMapping);
if (stateful) {
// Based on the fact that the signature is stateful, DoFnSignatures ensures
// that it is also keyed
all = statefulParDoTransform((KvCoder) context.getInput(transform).getCoder(), windowingStrategy.getWindowFn().windowCoder(), (JavaRDD) inRDD, getPartitioner(context), (MultiDoFnFunction) multiDoFnFunction, signature.processElement().requiresTimeSortedInput());
} else {
all = inRDD.mapPartitionsToPair(multiDoFnFunction);
}
Map<TupleTag<?>, PCollection<?>> outputs = context.getOutputs(transform);
if (outputs.size() > 1) {
StorageLevel level = StorageLevel.fromString(context.storageLevel());
if (canAvoidRddSerialization(level)) {
// if it is memory only reduce the overhead of moving to bytes
all = all.persist(level);
} else {
// Caching can cause Serialization, we need to code to bytes
// more details in https://issues.apache.org/jira/browse/BEAM-2669
Map<TupleTag<?>, Coder<WindowedValue<?>>> coderMap = TranslationUtils.getTupleTagCoders(outputs);
all = all.mapToPair(TranslationUtils.getTupleTagEncodeFunction(coderMap)).persist(level).mapToPair(TranslationUtils.getTupleTagDecodeFunction(coderMap));
}
}
for (Map.Entry<TupleTag<?>, PCollection<?>> output : outputs.entrySet()) {
JavaPairRDD<TupleTag<?>, WindowedValue<?>> filtered = all.filter(new TranslationUtils.TupleTagFilter(output.getKey()));
// Object is the best we can do since different outputs can have different tags
JavaRDD<WindowedValue<Object>> values = (JavaRDD<WindowedValue<Object>>) (JavaRDD<?>) filtered.values();
context.putDataset(output.getValue(), new BoundedDataset<>(values));
}
}
@Override
public String toNativeString() {
return "mapPartitions(new <fn>())";
}
};
}
Aggregations