use of org.apache.beam.sdk.coders.Coder in project beam by apache.
the class DoFnOperator method createWrappingDoFnRunner.
// allow overriding this, for example SplittableDoFnOperator will not create a
// stateful DoFn runner because ProcessFn, which is used for executing a Splittable DoFn
// doesn't play by the normal DoFn rules and WindowDoFnOperator uses LateDataDroppingDoFnRunner
protected DoFnRunner<InputT, OutputT> createWrappingDoFnRunner(DoFnRunner<InputT, OutputT> wrappedRunner, StepContext stepContext) {
if (keyCoder != null) {
StatefulDoFnRunner.CleanupTimer<InputT> cleanupTimer = new StatefulDoFnRunner.TimeInternalsCleanupTimer<InputT>(timerInternals, windowingStrategy) {
@Override
public void setForWindow(InputT input, BoundedWindow window) {
if (!window.equals(GlobalWindow.INSTANCE) || usesOnWindowExpiration) {
// Skip setting a cleanup timer for the global window as these timers
// lead to potentially unbounded state growth in the runner, depending on key
// cardinality. Cleanup for global window will be performed upon arrival of the
// final watermark.
// In the case of OnWindowExpiration, we still set the timer.
super.setForWindow(input, window);
}
}
};
// we don't know the window type
// @SuppressWarnings({"unchecked", "rawtypes"})
Coder windowCoder = windowingStrategy.getWindowFn().windowCoder();
@SuppressWarnings({ "unchecked", "rawtypes" }) StatefulDoFnRunner.StateCleaner<?> stateCleaner = new StatefulDoFnRunner.StateInternalsStateCleaner<>(doFn, keyedStateInternals, windowCoder);
return DoFnRunners.defaultStatefulDoFnRunner(doFn, getInputCoder(), wrappedRunner, stepContext, windowingStrategy, cleanupTimer, stateCleaner, true);
} else {
return doFnRunner;
}
}
use of org.apache.beam.sdk.coders.Coder in project beam by apache.
the class DataflowPipelineTranslatorTest method testStreamingGroupIntoBatchesWithShardedKeyTranslationUnifiedWorker.
@Test
public void testStreamingGroupIntoBatchesWithShardedKeyTranslationUnifiedWorker() throws Exception {
List<String> experiments = new ArrayList<>(ImmutableList.of(GcpOptions.STREAMING_ENGINE_EXPERIMENT, GcpOptions.WINDMILL_SERVICE_EXPERIMENT, "use_runner_v2"));
JobSpecification jobSpec = runStreamingGroupIntoBatchesAndGetJobSpec(true, experiments);
List<Step> steps = jobSpec.getJob().getSteps();
Step shardedStateStep = steps.get(steps.size() - 1);
Map<String, Object> properties = shardedStateStep.getProperties();
assertTrue(properties.containsKey(PropertyNames.USES_KEYED_STATE));
assertTrue(properties.containsKey(PropertyNames.ALLOWS_SHARDABLE_STATE));
assertEquals("true", getString(properties, PropertyNames.ALLOWS_SHARDABLE_STATE));
assertTrue(properties.containsKey(PropertyNames.PRESERVES_KEYS));
assertEquals("true", getString(properties, PropertyNames.PRESERVES_KEYS));
// Also checks the runner proto is correctly populated.
Map<String, RunnerApi.PTransform> transformMap = jobSpec.getPipelineProto().getComponents().getTransformsMap();
boolean transformFound = false;
for (Map.Entry<String, RunnerApi.PTransform> transform : transformMap.entrySet()) {
RunnerApi.FunctionSpec spec = transform.getValue().getSpec();
if (spec.getUrn().equals(PTransformTranslation.GROUP_INTO_BATCHES_WITH_SHARDED_KEY_URN)) {
for (String subtransform : transform.getValue().getSubtransformsList()) {
RunnerApi.PTransform ptransform = transformMap.get(subtransform);
if (ptransform.getSpec().getUrn().equals(PTransformTranslation.GROUP_INTO_BATCHES_URN)) {
transformFound = true;
}
}
}
}
assertTrue(transformFound);
boolean coderFound = false;
Map<String, RunnerApi.Coder> coderMap = jobSpec.getPipelineProto().getComponents().getCodersMap();
for (Map.Entry<String, RunnerApi.Coder> coder : coderMap.entrySet()) {
if (coder.getValue().getSpec().getUrn().equals(ModelCoders.SHARDED_KEY_CODER_URN)) {
coderFound = true;
}
}
assertTrue(coderFound);
}
use of org.apache.beam.sdk.coders.Coder in project beam by apache.
the class CloudObjectTranslators method addComponents.
private static CloudObject addComponents(CloudObject base, List<? extends Coder<?>> components, SdkComponents sdkComponents) {
if (!components.isEmpty()) {
List<CloudObject> cloudComponents = new ArrayList<>(components.size());
for (Coder component : components) {
cloudComponents.add(CloudObjects.asCloudObject(component, sdkComponents));
}
Structs.addList(base, PropertyNames.COMPONENT_ENCODINGS, cloudComponents);
}
return base;
}
use of org.apache.beam.sdk.coders.Coder in project beam by apache.
the class DirectRunnerTest method byteArrayCountShouldSucceed.
@Test
public void byteArrayCountShouldSucceed() {
Pipeline p = getPipeline();
SerializableFunction<Integer, byte[]> getBytes = input -> {
try {
return CoderUtils.encodeToByteArray(VarIntCoder.of(), input);
} catch (CoderException e) {
fail("Unexpected Coder Exception " + e);
throw new AssertionError("Unreachable");
}
};
TypeDescriptor<byte[]> td = new TypeDescriptor<byte[]>() {
};
PCollection<byte[]> foos = p.apply(Create.of(1, 1, 1, 2, 2, 3)).apply(MapElements.into(td).via(getBytes));
PCollection<byte[]> msync = p.apply(Create.of(1, -2, -8, -16)).apply(MapElements.into(td).via(getBytes));
PCollection<byte[]> bytes = PCollectionList.of(foos).and(msync).apply(Flatten.pCollections());
PCollection<KV<byte[], Long>> counts = bytes.apply(Count.perElement());
PCollection<KV<Integer, Long>> countsBackToString = counts.apply(MapElements.via(new SimpleFunction<KV<byte[], Long>, KV<Integer, Long>>() {
@Override
public KV<Integer, Long> apply(KV<byte[], Long> input) {
try {
return KV.of(CoderUtils.decodeFromByteArray(VarIntCoder.of(), input.getKey()), input.getValue());
} catch (CoderException e) {
fail("Unexpected Coder Exception " + e);
throw new AssertionError("Unreachable");
}
}
}));
Map<Integer, Long> expected = ImmutableMap.<Integer, Long>builder().put(1, 4L).put(2, 2L).put(3, 1L).put(-2, 1L).put(-8, 1L).put(-16, 1L).build();
PAssert.thatMap(countsBackToString).isEqualTo(expected);
}
use of org.apache.beam.sdk.coders.Coder in project beam by apache.
the class ParDoEvaluator method create.
public static <InputT, OutputT> ParDoEvaluator<InputT> create(EvaluationContext evaluationContext, PipelineOptions options, DirectStepContext stepContext, AppliedPTransform<?, ?, ?> application, Coder<InputT> inputCoder, WindowingStrategy<?, ? extends BoundedWindow> windowingStrategy, DoFn<InputT, OutputT> fn, StructuralKey<?> key, List<PCollectionView<?>> sideInputs, TupleTag<OutputT> mainOutputTag, List<TupleTag<?>> additionalOutputTags, Map<TupleTag<?>, PCollection<?>> outputs, DoFnSchemaInformation doFnSchemaInformation, Map<String, PCollectionView<?>> sideInputMapping, DoFnRunnerFactory<InputT, OutputT> runnerFactory) {
BundleOutputManager outputManager = createOutputManager(evaluationContext, key, outputs);
ReadyCheckingSideInputReader sideInputReader = evaluationContext.createSideInputReader(sideInputs);
Map<TupleTag<?>, Coder<?>> outputCoders = outputs.entrySet().stream().collect(Collectors.toMap(e -> e.getKey(), e -> e.getValue().getCoder()));
PushbackSideInputDoFnRunner<InputT, OutputT> runner = runnerFactory.createRunner(options, fn, sideInputs, sideInputReader, outputManager, mainOutputTag, additionalOutputTags, stepContext, inputCoder, outputCoders, windowingStrategy, doFnSchemaInformation, sideInputMapping);
return create(runner, stepContext, application, outputManager);
}
Aggregations