use of org.apache.beam.sdk.util.WindowedValue.WindowedValueCoder in project beam by apache.
the class SparkBatchPortablePipelineTranslator method translate.
/**
* Translates pipeline from Beam into the Spark context.
*/
@Override
public void translate(final RunnerApi.Pipeline pipeline, SparkTranslationContext context) {
QueryablePipeline p = QueryablePipeline.forTransforms(pipeline.getRootTransformIdsList(), pipeline.getComponents());
for (PipelineNode.PTransformNode transformNode : p.getTopologicallyOrderedTransforms()) {
// their corresponding RDDs can later be cached.
for (String inputId : transformNode.getTransform().getInputsMap().values()) {
context.incrementConsumptionCountBy(inputId, 1);
}
// of computation is an intermediate RDD, which we might also need to cache.
if (transformNode.getTransform().getSpec().getUrn().equals(ExecutableStage.URN)) {
context.incrementConsumptionCountBy(getExecutableStageIntermediateId(transformNode), transformNode.getTransform().getOutputsMap().size());
}
for (String outputId : transformNode.getTransform().getOutputsMap().values()) {
WindowedValueCoder outputCoder = getWindowedValueCoder(outputId, pipeline.getComponents());
context.putCoder(outputId, outputCoder);
}
}
for (PipelineNode.PTransformNode transformNode : p.getTopologicallyOrderedTransforms()) {
urnToTransformTranslator.getOrDefault(transformNode.getTransform().getSpec().getUrn(), SparkBatchPortablePipelineTranslator::urnNotFound).translate(transformNode, pipeline, context);
}
}
use of org.apache.beam.sdk.util.WindowedValue.WindowedValueCoder in project beam by apache.
the class SparkExecutableStageFunction method getStateRequestHandler.
private StateRequestHandler getStateRequestHandler(ExecutableStage executableStage, ProcessBundleDescriptors.ExecutableProcessBundleDescriptor processBundleDescriptor) {
EnumMap<TypeCase, StateRequestHandler> handlerMap = new EnumMap<>(StateKey.TypeCase.class);
final StateRequestHandler sideInputHandler;
StateRequestHandlers.SideInputHandlerFactory sideInputHandlerFactory = BatchSideInputHandlerFactory.forStage(executableStage, new BatchSideInputHandlerFactory.SideInputGetter() {
@Override
public <T> List<T> getSideInput(String pCollectionId) {
Tuple2<Broadcast<List<byte[]>>, WindowedValueCoder<SideInputT>> tuple2 = sideInputs.get(pCollectionId);
Broadcast<List<byte[]>> broadcast = tuple2._1;
WindowedValueCoder<SideInputT> coder = tuple2._2;
return (List<T>) broadcast.value().stream().map(bytes -> CoderHelpers.fromByteArray(bytes, coder)).collect(Collectors.toList());
}
});
try {
sideInputHandler = StateRequestHandlers.forSideInputHandlerFactory(ProcessBundleDescriptors.getSideInputs(executableStage), sideInputHandlerFactory);
} catch (IOException e) {
throw new RuntimeException("Failed to setup state handler", e);
}
if (bagUserStateHandlerFactory == null) {
bagUserStateHandlerFactory = new InMemoryBagUserStateFactory();
}
final StateRequestHandler userStateHandler;
if (executableStage.getUserStates().size() > 0) {
// Need to discard the old key's state
bagUserStateHandlerFactory.resetForNewKey();
userStateHandler = StateRequestHandlers.forBagUserStateHandlerFactory(processBundleDescriptor, bagUserStateHandlerFactory);
} else {
userStateHandler = StateRequestHandler.unsupported();
}
handlerMap.put(StateKey.TypeCase.ITERABLE_SIDE_INPUT, sideInputHandler);
handlerMap.put(StateKey.TypeCase.MULTIMAP_SIDE_INPUT, sideInputHandler);
handlerMap.put(StateKey.TypeCase.MULTIMAP_KEYS_SIDE_INPUT, sideInputHandler);
handlerMap.put(StateKey.TypeCase.BAG_USER_STATE, userStateHandler);
return StateRequestHandlers.delegateBasedUponType(handlerMap);
}
use of org.apache.beam.sdk.util.WindowedValue.WindowedValueCoder in project beam by apache.
the class IsmReaderFactoryTest method testFactory.
@Test
public void testFactory() throws Exception {
WindowedValueCoder<?> coder = WindowedValue.getFullCoder(IsmRecordCoder.of(1, 0, ImmutableList.<Coder<?>>of(StringUtf8Coder.of()), VarLongCoder.of()), GlobalWindow.Coder.INSTANCE);
String tmpFile = tmpFolder.newFile().getPath();
ResourceId tmpResourceId = FileSystems.matchSingleFileSpec(tmpFile).resourceId();
@SuppressWarnings("rawtypes") IsmReader<?> ismReader = (IsmReader) new IsmReaderFactory().create(createSpecForFilename(tmpFile), coder, options, executionContext, operationContext);
assertEquals(coder.getValueCoder(), ismReader.getCoder());
assertEquals(tmpResourceId, ismReader.getResourceId());
}
use of org.apache.beam.sdk.util.WindowedValue.WindowedValueCoder in project beam by apache.
the class IsmSinkFactory method create.
@Override
public Sink<?> create(CloudObject spec, @Nullable Coder<?> coder, @Nullable PipelineOptions options, @Nullable DataflowExecutionContext executionContext, DataflowOperationContext operationContext) throws Exception {
options = checkArgumentNotNull(options);
coder = checkArgumentNotNull(coder);
// The validity of this coder is checked in detail by the typed create, below
@SuppressWarnings("unchecked") Coder<WindowedValue<IsmRecord<Object>>> typedCoder = (Coder<WindowedValue<IsmRecord<Object>>>) coder;
String filename = getString(spec, WorkerPropertyNames.FILENAME);
checkArgument(typedCoder instanceof WindowedValueCoder, "%s only supports using %s but got %s.", IsmSink.class, WindowedValueCoder.class, typedCoder);
WindowedValueCoder<IsmRecord<Object>> windowedCoder = (WindowedValueCoder<IsmRecord<Object>>) typedCoder;
checkArgument(windowedCoder.getValueCoder() instanceof IsmRecordCoder, "%s only supports using %s but got %s.", IsmSink.class, IsmRecordCoder.class, windowedCoder.getValueCoder());
@SuppressWarnings("unchecked") IsmRecordCoder<Object> ismCoder = (IsmRecordCoder<Object>) windowedCoder.getValueCoder();
long bloomFilterSizeLimitBytes = Math.max(MIN_BLOOM_FILTER_SIZE_BYTES, DoubleMath.roundToLong(BLOOM_FILTER_SIZE_LIMIT_MULTIPLIER * options.as(DataflowWorkerHarnessOptions.class).getWorkerCacheMb() * // Note the conversion from MiB to bytes
1024 * 1024, RoundingMode.DOWN));
return new IsmSink<>(FileSystems.matchNewResource(filename, false), ismCoder, bloomFilterSizeLimitBytes);
}
use of org.apache.beam.sdk.util.WindowedValue.WindowedValueCoder in project beam by apache.
the class ApexParDoOperator method processElementInReadyWindows.
private Iterable<WindowedValue<InputT>> processElementInReadyWindows(WindowedValue<InputT> elem) {
try {
pushbackDoFnRunner.startBundle();
if (currentKeyStateInternals != null) {
InputT value = elem.getValue();
final Object key;
final Coder<Object> keyCoder;
@SuppressWarnings({ "rawtypes", "unchecked" }) WindowedValueCoder<InputT> wvCoder = (WindowedValueCoder) inputCoder;
if (value instanceof KeyedWorkItem) {
key = ((KeyedWorkItem) value).key();
@SuppressWarnings({ "rawtypes", "unchecked" }) KeyedWorkItemCoder<Object, ?> kwiCoder = (KeyedWorkItemCoder) wvCoder.getValueCoder();
keyCoder = kwiCoder.getKeyCoder();
} else {
key = ((KV) value).getKey();
@SuppressWarnings({ "rawtypes", "unchecked" }) KvCoder<Object, ?> kwiCoder = (KvCoder) wvCoder.getValueCoder();
keyCoder = kwiCoder.getKeyCoder();
}
((StateInternalsProxy) currentKeyStateInternals).setKey(key);
currentKeyTimerInternals.setContext(key, keyCoder, new Instant(this.currentInputWatermark), new Instant(this.currentOutputWatermark));
}
Iterable<WindowedValue<InputT>> pushedBack = pushbackDoFnRunner.processElementInReadyWindows(elem);
pushbackDoFnRunner.finishBundle();
return pushedBack;
} catch (UserCodeException ue) {
if (ue.getCause() instanceof AssertionError) {
ApexRunner.ASSERTION_ERROR.set((AssertionError) ue.getCause());
}
throw ue;
}
}
Aggregations