Search in sources :

Example 1 with WindowedValueCoder

use of org.apache.beam.sdk.util.WindowedValue.WindowedValueCoder in project beam by apache.

the class SparkBatchPortablePipelineTranslator method translate.

/**
 * Translates pipeline from Beam into the Spark context.
 */
@Override
public void translate(final RunnerApi.Pipeline pipeline, SparkTranslationContext context) {
    QueryablePipeline p = QueryablePipeline.forTransforms(pipeline.getRootTransformIdsList(), pipeline.getComponents());
    for (PipelineNode.PTransformNode transformNode : p.getTopologicallyOrderedTransforms()) {
        // their corresponding RDDs can later be cached.
        for (String inputId : transformNode.getTransform().getInputsMap().values()) {
            context.incrementConsumptionCountBy(inputId, 1);
        }
        // of computation is an intermediate RDD, which we might also need to cache.
        if (transformNode.getTransform().getSpec().getUrn().equals(ExecutableStage.URN)) {
            context.incrementConsumptionCountBy(getExecutableStageIntermediateId(transformNode), transformNode.getTransform().getOutputsMap().size());
        }
        for (String outputId : transformNode.getTransform().getOutputsMap().values()) {
            WindowedValueCoder outputCoder = getWindowedValueCoder(outputId, pipeline.getComponents());
            context.putCoder(outputId, outputCoder);
        }
    }
    for (PipelineNode.PTransformNode transformNode : p.getTopologicallyOrderedTransforms()) {
        urnToTransformTranslator.getOrDefault(transformNode.getTransform().getSpec().getUrn(), SparkBatchPortablePipelineTranslator::urnNotFound).translate(transformNode, pipeline, context);
    }
}
Also used : WindowedValueCoder(org.apache.beam.sdk.util.WindowedValue.WindowedValueCoder) PipelineTranslatorUtils.getWindowedValueCoder(org.apache.beam.runners.fnexecution.translation.PipelineTranslatorUtils.getWindowedValueCoder) QueryablePipeline(org.apache.beam.runners.core.construction.graph.QueryablePipeline) PipelineNode(org.apache.beam.runners.core.construction.graph.PipelineNode) PTransformNode(org.apache.beam.runners.core.construction.graph.PipelineNode.PTransformNode)

Example 2 with WindowedValueCoder

use of org.apache.beam.sdk.util.WindowedValue.WindowedValueCoder in project beam by apache.

the class SparkExecutableStageFunction method getStateRequestHandler.

private StateRequestHandler getStateRequestHandler(ExecutableStage executableStage, ProcessBundleDescriptors.ExecutableProcessBundleDescriptor processBundleDescriptor) {
    EnumMap<TypeCase, StateRequestHandler> handlerMap = new EnumMap<>(StateKey.TypeCase.class);
    final StateRequestHandler sideInputHandler;
    StateRequestHandlers.SideInputHandlerFactory sideInputHandlerFactory = BatchSideInputHandlerFactory.forStage(executableStage, new BatchSideInputHandlerFactory.SideInputGetter() {

        @Override
        public <T> List<T> getSideInput(String pCollectionId) {
            Tuple2<Broadcast<List<byte[]>>, WindowedValueCoder<SideInputT>> tuple2 = sideInputs.get(pCollectionId);
            Broadcast<List<byte[]>> broadcast = tuple2._1;
            WindowedValueCoder<SideInputT> coder = tuple2._2;
            return (List<T>) broadcast.value().stream().map(bytes -> CoderHelpers.fromByteArray(bytes, coder)).collect(Collectors.toList());
        }
    });
    try {
        sideInputHandler = StateRequestHandlers.forSideInputHandlerFactory(ProcessBundleDescriptors.getSideInputs(executableStage), sideInputHandlerFactory);
    } catch (IOException e) {
        throw new RuntimeException("Failed to setup state handler", e);
    }
    if (bagUserStateHandlerFactory == null) {
        bagUserStateHandlerFactory = new InMemoryBagUserStateFactory();
    }
    final StateRequestHandler userStateHandler;
    if (executableStage.getUserStates().size() > 0) {
        // Need to discard the old key's state
        bagUserStateHandlerFactory.resetForNewKey();
        userStateHandler = StateRequestHandlers.forBagUserStateHandlerFactory(processBundleDescriptor, bagUserStateHandlerFactory);
    } else {
        userStateHandler = StateRequestHandler.unsupported();
    }
    handlerMap.put(StateKey.TypeCase.ITERABLE_SIDE_INPUT, sideInputHandler);
    handlerMap.put(StateKey.TypeCase.MULTIMAP_SIDE_INPUT, sideInputHandler);
    handlerMap.put(StateKey.TypeCase.MULTIMAP_KEYS_SIDE_INPUT, sideInputHandler);
    handlerMap.put(StateKey.TypeCase.BAG_USER_STATE, userStateHandler);
    return StateRequestHandlers.delegateBasedUponType(handlerMap);
}
Also used : WindowedValueCoder(org.apache.beam.sdk.util.WindowedValue.WindowedValueCoder) SerializablePipelineOptions(org.apache.beam.runners.core.construction.SerializablePipelineOptions) WindowedValue(org.apache.beam.sdk.util.WindowedValue) TimerInternals(org.apache.beam.runners.core.TimerInternals) BatchSideInputHandlerFactory(org.apache.beam.runners.fnexecution.translation.BatchSideInputHandlerFactory) Locale(java.util.Locale) JobBundleFactory(org.apache.beam.runners.fnexecution.control.JobBundleFactory) Map(java.util.Map) Iterables(org.apache.beam.vendor.guava.v26_0_jre.com.google.common.collect.Iterables) JobInfo(org.apache.beam.runners.fnexecution.provisioning.JobInfo) TimerReceiverFactory(org.apache.beam.runners.fnexecution.control.TimerReceiverFactory) FlatMapFunction(org.apache.spark.api.java.function.FlatMapFunction) Broadcast(org.apache.spark.broadcast.Broadcast) StageBundleFactory(org.apache.beam.runners.fnexecution.control.StageBundleFactory) EnumMap(java.util.EnumMap) FnDataReceiver(org.apache.beam.sdk.fn.data.FnDataReceiver) BundleProgressHandler(org.apache.beam.runners.fnexecution.control.BundleProgressHandler) ExecutableStage(org.apache.beam.runners.core.construction.graph.ExecutableStage) Tuple2(scala.Tuple2) Collectors(java.util.stream.Collectors) Serializable(java.io.Serializable) List(java.util.List) ByteArray(org.apache.beam.runners.spark.util.ByteArray) SparkPipelineOptions(org.apache.beam.runners.spark.SparkPipelineOptions) StateKey(org.apache.beam.model.fnexecution.v1.BeamFnApi.StateKey) ConcurrentLinkedQueue(java.util.concurrent.ConcurrentLinkedQueue) ProcessBundleResponse(org.apache.beam.model.fnexecution.v1.BeamFnApi.ProcessBundleResponse) Coder(org.apache.beam.sdk.coders.Coder) CoderHelpers(org.apache.beam.runners.spark.coders.CoderHelpers) RawUnionValue(org.apache.beam.sdk.transforms.join.RawUnionValue) RemoteBundle(org.apache.beam.runners.fnexecution.control.RemoteBundle) InMemoryBagUserStateFactory(org.apache.beam.runners.fnexecution.state.InMemoryBagUserStateFactory) StateRequestHandler(org.apache.beam.runners.fnexecution.state.StateRequestHandler) ProcessBundleProgressResponse(org.apache.beam.model.fnexecution.v1.BeamFnApi.ProcessBundleProgressResponse) RunnerApi(org.apache.beam.model.pipeline.v1.RunnerApi) Iterator(java.util.Iterator) OutputReceiverFactory(org.apache.beam.runners.fnexecution.control.OutputReceiverFactory) ProcessBundleDescriptors(org.apache.beam.runners.fnexecution.control.ProcessBundleDescriptors) MetricsContainerImpl(org.apache.beam.runners.core.metrics.MetricsContainerImpl) PipelineTranslatorUtils(org.apache.beam.runners.fnexecution.translation.PipelineTranslatorUtils) StateRequestHandlers(org.apache.beam.runners.fnexecution.state.StateRequestHandlers) IOException(java.io.IOException) MetricsContainerStepMapAccumulator(org.apache.beam.runners.spark.metrics.MetricsContainerStepMapAccumulator) InMemoryTimerInternals(org.apache.beam.runners.core.InMemoryTimerInternals) Timer(org.apache.beam.runners.core.construction.Timer) BoundedWindow(org.apache.beam.sdk.transforms.windowing.BoundedWindow) Instant(org.joda.time.Instant) FileSystems(org.apache.beam.sdk.io.FileSystems) Collections(java.util.Collections) TypeCase(org.apache.beam.model.fnexecution.v1.BeamFnApi.StateKey.TypeCase) ExecutableStageContext(org.apache.beam.runners.fnexecution.control.ExecutableStageContext) StateRequestHandler(org.apache.beam.runners.fnexecution.state.StateRequestHandler) StateKey(org.apache.beam.model.fnexecution.v1.BeamFnApi.StateKey) TypeCase(org.apache.beam.model.fnexecution.v1.BeamFnApi.StateKey.TypeCase) IOException(java.io.IOException) InMemoryBagUserStateFactory(org.apache.beam.runners.fnexecution.state.InMemoryBagUserStateFactory) WindowedValueCoder(org.apache.beam.sdk.util.WindowedValue.WindowedValueCoder) Broadcast(org.apache.spark.broadcast.Broadcast) BatchSideInputHandlerFactory(org.apache.beam.runners.fnexecution.translation.BatchSideInputHandlerFactory) Tuple2(scala.Tuple2) List(java.util.List) StateRequestHandlers(org.apache.beam.runners.fnexecution.state.StateRequestHandlers) EnumMap(java.util.EnumMap)

Example 3 with WindowedValueCoder

use of org.apache.beam.sdk.util.WindowedValue.WindowedValueCoder in project beam by apache.

the class IsmReaderFactoryTest method testFactory.

@Test
public void testFactory() throws Exception {
    WindowedValueCoder<?> coder = WindowedValue.getFullCoder(IsmRecordCoder.of(1, 0, ImmutableList.<Coder<?>>of(StringUtf8Coder.of()), VarLongCoder.of()), GlobalWindow.Coder.INSTANCE);
    String tmpFile = tmpFolder.newFile().getPath();
    ResourceId tmpResourceId = FileSystems.matchSingleFileSpec(tmpFile).resourceId();
    @SuppressWarnings("rawtypes") IsmReader<?> ismReader = (IsmReader) new IsmReaderFactory().create(createSpecForFilename(tmpFile), coder, options, executionContext, operationContext);
    assertEquals(coder.getValueCoder(), ismReader.getCoder());
    assertEquals(tmpResourceId, ismReader.getResourceId());
}
Also used : WindowedValueCoder(org.apache.beam.sdk.util.WindowedValue.WindowedValueCoder) Coder(org.apache.beam.sdk.coders.Coder) StringUtf8Coder(org.apache.beam.sdk.coders.StringUtf8Coder) VarLongCoder(org.apache.beam.sdk.coders.VarLongCoder) IsmRecordCoder(org.apache.beam.runners.dataflow.internal.IsmFormat.IsmRecordCoder) ResourceId(org.apache.beam.sdk.io.fs.ResourceId) Test(org.junit.Test)

Example 4 with WindowedValueCoder

use of org.apache.beam.sdk.util.WindowedValue.WindowedValueCoder in project beam by apache.

the class IsmSinkFactory method create.

@Override
public Sink<?> create(CloudObject spec, @Nullable Coder<?> coder, @Nullable PipelineOptions options, @Nullable DataflowExecutionContext executionContext, DataflowOperationContext operationContext) throws Exception {
    options = checkArgumentNotNull(options);
    coder = checkArgumentNotNull(coder);
    // The validity of this coder is checked in detail by the typed create, below
    @SuppressWarnings("unchecked") Coder<WindowedValue<IsmRecord<Object>>> typedCoder = (Coder<WindowedValue<IsmRecord<Object>>>) coder;
    String filename = getString(spec, WorkerPropertyNames.FILENAME);
    checkArgument(typedCoder instanceof WindowedValueCoder, "%s only supports using %s but got %s.", IsmSink.class, WindowedValueCoder.class, typedCoder);
    WindowedValueCoder<IsmRecord<Object>> windowedCoder = (WindowedValueCoder<IsmRecord<Object>>) typedCoder;
    checkArgument(windowedCoder.getValueCoder() instanceof IsmRecordCoder, "%s only supports using %s but got %s.", IsmSink.class, IsmRecordCoder.class, windowedCoder.getValueCoder());
    @SuppressWarnings("unchecked") IsmRecordCoder<Object> ismCoder = (IsmRecordCoder<Object>) windowedCoder.getValueCoder();
    long bloomFilterSizeLimitBytes = Math.max(MIN_BLOOM_FILTER_SIZE_BYTES, DoubleMath.roundToLong(BLOOM_FILTER_SIZE_LIMIT_MULTIPLIER * options.as(DataflowWorkerHarnessOptions.class).getWorkerCacheMb() * // Note the conversion from MiB to bytes
    1024 * 1024, RoundingMode.DOWN));
    return new IsmSink<>(FileSystems.matchNewResource(filename, false), ismCoder, bloomFilterSizeLimitBytes);
}
Also used : WindowedValueCoder(org.apache.beam.sdk.util.WindowedValue.WindowedValueCoder) Coder(org.apache.beam.sdk.coders.Coder) IsmRecordCoder(org.apache.beam.runners.dataflow.internal.IsmFormat.IsmRecordCoder) IsmRecord(org.apache.beam.runners.dataflow.internal.IsmFormat.IsmRecord) Structs.getString(org.apache.beam.runners.dataflow.util.Structs.getString) IsmRecordCoder(org.apache.beam.runners.dataflow.internal.IsmFormat.IsmRecordCoder) WindowedValueCoder(org.apache.beam.sdk.util.WindowedValue.WindowedValueCoder) WindowedValue(org.apache.beam.sdk.util.WindowedValue) CloudObject(org.apache.beam.runners.dataflow.util.CloudObject)

Example 5 with WindowedValueCoder

use of org.apache.beam.sdk.util.WindowedValue.WindowedValueCoder in project beam by apache.

the class ApexParDoOperator method processElementInReadyWindows.

private Iterable<WindowedValue<InputT>> processElementInReadyWindows(WindowedValue<InputT> elem) {
    try {
        pushbackDoFnRunner.startBundle();
        if (currentKeyStateInternals != null) {
            InputT value = elem.getValue();
            final Object key;
            final Coder<Object> keyCoder;
            @SuppressWarnings({ "rawtypes", "unchecked" }) WindowedValueCoder<InputT> wvCoder = (WindowedValueCoder) inputCoder;
            if (value instanceof KeyedWorkItem) {
                key = ((KeyedWorkItem) value).key();
                @SuppressWarnings({ "rawtypes", "unchecked" }) KeyedWorkItemCoder<Object, ?> kwiCoder = (KeyedWorkItemCoder) wvCoder.getValueCoder();
                keyCoder = kwiCoder.getKeyCoder();
            } else {
                key = ((KV) value).getKey();
                @SuppressWarnings({ "rawtypes", "unchecked" }) KvCoder<Object, ?> kwiCoder = (KvCoder) wvCoder.getValueCoder();
                keyCoder = kwiCoder.getKeyCoder();
            }
            ((StateInternalsProxy) currentKeyStateInternals).setKey(key);
            currentKeyTimerInternals.setContext(key, keyCoder, new Instant(this.currentInputWatermark), new Instant(this.currentOutputWatermark));
        }
        Iterable<WindowedValue<InputT>> pushedBack = pushbackDoFnRunner.processElementInReadyWindows(elem);
        pushbackDoFnRunner.finishBundle();
        return pushedBack;
    } catch (UserCodeException ue) {
        if (ue.getCause() instanceof AssertionError) {
            ApexRunner.ASSERTION_ERROR.set((AssertionError) ue.getCause());
        }
        throw ue;
    }
}
Also used : StateInternalsProxy(org.apache.beam.runners.apex.translation.utils.StateInternalsProxy) KeyedWorkItemCoder(org.apache.beam.runners.core.KeyedWorkItemCoder) Instant(org.joda.time.Instant) KvCoder(org.apache.beam.sdk.coders.KvCoder) KeyedWorkItem(org.apache.beam.runners.core.KeyedWorkItem) WindowedValueCoder(org.apache.beam.sdk.util.WindowedValue.WindowedValueCoder) UserCodeException(org.apache.beam.sdk.util.UserCodeException) WindowedValue(org.apache.beam.sdk.util.WindowedValue) OutputWindowedValue(org.apache.beam.runners.core.OutputWindowedValue)

Aggregations

WindowedValueCoder (org.apache.beam.sdk.util.WindowedValue.WindowedValueCoder)16 RunnerApi (org.apache.beam.model.pipeline.v1.RunnerApi)9 WindowedValue (org.apache.beam.sdk.util.WindowedValue)8 Coder (org.apache.beam.sdk.coders.Coder)7 KvCoder (org.apache.beam.sdk.coders.KvCoder)7 CloudObject (org.apache.beam.runners.dataflow.util.CloudObject)6 List (java.util.List)5 Map (java.util.Map)5 IOException (java.io.IOException)4 ArrayList (java.util.ArrayList)4 HashMap (java.util.HashMap)4 RehydratedComponents (org.apache.beam.runners.core.construction.RehydratedComponents)4 InvalidProtocolBufferException (org.apache.beam.vendor.grpc.v1p43p2.com.google.protobuf.InvalidProtocolBufferException)4 ParallelInstruction (com.google.api.services.dataflow.model.ParallelInstruction)3 Structs.getString (org.apache.beam.runners.dataflow.util.Structs.getString)3 ParDoFn (org.apache.beam.runners.dataflow.worker.util.common.worker.ParDoFn)3 KV (org.apache.beam.sdk.values.KV)3 PCollectionView (org.apache.beam.sdk.values.PCollectionView)3 TupleTag (org.apache.beam.sdk.values.TupleTag)3 ImmutableMap (org.apache.beam.vendor.guava.v26_0_jre.com.google.common.collect.ImmutableMap)3