Search in sources :

Example 16 with RawUnionValue

use of org.apache.beam.sdk.transforms.join.RawUnionValue in project beam by apache.

the class SparkExecutableStageFunctionTest method outputsAreTaggedCorrectly.

@Test
public void outputsAreTaggedCorrectly() throws Exception {
    WindowedValue<Integer> three = WindowedValue.valueInGlobalWindow(3);
    WindowedValue<Integer> four = WindowedValue.valueInGlobalWindow(4);
    WindowedValue<Integer> five = WindowedValue.valueInGlobalWindow(5);
    Map<String, Integer> outputTagMap = ImmutableMap.of("one", 1, "two", 2, "three", 3);
    // We use a real StageBundleFactory here in order to exercise the output receiver factory.
    StageBundleFactory stageBundleFactory = new StageBundleFactory() {

        private boolean once;

        @Override
        public RemoteBundle getBundle(OutputReceiverFactory receiverFactory, TimerReceiverFactory timerReceiverFactory, StateRequestHandler stateRequestHandler, BundleProgressHandler progressHandler, BundleFinalizationHandler finalizationHandler, BundleCheckpointHandler checkpointHandler) {
            return new RemoteBundle() {

                @Override
                public String getId() {
                    return "bundle-id";
                }

                @Override
                public Map<String, FnDataReceiver> getInputReceivers() {
                    return ImmutableMap.of("input", input -> {
                    /* Ignore input*/
                    });
                }

                @Override
                public Map<KV<String, String>, FnDataReceiver<Timer>> getTimerReceivers() {
                    return Collections.emptyMap();
                }

                @Override
                public void requestProgress() {
                    throw new UnsupportedOperationException();
                }

                @Override
                public void split(double fractionOfRemainder) {
                    throw new UnsupportedOperationException();
                }

                @Override
                public void close() throws Exception {
                    if (once) {
                        return;
                    }
                    // Emit all values to the runner when the bundle is closed.
                    receiverFactory.create("one").accept(three);
                    receiverFactory.create("two").accept(four);
                    receiverFactory.create("three").accept(five);
                    once = true;
                }
            };
        }

        @Override
        public ProcessBundleDescriptors.ExecutableProcessBundleDescriptor getProcessBundleDescriptor() {
            return Mockito.mock(ProcessBundleDescriptors.ExecutableProcessBundleDescriptor.class);
        }

        @Override
        public InstructionRequestHandler getInstructionRequestHandler() {
            return null;
        }

        @Override
        public void close() {
        }
    };
    when(stageContext.getStageBundleFactory(any())).thenReturn(stageBundleFactory);
    SparkExecutableStageFunction<Integer, ?> function = getFunction(outputTagMap);
    List<WindowedValue<Integer>> inputs = new ArrayList<>();
    inputs.add(WindowedValue.valueInGlobalWindow(0));
    Iterator<RawUnionValue> iterator = function.call(inputs.iterator());
    Iterable<RawUnionValue> iterable = () -> iterator;
    assertThat(iterable, contains(new RawUnionValue(1, three), new RawUnionValue(2, four), new RawUnionValue(3, five)));
}
Also used : StateRequestHandler(org.apache.beam.runners.fnexecution.state.StateRequestHandler) FnDataReceiver(org.apache.beam.sdk.fn.data.FnDataReceiver) RawUnionValue(org.apache.beam.sdk.transforms.join.RawUnionValue) ArrayList(java.util.ArrayList) KV(org.apache.beam.sdk.values.KV) ProcessBundleDescriptors(org.apache.beam.runners.fnexecution.control.ProcessBundleDescriptors) BundleCheckpointHandler(org.apache.beam.runners.fnexecution.control.BundleCheckpointHandler) StageBundleFactory(org.apache.beam.runners.fnexecution.control.StageBundleFactory) OutputReceiverFactory(org.apache.beam.runners.fnexecution.control.OutputReceiverFactory) WindowedValue(org.apache.beam.sdk.util.WindowedValue) TimerReceiverFactory(org.apache.beam.runners.fnexecution.control.TimerReceiverFactory) BundleFinalizationHandler(org.apache.beam.runners.fnexecution.control.BundleFinalizationHandler) BundleProgressHandler(org.apache.beam.runners.fnexecution.control.BundleProgressHandler) RemoteBundle(org.apache.beam.runners.fnexecution.control.RemoteBundle) Test(org.junit.Test)

Example 17 with RawUnionValue

use of org.apache.beam.sdk.transforms.join.RawUnionValue in project beam by apache.

the class ParDoMultiOutputTranslatorBatch method translateNode.

@Override
public void translateNode(ParDo.MultiOutput<InputT, OutputT> transform, Twister2BatchTranslationContext context) {
    DoFn<InputT, OutputT> doFn;
    doFn = transform.getFn();
    if (DoFnSignatures.signatureForDoFn(doFn).processElement().isSplittable()) {
        throw new UnsupportedOperationException(String.format("Not expected to directly translate splittable DoFn, should have been overridden: %s", doFn));
    }
    BatchTSetImpl<WindowedValue<InputT>> inputTTSet = context.getInputDataSet(context.getInput(transform));
    WindowingStrategy<?, ?> windowingStrategy = context.getInput(transform).getWindowingStrategy();
    Coder<InputT> inputCoder = (Coder<InputT>) context.getInput(transform).getCoder();
    Map<String, PCollectionView<?>> sideInputMapping;
    Map<TupleTag<?>, PCollection<?>> outputs = context.getOutputs();
    Map<TupleTag<?>, Coder<?>> outputCoders = context.getOutputCoders();
    // DoFnSignature signature = DoFnSignatures.getSignature(transform.getFn().getClass());
    DoFnSchemaInformation doFnSchemaInformation;
    doFnSchemaInformation = ParDoTranslation.getSchemaInformation(context.getCurrentTransform());
    sideInputMapping = ParDoTranslation.getSideInputMapping(context.getCurrentTransform());
    TupleTag<OutputT> mainOutput = transform.getMainOutputTag();
    List<TupleTag<?>> additionalOutputTags = new ArrayList<>(transform.getAdditionalOutputTags().getAll());
    Map<String, PCollectionView<?>> sideInputs = transform.getSideInputs();
    // TODO : note change from List to map in sideinputs
    // construct a map from side input to WindowingStrategy so that
    // the DoFn runner can map main-input windows to side input windows
    Map<PCollectionView<?>, WindowingStrategy<?, ?>> sideInputStrategies = new HashMap<>();
    for (PCollectionView<?> sideInput : sideInputs.values()) {
        sideInputStrategies.put(sideInput, sideInput.getWindowingStrategyInternal());
    }
    TupleTag<?> mainOutputTag;
    try {
        mainOutputTag = ParDoTranslation.getMainOutputTag(context.getCurrentTransform());
    } catch (IOException e) {
        throw new RuntimeException(e);
    }
    Map<TupleTag<?>, Integer> outputMap = Maps.newHashMap();
    outputMap.put(mainOutputTag, 0);
    int count = 1;
    for (TupleTag<?> tag : outputs.keySet()) {
        if (!outputMap.containsKey(tag)) {
            outputMap.put(tag, count++);
        }
    }
    ComputeTSet<RawUnionValue, Iterator<WindowedValue<InputT>>> outputTSet = inputTTSet.direct().<RawUnionValue>compute(new DoFnFunction<OutputT, InputT>(context, doFn, inputCoder, outputCoders, additionalOutputTags, windowingStrategy, sideInputStrategies, mainOutput, doFnSchemaInformation, outputMap, sideInputMapping));
    for (Map.Entry<TupleTag<?>, PCollection<?>> output : outputs.entrySet()) {
        ComputeTSet<WindowedValue<OutputT>, Iterator<RawUnionValue>> tempTSet = outputTSet.direct().compute(new OutputTagFilter(outputMap.get(output.getKey())));
        context.setOutputDataSet((PCollection) output.getValue(), tempTSet);
    }
}
Also used : HashMap(java.util.HashMap) ArrayList(java.util.ArrayList) TupleTag(org.apache.beam.sdk.values.TupleTag) WindowingStrategy(org.apache.beam.sdk.values.WindowingStrategy) WindowedValue(org.apache.beam.sdk.util.WindowedValue) Iterator(java.util.Iterator) OutputTagFilter(org.apache.beam.runners.twister2.translators.functions.OutputTagFilter) Coder(org.apache.beam.sdk.coders.Coder) RawUnionValue(org.apache.beam.sdk.transforms.join.RawUnionValue) IOException(java.io.IOException) PCollection(org.apache.beam.sdk.values.PCollection) PCollectionView(org.apache.beam.sdk.values.PCollectionView) DoFnSchemaInformation(org.apache.beam.sdk.transforms.DoFnSchemaInformation) HashMap(java.util.HashMap) Map(java.util.Map)

Aggregations

RawUnionValue (org.apache.beam.sdk.transforms.join.RawUnionValue)17 WindowedValue (org.apache.beam.sdk.util.WindowedValue)14 Coder (org.apache.beam.sdk.coders.Coder)9 KvCoder (org.apache.beam.sdk.coders.KvCoder)8 TupleTag (org.apache.beam.sdk.values.TupleTag)8 IOException (java.io.IOException)7 ArrayList (java.util.ArrayList)7 RunnerApi (org.apache.beam.model.pipeline.v1.RunnerApi)7 CoderTypeInformation (org.apache.beam.runners.flink.translation.types.CoderTypeInformation)6 PipelineTranslatorUtils.instantiateCoder (org.apache.beam.runners.fnexecution.translation.PipelineTranslatorUtils.instantiateCoder)6 ByteArrayCoder (org.apache.beam.sdk.coders.ByteArrayCoder)6 VoidCoder (org.apache.beam.sdk.coders.VoidCoder)6 PCollectionView (org.apache.beam.sdk.values.PCollectionView)6 Instant (org.joda.time.Instant)6 HashMap (java.util.HashMap)5 IterableCoder (org.apache.beam.sdk.coders.IterableCoder)5 KV (org.apache.beam.sdk.values.KV)5 List (java.util.List)4 Map (java.util.Map)4 WindowingStrategy (org.apache.beam.sdk.values.WindowingStrategy)4