Search in sources :

Example 36 with WindowingStrategy

use of org.apache.beam.sdk.values.WindowingStrategy in project beam by apache.

the class ParDoMultiOutputTranslatorBatch method translateNode.

@Override
public void translateNode(ParDo.MultiOutput<InputT, OutputT> transform, Twister2BatchTranslationContext context) {
    DoFn<InputT, OutputT> doFn;
    doFn = transform.getFn();
    if (DoFnSignatures.signatureForDoFn(doFn).processElement().isSplittable()) {
        throw new UnsupportedOperationException(String.format("Not expected to directly translate splittable DoFn, should have been overridden: %s", doFn));
    }
    BatchTSetImpl<WindowedValue<InputT>> inputTTSet = context.getInputDataSet(context.getInput(transform));
    WindowingStrategy<?, ?> windowingStrategy = context.getInput(transform).getWindowingStrategy();
    Coder<InputT> inputCoder = (Coder<InputT>) context.getInput(transform).getCoder();
    Map<String, PCollectionView<?>> sideInputMapping;
    Map<TupleTag<?>, PCollection<?>> outputs = context.getOutputs();
    Map<TupleTag<?>, Coder<?>> outputCoders = context.getOutputCoders();
    // DoFnSignature signature = DoFnSignatures.getSignature(transform.getFn().getClass());
    DoFnSchemaInformation doFnSchemaInformation;
    doFnSchemaInformation = ParDoTranslation.getSchemaInformation(context.getCurrentTransform());
    sideInputMapping = ParDoTranslation.getSideInputMapping(context.getCurrentTransform());
    TupleTag<OutputT> mainOutput = transform.getMainOutputTag();
    List<TupleTag<?>> additionalOutputTags = new ArrayList<>(transform.getAdditionalOutputTags().getAll());
    Map<String, PCollectionView<?>> sideInputs = transform.getSideInputs();
    // TODO : note change from List to map in sideinputs
    // construct a map from side input to WindowingStrategy so that
    // the DoFn runner can map main-input windows to side input windows
    Map<PCollectionView<?>, WindowingStrategy<?, ?>> sideInputStrategies = new HashMap<>();
    for (PCollectionView<?> sideInput : sideInputs.values()) {
        sideInputStrategies.put(sideInput, sideInput.getWindowingStrategyInternal());
    }
    TupleTag<?> mainOutputTag;
    try {
        mainOutputTag = ParDoTranslation.getMainOutputTag(context.getCurrentTransform());
    } catch (IOException e) {
        throw new RuntimeException(e);
    }
    Map<TupleTag<?>, Integer> outputMap = Maps.newHashMap();
    outputMap.put(mainOutputTag, 0);
    int count = 1;
    for (TupleTag<?> tag : outputs.keySet()) {
        if (!outputMap.containsKey(tag)) {
            outputMap.put(tag, count++);
        }
    }
    ComputeTSet<RawUnionValue, Iterator<WindowedValue<InputT>>> outputTSet = inputTTSet.direct().<RawUnionValue>compute(new DoFnFunction<OutputT, InputT>(context, doFn, inputCoder, outputCoders, additionalOutputTags, windowingStrategy, sideInputStrategies, mainOutput, doFnSchemaInformation, outputMap, sideInputMapping));
    for (Map.Entry<TupleTag<?>, PCollection<?>> output : outputs.entrySet()) {
        ComputeTSet<WindowedValue<OutputT>, Iterator<RawUnionValue>> tempTSet = outputTSet.direct().compute(new OutputTagFilter(outputMap.get(output.getKey())));
        context.setOutputDataSet((PCollection) output.getValue(), tempTSet);
    }
}
Also used : HashMap(java.util.HashMap) ArrayList(java.util.ArrayList) TupleTag(org.apache.beam.sdk.values.TupleTag) WindowingStrategy(org.apache.beam.sdk.values.WindowingStrategy) WindowedValue(org.apache.beam.sdk.util.WindowedValue) Iterator(java.util.Iterator) OutputTagFilter(org.apache.beam.runners.twister2.translators.functions.OutputTagFilter) Coder(org.apache.beam.sdk.coders.Coder) RawUnionValue(org.apache.beam.sdk.transforms.join.RawUnionValue) IOException(java.io.IOException) PCollection(org.apache.beam.sdk.values.PCollection) PCollectionView(org.apache.beam.sdk.values.PCollectionView) DoFnSchemaInformation(org.apache.beam.sdk.transforms.DoFnSchemaInformation) HashMap(java.util.HashMap) Map(java.util.Map)

Aggregations

WindowingStrategy (org.apache.beam.sdk.values.WindowingStrategy)36 WindowedValue (org.apache.beam.sdk.util.WindowedValue)25 BoundedWindow (org.apache.beam.sdk.transforms.windowing.BoundedWindow)21 KV (org.apache.beam.sdk.values.KV)19 KvCoder (org.apache.beam.sdk.coders.KvCoder)17 Coder (org.apache.beam.sdk.coders.Coder)16 List (java.util.List)15 TupleTag (org.apache.beam.sdk.values.TupleTag)14 Instant (org.joda.time.Instant)13 Test (org.junit.Test)13 PCollection (org.apache.beam.sdk.values.PCollection)11 ArrayList (java.util.ArrayList)10 HashMap (java.util.HashMap)9 Map (java.util.Map)9 SerializablePipelineOptions (org.apache.beam.runners.core.construction.SerializablePipelineOptions)9 IntervalWindow (org.apache.beam.sdk.transforms.windowing.IntervalWindow)9 Duration (org.joda.time.Duration)9 IOException (java.io.IOException)8 Collectors (java.util.stream.Collectors)8 StringUtf8Coder (org.apache.beam.sdk.coders.StringUtf8Coder)8