Search in sources :

Example 11 with KvCoder

use of org.apache.beam.sdk.coders.KvCoder in project beam by apache.

the class FlinkStreamingPortablePipelineTranslator method translateExecutableStage.

private <InputT, OutputT> void translateExecutableStage(String id, RunnerApi.Pipeline pipeline, StreamingTranslationContext context) {
    // TODO: Fail on splittable DoFns.
    // TODO: Special-case single outputs to avoid multiplexing PCollections.
    RunnerApi.Components components = pipeline.getComponents();
    RunnerApi.PTransform transform = components.getTransformsOrThrow(id);
    Map<String, String> outputs = transform.getOutputsMap();
    final RunnerApi.ExecutableStagePayload stagePayload;
    try {
        stagePayload = RunnerApi.ExecutableStagePayload.parseFrom(transform.getSpec().getPayload());
    } catch (IOException e) {
        throw new RuntimeException(e);
    }
    String inputPCollectionId = stagePayload.getInput();
    final TransformedSideInputs transformedSideInputs;
    if (stagePayload.getSideInputsCount() > 0) {
        transformedSideInputs = transformSideInputs(stagePayload, components, context);
    } else {
        transformedSideInputs = new TransformedSideInputs(Collections.emptyMap(), null);
    }
    Map<TupleTag<?>, OutputTag<WindowedValue<?>>> tagsToOutputTags = Maps.newLinkedHashMap();
    Map<TupleTag<?>, Coder<WindowedValue<?>>> tagsToCoders = Maps.newLinkedHashMap();
    // TODO: does it matter which output we designate as "main"
    final TupleTag<OutputT> mainOutputTag = outputs.isEmpty() ? null : new TupleTag(outputs.keySet().iterator().next());
    // associate output tags with ids, output manager uses these Integer ids to serialize state
    BiMap<String, Integer> outputIndexMap = createOutputMap(outputs.keySet());
    Map<String, Coder<WindowedValue<?>>> outputCoders = Maps.newHashMap();
    Map<TupleTag<?>, Integer> tagsToIds = Maps.newHashMap();
    Map<String, TupleTag<?>> collectionIdToTupleTag = Maps.newHashMap();
    // order output names for deterministic mapping
    for (String localOutputName : new TreeMap<>(outputIndexMap).keySet()) {
        String collectionId = outputs.get(localOutputName);
        Coder<WindowedValue<?>> windowCoder = (Coder) instantiateCoder(collectionId, components);
        outputCoders.put(localOutputName, windowCoder);
        TupleTag<?> tupleTag = new TupleTag<>(localOutputName);
        CoderTypeInformation<WindowedValue<?>> typeInformation = new CoderTypeInformation(windowCoder, context.getPipelineOptions());
        tagsToOutputTags.put(tupleTag, new OutputTag<>(localOutputName, typeInformation));
        tagsToCoders.put(tupleTag, windowCoder);
        tagsToIds.put(tupleTag, outputIndexMap.get(localOutputName));
        collectionIdToTupleTag.put(collectionId, tupleTag);
    }
    final SingleOutputStreamOperator<WindowedValue<OutputT>> outputStream;
    DataStream<WindowedValue<InputT>> inputDataStream = context.getDataStreamOrThrow(inputPCollectionId);
    CoderTypeInformation<WindowedValue<OutputT>> outputTypeInformation = !outputs.isEmpty() ? new CoderTypeInformation(outputCoders.get(mainOutputTag.getId()), context.getPipelineOptions()) : null;
    ArrayList<TupleTag<?>> additionalOutputTags = Lists.newArrayList();
    for (TupleTag<?> tupleTag : tagsToCoders.keySet()) {
        if (!mainOutputTag.getId().equals(tupleTag.getId())) {
            additionalOutputTags.add(tupleTag);
        }
    }
    final Coder<WindowedValue<InputT>> windowedInputCoder = instantiateCoder(inputPCollectionId, components);
    final boolean stateful = stagePayload.getUserStatesCount() > 0 || stagePayload.getTimersCount() > 0;
    final boolean hasSdfProcessFn = stagePayload.getComponents().getTransformsMap().values().stream().anyMatch(pTransform -> pTransform.getSpec().getUrn().equals(PTransformTranslation.SPLITTABLE_PROCESS_SIZED_ELEMENTS_AND_RESTRICTIONS_URN));
    Coder keyCoder = null;
    KeySelector<WindowedValue<InputT>, ?> keySelector = null;
    if (stateful || hasSdfProcessFn) {
        // Stateful/SDF stages are only allowed of KV input.
        Coder valueCoder = ((WindowedValue.FullWindowedValueCoder) windowedInputCoder).getValueCoder();
        if (!(valueCoder instanceof KvCoder)) {
            throw new IllegalStateException(String.format(Locale.ENGLISH, "The element coder for stateful DoFn '%s' must be KvCoder but is: %s", inputPCollectionId, valueCoder.getClass().getSimpleName()));
        }
        if (stateful) {
            keyCoder = ((KvCoder) valueCoder).getKeyCoder();
            keySelector = new KvToByteBufferKeySelector(keyCoder, new SerializablePipelineOptions(context.getPipelineOptions()));
        } else {
            // as the key.
            if (!(((KvCoder) valueCoder).getKeyCoder() instanceof KvCoder)) {
                throw new IllegalStateException(String.format(Locale.ENGLISH, "The element coder for splittable DoFn '%s' must be KVCoder(KvCoder, DoubleCoder) but is: %s", inputPCollectionId, valueCoder.getClass().getSimpleName()));
            }
            keyCoder = ((KvCoder) ((KvCoder) valueCoder).getKeyCoder()).getKeyCoder();
            keySelector = new SdfByteBufferKeySelector(keyCoder, new SerializablePipelineOptions(context.getPipelineOptions()));
        }
        inputDataStream = inputDataStream.keyBy(keySelector);
    }
    DoFnOperator.MultiOutputOutputManagerFactory<OutputT> outputManagerFactory = new DoFnOperator.MultiOutputOutputManagerFactory<>(mainOutputTag, tagsToOutputTags, tagsToCoders, tagsToIds, new SerializablePipelineOptions(context.getPipelineOptions()));
    DoFnOperator<InputT, OutputT> doFnOperator = new ExecutableStageDoFnOperator<>(transform.getUniqueName(), windowedInputCoder, Collections.emptyMap(), mainOutputTag, additionalOutputTags, outputManagerFactory, transformedSideInputs.unionTagToView, new ArrayList<>(transformedSideInputs.unionTagToView.values()), getSideInputIdToPCollectionViewMap(stagePayload, components), context.getPipelineOptions(), stagePayload, context.getJobInfo(), FlinkExecutableStageContextFactory.getInstance(), collectionIdToTupleTag, getWindowingStrategy(inputPCollectionId, components), keyCoder, keySelector);
    final String operatorName = generateNameFromStagePayload(stagePayload);
    if (transformedSideInputs.unionTagToView.isEmpty()) {
        outputStream = inputDataStream.transform(operatorName, outputTypeInformation, doFnOperator);
    } else {
        DataStream<RawUnionValue> sideInputStream = transformedSideInputs.unionedSideInputs.broadcast();
        if (stateful || hasSdfProcessFn) {
            // We have to manually construct the two-input transform because we're not
            // allowed to have only one input keyed, normally. Since Flink 1.5.0 it's
            // possible to use the Broadcast State Pattern which provides a more elegant
            // way to process keyed main input with broadcast state, but it's not feasible
            // here because it breaks the DoFnOperator abstraction.
            TwoInputTransformation<WindowedValue<KV<?, InputT>>, RawUnionValue, WindowedValue<OutputT>> rawFlinkTransform = new TwoInputTransformation(inputDataStream.getTransformation(), sideInputStream.getTransformation(), transform.getUniqueName(), doFnOperator, outputTypeInformation, inputDataStream.getParallelism());
            rawFlinkTransform.setStateKeyType(((KeyedStream) inputDataStream).getKeyType());
            rawFlinkTransform.setStateKeySelectors(((KeyedStream) inputDataStream).getKeySelector(), null);
            outputStream = new SingleOutputStreamOperator(inputDataStream.getExecutionEnvironment(), // we have to cheat around the ctor being protected
            rawFlinkTransform) {
            };
        } else {
            outputStream = inputDataStream.connect(sideInputStream).transform(operatorName, outputTypeInformation, doFnOperator);
        }
    }
    // Assign a unique but consistent id to re-map operator state
    outputStream.uid(transform.getUniqueName());
    if (mainOutputTag != null) {
        context.addDataStream(outputs.get(mainOutputTag.getId()), outputStream);
    }
    for (TupleTag<?> tupleTag : additionalOutputTags) {
        context.addDataStream(outputs.get(tupleTag.getId()), outputStream.getSideOutput(tagsToOutputTags.get(tupleTag)));
    }
}
Also used : KvToByteBufferKeySelector(org.apache.beam.runners.flink.translation.wrappers.streaming.KvToByteBufferKeySelector) TupleTag(org.apache.beam.sdk.values.TupleTag) RunnerApi(org.apache.beam.model.pipeline.v1.RunnerApi) WindowedValue(org.apache.beam.sdk.util.WindowedValue) OutputTag(org.apache.flink.util.OutputTag) SerializablePipelineOptions(org.apache.beam.runners.core.construction.SerializablePipelineOptions) RawUnionValue(org.apache.beam.sdk.transforms.join.RawUnionValue) SingleOutputStreamOperator(org.apache.flink.streaming.api.datastream.SingleOutputStreamOperator) WindowDoFnOperator(org.apache.beam.runners.flink.translation.wrappers.streaming.WindowDoFnOperator) DoFnOperator(org.apache.beam.runners.flink.translation.wrappers.streaming.DoFnOperator) ExecutableStageDoFnOperator(org.apache.beam.runners.flink.translation.wrappers.streaming.ExecutableStageDoFnOperator) SdfByteBufferKeySelector(org.apache.beam.runners.flink.translation.wrappers.streaming.SdfByteBufferKeySelector) TwoInputTransformation(org.apache.flink.streaming.api.transformations.TwoInputTransformation) ExecutableStageDoFnOperator(org.apache.beam.runners.flink.translation.wrappers.streaming.ExecutableStageDoFnOperator) CoderTypeInformation(org.apache.beam.runners.flink.translation.types.CoderTypeInformation) SingletonKeyedWorkItemCoder(org.apache.beam.runners.flink.translation.wrappers.streaming.SingletonKeyedWorkItemCoder) WindowedValueCoder(org.apache.beam.sdk.util.WindowedValue.WindowedValueCoder) KvCoder(org.apache.beam.sdk.coders.KvCoder) PipelineTranslatorUtils.instantiateCoder(org.apache.beam.runners.fnexecution.translation.PipelineTranslatorUtils.instantiateCoder) IterableCoder(org.apache.beam.sdk.coders.IterableCoder) VoidCoder(org.apache.beam.sdk.coders.VoidCoder) UnionCoder(org.apache.beam.sdk.transforms.join.UnionCoder) Coder(org.apache.beam.sdk.coders.Coder) ByteArrayCoder(org.apache.beam.sdk.coders.ByteArrayCoder) KvCoder(org.apache.beam.sdk.coders.KvCoder) IOException(java.io.IOException)

Example 12 with KvCoder

use of org.apache.beam.sdk.coders.KvCoder in project beam by apache.

the class DataflowSideInputHandlerFactory method forMultimapSideInput.

@Override
public <K, V, W extends BoundedWindow> MultimapSideInputHandler<K, V, W> forMultimapSideInput(String pTransformId, String sideInputId, KvCoder<K, V> elementCoder, Coder<W> windowCoder) {
    checkArgument(pTransformId != null && pTransformId.length() > 0, "Expect a valid PTransform ID.");
    SideInputReader sideInputReader = ptransformIdToSideInputReader.get(pTransformId);
    checkState(sideInputReader != null, String.format("Unknown PTransform '%s'", pTransformId));
    PCollectionView<Materializations.MultimapView<Object, Object>> view = (PCollectionView<Materializations.MultimapView<Object, Object>>) sideInputIdToPCollectionViewMap.get(RunnerApi.ExecutableStagePayload.SideInputId.newBuilder().setTransformId(pTransformId).setLocalName(sideInputId).build());
    checkState(view != null, String.format("Unknown side input '%s' on PTransform '%s'", sideInputId, pTransformId));
    checkState(Materializations.MULTIMAP_MATERIALIZATION_URN.equals(view.getViewFn().getMaterialization().getUrn()), String.format("Unknown materialization for side input '%s' on PTransform '%s' with urn '%s'", sideInputId, pTransformId, view.getViewFn().getMaterialization().getUrn()));
    checkState(view.getCoderInternal() instanceof KvCoder, String.format("Materialization of side input '%s' on PTransform '%s' expects %s but received %s.", sideInputId, pTransformId, KvCoder.class.getSimpleName(), view.getCoderInternal().getClass().getSimpleName()));
    KvCoder<K, V> kvCoder = elementCoder;
    return new DataflowMultimapSideInputHandler<>(sideInputReader, view, kvCoder.getKeyCoder(), kvCoder.getValueCoder(), windowCoder);
}
Also used : PCollectionView(org.apache.beam.sdk.values.PCollectionView) KvCoder(org.apache.beam.sdk.coders.KvCoder) SideInputReader(org.apache.beam.runners.core.SideInputReader) Materializations(org.apache.beam.sdk.transforms.Materializations)

Example 13 with KvCoder

use of org.apache.beam.sdk.coders.KvCoder in project beam by apache.

the class CommonCoderTest method convertValue.

/**
 * Converts from JSON-auto-deserialized types into the proper Java types for the known coders.
 */
private static Object convertValue(Object value, CommonCoder coderSpec, Coder coder) {
    String s = coderSpec.getUrn();
    if (s.equals(getUrn(StandardCoders.Enum.BYTES))) {
        return ((String) value).getBytes(StandardCharsets.ISO_8859_1);
    } else if (s.equals(getUrn(StandardCoders.Enum.BOOL))) {
        return value;
    } else if (s.equals(getUrn(StandardCoders.Enum.STRING_UTF8))) {
        return value;
    } else if (s.equals(getUrn(StandardCoders.Enum.KV))) {
        Coder keyCoder = ((KvCoder) coder).getKeyCoder();
        Coder valueCoder = ((KvCoder) coder).getValueCoder();
        Map<String, Object> kvMap = (Map<String, Object>) value;
        Object k = convertValue(kvMap.get("key"), coderSpec.getComponents().get(0), keyCoder);
        Object v = convertValue(kvMap.get("value"), coderSpec.getComponents().get(1), valueCoder);
        return KV.of(k, v);
    } else if (s.equals(getUrn(StandardCoders.Enum.VARINT))) {
        return ((Number) value).longValue();
    } else if (s.equals(getUrn(StandardCoders.Enum.TIMER))) {
        Map<String, Object> kvMap = (Map<String, Object>) value;
        Coder<?> keyCoder = ((Timer.Coder) coder).getValueCoder();
        Coder<? extends BoundedWindow> windowCoder = ((Timer.Coder) coder).getWindowCoder();
        List<BoundedWindow> windows = new ArrayList<>();
        for (Object window : (List<Object>) kvMap.get("windows")) {
            windows.add((BoundedWindow) convertValue(window, coderSpec.getComponents().get(1), windowCoder));
        }
        if ((boolean) kvMap.get("clearBit")) {
            return Timer.cleared(convertValue(kvMap.get("userKey"), coderSpec.getComponents().get(0), keyCoder), (String) kvMap.get("dynamicTimerTag"), windows);
        }
        Map<String, Object> paneInfoMap = (Map<String, Object>) kvMap.get("pane");
        PaneInfo paneInfo = PaneInfo.createPane((boolean) paneInfoMap.get("is_first"), (boolean) paneInfoMap.get("is_last"), PaneInfo.Timing.valueOf((String) paneInfoMap.get("timing")), (int) paneInfoMap.get("index"), (int) paneInfoMap.get("on_time_index"));
        return Timer.of(convertValue(kvMap.get("userKey"), coderSpec.getComponents().get(0), keyCoder), (String) kvMap.get("dynamicTimerTag"), windows, new Instant(((Number) kvMap.get("fireTimestamp")).longValue()), new Instant(((Number) kvMap.get("holdTimestamp")).longValue()), paneInfo);
    } else if (s.equals(getUrn(StandardCoders.Enum.INTERVAL_WINDOW))) {
        Map<String, Object> kvMap = (Map<String, Object>) value;
        Instant end = new Instant(((Number) kvMap.get("end")).longValue());
        Duration span = Duration.millis(((Number) kvMap.get("span")).longValue());
        return new IntervalWindow(end.minus(span), span);
    } else if (s.equals(getUrn(StandardCoders.Enum.ITERABLE)) || s.equals(getUrn(StandardCoders.Enum.STATE_BACKED_ITERABLE))) {
        Coder elementCoder = ((IterableLikeCoder) coder).getElemCoder();
        List<Object> elements = (List<Object>) value;
        List<Object> convertedElements = new ArrayList<>();
        for (Object element : elements) {
            convertedElements.add(convertValue(element, coderSpec.getComponents().get(0), elementCoder));
        }
        return convertedElements;
    } else if (s.equals(getUrn(StandardCoders.Enum.GLOBAL_WINDOW))) {
        return GlobalWindow.INSTANCE;
    } else if (s.equals(getUrn(StandardCoders.Enum.WINDOWED_VALUE)) || s.equals(getUrn(StandardCoders.Enum.PARAM_WINDOWED_VALUE))) {
        Map<String, Object> kvMap = (Map<String, Object>) value;
        Coder valueCoder = ((WindowedValue.FullWindowedValueCoder) coder).getValueCoder();
        Coder windowCoder = ((WindowedValue.FullWindowedValueCoder) coder).getWindowCoder();
        Object windowValue = convertValue(kvMap.get("value"), coderSpec.getComponents().get(0), valueCoder);
        Instant timestamp = new Instant(((Number) kvMap.get("timestamp")).longValue());
        List<BoundedWindow> windows = new ArrayList<>();
        for (Object window : (List<Object>) kvMap.get("windows")) {
            windows.add((BoundedWindow) convertValue(window, coderSpec.getComponents().get(1), windowCoder));
        }
        Map<String, Object> paneInfoMap = (Map<String, Object>) kvMap.get("pane");
        PaneInfo paneInfo = PaneInfo.createPane((boolean) paneInfoMap.get("is_first"), (boolean) paneInfoMap.get("is_last"), PaneInfo.Timing.valueOf((String) paneInfoMap.get("timing")), (int) paneInfoMap.get("index"), (int) paneInfoMap.get("on_time_index"));
        return WindowedValue.of(windowValue, timestamp, windows, paneInfo);
    } else if (s.equals(getUrn(StandardCoders.Enum.DOUBLE))) {
        return Double.parseDouble((String) value);
    } else if (s.equals(getUrn(StandardCoders.Enum.ROW))) {
        Schema schema;
        try {
            schema = SchemaTranslation.schemaFromProto(SchemaApi.Schema.parseFrom(coderSpec.getPayload()));
        } catch (InvalidProtocolBufferException e) {
            throw new RuntimeException("Failed to parse schema payload for row coder", e);
        }
        return parseField(value, Schema.FieldType.row(schema));
    } else if (s.equals(getUrn(StandardCoders.Enum.SHARDED_KEY))) {
        Map<String, Object> kvMap = (Map<String, Object>) value;
        Coder<?> keyCoder = ((ShardedKey.Coder) coder).getKeyCoder();
        byte[] shardId = ((String) kvMap.get("shardId")).getBytes(StandardCharsets.ISO_8859_1);
        return ShardedKey.of(convertValue(kvMap.get("key"), coderSpec.getComponents().get(0), keyCoder), shardId);
    } else if (s.equals(getUrn(StandardCoders.Enum.CUSTOM_WINDOW))) {
        Map<String, Object> kvMap = (Map<String, Object>) value;
        Coder windowCoder = ((TimestampPrefixingWindowCoder) coder).getWindowCoder();
        return convertValue(kvMap.get("window"), coderSpec.getComponents().get(0), windowCoder);
    } else {
        throw new IllegalStateException("Unknown coder URN: " + coderSpec.getUrn());
    }
}
Also used : DoubleCoder(org.apache.beam.sdk.coders.DoubleCoder) IntervalWindowCoder(org.apache.beam.sdk.transforms.windowing.IntervalWindow.IntervalWindowCoder) ByteCoder(org.apache.beam.sdk.coders.ByteCoder) KvCoder(org.apache.beam.sdk.coders.KvCoder) VarLongCoder(org.apache.beam.sdk.coders.VarLongCoder) BooleanCoder(org.apache.beam.sdk.coders.BooleanCoder) TimestampPrefixingWindowCoder(org.apache.beam.sdk.coders.TimestampPrefixingWindowCoder) Coder(org.apache.beam.sdk.coders.Coder) RowCoder(org.apache.beam.sdk.coders.RowCoder) StringUtf8Coder(org.apache.beam.sdk.coders.StringUtf8Coder) IterableCoder(org.apache.beam.sdk.coders.IterableCoder) IterableLikeCoder(org.apache.beam.sdk.coders.IterableLikeCoder) Instant(org.joda.time.Instant) Schema(org.apache.beam.sdk.schemas.Schema) InvalidProtocolBufferException(org.apache.beam.vendor.grpc.v1p43p2.com.google.protobuf.InvalidProtocolBufferException) Duration(org.joda.time.Duration) ByteString(org.apache.beam.vendor.grpc.v1p43p2.com.google.protobuf.ByteString) Timer(org.apache.beam.runners.core.construction.Timer) WindowedValue(org.apache.beam.sdk.util.WindowedValue) PaneInfo(org.apache.beam.sdk.transforms.windowing.PaneInfo) BoundedWindow(org.apache.beam.sdk.transforms.windowing.BoundedWindow) List(java.util.List) ImmutableList.toImmutableList(org.apache.beam.vendor.guava.v26_0_jre.com.google.common.collect.ImmutableList.toImmutableList) ImmutableList(org.apache.beam.vendor.guava.v26_0_jre.com.google.common.collect.ImmutableList) ArrayList(java.util.ArrayList) ImmutableMap(org.apache.beam.vendor.guava.v26_0_jre.com.google.common.collect.ImmutableMap) ImmutableBiMap(org.apache.beam.vendor.guava.v26_0_jre.com.google.common.collect.ImmutableBiMap) Map(java.util.Map) HashMap(java.util.HashMap) IntervalWindow(org.apache.beam.sdk.transforms.windowing.IntervalWindow)

Example 14 with KvCoder

use of org.apache.beam.sdk.coders.KvCoder in project beam by apache.

the class KryoCoderTest method testCodingWithKvCoderClassToBeEncoded.

@Test
public void testCodingWithKvCoderClassToBeEncoded() throws IOException {
    final KryoRegistrar registrar = k -> {
        k.register(TestClass.class);
        k.register(ClassToBeEncoded.class);
    };
    final ListCoder<Void> listCoder = ListCoder.of(VoidCoder.of());
    final KvCoder<ClassToBeEncoded, List<Void>> kvCoder = KvCoder.of(KryoCoder.of(OPTIONS, registrar), listCoder);
    final List<Void> inputValue = new ArrayList<>();
    inputValue.add(null);
    inputValue.add(null);
    final ByteArrayOutputStream byteArrayOutputStream = new ByteArrayOutputStream();
    final ClassToBeEncoded inputKey = new ClassToBeEncoded("something", 1, 0.2);
    kvCoder.encode(KV.of(inputKey, inputValue), byteArrayOutputStream);
    final KV<ClassToBeEncoded, List<Void>> decoded = kvCoder.decode(new ByteArrayInputStream(byteArrayOutputStream.toByteArray()));
    assertNotNull(decoded);
    assertNotNull(decoded.getKey());
    assertEquals(inputKey, decoded.getKey());
    assertNotNull(decoded.getValue());
    assertEquals(inputValue, decoded.getValue());
}
Also used : KvCoder(org.apache.beam.sdk.coders.KvCoder) KV(org.apache.beam.sdk.values.KV) ByteArrayOutputStream(java.io.ByteArrayOutputStream) Assert.assertNotNull(org.junit.Assert.assertNotNull) ObjectInputStream(java.io.ObjectInputStream) ListCoder(org.apache.beam.sdk.coders.ListCoder) IOException(java.io.IOException) Test(org.junit.Test) PipelineOptionsFactory(org.apache.beam.sdk.options.PipelineOptionsFactory) ArrayList(java.util.ArrayList) StringUtf8Coder(org.apache.beam.sdk.coders.StringUtf8Coder) Objects(java.util.Objects) CoderException(org.apache.beam.sdk.coders.CoderException) List(java.util.List) ByteArrayInputStream(java.io.ByteArrayInputStream) ObjectOutputStream(java.io.ObjectOutputStream) VoidCoder(org.apache.beam.sdk.coders.VoidCoder) PipelineOptions(org.apache.beam.sdk.options.PipelineOptions) Assert.assertEquals(org.junit.Assert.assertEquals) Nullable(org.checkerframework.checker.nullness.qual.Nullable) ArrayList(java.util.ArrayList) ByteArrayOutputStream(java.io.ByteArrayOutputStream) ByteArrayInputStream(java.io.ByteArrayInputStream) ArrayList(java.util.ArrayList) List(java.util.List) Test(org.junit.Test)

Example 15 with KvCoder

use of org.apache.beam.sdk.coders.KvCoder in project beam by apache.

the class FnApiStateAccessor method get.

@Override
@Nullable
public <T> T get(PCollectionView<T> view, BoundedWindow window) {
    TupleTag<?> tag = view.getTagInternal();
    SideInputSpec sideInputSpec = sideInputSpecMap.get(tag);
    checkArgument(sideInputSpec != null, "Attempting to access unknown side input %s.", view);
    ByteString.Output encodedWindowOut = ByteString.newOutput();
    try {
        sideInputSpec.getWindowCoder().encode(sideInputSpec.getWindowMappingFn().getSideInputWindow(window), encodedWindowOut);
    } catch (IOException e) {
        throw new IllegalStateException(e);
    }
    ByteString encodedWindow = encodedWindowOut.toByteString();
    StateKey.Builder cacheKeyBuilder = StateKey.newBuilder();
    switch(sideInputSpec.getAccessPattern()) {
        case Materializations.ITERABLE_MATERIALIZATION_URN:
            cacheKeyBuilder.getIterableSideInputBuilder().setTransformId(ptransformId).setSideInputId(tag.getId()).setWindow(encodedWindow);
            break;
        case Materializations.MULTIMAP_MATERIALIZATION_URN:
            checkState(sideInputSpec.getCoder() instanceof KvCoder, "Expected %s but received %s.", KvCoder.class, sideInputSpec.getCoder().getClass());
            cacheKeyBuilder.getMultimapKeysSideInputBuilder().setTransformId(ptransformId).setSideInputId(tag.getId()).setWindow(encodedWindow);
            break;
        default:
            throw new IllegalStateException(String.format("This SDK is only capable of dealing with %s materializations " + "but was asked to handle %s for PCollectionView with tag %s.", ImmutableList.of(Materializations.ITERABLE_MATERIALIZATION_URN, Materializations.MULTIMAP_MATERIALIZATION_URN), sideInputSpec.getAccessPattern(), tag));
    }
    return (T) stateKeyObjectCache.computeIfAbsent(cacheKeyBuilder.build(), key -> {
        switch(sideInputSpec.getAccessPattern()) {
            case Materializations.ITERABLE_MATERIALIZATION_URN:
                return sideInputSpec.getViewFn().apply(new IterableSideInput<>(getCacheFor(key), beamFnStateClient, processBundleInstructionId.get(), key, sideInputSpec.getCoder()));
            case Materializations.MULTIMAP_MATERIALIZATION_URN:
                return sideInputSpec.getViewFn().apply(new MultimapSideInput<>(getCacheFor(key), beamFnStateClient, processBundleInstructionId.get(), key, ((KvCoder) sideInputSpec.getCoder()).getKeyCoder(), ((KvCoder) sideInputSpec.getCoder()).getValueCoder()));
            default:
                throw new IllegalStateException(String.format("This SDK is only capable of dealing with %s materializations " + "but was asked to handle %s for PCollectionView with tag %s.", ImmutableList.of(Materializations.ITERABLE_MATERIALIZATION_URN, Materializations.MULTIMAP_MATERIALIZATION_URN), sideInputSpec.getAccessPattern(), tag));
        }
    });
}
Also used : StateSpec(org.apache.beam.sdk.state.StateSpec) CombineFn(org.apache.beam.sdk.transforms.Combine.CombineFn) CombineFnWithContext(org.apache.beam.sdk.transforms.CombineWithContext.CombineFnWithContext) SetState(org.apache.beam.sdk.state.SetState) TimestampCombiner(org.apache.beam.sdk.transforms.windowing.TimestampCombiner) Coder(org.apache.beam.sdk.coders.Coder) ValueState(org.apache.beam.sdk.state.ValueState) StateContext(org.apache.beam.sdk.state.StateContext) Function(java.util.function.Function) Supplier(java.util.function.Supplier) ArrayList(java.util.ArrayList) CacheToken(org.apache.beam.model.fnexecution.v1.BeamFnApi.ProcessBundleRequest.CacheToken) MapState(org.apache.beam.sdk.state.MapState) TupleTag(org.apache.beam.sdk.values.TupleTag) Map(java.util.Map) Cache(org.apache.beam.fn.harness.Cache) Iterables(org.apache.beam.vendor.guava.v26_0_jre.com.google.common.collect.Iterables) Preconditions.checkArgument(org.apache.beam.vendor.guava.v26_0_jre.com.google.common.base.Preconditions.checkArgument) Maps(org.apache.beam.vendor.guava.v26_0_jre.com.google.common.collect.Maps) ByteString(org.apache.beam.vendor.grpc.v1p43p2.com.google.protobuf.ByteString) PipelineOptions(org.apache.beam.sdk.options.PipelineOptions) Materializations(org.apache.beam.sdk.transforms.Materializations) CombineFnUtil(org.apache.beam.sdk.util.CombineFnUtil) Nullable(org.checkerframework.checker.nullness.qual.Nullable) SideInputReader(org.apache.beam.runners.core.SideInputReader) KvCoder(org.apache.beam.sdk.coders.KvCoder) OrderedListState(org.apache.beam.sdk.state.OrderedListState) Iterator(java.util.Iterator) Collection(java.util.Collection) ReadableStates(org.apache.beam.sdk.state.ReadableStates) IOException(java.io.IOException) BeamFnApi(org.apache.beam.model.fnexecution.v1.BeamFnApi) List(java.util.List) BagState(org.apache.beam.sdk.state.BagState) CombiningState(org.apache.beam.sdk.state.CombiningState) WatermarkHoldState(org.apache.beam.sdk.state.WatermarkHoldState) Preconditions.checkState(org.apache.beam.vendor.guava.v26_0_jre.com.google.common.base.Preconditions.checkState) PCollectionView(org.apache.beam.sdk.values.PCollectionView) BoundedWindow(org.apache.beam.sdk.transforms.windowing.BoundedWindow) ImmutableList(org.apache.beam.vendor.guava.v26_0_jre.com.google.common.collect.ImmutableList) Caches(org.apache.beam.fn.harness.Caches) VoidCoder(org.apache.beam.sdk.coders.VoidCoder) StateBinder(org.apache.beam.sdk.state.StateBinder) StateKey(org.apache.beam.model.fnexecution.v1.BeamFnApi.StateKey) ThrowingRunnable(org.apache.beam.sdk.function.ThrowingRunnable) ReadableState(org.apache.beam.sdk.state.ReadableState) StateKey(org.apache.beam.model.fnexecution.v1.BeamFnApi.StateKey) ByteString(org.apache.beam.vendor.grpc.v1p43p2.com.google.protobuf.ByteString) KvCoder(org.apache.beam.sdk.coders.KvCoder) IOException(java.io.IOException) Nullable(org.checkerframework.checker.nullness.qual.Nullable)

Aggregations

KvCoder (org.apache.beam.sdk.coders.KvCoder)44 Coder (org.apache.beam.sdk.coders.Coder)26 WindowedValue (org.apache.beam.sdk.util.WindowedValue)25 KV (org.apache.beam.sdk.values.KV)21 BoundedWindow (org.apache.beam.sdk.transforms.windowing.BoundedWindow)20 Map (java.util.Map)17 List (java.util.List)16 ArrayList (java.util.ArrayList)15 RunnerApi (org.apache.beam.model.pipeline.v1.RunnerApi)15 IOException (java.io.IOException)14 HashMap (java.util.HashMap)14 WindowingStrategy (org.apache.beam.sdk.values.WindowingStrategy)13 StringUtf8Coder (org.apache.beam.sdk.coders.StringUtf8Coder)11 IterableCoder (org.apache.beam.sdk.coders.IterableCoder)10 VoidCoder (org.apache.beam.sdk.coders.VoidCoder)10 PCollectionView (org.apache.beam.sdk.values.PCollectionView)10 Test (org.junit.Test)10 WindowedValueCoder (org.apache.beam.sdk.util.WindowedValue.WindowedValueCoder)8 ViewFn (org.apache.beam.sdk.transforms.ViewFn)7 PCollection (org.apache.beam.sdk.values.PCollection)7