Search in sources :

Example 21 with Coder

use of org.apache.beam.sdk.coders.Coder in project beam by apache.

the class CommonCoderTest method convertValue.

/**
 * Converts from JSON-auto-deserialized types into the proper Java types for the known coders.
 */
private static Object convertValue(Object value, CommonCoder coderSpec, Coder coder) {
    String s = coderSpec.getUrn();
    if (s.equals(getUrn(StandardCoders.Enum.BYTES))) {
        return ((String) value).getBytes(StandardCharsets.ISO_8859_1);
    } else if (s.equals(getUrn(StandardCoders.Enum.BOOL))) {
        return value;
    } else if (s.equals(getUrn(StandardCoders.Enum.STRING_UTF8))) {
        return value;
    } else if (s.equals(getUrn(StandardCoders.Enum.KV))) {
        Coder keyCoder = ((KvCoder) coder).getKeyCoder();
        Coder valueCoder = ((KvCoder) coder).getValueCoder();
        Map<String, Object> kvMap = (Map<String, Object>) value;
        Object k = convertValue(kvMap.get("key"), coderSpec.getComponents().get(0), keyCoder);
        Object v = convertValue(kvMap.get("value"), coderSpec.getComponents().get(1), valueCoder);
        return KV.of(k, v);
    } else if (s.equals(getUrn(StandardCoders.Enum.VARINT))) {
        return ((Number) value).longValue();
    } else if (s.equals(getUrn(StandardCoders.Enum.TIMER))) {
        Map<String, Object> kvMap = (Map<String, Object>) value;
        Coder<?> keyCoder = ((Timer.Coder) coder).getValueCoder();
        Coder<? extends BoundedWindow> windowCoder = ((Timer.Coder) coder).getWindowCoder();
        List<BoundedWindow> windows = new ArrayList<>();
        for (Object window : (List<Object>) kvMap.get("windows")) {
            windows.add((BoundedWindow) convertValue(window, coderSpec.getComponents().get(1), windowCoder));
        }
        if ((boolean) kvMap.get("clearBit")) {
            return Timer.cleared(convertValue(kvMap.get("userKey"), coderSpec.getComponents().get(0), keyCoder), (String) kvMap.get("dynamicTimerTag"), windows);
        }
        Map<String, Object> paneInfoMap = (Map<String, Object>) kvMap.get("pane");
        PaneInfo paneInfo = PaneInfo.createPane((boolean) paneInfoMap.get("is_first"), (boolean) paneInfoMap.get("is_last"), PaneInfo.Timing.valueOf((String) paneInfoMap.get("timing")), (int) paneInfoMap.get("index"), (int) paneInfoMap.get("on_time_index"));
        return Timer.of(convertValue(kvMap.get("userKey"), coderSpec.getComponents().get(0), keyCoder), (String) kvMap.get("dynamicTimerTag"), windows, new Instant(((Number) kvMap.get("fireTimestamp")).longValue()), new Instant(((Number) kvMap.get("holdTimestamp")).longValue()), paneInfo);
    } else if (s.equals(getUrn(StandardCoders.Enum.INTERVAL_WINDOW))) {
        Map<String, Object> kvMap = (Map<String, Object>) value;
        Instant end = new Instant(((Number) kvMap.get("end")).longValue());
        Duration span = Duration.millis(((Number) kvMap.get("span")).longValue());
        return new IntervalWindow(end.minus(span), span);
    } else if (s.equals(getUrn(StandardCoders.Enum.ITERABLE)) || s.equals(getUrn(StandardCoders.Enum.STATE_BACKED_ITERABLE))) {
        Coder elementCoder = ((IterableLikeCoder) coder).getElemCoder();
        List<Object> elements = (List<Object>) value;
        List<Object> convertedElements = new ArrayList<>();
        for (Object element : elements) {
            convertedElements.add(convertValue(element, coderSpec.getComponents().get(0), elementCoder));
        }
        return convertedElements;
    } else if (s.equals(getUrn(StandardCoders.Enum.GLOBAL_WINDOW))) {
        return GlobalWindow.INSTANCE;
    } else if (s.equals(getUrn(StandardCoders.Enum.WINDOWED_VALUE)) || s.equals(getUrn(StandardCoders.Enum.PARAM_WINDOWED_VALUE))) {
        Map<String, Object> kvMap = (Map<String, Object>) value;
        Coder valueCoder = ((WindowedValue.FullWindowedValueCoder) coder).getValueCoder();
        Coder windowCoder = ((WindowedValue.FullWindowedValueCoder) coder).getWindowCoder();
        Object windowValue = convertValue(kvMap.get("value"), coderSpec.getComponents().get(0), valueCoder);
        Instant timestamp = new Instant(((Number) kvMap.get("timestamp")).longValue());
        List<BoundedWindow> windows = new ArrayList<>();
        for (Object window : (List<Object>) kvMap.get("windows")) {
            windows.add((BoundedWindow) convertValue(window, coderSpec.getComponents().get(1), windowCoder));
        }
        Map<String, Object> paneInfoMap = (Map<String, Object>) kvMap.get("pane");
        PaneInfo paneInfo = PaneInfo.createPane((boolean) paneInfoMap.get("is_first"), (boolean) paneInfoMap.get("is_last"), PaneInfo.Timing.valueOf((String) paneInfoMap.get("timing")), (int) paneInfoMap.get("index"), (int) paneInfoMap.get("on_time_index"));
        return WindowedValue.of(windowValue, timestamp, windows, paneInfo);
    } else if (s.equals(getUrn(StandardCoders.Enum.DOUBLE))) {
        return Double.parseDouble((String) value);
    } else if (s.equals(getUrn(StandardCoders.Enum.ROW))) {
        Schema schema;
        try {
            schema = SchemaTranslation.schemaFromProto(SchemaApi.Schema.parseFrom(coderSpec.getPayload()));
        } catch (InvalidProtocolBufferException e) {
            throw new RuntimeException("Failed to parse schema payload for row coder", e);
        }
        return parseField(value, Schema.FieldType.row(schema));
    } else if (s.equals(getUrn(StandardCoders.Enum.SHARDED_KEY))) {
        Map<String, Object> kvMap = (Map<String, Object>) value;
        Coder<?> keyCoder = ((ShardedKey.Coder) coder).getKeyCoder();
        byte[] shardId = ((String) kvMap.get("shardId")).getBytes(StandardCharsets.ISO_8859_1);
        return ShardedKey.of(convertValue(kvMap.get("key"), coderSpec.getComponents().get(0), keyCoder), shardId);
    } else if (s.equals(getUrn(StandardCoders.Enum.CUSTOM_WINDOW))) {
        Map<String, Object> kvMap = (Map<String, Object>) value;
        Coder windowCoder = ((TimestampPrefixingWindowCoder) coder).getWindowCoder();
        return convertValue(kvMap.get("window"), coderSpec.getComponents().get(0), windowCoder);
    } else {
        throw new IllegalStateException("Unknown coder URN: " + coderSpec.getUrn());
    }
}
Also used : DoubleCoder(org.apache.beam.sdk.coders.DoubleCoder) IntervalWindowCoder(org.apache.beam.sdk.transforms.windowing.IntervalWindow.IntervalWindowCoder) ByteCoder(org.apache.beam.sdk.coders.ByteCoder) KvCoder(org.apache.beam.sdk.coders.KvCoder) VarLongCoder(org.apache.beam.sdk.coders.VarLongCoder) BooleanCoder(org.apache.beam.sdk.coders.BooleanCoder) TimestampPrefixingWindowCoder(org.apache.beam.sdk.coders.TimestampPrefixingWindowCoder) Coder(org.apache.beam.sdk.coders.Coder) RowCoder(org.apache.beam.sdk.coders.RowCoder) StringUtf8Coder(org.apache.beam.sdk.coders.StringUtf8Coder) IterableCoder(org.apache.beam.sdk.coders.IterableCoder) IterableLikeCoder(org.apache.beam.sdk.coders.IterableLikeCoder) Instant(org.joda.time.Instant) Schema(org.apache.beam.sdk.schemas.Schema) InvalidProtocolBufferException(org.apache.beam.vendor.grpc.v1p43p2.com.google.protobuf.InvalidProtocolBufferException) Duration(org.joda.time.Duration) ByteString(org.apache.beam.vendor.grpc.v1p43p2.com.google.protobuf.ByteString) Timer(org.apache.beam.runners.core.construction.Timer) WindowedValue(org.apache.beam.sdk.util.WindowedValue) PaneInfo(org.apache.beam.sdk.transforms.windowing.PaneInfo) BoundedWindow(org.apache.beam.sdk.transforms.windowing.BoundedWindow) List(java.util.List) ImmutableList.toImmutableList(org.apache.beam.vendor.guava.v26_0_jre.com.google.common.collect.ImmutableList.toImmutableList) ImmutableList(org.apache.beam.vendor.guava.v26_0_jre.com.google.common.collect.ImmutableList) ArrayList(java.util.ArrayList) ImmutableMap(org.apache.beam.vendor.guava.v26_0_jre.com.google.common.collect.ImmutableMap) ImmutableBiMap(org.apache.beam.vendor.guava.v26_0_jre.com.google.common.collect.ImmutableBiMap) Map(java.util.Map) HashMap(java.util.HashMap) IntervalWindow(org.apache.beam.sdk.transforms.windowing.IntervalWindow)

Example 22 with Coder

use of org.apache.beam.sdk.coders.Coder in project beam by apache.

the class FlattenP method tryProcess.

@Override
protected boolean tryProcess(int ordinal, @Nonnull Object item) {
    Coder inputCoder = inputOrdinalCoders.get(ordinal);
    WindowedValue<Object> windowedValue = Utils.decodeWindowedValue((byte[]) item, inputCoder);
    return tryEmit(Utils.encode(windowedValue, outputCoder));
}
Also used : Coder(org.apache.beam.sdk.coders.Coder)

Example 23 with Coder

use of org.apache.beam.sdk.coders.Coder in project beam by apache.

the class ParDoTranslatorBatch method createBroadcastSideInputs.

private static SideInputBroadcast createBroadcastSideInputs(List<PCollectionView<?>> sideInputs, AbstractTranslationContext context) {
    JavaSparkContext jsc = JavaSparkContext.fromSparkContext(context.getSparkSession().sparkContext());
    SideInputBroadcast sideInputBroadcast = new SideInputBroadcast();
    for (PCollectionView<?> sideInput : sideInputs) {
        Coder<? extends BoundedWindow> windowCoder = sideInput.getPCollection().getWindowingStrategy().getWindowFn().windowCoder();
        Coder<WindowedValue<?>> windowedValueCoder = (Coder<WindowedValue<?>>) (Coder<?>) WindowedValue.getFullCoder(sideInput.getPCollection().getCoder(), windowCoder);
        Dataset<WindowedValue<?>> broadcastSet = context.getSideInputDataSet(sideInput);
        List<WindowedValue<?>> valuesList = broadcastSet.collectAsList();
        List<byte[]> codedValues = new ArrayList<>();
        for (WindowedValue<?> v : valuesList) {
            codedValues.add(CoderHelpers.toByteArray(v, windowedValueCoder));
        }
        sideInputBroadcast.add(sideInput.getTagInternal().getId(), jsc.broadcast(codedValues), windowedValueCoder);
    }
    return sideInputBroadcast;
}
Also used : SerializableCoder(org.apache.beam.sdk.coders.SerializableCoder) Coder(org.apache.beam.sdk.coders.Coder) MultiOutputCoder(org.apache.beam.runners.spark.structuredstreaming.translation.helpers.MultiOutputCoder) SideInputBroadcast(org.apache.beam.runners.spark.structuredstreaming.translation.helpers.SideInputBroadcast) WindowedValue(org.apache.beam.sdk.util.WindowedValue) ArrayList(java.util.ArrayList) JavaSparkContext(org.apache.spark.api.java.JavaSparkContext)

Example 24 with Coder

use of org.apache.beam.sdk.coders.Coder in project beam by apache.

the class SparkSideInputReader method initializeBroadcastVariable.

private <T> Map<BoundedWindow, T> initializeBroadcastVariable(Iterable<WindowedValue<?>> inputValues, PCollectionView<T> view) {
    // first partition into windows
    Map<BoundedWindow, List<WindowedValue<?>>> partitionedElements = new HashMap<>();
    for (WindowedValue<?> value : inputValues) {
        for (BoundedWindow window : value.getWindows()) {
            List<WindowedValue<?>> windowedValues = partitionedElements.computeIfAbsent(window, k -> new ArrayList<>());
            windowedValues.add(value);
        }
    }
    Map<BoundedWindow, T> resultMap = new HashMap<>();
    for (Map.Entry<BoundedWindow, List<WindowedValue<?>>> elements : partitionedElements.entrySet()) {
        switch(view.getViewFn().getMaterialization().getUrn()) {
            case Materializations.ITERABLE_MATERIALIZATION_URN:
                {
                    ViewFn<IterableView, T> viewFn = (ViewFn<IterableView, T>) view.getViewFn();
                    resultMap.put(elements.getKey(), viewFn.apply(() -> elements.getValue().stream().map(WindowedValue::getValue).collect(Collectors.toList())));
                }
                break;
            case Materializations.MULTIMAP_MATERIALIZATION_URN:
                {
                    ViewFn<MultimapView, T> viewFn = (ViewFn<MultimapView, T>) view.getViewFn();
                    Coder<?> keyCoder = ((KvCoder<?, ?>) view.getCoderInternal()).getKeyCoder();
                    resultMap.put(elements.getKey(), viewFn.apply(InMemoryMultimapSideInputView.fromIterable(keyCoder, (Iterable) elements.getValue().stream().map(WindowedValue::getValue).collect(Collectors.toList()))));
                }
                break;
            default:
                throw new IllegalStateException(String.format("Unknown side input materialization format requested '%s'", view.getViewFn().getMaterialization().getUrn()));
        }
    }
    return resultMap;
}
Also used : Coder(org.apache.beam.sdk.coders.Coder) KvCoder(org.apache.beam.sdk.coders.KvCoder) IterableView(org.apache.beam.sdk.transforms.Materializations.IterableView) HashMap(java.util.HashMap) MultimapView(org.apache.beam.sdk.transforms.Materializations.MultimapView) ViewFn(org.apache.beam.sdk.transforms.ViewFn) WindowedValue(org.apache.beam.sdk.util.WindowedValue) BoundedWindow(org.apache.beam.sdk.transforms.windowing.BoundedWindow) ArrayList(java.util.ArrayList) List(java.util.List) HashMap(java.util.HashMap) Map(java.util.Map)

Example 25 with Coder

use of org.apache.beam.sdk.coders.Coder in project beam by apache.

the class SparkExecutableStageFunction method getStateRequestHandler.

private StateRequestHandler getStateRequestHandler(ExecutableStage executableStage, ProcessBundleDescriptors.ExecutableProcessBundleDescriptor processBundleDescriptor) {
    EnumMap<TypeCase, StateRequestHandler> handlerMap = new EnumMap<>(StateKey.TypeCase.class);
    final StateRequestHandler sideInputHandler;
    StateRequestHandlers.SideInputHandlerFactory sideInputHandlerFactory = BatchSideInputHandlerFactory.forStage(executableStage, new BatchSideInputHandlerFactory.SideInputGetter() {

        @Override
        public <T> List<T> getSideInput(String pCollectionId) {
            Tuple2<Broadcast<List<byte[]>>, WindowedValueCoder<SideInputT>> tuple2 = sideInputs.get(pCollectionId);
            Broadcast<List<byte[]>> broadcast = tuple2._1;
            WindowedValueCoder<SideInputT> coder = tuple2._2;
            return (List<T>) broadcast.value().stream().map(bytes -> CoderHelpers.fromByteArray(bytes, coder)).collect(Collectors.toList());
        }
    });
    try {
        sideInputHandler = StateRequestHandlers.forSideInputHandlerFactory(ProcessBundleDescriptors.getSideInputs(executableStage), sideInputHandlerFactory);
    } catch (IOException e) {
        throw new RuntimeException("Failed to setup state handler", e);
    }
    if (bagUserStateHandlerFactory == null) {
        bagUserStateHandlerFactory = new InMemoryBagUserStateFactory();
    }
    final StateRequestHandler userStateHandler;
    if (executableStage.getUserStates().size() > 0) {
        // Need to discard the old key's state
        bagUserStateHandlerFactory.resetForNewKey();
        userStateHandler = StateRequestHandlers.forBagUserStateHandlerFactory(processBundleDescriptor, bagUserStateHandlerFactory);
    } else {
        userStateHandler = StateRequestHandler.unsupported();
    }
    handlerMap.put(StateKey.TypeCase.ITERABLE_SIDE_INPUT, sideInputHandler);
    handlerMap.put(StateKey.TypeCase.MULTIMAP_SIDE_INPUT, sideInputHandler);
    handlerMap.put(StateKey.TypeCase.MULTIMAP_KEYS_SIDE_INPUT, sideInputHandler);
    handlerMap.put(StateKey.TypeCase.BAG_USER_STATE, userStateHandler);
    return StateRequestHandlers.delegateBasedUponType(handlerMap);
}
Also used : WindowedValueCoder(org.apache.beam.sdk.util.WindowedValue.WindowedValueCoder) SerializablePipelineOptions(org.apache.beam.runners.core.construction.SerializablePipelineOptions) WindowedValue(org.apache.beam.sdk.util.WindowedValue) TimerInternals(org.apache.beam.runners.core.TimerInternals) BatchSideInputHandlerFactory(org.apache.beam.runners.fnexecution.translation.BatchSideInputHandlerFactory) Locale(java.util.Locale) JobBundleFactory(org.apache.beam.runners.fnexecution.control.JobBundleFactory) Map(java.util.Map) Iterables(org.apache.beam.vendor.guava.v26_0_jre.com.google.common.collect.Iterables) JobInfo(org.apache.beam.runners.fnexecution.provisioning.JobInfo) TimerReceiverFactory(org.apache.beam.runners.fnexecution.control.TimerReceiverFactory) FlatMapFunction(org.apache.spark.api.java.function.FlatMapFunction) Broadcast(org.apache.spark.broadcast.Broadcast) StageBundleFactory(org.apache.beam.runners.fnexecution.control.StageBundleFactory) EnumMap(java.util.EnumMap) FnDataReceiver(org.apache.beam.sdk.fn.data.FnDataReceiver) BundleProgressHandler(org.apache.beam.runners.fnexecution.control.BundleProgressHandler) ExecutableStage(org.apache.beam.runners.core.construction.graph.ExecutableStage) Tuple2(scala.Tuple2) Collectors(java.util.stream.Collectors) Serializable(java.io.Serializable) List(java.util.List) ByteArray(org.apache.beam.runners.spark.util.ByteArray) SparkPipelineOptions(org.apache.beam.runners.spark.SparkPipelineOptions) StateKey(org.apache.beam.model.fnexecution.v1.BeamFnApi.StateKey) ConcurrentLinkedQueue(java.util.concurrent.ConcurrentLinkedQueue) ProcessBundleResponse(org.apache.beam.model.fnexecution.v1.BeamFnApi.ProcessBundleResponse) Coder(org.apache.beam.sdk.coders.Coder) CoderHelpers(org.apache.beam.runners.spark.coders.CoderHelpers) RawUnionValue(org.apache.beam.sdk.transforms.join.RawUnionValue) RemoteBundle(org.apache.beam.runners.fnexecution.control.RemoteBundle) InMemoryBagUserStateFactory(org.apache.beam.runners.fnexecution.state.InMemoryBagUserStateFactory) StateRequestHandler(org.apache.beam.runners.fnexecution.state.StateRequestHandler) ProcessBundleProgressResponse(org.apache.beam.model.fnexecution.v1.BeamFnApi.ProcessBundleProgressResponse) RunnerApi(org.apache.beam.model.pipeline.v1.RunnerApi) Iterator(java.util.Iterator) OutputReceiverFactory(org.apache.beam.runners.fnexecution.control.OutputReceiverFactory) ProcessBundleDescriptors(org.apache.beam.runners.fnexecution.control.ProcessBundleDescriptors) MetricsContainerImpl(org.apache.beam.runners.core.metrics.MetricsContainerImpl) PipelineTranslatorUtils(org.apache.beam.runners.fnexecution.translation.PipelineTranslatorUtils) StateRequestHandlers(org.apache.beam.runners.fnexecution.state.StateRequestHandlers) IOException(java.io.IOException) MetricsContainerStepMapAccumulator(org.apache.beam.runners.spark.metrics.MetricsContainerStepMapAccumulator) InMemoryTimerInternals(org.apache.beam.runners.core.InMemoryTimerInternals) Timer(org.apache.beam.runners.core.construction.Timer) BoundedWindow(org.apache.beam.sdk.transforms.windowing.BoundedWindow) Instant(org.joda.time.Instant) FileSystems(org.apache.beam.sdk.io.FileSystems) Collections(java.util.Collections) TypeCase(org.apache.beam.model.fnexecution.v1.BeamFnApi.StateKey.TypeCase) ExecutableStageContext(org.apache.beam.runners.fnexecution.control.ExecutableStageContext) StateRequestHandler(org.apache.beam.runners.fnexecution.state.StateRequestHandler) StateKey(org.apache.beam.model.fnexecution.v1.BeamFnApi.StateKey) TypeCase(org.apache.beam.model.fnexecution.v1.BeamFnApi.StateKey.TypeCase) IOException(java.io.IOException) InMemoryBagUserStateFactory(org.apache.beam.runners.fnexecution.state.InMemoryBagUserStateFactory) WindowedValueCoder(org.apache.beam.sdk.util.WindowedValue.WindowedValueCoder) Broadcast(org.apache.spark.broadcast.Broadcast) BatchSideInputHandlerFactory(org.apache.beam.runners.fnexecution.translation.BatchSideInputHandlerFactory) Tuple2(scala.Tuple2) List(java.util.List) StateRequestHandlers(org.apache.beam.runners.fnexecution.state.StateRequestHandlers) EnumMap(java.util.EnumMap)

Aggregations

Coder (org.apache.beam.sdk.coders.Coder)117 KvCoder (org.apache.beam.sdk.coders.KvCoder)74 WindowedValue (org.apache.beam.sdk.util.WindowedValue)53 StringUtf8Coder (org.apache.beam.sdk.coders.StringUtf8Coder)44 Test (org.junit.Test)43 HashMap (java.util.HashMap)40 ArrayList (java.util.ArrayList)36 Map (java.util.Map)34 BoundedWindow (org.apache.beam.sdk.transforms.windowing.BoundedWindow)34 List (java.util.List)31 KV (org.apache.beam.sdk.values.KV)29 RunnerApi (org.apache.beam.model.pipeline.v1.RunnerApi)28 IterableCoder (org.apache.beam.sdk.coders.IterableCoder)28 PCollection (org.apache.beam.sdk.values.PCollection)28 TupleTag (org.apache.beam.sdk.values.TupleTag)23 ByteString (org.apache.beam.vendor.grpc.v1p43p2.com.google.protobuf.ByteString)23 IOException (java.io.IOException)21 PCollectionView (org.apache.beam.sdk.values.PCollectionView)21 Instant (org.joda.time.Instant)21 ImmutableMap (org.apache.beam.vendor.guava.v26_0_jre.com.google.common.collect.ImmutableMap)20