Search in sources :

Example 56 with Coder

use of org.apache.beam.sdk.coders.Coder in project beam by apache.

the class DoFnOperatorTest method keyedParDoSideInputCheckpointing.

@Test
public void keyedParDoSideInputCheckpointing() throws Exception {
    sideInputCheckpointing(() -> {
        StringUtf8Coder keyCoder = StringUtf8Coder.of();
        Coder<WindowedValue<String>> coder = WindowedValue.getFullCoder(keyCoder, IntervalWindow.getCoder());
        TupleTag<String> outputTag = new TupleTag<>("main-output");
        KeySelector<WindowedValue<String>, ByteBuffer> keySelector = e -> FlinkKeyUtils.encodeKey(e.getValue(), keyCoder);
        ImmutableMap<Integer, PCollectionView<?>> sideInputMapping = ImmutableMap.<Integer, PCollectionView<?>>builder().put(1, view1).put(2, view2).build();
        DoFnOperator<String, String> doFnOperator = new DoFnOperator<>(new IdentityDoFn<>(), "stepName", coder, Collections.emptyMap(), outputTag, Collections.emptyList(), new DoFnOperator.MultiOutputOutputManagerFactory<>(outputTag, coder, new SerializablePipelineOptions(FlinkPipelineOptions.defaults())), WindowingStrategy.of(FixedWindows.of(Duration.millis(100))), sideInputMapping, /* side-input mapping */
        ImmutableList.of(view1, view2), /* side inputs */
        FlinkPipelineOptions.defaults(), keyCoder, keySelector, DoFnSchemaInformation.create(), Collections.emptyMap());
        return new KeyedTwoInputStreamOperatorTestHarness<>(doFnOperator, keySelector, // we use a dummy key for the second input since it is considered to be broadcast
        null, new CoderTypeInformation<>(FlinkKeyUtils.ByteBufferCoder.of(), FlinkPipelineOptions.defaults()));
    });
}
Also used : StateSpec(org.apache.beam.sdk.state.StateSpec) Arrays(java.util.Arrays) StateNamespace(org.apache.beam.runners.core.StateNamespace) SerializablePipelineOptions(org.apache.beam.runners.core.construction.SerializablePipelineOptions) TimestampCombiner(org.apache.beam.sdk.transforms.windowing.TimestampCombiner) WindowedValue(org.apache.beam.sdk.util.WindowedValue) StreamRecordStripper.stripStreamRecordFromWindowedValue(org.apache.beam.runners.flink.translation.wrappers.streaming.StreamRecordStripper.stripStreamRecordFromWindowedValue) IsIterableContainingInOrder.contains(org.hamcrest.collection.IsIterableContainingInOrder.contains) FlinkPipelineOptions(org.apache.beam.runners.flink.FlinkPipelineOptions) TimerSpecs(org.apache.beam.sdk.state.TimerSpecs) DoFnRunner(org.apache.beam.runners.core.DoFnRunner) FlinkMetricContainer(org.apache.beam.runners.flink.metrics.FlinkMetricContainer) StepContext(org.apache.beam.runners.core.StepContext) ValueState(org.apache.beam.sdk.state.ValueState) ImmutableMap(org.apache.beam.vendor.guava.v26_0_jre.com.google.common.collect.ImmutableMap) KeyedOneInputStreamOperatorTestHarness(org.apache.flink.streaming.util.KeyedOneInputStreamOperatorTestHarness) TimerInternals(org.apache.beam.runners.core.TimerInternals) ByteBuffer(java.nio.ByteBuffer) DoFnSchemaInformation(org.apache.beam.sdk.transforms.DoFnSchemaInformation) OneInputStreamOperatorTestHarness(org.apache.flink.streaming.util.OneInputStreamOperatorTestHarness) TypeFactory(com.fasterxml.jackson.databind.type.TypeFactory) Create(org.apache.beam.sdk.transforms.Create) TwoInputStreamOperatorTestHarness(org.apache.flink.streaming.util.TwoInputStreamOperatorTestHarness) Iterables(org.apache.beam.vendor.guava.v26_0_jre.com.google.common.collect.Iterables) LRUMap(com.fasterxml.jackson.databind.util.LRUMap) Window(org.apache.beam.sdk.transforms.windowing.Window) GlobalWindow(org.apache.beam.sdk.transforms.windowing.GlobalWindow) FluentIterable(org.apache.beam.vendor.guava.v26_0_jre.com.google.common.collect.FluentIterable) TypeInformation(org.apache.flink.api.common.typeinfo.TypeInformation) CoderTypeInformation(org.apache.beam.runners.flink.translation.types.CoderTypeInformation) KvCoder(org.apache.beam.sdk.coders.KvCoder) KeySelector(org.apache.flink.api.java.functions.KeySelector) KeyedTwoInputStreamOperatorTestHarness(org.apache.flink.streaming.util.KeyedTwoInputStreamOperatorTestHarness) PaneInfo(org.apache.beam.sdk.transforms.windowing.PaneInfo) FullWindowedValueCoder(org.apache.beam.sdk.util.WindowedValue.FullWindowedValueCoder) OutputTag(org.apache.flink.util.OutputTag) VarLongCoder(org.apache.beam.sdk.coders.VarLongCoder) OperatorSubtaskState(org.apache.flink.runtime.checkpoint.OperatorSubtaskState) Collectors(java.util.stream.Collectors) Matchers.instanceOf(org.hamcrest.Matchers.instanceOf) Objects(java.util.Objects) List(java.util.List) WatermarkHoldState(org.apache.beam.sdk.state.WatermarkHoldState) Matchers.containsInAnyOrder(org.hamcrest.Matchers.containsInAnyOrder) Timer(org.apache.beam.sdk.state.Timer) Matchers.equalTo(org.hamcrest.Matchers.equalTo) Optional(java.util.Optional) ImmutableList(org.apache.beam.vendor.guava.v26_0_jre.com.google.common.collect.ImmutableList) Matchers.greaterThan(org.hamcrest.Matchers.greaterThan) Matchers.is(org.hamcrest.Matchers.is) StateTag(org.apache.beam.runners.core.StateTag) WindowingStrategy(org.apache.beam.sdk.values.WindowingStrategy) StatefulDoFnRunner(org.apache.beam.runners.core.StatefulDoFnRunner) Whitebox(org.powermock.reflect.Whitebox) KV(org.apache.beam.sdk.values.KV) Assert.assertThrows(org.junit.Assert.assertThrows) Duration(org.joda.time.Duration) RunWith(org.junit.runner.RunWith) Coder(org.apache.beam.sdk.coders.Coder) HashMap(java.util.HashMap) View(org.apache.beam.sdk.transforms.View) StateNamespaces(org.apache.beam.runners.core.StateNamespaces) Supplier(java.util.function.Supplier) StateTags(org.apache.beam.runners.core.StateTags) ArrayList(java.util.ArrayList) StringUtf8Coder(org.apache.beam.sdk.coders.StringUtf8Coder) RawUnionValue(org.apache.beam.sdk.transforms.join.RawUnionValue) StreamRecord(org.apache.flink.streaming.runtime.streamrecord.StreamRecord) TimerSpec(org.apache.beam.sdk.state.TimerSpec) CoderTypeSerializer(org.apache.beam.runners.flink.translation.types.CoderTypeSerializer) TupleTag(org.apache.beam.sdk.values.TupleTag) MatcherAssert.assertThat(org.hamcrest.MatcherAssert.assertThat) Pipeline(org.apache.beam.sdk.Pipeline) Nullable(org.checkerframework.checker.nullness.qual.Nullable) Before(org.junit.Before) DoFn(org.apache.beam.sdk.transforms.DoFn) PCollectionViewTesting(org.apache.beam.sdk.testing.PCollectionViewTesting) FixedWindows(org.apache.beam.sdk.transforms.windowing.FixedWindows) Function(org.apache.beam.vendor.guava.v26_0_jre.com.google.common.base.Function) Test(org.junit.Test) JUnit4(org.junit.runners.JUnit4) PCollection(org.apache.beam.sdk.values.PCollection) Mockito(org.mockito.Mockito) Matchers.emptyIterable(org.hamcrest.Matchers.emptyIterable) StateSpecs(org.apache.beam.sdk.state.StateSpecs) PCollectionView(org.apache.beam.sdk.values.PCollectionView) BoundedWindow(org.apache.beam.sdk.transforms.windowing.BoundedWindow) Instant(org.joda.time.Instant) VarIntCoder(org.apache.beam.sdk.coders.VarIntCoder) IntervalWindow(org.apache.beam.sdk.transforms.windowing.IntervalWindow) Collections(java.util.Collections) TimeDomain(org.apache.beam.sdk.state.TimeDomain) Assert.assertEquals(org.junit.Assert.assertEquals) KeyedTwoInputStreamOperatorTestHarness(org.apache.flink.streaming.util.KeyedTwoInputStreamOperatorTestHarness) TupleTag(org.apache.beam.sdk.values.TupleTag) ByteBuffer(java.nio.ByteBuffer) PCollectionView(org.apache.beam.sdk.values.PCollectionView) WindowedValue(org.apache.beam.sdk.util.WindowedValue) StreamRecordStripper.stripStreamRecordFromWindowedValue(org.apache.beam.runners.flink.translation.wrappers.streaming.StreamRecordStripper.stripStreamRecordFromWindowedValue) StringUtf8Coder(org.apache.beam.sdk.coders.StringUtf8Coder) SerializablePipelineOptions(org.apache.beam.runners.core.construction.SerializablePipelineOptions) Test(org.junit.Test)

Example 57 with Coder

use of org.apache.beam.sdk.coders.Coder in project beam by apache.

the class CommonCoderTest method executeSingleTest.

@Test
public void executeSingleTest() throws IOException {
    assertCoderIsKnown(testSpec.getCoder());
    Coder coder = instantiateCoder(testSpec.getCoder());
    Object testValue = convertValue(testSpec.getValue(), testSpec.getCoder(), coder);
    Context context = testSpec.getNested() ? Context.NESTED : Context.OUTER;
    byte[] encoded = CoderUtils.encodeToByteArray(coder, testValue, context);
    Object decodedValue = CoderUtils.decodeFromByteArray(coder, testSpec.getSerialized(), context);
    if (!testSpec.getCoder().getNonDeterministic()) {
        assertThat(testSpec.toString(), encoded, equalTo(testSpec.getSerialized()));
    }
    verifyDecodedValue(testSpec.getCoder(), decodedValue, testValue);
}
Also used : Context(org.apache.beam.sdk.coders.Coder.Context) TranslationContext(org.apache.beam.runners.core.construction.CoderTranslation.TranslationContext) DoubleCoder(org.apache.beam.sdk.coders.DoubleCoder) IntervalWindowCoder(org.apache.beam.sdk.transforms.windowing.IntervalWindow.IntervalWindowCoder) ByteCoder(org.apache.beam.sdk.coders.ByteCoder) KvCoder(org.apache.beam.sdk.coders.KvCoder) VarLongCoder(org.apache.beam.sdk.coders.VarLongCoder) BooleanCoder(org.apache.beam.sdk.coders.BooleanCoder) TimestampPrefixingWindowCoder(org.apache.beam.sdk.coders.TimestampPrefixingWindowCoder) Coder(org.apache.beam.sdk.coders.Coder) RowCoder(org.apache.beam.sdk.coders.RowCoder) StringUtf8Coder(org.apache.beam.sdk.coders.StringUtf8Coder) IterableCoder(org.apache.beam.sdk.coders.IterableCoder) IterableLikeCoder(org.apache.beam.sdk.coders.IterableLikeCoder) Test(org.junit.Test)

Example 58 with Coder

use of org.apache.beam.sdk.coders.Coder in project beam by apache.

the class CommonCoderTest method convertValue.

/**
 * Converts from JSON-auto-deserialized types into the proper Java types for the known coders.
 */
private static Object convertValue(Object value, CommonCoder coderSpec, Coder coder) {
    String s = coderSpec.getUrn();
    if (s.equals(getUrn(StandardCoders.Enum.BYTES))) {
        return ((String) value).getBytes(StandardCharsets.ISO_8859_1);
    } else if (s.equals(getUrn(StandardCoders.Enum.BOOL))) {
        return value;
    } else if (s.equals(getUrn(StandardCoders.Enum.STRING_UTF8))) {
        return value;
    } else if (s.equals(getUrn(StandardCoders.Enum.KV))) {
        Coder keyCoder = ((KvCoder) coder).getKeyCoder();
        Coder valueCoder = ((KvCoder) coder).getValueCoder();
        Map<String, Object> kvMap = (Map<String, Object>) value;
        Object k = convertValue(kvMap.get("key"), coderSpec.getComponents().get(0), keyCoder);
        Object v = convertValue(kvMap.get("value"), coderSpec.getComponents().get(1), valueCoder);
        return KV.of(k, v);
    } else if (s.equals(getUrn(StandardCoders.Enum.VARINT))) {
        return ((Number) value).longValue();
    } else if (s.equals(getUrn(StandardCoders.Enum.TIMER))) {
        Map<String, Object> kvMap = (Map<String, Object>) value;
        Coder<?> keyCoder = ((Timer.Coder) coder).getValueCoder();
        Coder<? extends BoundedWindow> windowCoder = ((Timer.Coder) coder).getWindowCoder();
        List<BoundedWindow> windows = new ArrayList<>();
        for (Object window : (List<Object>) kvMap.get("windows")) {
            windows.add((BoundedWindow) convertValue(window, coderSpec.getComponents().get(1), windowCoder));
        }
        if ((boolean) kvMap.get("clearBit")) {
            return Timer.cleared(convertValue(kvMap.get("userKey"), coderSpec.getComponents().get(0), keyCoder), (String) kvMap.get("dynamicTimerTag"), windows);
        }
        Map<String, Object> paneInfoMap = (Map<String, Object>) kvMap.get("pane");
        PaneInfo paneInfo = PaneInfo.createPane((boolean) paneInfoMap.get("is_first"), (boolean) paneInfoMap.get("is_last"), PaneInfo.Timing.valueOf((String) paneInfoMap.get("timing")), (int) paneInfoMap.get("index"), (int) paneInfoMap.get("on_time_index"));
        return Timer.of(convertValue(kvMap.get("userKey"), coderSpec.getComponents().get(0), keyCoder), (String) kvMap.get("dynamicTimerTag"), windows, new Instant(((Number) kvMap.get("fireTimestamp")).longValue()), new Instant(((Number) kvMap.get("holdTimestamp")).longValue()), paneInfo);
    } else if (s.equals(getUrn(StandardCoders.Enum.INTERVAL_WINDOW))) {
        Map<String, Object> kvMap = (Map<String, Object>) value;
        Instant end = new Instant(((Number) kvMap.get("end")).longValue());
        Duration span = Duration.millis(((Number) kvMap.get("span")).longValue());
        return new IntervalWindow(end.minus(span), span);
    } else if (s.equals(getUrn(StandardCoders.Enum.ITERABLE)) || s.equals(getUrn(StandardCoders.Enum.STATE_BACKED_ITERABLE))) {
        Coder elementCoder = ((IterableLikeCoder) coder).getElemCoder();
        List<Object> elements = (List<Object>) value;
        List<Object> convertedElements = new ArrayList<>();
        for (Object element : elements) {
            convertedElements.add(convertValue(element, coderSpec.getComponents().get(0), elementCoder));
        }
        return convertedElements;
    } else if (s.equals(getUrn(StandardCoders.Enum.GLOBAL_WINDOW))) {
        return GlobalWindow.INSTANCE;
    } else if (s.equals(getUrn(StandardCoders.Enum.WINDOWED_VALUE)) || s.equals(getUrn(StandardCoders.Enum.PARAM_WINDOWED_VALUE))) {
        Map<String, Object> kvMap = (Map<String, Object>) value;
        Coder valueCoder = ((WindowedValue.FullWindowedValueCoder) coder).getValueCoder();
        Coder windowCoder = ((WindowedValue.FullWindowedValueCoder) coder).getWindowCoder();
        Object windowValue = convertValue(kvMap.get("value"), coderSpec.getComponents().get(0), valueCoder);
        Instant timestamp = new Instant(((Number) kvMap.get("timestamp")).longValue());
        List<BoundedWindow> windows = new ArrayList<>();
        for (Object window : (List<Object>) kvMap.get("windows")) {
            windows.add((BoundedWindow) convertValue(window, coderSpec.getComponents().get(1), windowCoder));
        }
        Map<String, Object> paneInfoMap = (Map<String, Object>) kvMap.get("pane");
        PaneInfo paneInfo = PaneInfo.createPane((boolean) paneInfoMap.get("is_first"), (boolean) paneInfoMap.get("is_last"), PaneInfo.Timing.valueOf((String) paneInfoMap.get("timing")), (int) paneInfoMap.get("index"), (int) paneInfoMap.get("on_time_index"));
        return WindowedValue.of(windowValue, timestamp, windows, paneInfo);
    } else if (s.equals(getUrn(StandardCoders.Enum.DOUBLE))) {
        return Double.parseDouble((String) value);
    } else if (s.equals(getUrn(StandardCoders.Enum.ROW))) {
        Schema schema;
        try {
            schema = SchemaTranslation.schemaFromProto(SchemaApi.Schema.parseFrom(coderSpec.getPayload()));
        } catch (InvalidProtocolBufferException e) {
            throw new RuntimeException("Failed to parse schema payload for row coder", e);
        }
        return parseField(value, Schema.FieldType.row(schema));
    } else if (s.equals(getUrn(StandardCoders.Enum.SHARDED_KEY))) {
        Map<String, Object> kvMap = (Map<String, Object>) value;
        Coder<?> keyCoder = ((ShardedKey.Coder) coder).getKeyCoder();
        byte[] shardId = ((String) kvMap.get("shardId")).getBytes(StandardCharsets.ISO_8859_1);
        return ShardedKey.of(convertValue(kvMap.get("key"), coderSpec.getComponents().get(0), keyCoder), shardId);
    } else if (s.equals(getUrn(StandardCoders.Enum.CUSTOM_WINDOW))) {
        Map<String, Object> kvMap = (Map<String, Object>) value;
        Coder windowCoder = ((TimestampPrefixingWindowCoder) coder).getWindowCoder();
        return convertValue(kvMap.get("window"), coderSpec.getComponents().get(0), windowCoder);
    } else {
        throw new IllegalStateException("Unknown coder URN: " + coderSpec.getUrn());
    }
}
Also used : DoubleCoder(org.apache.beam.sdk.coders.DoubleCoder) IntervalWindowCoder(org.apache.beam.sdk.transforms.windowing.IntervalWindow.IntervalWindowCoder) ByteCoder(org.apache.beam.sdk.coders.ByteCoder) KvCoder(org.apache.beam.sdk.coders.KvCoder) VarLongCoder(org.apache.beam.sdk.coders.VarLongCoder) BooleanCoder(org.apache.beam.sdk.coders.BooleanCoder) TimestampPrefixingWindowCoder(org.apache.beam.sdk.coders.TimestampPrefixingWindowCoder) Coder(org.apache.beam.sdk.coders.Coder) RowCoder(org.apache.beam.sdk.coders.RowCoder) StringUtf8Coder(org.apache.beam.sdk.coders.StringUtf8Coder) IterableCoder(org.apache.beam.sdk.coders.IterableCoder) IterableLikeCoder(org.apache.beam.sdk.coders.IterableLikeCoder) Instant(org.joda.time.Instant) Schema(org.apache.beam.sdk.schemas.Schema) InvalidProtocolBufferException(org.apache.beam.vendor.grpc.v1p43p2.com.google.protobuf.InvalidProtocolBufferException) Duration(org.joda.time.Duration) ByteString(org.apache.beam.vendor.grpc.v1p43p2.com.google.protobuf.ByteString) Timer(org.apache.beam.runners.core.construction.Timer) WindowedValue(org.apache.beam.sdk.util.WindowedValue) PaneInfo(org.apache.beam.sdk.transforms.windowing.PaneInfo) BoundedWindow(org.apache.beam.sdk.transforms.windowing.BoundedWindow) List(java.util.List) ImmutableList.toImmutableList(org.apache.beam.vendor.guava.v26_0_jre.com.google.common.collect.ImmutableList.toImmutableList) ImmutableList(org.apache.beam.vendor.guava.v26_0_jre.com.google.common.collect.ImmutableList) ArrayList(java.util.ArrayList) ImmutableMap(org.apache.beam.vendor.guava.v26_0_jre.com.google.common.collect.ImmutableMap) ImmutableBiMap(org.apache.beam.vendor.guava.v26_0_jre.com.google.common.collect.ImmutableBiMap) Map(java.util.Map) HashMap(java.util.HashMap) IntervalWindow(org.apache.beam.sdk.transforms.windowing.IntervalWindow)

Example 59 with Coder

use of org.apache.beam.sdk.coders.Coder in project beam by apache.

the class FlattenP method tryProcess.

@Override
protected boolean tryProcess(int ordinal, @Nonnull Object item) {
    Coder inputCoder = inputOrdinalCoders.get(ordinal);
    WindowedValue<Object> windowedValue = Utils.decodeWindowedValue((byte[]) item, inputCoder);
    return tryEmit(Utils.encode(windowedValue, outputCoder));
}
Also used : Coder(org.apache.beam.sdk.coders.Coder)

Example 60 with Coder

use of org.apache.beam.sdk.coders.Coder in project beam by apache.

the class CloudObjectTranslators method addComponents.

private static CloudObject addComponents(CloudObject base, List<? extends Coder<?>> components) {
    if (!components.isEmpty()) {
        List<CloudObject> cloudComponents = new ArrayList<>(components.size());
        for (Coder component : components) {
            cloudComponents.add(CloudObjects.asCloudObject(component));
        }
        Structs.addList(base, PropertyNames.COMPONENT_ENCODINGS, cloudComponents);
    }
    return base;
}
Also used : CoGbkResultCoder(org.apache.beam.sdk.transforms.join.CoGbkResult.CoGbkResultCoder) Coder(org.apache.beam.sdk.coders.Coder) MapCoder(org.apache.beam.sdk.coders.MapCoder) IntervalWindowCoder(org.apache.beam.sdk.transforms.windowing.IntervalWindow.IntervalWindowCoder) LengthPrefixCoder(org.apache.beam.sdk.coders.LengthPrefixCoder) CustomCoder(org.apache.beam.sdk.coders.CustomCoder) KvCoder(org.apache.beam.sdk.coders.KvCoder) IterableCoder(org.apache.beam.sdk.coders.IterableCoder) FullWindowedValueCoder(org.apache.beam.sdk.util.WindowedValue.FullWindowedValueCoder) VarLongCoder(org.apache.beam.sdk.coders.VarLongCoder) UnionCoder(org.apache.beam.sdk.transforms.join.UnionCoder) ByteArrayCoder(org.apache.beam.sdk.coders.ByteArrayCoder) IterableLikeCoder(org.apache.beam.sdk.coders.IterableLikeCoder) NullableCoder(org.apache.beam.sdk.coders.NullableCoder) ArrayList(java.util.ArrayList)

Aggregations

Coder (org.apache.beam.sdk.coders.Coder)119 KvCoder (org.apache.beam.sdk.coders.KvCoder)75 WindowedValue (org.apache.beam.sdk.util.WindowedValue)55 StringUtf8Coder (org.apache.beam.sdk.coders.StringUtf8Coder)44 Test (org.junit.Test)43 HashMap (java.util.HashMap)42 ArrayList (java.util.ArrayList)38 Map (java.util.Map)36 BoundedWindow (org.apache.beam.sdk.transforms.windowing.BoundedWindow)35 List (java.util.List)32 KV (org.apache.beam.sdk.values.KV)30 RunnerApi (org.apache.beam.model.pipeline.v1.RunnerApi)28 IterableCoder (org.apache.beam.sdk.coders.IterableCoder)28 PCollection (org.apache.beam.sdk.values.PCollection)28 TupleTag (org.apache.beam.sdk.values.TupleTag)24 ByteString (org.apache.beam.vendor.grpc.v1p43p2.com.google.protobuf.ByteString)23 IOException (java.io.IOException)22 PCollectionView (org.apache.beam.sdk.values.PCollectionView)22 Instant (org.joda.time.Instant)21 WindowingStrategy (org.apache.beam.sdk.values.WindowingStrategy)20