Search in sources :

Example 6 with Coder

use of org.apache.beam.sdk.coders.Coder in project beam by apache.

the class FlinkKeyGroupStateInternals method restoreKeyGroupState.

/**
   * Restore the state {@code (stateName -> (valueCoder && (namespace -> value)))}
   * for a given {@code keyGroupIdx}.
   *
   * @param keyGroupIdx the id of the key-group to be put in the snapshot.
   * @param in the stream to read from.
   * @param userCodeClassLoader the class loader that will be used to deserialize
   *                            the valueCoder.
   */
public void restoreKeyGroupState(int keyGroupIdx, DataInputStream in, ClassLoader userCodeClassLoader) throws Exception {
    int localIdx = getIndexForKeyGroup(keyGroupIdx);
    Map<String, Tuple2<Coder<?>, Map<String, ?>>> stateTable = stateTables[localIdx];
    int numStates = in.readShort();
    for (int i = 0; i < numStates; ++i) {
        String stateName = in.readUTF();
        Coder coder = InstantiationUtil.deserializeObject(in, userCodeClassLoader);
        Tuple2<Coder<?>, Map<String, ?>> tuple2 = stateTable.get(stateName);
        if (tuple2 == null) {
            tuple2 = new Tuple2<>();
            tuple2.f0 = coder;
            tuple2.f1 = new HashMap<>();
            stateTable.put(stateName, tuple2);
        }
        Map<String, Object> map = (Map<String, Object>) tuple2.f1;
        int mapSize = in.readInt();
        for (int j = 0; j < mapSize; j++) {
            String namespace = StringUtf8Coder.of().decode(in);
            Object value = coder.decode(in);
            map.put(namespace, value);
        }
    }
}
Also used : Coder(org.apache.beam.sdk.coders.Coder) ListCoder(org.apache.beam.sdk.coders.ListCoder) StringUtf8Coder(org.apache.beam.sdk.coders.StringUtf8Coder) Tuple2(org.apache.flink.api.java.tuple.Tuple2) HashMap(java.util.HashMap) Map(java.util.Map)

Example 7 with Coder

use of org.apache.beam.sdk.coders.Coder in project beam by apache.

the class TranslationContext method populateDAG.

public void populateDAG(DAG dag) {
    for (Map.Entry<String, Operator> nameAndOperator : this.operators.entrySet()) {
        dag.addOperator(nameAndOperator.getKey(), nameAndOperator.getValue());
    }
    int streamIndex = 0;
    for (Map.Entry<PCollection, Pair<OutputPortInfo, List<InputPortInfo>>> streamEntry : this.streams.entrySet()) {
        List<InputPortInfo> destInfo = streamEntry.getValue().getRight();
        InputPort[] sinks = new InputPort[destInfo.size()];
        for (int i = 0; i < sinks.length; i++) {
            sinks[i] = destInfo.get(i).port;
        }
        if (sinks.length > 0) {
            DAG.StreamMeta streamMeta = dag.addStream("stream" + streamIndex++, streamEntry.getValue().getLeft().port, sinks);
            if (pipelineOptions.isParDoFusionEnabled()) {
                optimizeStreams(streamMeta, streamEntry);
            }
            for (InputPort port : sinks) {
                PCollection pc = streamEntry.getKey();
                Coder coder = pc.getCoder();
                if (pc.getWindowingStrategy() != null) {
                    coder = FullWindowedValueCoder.of(pc.getCoder(), pc.getWindowingStrategy().getWindowFn().windowCoder());
                }
                Coder<Object> wrapperCoder = ApexStreamTuple.ApexStreamTupleCoder.of(coder);
                CoderAdapterStreamCodec streamCodec = new CoderAdapterStreamCodec(wrapperCoder);
                dag.setInputPortAttribute(port, PortContext.STREAM_CODEC, streamCodec);
            }
        }
    }
}
Also used : Operator(com.datatorrent.api.Operator) Coder(org.apache.beam.sdk.coders.Coder) FullWindowedValueCoder(org.apache.beam.sdk.util.WindowedValue.FullWindowedValueCoder) InputPort(com.datatorrent.api.Operator.InputPort) DAG(com.datatorrent.api.DAG) CoderAdapterStreamCodec(org.apache.beam.runners.apex.translation.utils.CoderAdapterStreamCodec) PCollection(org.apache.beam.sdk.values.PCollection) HashMap(java.util.HashMap) Map(java.util.Map) Pair(org.apache.commons.lang3.tuple.Pair) ImmutablePair(org.apache.commons.lang3.tuple.ImmutablePair)

Example 8 with Coder

use of org.apache.beam.sdk.coders.Coder in project component-runtime by Talend.

the class BeamProcessorChainImpl method extractDoFn.

private static Collection<DoFn<?, ?>> extractDoFn(final CapturingPipeline.TransformWithCoder step, final CoderRegistry coderRegistry) {
    final CapturingPipeline capturingPipeline = new CapturingPipeline(PipelineOptionsFactory.create());
    if (coderRegistry != null) {
        capturingPipeline.setCoderRegistry(coderRegistry);
    }
    final POutput apply = capturingPipeline.apply(new PTransform<PBegin, PCollection<Object>>() {

        @Override
        public PCollection<Object> expand(final PBegin input) {
            return PCollection.createPrimitiveOutputInternal(capturingPipeline, WindowingStrategy.globalDefault(), PCollection.IsBounded.BOUNDED, TypingCoder.INSTANCE);
        }

        @Override
        protected Coder<?> getDefaultOutputCoder() {
            return TypingCoder.INSTANCE;
        }
    }).apply(step.getTransform());
    if (PCollectionTuple.class.isInstance(apply) && step.getCoders() != null) {
        final Map<TupleTag<?>, PCollection<?>> all = PCollectionTuple.class.cast(apply).getAll();
        step.getCoders().forEach((k, v) -> {
            final PCollection<?> collection = all.get(k);
            if (collection != null) {
                collection.setCoder(Coder.class.cast(v));
            }
        });
    } else if (PCollection.class.isInstance(apply) && step.getCoders() != null && !step.getCoders().isEmpty()) {
        PCollection.class.cast(apply).setCoder(Coder.class.cast(step.getCoders().values().iterator().next()));
    }
    final CapturingPipeline.SinkExtractor sinkExtractor = new CapturingPipeline.SinkExtractor();
    capturingPipeline.traverseTopologically(sinkExtractor);
    return sinkExtractor.getOutputs();
}
Also used : Coder(org.apache.beam.sdk.coders.Coder) TupleTag(org.apache.beam.sdk.values.TupleTag) PBegin(org.apache.beam.sdk.values.PBegin) PCollection(org.apache.beam.sdk.values.PCollection) POutput(org.apache.beam.sdk.values.POutput) PCollectionTuple(org.apache.beam.sdk.values.PCollectionTuple) PTransform(org.apache.beam.sdk.transforms.PTransform)

Example 9 with Coder

use of org.apache.beam.sdk.coders.Coder in project beam by apache.

the class PartialGroupByKeyParDoFnsTest method testPartialGroupByKey.

@Test
public void testPartialGroupByKey() throws Exception {
    Coder keyCoder = StringUtf8Coder.of();
    Coder valueCoder = BigEndianIntegerCoder.of();
    TestOutputReceiver receiver = new TestOutputReceiver(new ElementByteSizeObservableCoder(WindowedValue.getValueOnlyCoder(KvCoder.of(keyCoder, IterableCoder.of(valueCoder)))), counterSet, NameContextsForTests.nameContextForTest());
    ParDoFn pgbkParDoFn = new SimplePartialGroupByKeyParDoFn(GroupingTables.buffering(new WindowingCoderGroupingKeyCreator(keyCoder), PairInfo.create(), new CoderSizeEstimator(WindowedValue.getValueOnlyCoder(keyCoder)), new CoderSizeEstimator(valueCoder)), receiver);
    pgbkParDoFn.startBundle(receiver);
    pgbkParDoFn.processElement(WindowedValue.valueInGlobalWindow(KV.of("hi", 4)));
    pgbkParDoFn.processElement(WindowedValue.valueInGlobalWindow(KV.of("there", 5)));
    pgbkParDoFn.processElement(WindowedValue.valueInGlobalWindow(KV.of("hi", 6)));
    pgbkParDoFn.processElement(WindowedValue.valueInGlobalWindow(KV.of("joe", 7)));
    pgbkParDoFn.processElement(WindowedValue.valueInGlobalWindow(KV.of("there", 8)));
    pgbkParDoFn.processElement(WindowedValue.valueInGlobalWindow(KV.of("hi", 9)));
    pgbkParDoFn.finishBundle();
    assertThat(receiver.outputElems, IsIterableContainingInAnyOrder.<Object>containsInAnyOrder(WindowedValue.valueInGlobalWindow(KV.of("hi", Arrays.asList(4, 6, 9))), WindowedValue.valueInGlobalWindow(KV.of("there", Arrays.asList(5, 8))), WindowedValue.valueInGlobalWindow(KV.of("joe", Arrays.asList(7)))));
    // Exact counter values depend on size of encoded data.  If encoding
    // changes, then these expected counters should change to match.
    CounterUpdateExtractor<?> updateExtractor = Mockito.mock(CounterUpdateExtractor.class);
    counterSet.extractUpdates(false, updateExtractor);
    verify(updateExtractor).longSum(getObjectCounterName("test_receiver_out"), false, 3L);
    verify(updateExtractor).longMean(getMeanByteCounterName("test_receiver_out"), false, LongCounterMean.ZERO.addValue(49L, 3));
    verifyNoMoreInteractions(updateExtractor);
}
Also used : CoderSizeEstimator(org.apache.beam.runners.dataflow.worker.PartialGroupByKeyParDoFns.CoderSizeEstimator) ElementByteSizeObservableCoder(org.apache.beam.runners.dataflow.worker.IntrinsicMapTaskExecutorFactory.ElementByteSizeObservableCoder) KvCoder(org.apache.beam.sdk.coders.KvCoder) BigEndianIntegerCoder(org.apache.beam.sdk.coders.BigEndianIntegerCoder) Coder(org.apache.beam.sdk.coders.Coder) StringUtf8Coder(org.apache.beam.sdk.coders.StringUtf8Coder) IterableCoder(org.apache.beam.sdk.coders.IterableCoder) WindowingCoderGroupingKeyCreator(org.apache.beam.runners.dataflow.worker.PartialGroupByKeyParDoFns.WindowingCoderGroupingKeyCreator) ElementByteSizeObservableCoder(org.apache.beam.runners.dataflow.worker.IntrinsicMapTaskExecutorFactory.ElementByteSizeObservableCoder) BatchSideInputPGBKParDoFn(org.apache.beam.runners.dataflow.worker.PartialGroupByKeyParDoFns.BatchSideInputPGBKParDoFn) ParDoFn(org.apache.beam.runners.dataflow.worker.util.common.worker.ParDoFn) StreamingSideInputPGBKParDoFn(org.apache.beam.runners.dataflow.worker.PartialGroupByKeyParDoFns.StreamingSideInputPGBKParDoFn) SimplePartialGroupByKeyParDoFn(org.apache.beam.runners.dataflow.worker.util.common.worker.SimplePartialGroupByKeyParDoFn) SimplePartialGroupByKeyParDoFn(org.apache.beam.runners.dataflow.worker.util.common.worker.SimplePartialGroupByKeyParDoFn) TestOutputReceiver(org.apache.beam.runners.dataflow.worker.util.common.worker.TestOutputReceiver) Test(org.junit.Test)

Example 10 with Coder

use of org.apache.beam.sdk.coders.Coder in project beam by apache.

the class PartialGroupByKeyParDoFnsTest method testCreateWithCombinerAndStreamingSideInputs.

@Test
public void testCreateWithCombinerAndStreamingSideInputs() throws Exception {
    StreamingOptions options = PipelineOptionsFactory.as(StreamingOptions.class);
    options.setStreaming(true);
    Coder keyCoder = StringUtf8Coder.of();
    Coder valueCoder = BigEndianIntegerCoder.of();
    KvCoder<String, Integer> kvCoder = KvCoder.of(keyCoder, valueCoder);
    TestOutputReceiver receiver = new TestOutputReceiver(new ElementByteSizeObservableCoder(WindowedValue.getValueOnlyCoder(kvCoder)), counterSet, NameContextsForTests.nameContextForTest());
    when(mockSideInputReader.isEmpty()).thenReturn(false);
    when(mockStreamingStepContext.stateInternals()).thenReturn((StateInternals) mockStateInternals);
    when(mockStateInternals.state(Matchers.<StateNamespace>any(), Matchers.<StateTag>any())).thenReturn(mockState);
    when(mockState.read()).thenReturn(Maps.newHashMap());
    ParDoFn pgbk = PartialGroupByKeyParDoFns.create(options, kvCoder, AppliedCombineFn.withInputCoder(Sum.ofIntegers(), CoderRegistry.createDefault(), kvCoder, ImmutableList.<PCollectionView<?>>of(), WindowingStrategy.globalDefault()), mockSideInputReader, receiver, mockStreamingStepContext);
    assertTrue(pgbk instanceof StreamingSideInputPGBKParDoFn);
}
Also used : ElementByteSizeObservableCoder(org.apache.beam.runners.dataflow.worker.IntrinsicMapTaskExecutorFactory.ElementByteSizeObservableCoder) KvCoder(org.apache.beam.sdk.coders.KvCoder) BigEndianIntegerCoder(org.apache.beam.sdk.coders.BigEndianIntegerCoder) Coder(org.apache.beam.sdk.coders.Coder) StringUtf8Coder(org.apache.beam.sdk.coders.StringUtf8Coder) IterableCoder(org.apache.beam.sdk.coders.IterableCoder) PCollectionView(org.apache.beam.sdk.values.PCollectionView) StreamingOptions(org.apache.beam.sdk.options.StreamingOptions) ElementByteSizeObservableCoder(org.apache.beam.runners.dataflow.worker.IntrinsicMapTaskExecutorFactory.ElementByteSizeObservableCoder) BatchSideInputPGBKParDoFn(org.apache.beam.runners.dataflow.worker.PartialGroupByKeyParDoFns.BatchSideInputPGBKParDoFn) ParDoFn(org.apache.beam.runners.dataflow.worker.util.common.worker.ParDoFn) StreamingSideInputPGBKParDoFn(org.apache.beam.runners.dataflow.worker.PartialGroupByKeyParDoFns.StreamingSideInputPGBKParDoFn) SimplePartialGroupByKeyParDoFn(org.apache.beam.runners.dataflow.worker.util.common.worker.SimplePartialGroupByKeyParDoFn) TestOutputReceiver(org.apache.beam.runners.dataflow.worker.util.common.worker.TestOutputReceiver) StreamingSideInputPGBKParDoFn(org.apache.beam.runners.dataflow.worker.PartialGroupByKeyParDoFns.StreamingSideInputPGBKParDoFn) Test(org.junit.Test)

Aggregations

Coder (org.apache.beam.sdk.coders.Coder)117 KvCoder (org.apache.beam.sdk.coders.KvCoder)74 WindowedValue (org.apache.beam.sdk.util.WindowedValue)53 StringUtf8Coder (org.apache.beam.sdk.coders.StringUtf8Coder)44 Test (org.junit.Test)43 HashMap (java.util.HashMap)40 ArrayList (java.util.ArrayList)36 Map (java.util.Map)34 BoundedWindow (org.apache.beam.sdk.transforms.windowing.BoundedWindow)34 List (java.util.List)31 KV (org.apache.beam.sdk.values.KV)29 RunnerApi (org.apache.beam.model.pipeline.v1.RunnerApi)28 IterableCoder (org.apache.beam.sdk.coders.IterableCoder)28 PCollection (org.apache.beam.sdk.values.PCollection)28 TupleTag (org.apache.beam.sdk.values.TupleTag)23 ByteString (org.apache.beam.vendor.grpc.v1p43p2.com.google.protobuf.ByteString)23 IOException (java.io.IOException)21 PCollectionView (org.apache.beam.sdk.values.PCollectionView)21 Instant (org.joda.time.Instant)21 ImmutableMap (org.apache.beam.vendor.guava.v26_0_jre.com.google.common.collect.ImmutableMap)20