use of org.apache.beam.sdk.coders.Coder in project beam by apache.
the class FlinkKeyGroupStateInternals method restoreKeyGroupState.
/**
* Restore the state {@code (stateName -> (valueCoder && (namespace -> value)))}
* for a given {@code keyGroupIdx}.
*
* @param keyGroupIdx the id of the key-group to be put in the snapshot.
* @param in the stream to read from.
* @param userCodeClassLoader the class loader that will be used to deserialize
* the valueCoder.
*/
public void restoreKeyGroupState(int keyGroupIdx, DataInputStream in, ClassLoader userCodeClassLoader) throws Exception {
int localIdx = getIndexForKeyGroup(keyGroupIdx);
Map<String, Tuple2<Coder<?>, Map<String, ?>>> stateTable = stateTables[localIdx];
int numStates = in.readShort();
for (int i = 0; i < numStates; ++i) {
String stateName = in.readUTF();
Coder coder = InstantiationUtil.deserializeObject(in, userCodeClassLoader);
Tuple2<Coder<?>, Map<String, ?>> tuple2 = stateTable.get(stateName);
if (tuple2 == null) {
tuple2 = new Tuple2<>();
tuple2.f0 = coder;
tuple2.f1 = new HashMap<>();
stateTable.put(stateName, tuple2);
}
Map<String, Object> map = (Map<String, Object>) tuple2.f1;
int mapSize = in.readInt();
for (int j = 0; j < mapSize; j++) {
String namespace = StringUtf8Coder.of().decode(in);
Object value = coder.decode(in);
map.put(namespace, value);
}
}
}
use of org.apache.beam.sdk.coders.Coder in project beam by apache.
the class TranslationContext method populateDAG.
public void populateDAG(DAG dag) {
for (Map.Entry<String, Operator> nameAndOperator : this.operators.entrySet()) {
dag.addOperator(nameAndOperator.getKey(), nameAndOperator.getValue());
}
int streamIndex = 0;
for (Map.Entry<PCollection, Pair<OutputPortInfo, List<InputPortInfo>>> streamEntry : this.streams.entrySet()) {
List<InputPortInfo> destInfo = streamEntry.getValue().getRight();
InputPort[] sinks = new InputPort[destInfo.size()];
for (int i = 0; i < sinks.length; i++) {
sinks[i] = destInfo.get(i).port;
}
if (sinks.length > 0) {
DAG.StreamMeta streamMeta = dag.addStream("stream" + streamIndex++, streamEntry.getValue().getLeft().port, sinks);
if (pipelineOptions.isParDoFusionEnabled()) {
optimizeStreams(streamMeta, streamEntry);
}
for (InputPort port : sinks) {
PCollection pc = streamEntry.getKey();
Coder coder = pc.getCoder();
if (pc.getWindowingStrategy() != null) {
coder = FullWindowedValueCoder.of(pc.getCoder(), pc.getWindowingStrategy().getWindowFn().windowCoder());
}
Coder<Object> wrapperCoder = ApexStreamTuple.ApexStreamTupleCoder.of(coder);
CoderAdapterStreamCodec streamCodec = new CoderAdapterStreamCodec(wrapperCoder);
dag.setInputPortAttribute(port, PortContext.STREAM_CODEC, streamCodec);
}
}
}
}
use of org.apache.beam.sdk.coders.Coder in project component-runtime by Talend.
the class BeamProcessorChainImpl method extractDoFn.
private static Collection<DoFn<?, ?>> extractDoFn(final CapturingPipeline.TransformWithCoder step, final CoderRegistry coderRegistry) {
final CapturingPipeline capturingPipeline = new CapturingPipeline(PipelineOptionsFactory.create());
if (coderRegistry != null) {
capturingPipeline.setCoderRegistry(coderRegistry);
}
final POutput apply = capturingPipeline.apply(new PTransform<PBegin, PCollection<Object>>() {
@Override
public PCollection<Object> expand(final PBegin input) {
return PCollection.createPrimitiveOutputInternal(capturingPipeline, WindowingStrategy.globalDefault(), PCollection.IsBounded.BOUNDED, TypingCoder.INSTANCE);
}
@Override
protected Coder<?> getDefaultOutputCoder() {
return TypingCoder.INSTANCE;
}
}).apply(step.getTransform());
if (PCollectionTuple.class.isInstance(apply) && step.getCoders() != null) {
final Map<TupleTag<?>, PCollection<?>> all = PCollectionTuple.class.cast(apply).getAll();
step.getCoders().forEach((k, v) -> {
final PCollection<?> collection = all.get(k);
if (collection != null) {
collection.setCoder(Coder.class.cast(v));
}
});
} else if (PCollection.class.isInstance(apply) && step.getCoders() != null && !step.getCoders().isEmpty()) {
PCollection.class.cast(apply).setCoder(Coder.class.cast(step.getCoders().values().iterator().next()));
}
final CapturingPipeline.SinkExtractor sinkExtractor = new CapturingPipeline.SinkExtractor();
capturingPipeline.traverseTopologically(sinkExtractor);
return sinkExtractor.getOutputs();
}
use of org.apache.beam.sdk.coders.Coder in project beam by apache.
the class PartialGroupByKeyParDoFnsTest method testPartialGroupByKey.
@Test
public void testPartialGroupByKey() throws Exception {
Coder keyCoder = StringUtf8Coder.of();
Coder valueCoder = BigEndianIntegerCoder.of();
TestOutputReceiver receiver = new TestOutputReceiver(new ElementByteSizeObservableCoder(WindowedValue.getValueOnlyCoder(KvCoder.of(keyCoder, IterableCoder.of(valueCoder)))), counterSet, NameContextsForTests.nameContextForTest());
ParDoFn pgbkParDoFn = new SimplePartialGroupByKeyParDoFn(GroupingTables.buffering(new WindowingCoderGroupingKeyCreator(keyCoder), PairInfo.create(), new CoderSizeEstimator(WindowedValue.getValueOnlyCoder(keyCoder)), new CoderSizeEstimator(valueCoder)), receiver);
pgbkParDoFn.startBundle(receiver);
pgbkParDoFn.processElement(WindowedValue.valueInGlobalWindow(KV.of("hi", 4)));
pgbkParDoFn.processElement(WindowedValue.valueInGlobalWindow(KV.of("there", 5)));
pgbkParDoFn.processElement(WindowedValue.valueInGlobalWindow(KV.of("hi", 6)));
pgbkParDoFn.processElement(WindowedValue.valueInGlobalWindow(KV.of("joe", 7)));
pgbkParDoFn.processElement(WindowedValue.valueInGlobalWindow(KV.of("there", 8)));
pgbkParDoFn.processElement(WindowedValue.valueInGlobalWindow(KV.of("hi", 9)));
pgbkParDoFn.finishBundle();
assertThat(receiver.outputElems, IsIterableContainingInAnyOrder.<Object>containsInAnyOrder(WindowedValue.valueInGlobalWindow(KV.of("hi", Arrays.asList(4, 6, 9))), WindowedValue.valueInGlobalWindow(KV.of("there", Arrays.asList(5, 8))), WindowedValue.valueInGlobalWindow(KV.of("joe", Arrays.asList(7)))));
// Exact counter values depend on size of encoded data. If encoding
// changes, then these expected counters should change to match.
CounterUpdateExtractor<?> updateExtractor = Mockito.mock(CounterUpdateExtractor.class);
counterSet.extractUpdates(false, updateExtractor);
verify(updateExtractor).longSum(getObjectCounterName("test_receiver_out"), false, 3L);
verify(updateExtractor).longMean(getMeanByteCounterName("test_receiver_out"), false, LongCounterMean.ZERO.addValue(49L, 3));
verifyNoMoreInteractions(updateExtractor);
}
use of org.apache.beam.sdk.coders.Coder in project beam by apache.
the class PartialGroupByKeyParDoFnsTest method testCreateWithCombinerAndStreamingSideInputs.
@Test
public void testCreateWithCombinerAndStreamingSideInputs() throws Exception {
StreamingOptions options = PipelineOptionsFactory.as(StreamingOptions.class);
options.setStreaming(true);
Coder keyCoder = StringUtf8Coder.of();
Coder valueCoder = BigEndianIntegerCoder.of();
KvCoder<String, Integer> kvCoder = KvCoder.of(keyCoder, valueCoder);
TestOutputReceiver receiver = new TestOutputReceiver(new ElementByteSizeObservableCoder(WindowedValue.getValueOnlyCoder(kvCoder)), counterSet, NameContextsForTests.nameContextForTest());
when(mockSideInputReader.isEmpty()).thenReturn(false);
when(mockStreamingStepContext.stateInternals()).thenReturn((StateInternals) mockStateInternals);
when(mockStateInternals.state(Matchers.<StateNamespace>any(), Matchers.<StateTag>any())).thenReturn(mockState);
when(mockState.read()).thenReturn(Maps.newHashMap());
ParDoFn pgbk = PartialGroupByKeyParDoFns.create(options, kvCoder, AppliedCombineFn.withInputCoder(Sum.ofIntegers(), CoderRegistry.createDefault(), kvCoder, ImmutableList.<PCollectionView<?>>of(), WindowingStrategy.globalDefault()), mockSideInputReader, receiver, mockStreamingStepContext);
assertTrue(pgbk instanceof StreamingSideInputPGBKParDoFn);
}
Aggregations