use of org.apache.beam.model.pipeline.v1.RunnerApi.PTransform in project beam by apache.
the class CombineRunners method createMergeAccumulatorsMapFunction.
static <KeyT, AccumT> ThrowingFunction<KV<KeyT, Iterable<AccumT>>, KV<KeyT, AccumT>> createMergeAccumulatorsMapFunction(String pTransformId, PTransform pTransform) throws IOException {
CombinePayload combinePayload = CombinePayload.parseFrom(pTransform.getSpec().getPayload());
CombineFn<?, AccumT, ?> combineFn = (CombineFn) SerializableUtils.deserializeFromByteArray(combinePayload.getCombineFn().getPayload().toByteArray(), "CombineFn");
return (KV<KeyT, Iterable<AccumT>> input) -> KV.of(input.getKey(), combineFn.mergeAccumulators(input.getValue()));
}
use of org.apache.beam.model.pipeline.v1.RunnerApi.PTransform in project beam by apache.
the class CombineRunners method createExtractOutputsMapFunction.
static <KeyT, AccumT, OutputT> ThrowingFunction<KV<KeyT, AccumT>, KV<KeyT, OutputT>> createExtractOutputsMapFunction(String pTransformId, PTransform pTransform) throws IOException {
CombinePayload combinePayload = CombinePayload.parseFrom(pTransform.getSpec().getPayload());
CombineFn<?, AccumT, OutputT> combineFn = (CombineFn) SerializableUtils.deserializeFromByteArray(combinePayload.getCombineFn().getPayload().toByteArray(), "CombineFn");
return (KV<KeyT, AccumT> input) -> KV.of(input.getKey(), combineFn.extractOutput(input.getValue()));
}
use of org.apache.beam.model.pipeline.v1.RunnerApi.PTransform in project beam by apache.
the class CombineRunners method createConvertToAccumulatorsMapFunction.
static <KeyT, InputT, AccumT> ThrowingFunction<KV<KeyT, InputT>, KV<KeyT, AccumT>> createConvertToAccumulatorsMapFunction(String pTransformId, PTransform pTransform) throws IOException {
CombinePayload combinePayload = CombinePayload.parseFrom(pTransform.getSpec().getPayload());
CombineFn<InputT, AccumT, ?> combineFn = (CombineFn) SerializableUtils.deserializeFromByteArray(combinePayload.getCombineFn().getPayload().toByteArray(), "CombineFn");
return (KV<KeyT, InputT> input) -> KV.of(input.getKey(), combineFn.addInput(combineFn.createAccumulator(), input.getValue()));
}
use of org.apache.beam.model.pipeline.v1.RunnerApi.PTransform in project beam by apache.
the class RemoteGrpcPortWriteTest method toFromPTransform.
@Test
public void toFromPTransform() throws InvalidProtocolBufferException {
RemoteGrpcPort port = RemoteGrpcPort.newBuilder().setApiServiceDescriptor(ApiServiceDescriptor.newBuilder().setUrl("foo").setAuthentication(AuthenticationSpec.getDefaultInstance()).build()).build();
RemoteGrpcPortWrite write = RemoteGrpcPortWrite.writeToPort("myPort", port);
PTransform ptransform = PTransform.parseFrom(write.toPTransform().toByteArray());
RemoteGrpcPortWrite serDeWrite = RemoteGrpcPortWrite.fromPTransform(ptransform);
assertThat(serDeWrite, equalTo(write));
assertThat(serDeWrite.getPort(), equalTo(write.getPort()));
assertThat(serDeWrite.toPTransform(), equalTo(ptransform));
}
use of org.apache.beam.model.pipeline.v1.RunnerApi.PTransform in project beam by apache.
the class GreedyStageFuser method forGrpcPortRead.
/**
* Returns an {@link ExecutableStage} where the initial {@link PTransformNode PTransform} is a
* Remote gRPC Port Read, reading elements from the materialized {@link PCollectionNode
* PCollection}.
*
* @param initialNodes the initial set of sibling transforms to fuse into this node. All of the
* transforms must consume the {@code inputPCollection} on a per-element basis, and must all
* be mutually compatible.
*/
public static ExecutableStage forGrpcPortRead(QueryablePipeline pipeline, PCollectionNode inputPCollection, Set<PTransformNode> initialNodes) {
checkArgument(!initialNodes.isEmpty(), "%s must contain at least one %s.", GreedyStageFuser.class.getSimpleName(), PTransformNode.class.getSimpleName());
// Choose the environment from an arbitrary node. The initial nodes may not be empty for this
// subgraph to make any sense, there has to be at least one processor node
// (otherwise the stage is gRPC Read -> gRPC Write, which doesn't do anything).
Environment environment = getStageEnvironment(pipeline, initialNodes);
ImmutableSet.Builder<PTransformNode> fusedTransforms = ImmutableSet.builder();
fusedTransforms.addAll(initialNodes);
Set<SideInputReference> sideInputs = new LinkedHashSet<>();
Set<UserStateReference> userStates = new LinkedHashSet<>();
Set<TimerReference> timers = new LinkedHashSet<>();
Set<PCollectionNode> fusedCollections = new LinkedHashSet<>();
Set<PCollectionNode> materializedPCollections = new LinkedHashSet<>();
Queue<PCollectionNode> fusionCandidates = new ArrayDeque<>();
for (PTransformNode initialConsumer : initialNodes) {
fusionCandidates.addAll(pipeline.getOutputPCollections(initialConsumer));
sideInputs.addAll(pipeline.getSideInputs(initialConsumer));
userStates.addAll(pipeline.getUserStates(initialConsumer));
timers.addAll(pipeline.getTimers(initialConsumer));
}
while (!fusionCandidates.isEmpty()) {
PCollectionNode candidate = fusionCandidates.poll();
if (fusedCollections.contains(candidate) || materializedPCollections.contains(candidate)) {
// This should generally mean we get to a Flatten via multiple paths through the graph and
// we've already determined what to do with the output.
LOG.debug("Skipping fusion candidate {} because it is {} in this {}", candidate, fusedCollections.contains(candidate) ? "fused" : "materialized", ExecutableStage.class.getSimpleName());
continue;
}
PCollectionFusibility fusibility = canFuse(pipeline, candidate, environment, fusedCollections);
switch(fusibility) {
case MATERIALIZE:
materializedPCollections.add(candidate);
break;
case FUSE:
// All of the consumers of the candidate PCollection can be fused into this stage. Do so.
fusedCollections.add(candidate);
fusedTransforms.addAll(pipeline.getPerElementConsumers(candidate));
for (PTransformNode consumer : pipeline.getPerElementConsumers(candidate)) {
// The outputs of every transform fused into this stage must be either materialized or
// themselves fused away, so add them to the set of candidates.
fusionCandidates.addAll(pipeline.getOutputPCollections(consumer));
sideInputs.addAll(pipeline.getSideInputs(consumer));
}
break;
default:
throw new IllegalStateException(String.format("Unknown type of %s %s", PCollectionFusibility.class.getSimpleName(), fusibility));
}
}
return ImmutableExecutableStage.ofFullComponents(pipeline.getComponents(), environment, inputPCollection, sideInputs, userStates, timers, fusedTransforms.build(), materializedPCollections, DEFAULT_WIRE_CODER_SETTINGS);
}
Aggregations