use of org.apache.beam.runners.core.construction.graph.QueryablePipeline in project beam by apache.
the class SparkBatchPortablePipelineTranslator method translate.
/**
* Translates pipeline from Beam into the Spark context.
*/
@Override
public void translate(final RunnerApi.Pipeline pipeline, SparkTranslationContext context) {
QueryablePipeline p = QueryablePipeline.forTransforms(pipeline.getRootTransformIdsList(), pipeline.getComponents());
for (PipelineNode.PTransformNode transformNode : p.getTopologicallyOrderedTransforms()) {
// their corresponding RDDs can later be cached.
for (String inputId : transformNode.getTransform().getInputsMap().values()) {
context.incrementConsumptionCountBy(inputId, 1);
}
// of computation is an intermediate RDD, which we might also need to cache.
if (transformNode.getTransform().getSpec().getUrn().equals(ExecutableStage.URN)) {
context.incrementConsumptionCountBy(getExecutableStageIntermediateId(transformNode), transformNode.getTransform().getOutputsMap().size());
}
for (String outputId : transformNode.getTransform().getOutputsMap().values()) {
WindowedValueCoder outputCoder = getWindowedValueCoder(outputId, pipeline.getComponents());
context.putCoder(outputId, outputCoder);
}
}
for (PipelineNode.PTransformNode transformNode : p.getTopologicallyOrderedTransforms()) {
urnToTransformTranslator.getOrDefault(transformNode.getTransform().getSpec().getUrn(), SparkBatchPortablePipelineTranslator::urnNotFound).translate(transformNode, pipeline, context);
}
}
use of org.apache.beam.runners.core.construction.graph.QueryablePipeline in project beam by apache.
the class PortablePipelineDotRenderer method toDot.
private String toDot(RunnerApi.Pipeline pipeline) {
final QueryablePipeline p = QueryablePipeline.forTransforms(pipeline.getRootTransformIdsList(), pipeline.getComponents());
begin();
for (PipelineNode.PTransformNode transform : p.getTopologicallyOrderedTransforms()) {
visitTransform(transform);
}
end();
return dotBuilder.toString();
}
use of org.apache.beam.runners.core.construction.graph.QueryablePipeline in project beam by apache.
the class ParDoBoundMultiTranslator method doTranslatePortable.
// static for serializing anonymous functions
private static <InT, OutT> void doTranslatePortable(PipelineNode.PTransformNode transform, QueryablePipeline pipeline, PortableTranslationContext ctx) {
Map<String, String> outputs = transform.getTransform().getOutputsMap();
final RunnerApi.ExecutableStagePayload stagePayload;
try {
stagePayload = RunnerApi.ExecutableStagePayload.parseFrom(transform.getTransform().getSpec().getPayload());
} catch (IOException e) {
throw new RuntimeException(e);
}
String inputId = stagePayload.getInput();
final MessageStream<OpMessage<InT>> inputStream = ctx.getMessageStreamById(inputId);
// Analyze side inputs
final List<MessageStream<OpMessage<Iterable<?>>>> sideInputStreams = new ArrayList<>();
final Map<SideInputId, PCollectionView<?>> sideInputMapping = new HashMap<>();
final Map<String, PCollectionView<?>> idToViewMapping = new HashMap<>();
final RunnerApi.Components components = stagePayload.getComponents();
for (SideInputId sideInputId : stagePayload.getSideInputsList()) {
final String sideInputCollectionId = components.getTransformsOrThrow(sideInputId.getTransformId()).getInputsOrThrow(sideInputId.getLocalName());
final WindowingStrategy<?, BoundedWindow> windowingStrategy = WindowUtils.getWindowStrategy(sideInputCollectionId, components);
final WindowedValue.WindowedValueCoder<?> coder = (WindowedValue.WindowedValueCoder) instantiateCoder(sideInputCollectionId, components);
// Create a runner-side view
final PCollectionView<?> view = createPCollectionView(sideInputId, coder, windowingStrategy);
// Use GBK to aggregate the side inputs and then broadcast it out
final MessageStream<OpMessage<Iterable<?>>> broadcastSideInput = groupAndBroadcastSideInput(sideInputId, sideInputCollectionId, components.getPcollectionsOrThrow(sideInputCollectionId), (WindowingStrategy) windowingStrategy, coder, ctx);
sideInputStreams.add(broadcastSideInput);
sideInputMapping.put(sideInputId, view);
idToViewMapping.put(getSideInputUniqueId(sideInputId), view);
}
final Map<TupleTag<?>, Integer> tagToIndexMap = new HashMap<>();
final Map<Integer, String> indexToIdMap = new HashMap<>();
final Map<String, TupleTag<?>> idToTupleTagMap = new HashMap<>();
// first output as the main output
final TupleTag<OutT> mainOutputTag = outputs.isEmpty() ? null : new TupleTag(outputs.keySet().iterator().next());
AtomicInteger index = new AtomicInteger(0);
outputs.keySet().iterator().forEachRemaining(outputName -> {
TupleTag<?> tupleTag = new TupleTag<>(outputName);
tagToIndexMap.put(tupleTag, index.get());
String collectionId = outputs.get(outputName);
indexToIdMap.put(index.get(), collectionId);
idToTupleTagMap.put(collectionId, tupleTag);
index.incrementAndGet();
});
WindowedValue.WindowedValueCoder<InT> windowedInputCoder = WindowUtils.instantiateWindowedCoder(inputId, pipeline.getComponents());
// TODO: support schema and side inputs for portable runner
// Note: transform.getTransform() is an ExecutableStage, not ParDo, so we need to extract
// these info from its components.
final DoFnSchemaInformation doFnSchemaInformation = null;
final RunnerApi.PCollection input = pipeline.getComponents().getPcollectionsOrThrow(inputId);
final PCollection.IsBounded isBounded = SamzaPipelineTranslatorUtils.isBounded(input);
final Coder<?> keyCoder = StateUtils.isStateful(stagePayload) ? ((KvCoder) ((WindowedValue.FullWindowedValueCoder) windowedInputCoder).getValueCoder()).getKeyCoder() : null;
final DoFnOp<InT, OutT, RawUnionValue> op = new DoFnOp<>(mainOutputTag, new NoOpDoFn<>(), keyCoder, // input coder not in use
windowedInputCoder.getValueCoder(), windowedInputCoder, // output coders not in use
Collections.emptyMap(), new ArrayList<>(sideInputMapping.values()), // used by java runner only
new ArrayList<>(idToTupleTagMap.values()), WindowUtils.getWindowStrategy(inputId, stagePayload.getComponents()), idToViewMapping, new DoFnOp.MultiOutputManagerFactory(tagToIndexMap), ctx.getTransformFullName(), ctx.getTransformId(), isBounded, true, stagePayload, ctx.getJobInfo(), idToTupleTagMap, doFnSchemaInformation, sideInputMapping);
final MessageStream<OpMessage<InT>> mergedStreams;
if (sideInputStreams.isEmpty()) {
mergedStreams = inputStream;
} else {
MessageStream<OpMessage<InT>> mergedSideInputStreams = MessageStream.mergeAll(sideInputStreams).flatMap(new SideInputWatermarkFn());
mergedStreams = inputStream.merge(Collections.singletonList(mergedSideInputStreams));
}
final MessageStream<OpMessage<RawUnionValue>> taggedOutputStream = mergedStreams.flatMapAsync(OpAdapter.adapt(op));
for (int outputIndex : tagToIndexMap.values()) {
@SuppressWarnings("unchecked") final MessageStream<OpMessage<OutT>> outputStream = taggedOutputStream.filter(message -> message.getType() != OpMessage.Type.ELEMENT || message.getElement().getValue().getUnionTag() == outputIndex).flatMapAsync(OpAdapter.adapt(new RawUnionValueToValue()));
ctx.registerMessageStream(indexToIdMap.get(outputIndex), outputStream);
}
}
use of org.apache.beam.runners.core.construction.graph.QueryablePipeline in project beam by apache.
the class SamzaPortablePipelineTranslator method createConfig.
public static void createConfig(RunnerApi.Pipeline pipeline, ConfigBuilder configBuilder, SamzaPipelineOptions options) {
QueryablePipeline queryablePipeline = QueryablePipeline.forTransforms(pipeline.getRootTransformIdsList(), pipeline.getComponents());
for (PipelineNode.PTransformNode transform : queryablePipeline.getTopologicallyOrderedTransforms()) {
TransformTranslator<?> translator = TRANSLATORS.get(transform.getTransform().getSpec().getUrn());
if (translator instanceof TransformConfigGenerator) {
TransformConfigGenerator configGenerator = (TransformConfigGenerator) translator;
configBuilder.putAll(configGenerator.createPortableConfig(transform, options));
}
}
}
use of org.apache.beam.runners.core.construction.graph.QueryablePipeline in project beam by apache.
the class SamzaPortablePipelineTranslator method translate.
public static void translate(RunnerApi.Pipeline pipeline, PortableTranslationContext ctx) {
QueryablePipeline queryablePipeline = QueryablePipeline.forTransforms(pipeline.getRootTransformIdsList(), pipeline.getComponents());
for (PipelineNode.PTransformNode transform : queryablePipeline.getTopologicallyOrderedTransforms()) {
ctx.setCurrentTransform(transform);
LOG.info("Translating transform urn: {}", transform.getTransform().getSpec().getUrn());
TRANSLATORS.get(transform.getTransform().getSpec().getUrn()).translatePortable(transform, queryablePipeline, ctx);
ctx.clearCurrentTransform();
}
}
Aggregations