use of org.apache.beam.sdk.transforms.PTransform in project beam by apache.
the class ReadSourceTranslatorStreaming method translateTransform.
@SuppressWarnings("unchecked")
@Override
public void translateTransform(PTransform<PBegin, PCollection<T>> transform, AbstractTranslationContext context) {
AppliedPTransform<PBegin, PCollection<T>, PTransform<PBegin, PCollection<T>>> rootTransform = (AppliedPTransform<PBegin, PCollection<T>, PTransform<PBegin, PCollection<T>>>) context.getCurrentTransform();
UnboundedSource<T, UnboundedSource.CheckpointMark> source;
try {
source = ReadTranslation.unboundedSourceFromTransform(rootTransform);
} catch (IOException e) {
throw new RuntimeException(e);
}
SparkSession sparkSession = context.getSparkSession();
String serializedSource = Base64Serializer.serializeUnchecked(source);
Dataset<Row> rowDataset = sparkSession.readStream().format(sourceProviderClass).option(BEAM_SOURCE_OPTION, serializedSource).option(DEFAULT_PARALLELISM, String.valueOf(context.getSparkSession().sparkContext().defaultParallelism())).option(PIPELINE_OPTIONS, context.getSerializableOptions().toString()).load();
// extract windowedValue from Row
WindowedValue.FullWindowedValueCoder<T> windowedValueCoder = WindowedValue.FullWindowedValueCoder.of(source.getOutputCoder(), GlobalWindow.Coder.INSTANCE);
Dataset<WindowedValue<T>> dataset = rowDataset.map(RowHelpers.extractWindowedValueFromRowMapFunction(windowedValueCoder), EncoderHelpers.fromBeamCoder(windowedValueCoder));
PCollection<T> output = (PCollection<T>) context.getOutput();
context.putDataset(output, dataset);
}
use of org.apache.beam.sdk.transforms.PTransform in project beam by apache.
the class PCollectionViewTranslatorBatch method translateNode.
@Override
public void translateNode(View.CreatePCollectionView<ElemT, ViewT> transform, Twister2BatchTranslationContext context) {
BatchTSet<WindowedValue<ElemT>> inputDataSet = context.getInputDataSet(context.getInput(transform));
@SuppressWarnings("unchecked") AppliedPTransform<PCollection<ElemT>, PCollection<ElemT>, PTransform<PCollection<ElemT>, PCollection<ElemT>>> application = (AppliedPTransform<PCollection<ElemT>, PCollection<ElemT>, PTransform<PCollection<ElemT>, PCollection<ElemT>>>) context.getCurrentTransform();
org.apache.beam.sdk.values.PCollectionView<ViewT> input;
PCollection<ElemT> inputPCol = context.getInput(transform);
final Coder coder = inputPCol.getCoder();
WindowingStrategy windowingStrategy = inputPCol.getWindowingStrategy();
WindowFn windowFn = windowingStrategy.getWindowFn();
try {
input = CreatePCollectionViewTranslation.getView(application);
} catch (IOException e) {
throw new RuntimeException(e);
}
switch(input.getViewFn().getMaterialization().getUrn()) {
case Materializations.MULTIMAP_MATERIALIZATION_URN:
KvCoder kvCoder = (KvCoder<?, ?>) coder;
final Coder keyCoder = kvCoder.getKeyCoder();
final WindowedValue.WindowedValueCoder kvwvCoder = WindowedValue.FullWindowedValueCoder.of(kvCoder.getValueCoder(), windowFn.windowCoder());
BatchTSet<WindowedValue<ElemT>> multimapMaterialization = inputDataSet.direct().map(new MapToTupleFunction<>(keyCoder, kvwvCoder)).allGather().map(new ByteToWindowFunctionPrimitive(keyCoder, kvwvCoder));
context.setSideInputDataSet(input.getTagInternal().getId(), multimapMaterialization);
break;
case Materializations.ITERABLE_MATERIALIZATION_URN:
final WindowedValue.WindowedValueCoder wvCoder = WindowedValue.FullWindowedValueCoder.of(coder, windowFn.windowCoder());
BatchTSet<WindowedValue<ElemT>> iterableMaterialization = inputDataSet.direct().map(new ElemToBytesFunction<>(wvCoder)).allGather().map(new ByteToElemFunction(wvCoder));
try {
input = CreatePCollectionViewTranslation.getView(application);
} catch (IOException e) {
throw new RuntimeException(e);
}
context.setSideInputDataSet(input.getTagInternal().getId(), iterableMaterialization);
break;
default:
throw new UnsupportedOperationException("Unknown side input materialization " + input.getViewFn().getMaterialization().getUrn());
}
}
use of org.apache.beam.sdk.transforms.PTransform in project beam by apache.
the class Twister2BatchPipelineTranslator method visitPrimitiveTransform.
@Override
public void visitPrimitiveTransform(TransformHierarchy.Node node) {
LOG.fine(String.format("visiting transform %s", node.getTransform()));
PTransform transform = node.getTransform();
BatchTransformTranslator translator = getTransformTranslator(transform);
if (null == translator) {
throw new IllegalStateException("no translator registered for " + transform);
}
translationContext.setCurrentTransform(node.toAppliedPTransform(getPipeline()));
translator.translateNode(transform, translationContext);
}
use of org.apache.beam.sdk.transforms.PTransform in project beam by apache.
the class GenerateSequenceTest method testUnboundedDisplayData.
@Test
public void testUnboundedDisplayData() {
Duration maxReadTime = Duration.standardHours(5);
SerializableFunction<Long, Instant> timestampFn = input -> Instant.now();
PTransform<?, ?> input = GenerateSequence.from(0).to(1234).withMaxReadTime(maxReadTime).withTimestampFn(timestampFn);
DisplayData displayData = DisplayData.from(input);
assertThat(displayData, hasDisplayItem("maxReadTime", maxReadTime));
assertThat(displayData, hasDisplayItem("timestampFn", timestampFn.getClass()));
}
use of org.apache.beam.sdk.transforms.PTransform in project beam by apache.
the class TransformHierarchyTest method visitDoesNotVisitSkippedNodes.
@Test
public void visitDoesNotVisitSkippedNodes() {
PCollection<String> one = PCollection.createPrimitiveOutputInternal(pipeline, WindowingStrategy.globalDefault(), IsBounded.BOUNDED, StringUtf8Coder.of());
final PCollection<Integer> two = PCollection.createPrimitiveOutputInternal(pipeline, WindowingStrategy.globalDefault(), IsBounded.UNBOUNDED, VarIntCoder.of());
final PDone done = PDone.in(pipeline);
final TupleTag<String> oneTag = new TupleTag<String>() {
};
final TupleTag<Integer> twoTag = new TupleTag<Integer>() {
};
final PCollectionTuple oneAndTwo = PCollectionTuple.of(oneTag, one).and(twoTag, two);
hierarchy.pushNode("consumes_both", one, new PTransform<PCollection<String>, PDone>() {
@Override
public PDone expand(PCollection<String> input) {
return done;
}
@Override
public Map<TupleTag<?>, PValue> getAdditionalInputs() {
return Collections.singletonMap(twoTag, two);
}
});
hierarchy.setOutput(done);
hierarchy.popNode();
final PTransform<PBegin, PCollectionTuple> producer = new PTransform<PBegin, PCollectionTuple>() {
@Override
public PCollectionTuple expand(PBegin input) {
return oneAndTwo;
}
};
final Node enclosing = hierarchy.pushNode("encloses_producer", PBegin.in(pipeline), new PTransform<PBegin, PCollectionTuple>() {
@Override
public PCollectionTuple expand(PBegin input) {
return input.apply(producer);
}
});
Node enclosed = hierarchy.pushNode("creates_one_and_two", PBegin.in(pipeline), producer);
hierarchy.setOutput(oneAndTwo);
hierarchy.popNode();
hierarchy.setOutput(oneAndTwo);
hierarchy.popNode();
final Set<Node> visitedNodes = new HashSet<>();
hierarchy.visit(new PipelineVisitor.Defaults() {
@Override
public CompositeBehavior enterCompositeTransform(Node node) {
visitedNodes.add(node);
return node.equals(enclosing) ? CompositeBehavior.DO_NOT_ENTER_TRANSFORM : CompositeBehavior.ENTER_TRANSFORM;
}
@Override
public void visitPrimitiveTransform(Node node) {
visitedNodes.add(node);
}
});
assertThat(visitedNodes, hasItem(enclosing));
assertThat(visitedNodes, not(hasItem(enclosed)));
}
Aggregations