Search in sources :

Example 26 with PTransform

use of org.apache.beam.sdk.transforms.PTransform in project beam by apache.

the class ReadSourceTranslatorStreaming method translateTransform.

@SuppressWarnings("unchecked")
@Override
public void translateTransform(PTransform<PBegin, PCollection<T>> transform, AbstractTranslationContext context) {
    AppliedPTransform<PBegin, PCollection<T>, PTransform<PBegin, PCollection<T>>> rootTransform = (AppliedPTransform<PBegin, PCollection<T>, PTransform<PBegin, PCollection<T>>>) context.getCurrentTransform();
    UnboundedSource<T, UnboundedSource.CheckpointMark> source;
    try {
        source = ReadTranslation.unboundedSourceFromTransform(rootTransform);
    } catch (IOException e) {
        throw new RuntimeException(e);
    }
    SparkSession sparkSession = context.getSparkSession();
    String serializedSource = Base64Serializer.serializeUnchecked(source);
    Dataset<Row> rowDataset = sparkSession.readStream().format(sourceProviderClass).option(BEAM_SOURCE_OPTION, serializedSource).option(DEFAULT_PARALLELISM, String.valueOf(context.getSparkSession().sparkContext().defaultParallelism())).option(PIPELINE_OPTIONS, context.getSerializableOptions().toString()).load();
    // extract windowedValue from Row
    WindowedValue.FullWindowedValueCoder<T> windowedValueCoder = WindowedValue.FullWindowedValueCoder.of(source.getOutputCoder(), GlobalWindow.Coder.INSTANCE);
    Dataset<WindowedValue<T>> dataset = rowDataset.map(RowHelpers.extractWindowedValueFromRowMapFunction(windowedValueCoder), EncoderHelpers.fromBeamCoder(windowedValueCoder));
    PCollection<T> output = (PCollection<T>) context.getOutput();
    context.putDataset(output, dataset);
}
Also used : SparkSession(org.apache.spark.sql.SparkSession) IOException(java.io.IOException) PBegin(org.apache.beam.sdk.values.PBegin) PCollection(org.apache.beam.sdk.values.PCollection) AppliedPTransform(org.apache.beam.sdk.runners.AppliedPTransform) WindowedValue(org.apache.beam.sdk.util.WindowedValue) Row(org.apache.spark.sql.Row) PTransform(org.apache.beam.sdk.transforms.PTransform) AppliedPTransform(org.apache.beam.sdk.runners.AppliedPTransform)

Example 27 with PTransform

use of org.apache.beam.sdk.transforms.PTransform in project beam by apache.

the class PCollectionViewTranslatorBatch method translateNode.

@Override
public void translateNode(View.CreatePCollectionView<ElemT, ViewT> transform, Twister2BatchTranslationContext context) {
    BatchTSet<WindowedValue<ElemT>> inputDataSet = context.getInputDataSet(context.getInput(transform));
    @SuppressWarnings("unchecked") AppliedPTransform<PCollection<ElemT>, PCollection<ElemT>, PTransform<PCollection<ElemT>, PCollection<ElemT>>> application = (AppliedPTransform<PCollection<ElemT>, PCollection<ElemT>, PTransform<PCollection<ElemT>, PCollection<ElemT>>>) context.getCurrentTransform();
    org.apache.beam.sdk.values.PCollectionView<ViewT> input;
    PCollection<ElemT> inputPCol = context.getInput(transform);
    final Coder coder = inputPCol.getCoder();
    WindowingStrategy windowingStrategy = inputPCol.getWindowingStrategy();
    WindowFn windowFn = windowingStrategy.getWindowFn();
    try {
        input = CreatePCollectionViewTranslation.getView(application);
    } catch (IOException e) {
        throw new RuntimeException(e);
    }
    switch(input.getViewFn().getMaterialization().getUrn()) {
        case Materializations.MULTIMAP_MATERIALIZATION_URN:
            KvCoder kvCoder = (KvCoder<?, ?>) coder;
            final Coder keyCoder = kvCoder.getKeyCoder();
            final WindowedValue.WindowedValueCoder kvwvCoder = WindowedValue.FullWindowedValueCoder.of(kvCoder.getValueCoder(), windowFn.windowCoder());
            BatchTSet<WindowedValue<ElemT>> multimapMaterialization = inputDataSet.direct().map(new MapToTupleFunction<>(keyCoder, kvwvCoder)).allGather().map(new ByteToWindowFunctionPrimitive(keyCoder, kvwvCoder));
            context.setSideInputDataSet(input.getTagInternal().getId(), multimapMaterialization);
            break;
        case Materializations.ITERABLE_MATERIALIZATION_URN:
            final WindowedValue.WindowedValueCoder wvCoder = WindowedValue.FullWindowedValueCoder.of(coder, windowFn.windowCoder());
            BatchTSet<WindowedValue<ElemT>> iterableMaterialization = inputDataSet.direct().map(new ElemToBytesFunction<>(wvCoder)).allGather().map(new ByteToElemFunction(wvCoder));
            try {
                input = CreatePCollectionViewTranslation.getView(application);
            } catch (IOException e) {
                throw new RuntimeException(e);
            }
            context.setSideInputDataSet(input.getTagInternal().getId(), iterableMaterialization);
            break;
        default:
            throw new UnsupportedOperationException("Unknown side input materialization " + input.getViewFn().getMaterialization().getUrn());
    }
}
Also used : WindowingStrategy(org.apache.beam.sdk.values.WindowingStrategy) WindowedValue(org.apache.beam.sdk.util.WindowedValue) AppliedPTransform(org.apache.beam.sdk.runners.AppliedPTransform) PTransform(org.apache.beam.sdk.transforms.PTransform) AppliedPTransform(org.apache.beam.sdk.runners.AppliedPTransform) KvCoder(org.apache.beam.sdk.coders.KvCoder) Coder(org.apache.beam.sdk.coders.Coder) ByteToElemFunction(org.apache.beam.runners.twister2.translators.functions.ByteToElemFunction) WindowFn(org.apache.beam.sdk.transforms.windowing.WindowFn) KvCoder(org.apache.beam.sdk.coders.KvCoder) IOException(java.io.IOException) PCollection(org.apache.beam.sdk.values.PCollection) ByteToWindowFunctionPrimitive(org.apache.beam.runners.twister2.translators.functions.ByteToWindowFunctionPrimitive)

Example 28 with PTransform

use of org.apache.beam.sdk.transforms.PTransform in project beam by apache.

the class Twister2BatchPipelineTranslator method visitPrimitiveTransform.

@Override
public void visitPrimitiveTransform(TransformHierarchy.Node node) {
    LOG.fine(String.format("visiting transform %s", node.getTransform()));
    PTransform transform = node.getTransform();
    BatchTransformTranslator translator = getTransformTranslator(transform);
    if (null == translator) {
        throw new IllegalStateException("no translator registered for " + transform);
    }
    translationContext.setCurrentTransform(node.toAppliedPTransform(getPipeline()));
    translator.translateNode(transform, translationContext);
}
Also used : PTransform(org.apache.beam.sdk.transforms.PTransform)

Example 29 with PTransform

use of org.apache.beam.sdk.transforms.PTransform in project beam by apache.

the class GenerateSequenceTest method testUnboundedDisplayData.

@Test
public void testUnboundedDisplayData() {
    Duration maxReadTime = Duration.standardHours(5);
    SerializableFunction<Long, Instant> timestampFn = input -> Instant.now();
    PTransform<?, ?> input = GenerateSequence.from(0).to(1234).withMaxReadTime(maxReadTime).withTimestampFn(timestampFn);
    DisplayData displayData = DisplayData.from(input);
    assertThat(displayData, hasDisplayItem("maxReadTime", maxReadTime));
    assertThat(displayData, hasDisplayItem("timestampFn", timestampFn.getClass()));
}
Also used : Count(org.apache.beam.sdk.transforms.Count) DoFn(org.apache.beam.sdk.transforms.DoFn) Min(org.apache.beam.sdk.transforms.Min) DisplayData(org.apache.beam.sdk.transforms.display.DisplayData) PAssert(org.apache.beam.sdk.testing.PAssert) Duration(org.joda.time.Duration) RunWith(org.junit.runner.RunWith) SerializableFunction(org.apache.beam.sdk.transforms.SerializableFunction) Test(org.junit.Test) JUnit4(org.junit.runners.JUnit4) NeedsRunner(org.apache.beam.sdk.testing.NeedsRunner) PCollection(org.apache.beam.sdk.values.PCollection) Category(org.junit.experimental.categories.Category) PTransform(org.apache.beam.sdk.transforms.PTransform) Max(org.apache.beam.sdk.transforms.Max) Rule(org.junit.Rule) ParDo(org.apache.beam.sdk.transforms.ParDo) Distinct(org.apache.beam.sdk.transforms.Distinct) TestPipeline(org.apache.beam.sdk.testing.TestPipeline) Instant(org.joda.time.Instant) Matchers.is(org.hamcrest.Matchers.is) MatcherAssert.assertThat(org.hamcrest.MatcherAssert.assertThat) DisplayDataMatchers.hasDisplayItem(org.apache.beam.sdk.transforms.display.DisplayDataMatchers.hasDisplayItem) Instant(org.joda.time.Instant) Duration(org.joda.time.Duration) DisplayData(org.apache.beam.sdk.transforms.display.DisplayData) Test(org.junit.Test)

Example 30 with PTransform

use of org.apache.beam.sdk.transforms.PTransform in project beam by apache.

the class TransformHierarchyTest method visitDoesNotVisitSkippedNodes.

@Test
public void visitDoesNotVisitSkippedNodes() {
    PCollection<String> one = PCollection.createPrimitiveOutputInternal(pipeline, WindowingStrategy.globalDefault(), IsBounded.BOUNDED, StringUtf8Coder.of());
    final PCollection<Integer> two = PCollection.createPrimitiveOutputInternal(pipeline, WindowingStrategy.globalDefault(), IsBounded.UNBOUNDED, VarIntCoder.of());
    final PDone done = PDone.in(pipeline);
    final TupleTag<String> oneTag = new TupleTag<String>() {
    };
    final TupleTag<Integer> twoTag = new TupleTag<Integer>() {
    };
    final PCollectionTuple oneAndTwo = PCollectionTuple.of(oneTag, one).and(twoTag, two);
    hierarchy.pushNode("consumes_both", one, new PTransform<PCollection<String>, PDone>() {

        @Override
        public PDone expand(PCollection<String> input) {
            return done;
        }

        @Override
        public Map<TupleTag<?>, PValue> getAdditionalInputs() {
            return Collections.singletonMap(twoTag, two);
        }
    });
    hierarchy.setOutput(done);
    hierarchy.popNode();
    final PTransform<PBegin, PCollectionTuple> producer = new PTransform<PBegin, PCollectionTuple>() {

        @Override
        public PCollectionTuple expand(PBegin input) {
            return oneAndTwo;
        }
    };
    final Node enclosing = hierarchy.pushNode("encloses_producer", PBegin.in(pipeline), new PTransform<PBegin, PCollectionTuple>() {

        @Override
        public PCollectionTuple expand(PBegin input) {
            return input.apply(producer);
        }
    });
    Node enclosed = hierarchy.pushNode("creates_one_and_two", PBegin.in(pipeline), producer);
    hierarchy.setOutput(oneAndTwo);
    hierarchy.popNode();
    hierarchy.setOutput(oneAndTwo);
    hierarchy.popNode();
    final Set<Node> visitedNodes = new HashSet<>();
    hierarchy.visit(new PipelineVisitor.Defaults() {

        @Override
        public CompositeBehavior enterCompositeTransform(Node node) {
            visitedNodes.add(node);
            return node.equals(enclosing) ? CompositeBehavior.DO_NOT_ENTER_TRANSFORM : CompositeBehavior.ENTER_TRANSFORM;
        }

        @Override
        public void visitPrimitiveTransform(Node node) {
            visitedNodes.add(node);
        }
    });
    assertThat(visitedNodes, hasItem(enclosing));
    assertThat(visitedNodes, not(hasItem(enclosed)));
}
Also used : Node(org.apache.beam.sdk.runners.TransformHierarchy.Node) Defaults(org.apache.beam.sdk.Pipeline.PipelineVisitor.Defaults) TupleTag(org.apache.beam.sdk.values.TupleTag) PBegin(org.apache.beam.sdk.values.PBegin) PCollection(org.apache.beam.sdk.values.PCollection) PDone(org.apache.beam.sdk.values.PDone) PCollectionTuple(org.apache.beam.sdk.values.PCollectionTuple) PipelineVisitor(org.apache.beam.sdk.Pipeline.PipelineVisitor) Map(java.util.Map) PTransform(org.apache.beam.sdk.transforms.PTransform) HashSet(java.util.HashSet) Test(org.junit.Test)

Aggregations

PTransform (org.apache.beam.sdk.transforms.PTransform)41 PCollection (org.apache.beam.sdk.values.PCollection)29 Test (org.junit.Test)18 AppliedPTransform (org.apache.beam.sdk.runners.AppliedPTransform)11 PBegin (org.apache.beam.sdk.values.PBegin)11 IOException (java.io.IOException)10 ArrayList (java.util.ArrayList)10 List (java.util.List)10 Map (java.util.Map)10 TupleTag (org.apache.beam.sdk.values.TupleTag)10 DoFn (org.apache.beam.sdk.transforms.DoFn)9 Coder (org.apache.beam.sdk.coders.Coder)8 Create (org.apache.beam.sdk.transforms.Create)8 ParDo (org.apache.beam.sdk.transforms.ParDo)7 PDone (org.apache.beam.sdk.values.PDone)7 PCollectionTuple (org.apache.beam.sdk.values.PCollectionTuple)6 Collection (java.util.Collection)5 HashMap (java.util.HashMap)5 Collectors.toList (java.util.stream.Collectors.toList)5 Schema (org.apache.beam.sdk.schemas.Schema)5