Search in sources :

Example 21 with DoFn

use of org.apache.beam.sdk.transforms.DoFn in project beam by apache.

the class DoFnSignatures method analyzeTruncateRestrictionMethod.

@VisibleForTesting
static DoFnSignature.TruncateRestrictionMethod analyzeTruncateRestrictionMethod(ErrorReporter errors, TypeDescriptor<? extends DoFn<?, ?>> fnT, Method m, TypeDescriptor<?> inputT, TypeDescriptor<?> restrictionT, FnAnalysisContext fnContext) {
    // Method is of the form:
    // @TruncateRestriction
    // TruncateResult<RestrictionT> truncateRestriction(... parameters ...);
    errors.checkArgument(TruncateResult.class.equals(m.getReturnType()), "Must return TruncateResult<Restriction>");
    Type[] params = m.getGenericParameterTypes();
    MethodAnalysisContext methodContext = MethodAnalysisContext.create();
    TypeDescriptor<? extends BoundedWindow> windowT = getWindowType(fnT, m);
    for (int i = 0; i < params.length; ++i) {
        Parameter extraParam = analyzeExtraParameter(errors, fnContext, methodContext, ParameterDescription.of(m, i, fnT.resolveType(params[i]), Arrays.asList(m.getParameterAnnotations()[i])), inputT, restrictionT);
        if (extraParam instanceof SchemaElementParameter) {
            errors.throwIllegalArgument("Schema @%s are not supported for @%s method. Found %s, did you mean to use %s?", format(DoFn.Element.class), format(TruncateRestriction.class), format(((SchemaElementParameter) extraParam).elementT()), format(inputT));
        } else if (extraParam instanceof RestrictionParameter) {
            errors.checkArgument(restrictionT.equals(((RestrictionParameter) extraParam).restrictionT()), "Uses restriction type %s, but @%s method uses restriction type %s", format(((RestrictionParameter) extraParam).restrictionT()), format(DoFn.GetInitialRestriction.class), format(restrictionT));
        }
        methodContext.addParameter(extraParam);
    }
    for (Parameter parameter : methodContext.getExtraParameters()) {
        checkParameterOneOf(errors, parameter, ALLOWED_TRUNCATE_RESTRICTION_PARAMETERS);
    }
    return DoFnSignature.TruncateRestrictionMethod.create(m, windowT, methodContext.getExtraParameters());
}
Also used : RestrictionParameter(org.apache.beam.sdk.transforms.reflect.DoFnSignature.Parameter.RestrictionParameter) Type(java.lang.reflect.Type) ParameterizedType(java.lang.reflect.ParameterizedType) TruncateRestriction(org.apache.beam.sdk.transforms.DoFn.TruncateRestriction) DoFn(org.apache.beam.sdk.transforms.DoFn) Parameter(org.apache.beam.sdk.transforms.reflect.DoFnSignature.Parameter) WatermarkEstimatorParameter(org.apache.beam.sdk.transforms.reflect.DoFnSignature.Parameter.WatermarkEstimatorParameter) BundleFinalizerParameter(org.apache.beam.sdk.transforms.reflect.DoFnSignature.Parameter.BundleFinalizerParameter) PipelineOptionsParameter(org.apache.beam.sdk.transforms.reflect.DoFnSignature.Parameter.PipelineOptionsParameter) WindowParameter(org.apache.beam.sdk.transforms.reflect.DoFnSignature.Parameter.WindowParameter) TimerFamilyParameter(org.apache.beam.sdk.transforms.reflect.DoFnSignature.Parameter.TimerFamilyParameter) TypeParameter(org.apache.beam.sdk.values.TypeParameter) RestrictionTrackerParameter(org.apache.beam.sdk.transforms.reflect.DoFnSignature.Parameter.RestrictionTrackerParameter) TimerParameter(org.apache.beam.sdk.transforms.reflect.DoFnSignature.Parameter.TimerParameter) WatermarkEstimatorStateParameter(org.apache.beam.sdk.transforms.reflect.DoFnSignature.Parameter.WatermarkEstimatorStateParameter) SchemaElementParameter(org.apache.beam.sdk.transforms.reflect.DoFnSignature.Parameter.SchemaElementParameter) RestrictionParameter(org.apache.beam.sdk.transforms.reflect.DoFnSignature.Parameter.RestrictionParameter) StateParameter(org.apache.beam.sdk.transforms.reflect.DoFnSignature.Parameter.StateParameter) TruncateResult(org.apache.beam.sdk.transforms.splittabledofn.RestrictionTracker.TruncateResult) SchemaElementParameter(org.apache.beam.sdk.transforms.reflect.DoFnSignature.Parameter.SchemaElementParameter) VisibleForTesting(org.apache.beam.vendor.guava.v26_0_jre.com.google.common.annotations.VisibleForTesting)

Example 22 with DoFn

use of org.apache.beam.sdk.transforms.DoFn in project beam by apache.

the class DoFnSignatures method analyzeNewTrackerMethod.

@VisibleForTesting
static DoFnSignature.NewTrackerMethod analyzeNewTrackerMethod(ErrorReporter errors, TypeDescriptor<? extends DoFn<?, ?>> fnT, Method m, TypeDescriptor<?> inputT, TypeDescriptor<?> outputT, TypeDescriptor<?> restrictionT, FnAnalysisContext fnContext) {
    // Method is of the form:
    // @NewTracker
    // TrackerT newTracker(... parameters ...);
    Type[] params = m.getGenericParameterTypes();
    TypeDescriptor<?> trackerT = fnT.resolveType(m.getGenericReturnType());
    TypeDescriptor<?> expectedTrackerT = restrictionTrackerTypeOf(restrictionT);
    errors.checkArgument(trackerT.isSubtypeOf(expectedTrackerT), "Returns %s, but must return a subtype of %s", format(trackerT), format(expectedTrackerT));
    MethodAnalysisContext methodContext = MethodAnalysisContext.create();
    TypeDescriptor<? extends BoundedWindow> windowT = getWindowType(fnT, m);
    for (int i = 0; i < params.length; ++i) {
        Parameter extraParam = analyzeExtraParameter(errors, fnContext, methodContext, ParameterDescription.of(m, i, fnT.resolveType(params[i]), Arrays.asList(m.getParameterAnnotations()[i])), inputT, outputT);
        if (extraParam instanceof SchemaElementParameter) {
            errors.throwIllegalArgument("Schema @%s are not supported for @%s method. Found %s, did you mean to use %s?", format(DoFn.Element.class), format(DoFn.NewTracker.class), format(((SchemaElementParameter) extraParam).elementT()), format(inputT));
        } else if (extraParam instanceof RestrictionParameter) {
            errors.checkArgument(restrictionT.equals(((RestrictionParameter) extraParam).restrictionT()), "Uses restriction type %s, but @%s method uses restriction type %s", format(((RestrictionParameter) extraParam).restrictionT()), format(DoFn.GetInitialRestriction.class), format(restrictionT));
        }
        methodContext.addParameter(extraParam);
    }
    for (Parameter parameter : methodContext.getExtraParameters()) {
        checkParameterOneOf(errors, parameter, ALLOWED_NEW_TRACKER_PARAMETERS);
    }
    return DoFnSignature.NewTrackerMethod.create(m, fnT.resolveType(m.getGenericReturnType()), windowT, methodContext.getExtraParameters());
}
Also used : RestrictionParameter(org.apache.beam.sdk.transforms.reflect.DoFnSignature.Parameter.RestrictionParameter) Type(java.lang.reflect.Type) ParameterizedType(java.lang.reflect.ParameterizedType) DoFn(org.apache.beam.sdk.transforms.DoFn) Parameter(org.apache.beam.sdk.transforms.reflect.DoFnSignature.Parameter) WatermarkEstimatorParameter(org.apache.beam.sdk.transforms.reflect.DoFnSignature.Parameter.WatermarkEstimatorParameter) BundleFinalizerParameter(org.apache.beam.sdk.transforms.reflect.DoFnSignature.Parameter.BundleFinalizerParameter) PipelineOptionsParameter(org.apache.beam.sdk.transforms.reflect.DoFnSignature.Parameter.PipelineOptionsParameter) WindowParameter(org.apache.beam.sdk.transforms.reflect.DoFnSignature.Parameter.WindowParameter) TimerFamilyParameter(org.apache.beam.sdk.transforms.reflect.DoFnSignature.Parameter.TimerFamilyParameter) TypeParameter(org.apache.beam.sdk.values.TypeParameter) RestrictionTrackerParameter(org.apache.beam.sdk.transforms.reflect.DoFnSignature.Parameter.RestrictionTrackerParameter) TimerParameter(org.apache.beam.sdk.transforms.reflect.DoFnSignature.Parameter.TimerParameter) WatermarkEstimatorStateParameter(org.apache.beam.sdk.transforms.reflect.DoFnSignature.Parameter.WatermarkEstimatorStateParameter) SchemaElementParameter(org.apache.beam.sdk.transforms.reflect.DoFnSignature.Parameter.SchemaElementParameter) RestrictionParameter(org.apache.beam.sdk.transforms.reflect.DoFnSignature.Parameter.RestrictionParameter) StateParameter(org.apache.beam.sdk.transforms.reflect.DoFnSignature.Parameter.StateParameter) SchemaElementParameter(org.apache.beam.sdk.transforms.reflect.DoFnSignature.Parameter.SchemaElementParameter) VisibleForTesting(org.apache.beam.vendor.guava.v26_0_jre.com.google.common.annotations.VisibleForTesting)

Example 23 with DoFn

use of org.apache.beam.sdk.transforms.DoFn in project beam by apache.

the class PipelineTest method testConflictingNames.

@Test
public void testConflictingNames() {
    final PipelineOptions options = TestPipeline.testingPipelineOptions();
    final Pipeline p = Pipeline.create(options);
    // Check pipeline runner correctly catches user errors.
    thrown.expect(IllegalStateException.class);
    thrown.expectMessage(new // more readable than a regex
    BaseMatcher<String>() {

        @Override
        public void describeTo(final Description description) {
            description.appendText("validates the conflicting instances are " + "listed into the exception message");
        }

        @Override
        public boolean matches(final Object o) {
            /*
             example value (first 2 lines are a single one):

             Pipeline update will not be possible because the following transforms do not have stable
             unique names: ParDo(Anonymous)2.

             Conflicting instances:
             - name=ParDo(Anonymous):
                 - org.apache.beam.sdk.PipelineTest$3@75d2da2d
                 - org.apache.beam.sdk.PipelineTest$2@4278284b

             You can fix it adding a name when you call apply(): pipeline.apply(<name>, <transform>).
            */
            final String sanitized = String.class.cast(o).replaceAll("\\$[\\p{Alnum}]+@[\\p{Alnum}]+", "\\$x@y");
            return sanitized.contains("Conflicting instances:\n" + "- name=ParDo(Anonymous):\n" + "    - org.apache.beam.sdk.PipelineTest$x@y\n" + "    - org.apache.beam.sdk.PipelineTest$x@y\n\n" + "You can fix it adding a name when you call apply(): " + "pipeline.apply(<name>, <transform>).");
        }
    });
    p.apply(Create.of("a")).apply(ParDo.of(new DoFn<String, String>() {

        @ProcessElement
        public void onElement(final ProcessContext ctx) {
            ctx.output(ctx.element());
        }
    })).apply(ParDo.of(new DoFn<String, String>() {

        @ProcessElement
        public void onElement(final ProcessContext ctx) {
        // no-op
        }
    }));
    p.run();
}
Also used : Description(org.hamcrest.Description) DoFn(org.apache.beam.sdk.transforms.DoFn) PipelineOptions(org.apache.beam.sdk.options.PipelineOptions) Matchers.containsString(org.hamcrest.Matchers.containsString) PTransformOverride(org.apache.beam.sdk.runners.PTransformOverride) TestPipeline(org.apache.beam.sdk.testing.TestPipeline) Test(org.junit.Test)

Example 24 with DoFn

use of org.apache.beam.sdk.transforms.DoFn in project beam by apache.

the class TransformHierarchyTest method visitAfterReplace.

/**
 * Tests that visiting the {@link TransformHierarchy} after replacing nodes does not visit any of
 * the original nodes or inaccessible values but does visit all of the replacement nodes, new
 * inaccessible replacement values, and the original output values.
 */
@Test
public void visitAfterReplace() {
    Node root = hierarchy.getCurrent();
    final SingleOutput<Long, Long> originalParDo = ParDo.of(new DoFn<Long, Long>() {

        @ProcessElement
        public void processElement(ProcessContext ctxt) {
            ctxt.output(ctxt.element() + 1L);
        }
    });
    GenerateSequence genUpstream = GenerateSequence.from(0);
    PCollection<Long> upstream = pipeline.apply(genUpstream);
    PCollection<Long> output = upstream.apply("Original", originalParDo);
    Node upstreamNode = hierarchy.pushNode("Upstream", pipeline.begin(), genUpstream);
    hierarchy.finishSpecifyingInput();
    hierarchy.setOutput(upstream);
    hierarchy.popNode();
    Node original = hierarchy.pushNode("Original", upstream, originalParDo);
    hierarchy.finishSpecifyingInput();
    hierarchy.setOutput(output);
    hierarchy.popNode();
    final TupleTag<Long> longs = new TupleTag<>();
    final MultiOutput<Long, Long> replacementParDo = ParDo.of(new DoFn<Long, Long>() {

        @ProcessElement
        public void processElement(ProcessContext ctxt) {
            ctxt.output(ctxt.element() + 1L);
        }
    }).withOutputTags(longs, TupleTagList.empty());
    PTransform<PCollection<Long>, PCollection<Long>> replacementComposite = new PTransform<PCollection<Long>, PCollection<Long>>() {

        @Override
        public PCollection<Long> expand(PCollection<Long> input) {
            return input.apply("Contained", replacementParDo).get(longs);
        }
    };
    PCollectionTuple replacementOutput = upstream.apply("Contained", replacementParDo);
    Node compositeNode = hierarchy.replaceNode(original, upstream, replacementComposite);
    Node replacementParNode = hierarchy.pushNode("Original/Contained", upstream, replacementParDo);
    hierarchy.finishSpecifyingInput();
    hierarchy.setOutput(replacementOutput);
    hierarchy.popNode();
    hierarchy.setOutput(replacementOutput.get(longs));
    Map<TupleTag<?>, PCollection<?>> expandedReplacementOutput = (Map) replacementOutput.expand();
    Entry<TupleTag<?>, PCollection<?>> replacementLongs = Iterables.getOnlyElement(expandedReplacementOutput.entrySet());
    hierarchy.replaceOutputs(Collections.singletonMap(replacementOutput.get(longs), ReplacementOutput.of(TaggedPValue.ofExpandedValue(output), TaggedPValue.of(replacementLongs.getKey(), replacementLongs.getValue()))));
    hierarchy.popNode();
    final Set<Node> visitedCompositeNodes = new HashSet<>();
    final Set<Node> visitedPrimitiveNodes = new HashSet<>();
    Set<PValue> visitedValues = hierarchy.visit(new Defaults() {

        @Override
        public CompositeBehavior enterCompositeTransform(Node node) {
            visitedCompositeNodes.add(node);
            return CompositeBehavior.ENTER_TRANSFORM;
        }

        @Override
        public void visitPrimitiveTransform(Node node) {
            visitedPrimitiveNodes.add(node);
        }
    });
    /*
    Final Graph:
    Upstream -> Upstream.out -> Composite -> (ReplacementParDo -> OriginalParDo.out)
    */
    assertThat(visitedCompositeNodes, containsInAnyOrder(root, compositeNode));
    assertThat(visitedPrimitiveNodes, containsInAnyOrder(upstreamNode, replacementParNode));
    assertThat(visitedValues, containsInAnyOrder(upstream, output));
}
Also used : Node(org.apache.beam.sdk.runners.TransformHierarchy.Node) TupleTag(org.apache.beam.sdk.values.TupleTag) PCollectionTuple(org.apache.beam.sdk.values.PCollectionTuple) PTransform(org.apache.beam.sdk.transforms.PTransform) HashSet(java.util.HashSet) TaggedPValue(org.apache.beam.sdk.values.TaggedPValue) PValue(org.apache.beam.sdk.values.PValue) GenerateSequence(org.apache.beam.sdk.io.GenerateSequence) PCollection(org.apache.beam.sdk.values.PCollection) DoFn(org.apache.beam.sdk.transforms.DoFn) Defaults(org.apache.beam.sdk.Pipeline.PipelineVisitor.Defaults) Map(java.util.Map) Test(org.junit.Test)

Example 25 with DoFn

use of org.apache.beam.sdk.transforms.DoFn in project beam by apache.

the class GroupTest method testAggregateLogicalValuesGlobally.

@Test
@Category(NeedsRunner.class)
public void testAggregateLogicalValuesGlobally() {
    Collection<BasicEnum> elements = Lists.newArrayList(BasicEnum.of("a", BasicEnum.TestEnum.ONE), BasicEnum.of("a", BasicEnum.TestEnum.TWO));
    CombineFn<EnumerationType.Value, ?, Iterable<EnumerationType.Value>> sampleAnyCombineFn = Sample.anyCombineFn(100);
    Field aggField = Field.of("sampleList", FieldType.array(FieldType.logicalType(BASIC_ENUM_ENUMERATION)));
    pipeline.apply(Create.of(elements)).apply(Group.<BasicEnum>globally().aggregateField("enumeration", sampleAnyCombineFn, aggField)).apply(ParDo.of(new DoFn<Row, List<Integer>>() {

        @ProcessElement
        public // {
        void process(@Element Row value) {
            assertThat(value.getArray(0), containsInAnyOrder(BASIC_ENUM_ENUMERATION.valueOf(1), BASIC_ENUM_ENUMERATION.valueOf(2)));
        }
    }));
    pipeline.run();
}
Also used : Field(org.apache.beam.sdk.schemas.Schema.Field) DoFn(org.apache.beam.sdk.transforms.DoFn) AutoValue(com.google.auto.value.AutoValue) EnumerationType(org.apache.beam.sdk.schemas.logicaltypes.EnumerationType) Row(org.apache.beam.sdk.values.Row) Category(org.junit.experimental.categories.Category) Test(org.junit.Test)

Aggregations

DoFn (org.apache.beam.sdk.transforms.DoFn)154 Test (org.junit.Test)98 Pipeline (org.apache.beam.sdk.Pipeline)60 KV (org.apache.beam.sdk.values.KV)45 TupleTag (org.apache.beam.sdk.values.TupleTag)28 StateSpec (org.apache.beam.sdk.state.StateSpec)26 Instant (org.joda.time.Instant)26 ArrayList (java.util.ArrayList)23 TestPipeline (org.apache.beam.sdk.testing.TestPipeline)23 BoundedWindow (org.apache.beam.sdk.transforms.windowing.BoundedWindow)22 PCollection (org.apache.beam.sdk.values.PCollection)21 TimerSpec (org.apache.beam.sdk.state.TimerSpec)19 WindowedValue (org.apache.beam.sdk.util.WindowedValue)18 PCollectionView (org.apache.beam.sdk.values.PCollectionView)18 HashMap (java.util.HashMap)17 Coder (org.apache.beam.sdk.coders.Coder)17 List (java.util.List)16 Map (java.util.Map)14 ValueState (org.apache.beam.sdk.state.ValueState)14 RunnerApi (org.apache.beam.model.pipeline.v1.RunnerApi)13