use of org.apache.beam.sdk.transforms.DoFn in project beam by apache.
the class DoFnSignatures method analyzeTruncateRestrictionMethod.
@VisibleForTesting
static DoFnSignature.TruncateRestrictionMethod analyzeTruncateRestrictionMethod(ErrorReporter errors, TypeDescriptor<? extends DoFn<?, ?>> fnT, Method m, TypeDescriptor<?> inputT, TypeDescriptor<?> restrictionT, FnAnalysisContext fnContext) {
// Method is of the form:
// @TruncateRestriction
// TruncateResult<RestrictionT> truncateRestriction(... parameters ...);
errors.checkArgument(TruncateResult.class.equals(m.getReturnType()), "Must return TruncateResult<Restriction>");
Type[] params = m.getGenericParameterTypes();
MethodAnalysisContext methodContext = MethodAnalysisContext.create();
TypeDescriptor<? extends BoundedWindow> windowT = getWindowType(fnT, m);
for (int i = 0; i < params.length; ++i) {
Parameter extraParam = analyzeExtraParameter(errors, fnContext, methodContext, ParameterDescription.of(m, i, fnT.resolveType(params[i]), Arrays.asList(m.getParameterAnnotations()[i])), inputT, restrictionT);
if (extraParam instanceof SchemaElementParameter) {
errors.throwIllegalArgument("Schema @%s are not supported for @%s method. Found %s, did you mean to use %s?", format(DoFn.Element.class), format(TruncateRestriction.class), format(((SchemaElementParameter) extraParam).elementT()), format(inputT));
} else if (extraParam instanceof RestrictionParameter) {
errors.checkArgument(restrictionT.equals(((RestrictionParameter) extraParam).restrictionT()), "Uses restriction type %s, but @%s method uses restriction type %s", format(((RestrictionParameter) extraParam).restrictionT()), format(DoFn.GetInitialRestriction.class), format(restrictionT));
}
methodContext.addParameter(extraParam);
}
for (Parameter parameter : methodContext.getExtraParameters()) {
checkParameterOneOf(errors, parameter, ALLOWED_TRUNCATE_RESTRICTION_PARAMETERS);
}
return DoFnSignature.TruncateRestrictionMethod.create(m, windowT, methodContext.getExtraParameters());
}
use of org.apache.beam.sdk.transforms.DoFn in project beam by apache.
the class DoFnSignatures method analyzeNewTrackerMethod.
@VisibleForTesting
static DoFnSignature.NewTrackerMethod analyzeNewTrackerMethod(ErrorReporter errors, TypeDescriptor<? extends DoFn<?, ?>> fnT, Method m, TypeDescriptor<?> inputT, TypeDescriptor<?> outputT, TypeDescriptor<?> restrictionT, FnAnalysisContext fnContext) {
// Method is of the form:
// @NewTracker
// TrackerT newTracker(... parameters ...);
Type[] params = m.getGenericParameterTypes();
TypeDescriptor<?> trackerT = fnT.resolveType(m.getGenericReturnType());
TypeDescriptor<?> expectedTrackerT = restrictionTrackerTypeOf(restrictionT);
errors.checkArgument(trackerT.isSubtypeOf(expectedTrackerT), "Returns %s, but must return a subtype of %s", format(trackerT), format(expectedTrackerT));
MethodAnalysisContext methodContext = MethodAnalysisContext.create();
TypeDescriptor<? extends BoundedWindow> windowT = getWindowType(fnT, m);
for (int i = 0; i < params.length; ++i) {
Parameter extraParam = analyzeExtraParameter(errors, fnContext, methodContext, ParameterDescription.of(m, i, fnT.resolveType(params[i]), Arrays.asList(m.getParameterAnnotations()[i])), inputT, outputT);
if (extraParam instanceof SchemaElementParameter) {
errors.throwIllegalArgument("Schema @%s are not supported for @%s method. Found %s, did you mean to use %s?", format(DoFn.Element.class), format(DoFn.NewTracker.class), format(((SchemaElementParameter) extraParam).elementT()), format(inputT));
} else if (extraParam instanceof RestrictionParameter) {
errors.checkArgument(restrictionT.equals(((RestrictionParameter) extraParam).restrictionT()), "Uses restriction type %s, but @%s method uses restriction type %s", format(((RestrictionParameter) extraParam).restrictionT()), format(DoFn.GetInitialRestriction.class), format(restrictionT));
}
methodContext.addParameter(extraParam);
}
for (Parameter parameter : methodContext.getExtraParameters()) {
checkParameterOneOf(errors, parameter, ALLOWED_NEW_TRACKER_PARAMETERS);
}
return DoFnSignature.NewTrackerMethod.create(m, fnT.resolveType(m.getGenericReturnType()), windowT, methodContext.getExtraParameters());
}
use of org.apache.beam.sdk.transforms.DoFn in project beam by apache.
the class PipelineTest method testConflictingNames.
@Test
public void testConflictingNames() {
final PipelineOptions options = TestPipeline.testingPipelineOptions();
final Pipeline p = Pipeline.create(options);
// Check pipeline runner correctly catches user errors.
thrown.expect(IllegalStateException.class);
thrown.expectMessage(new // more readable than a regex
BaseMatcher<String>() {
@Override
public void describeTo(final Description description) {
description.appendText("validates the conflicting instances are " + "listed into the exception message");
}
@Override
public boolean matches(final Object o) {
/*
example value (first 2 lines are a single one):
Pipeline update will not be possible because the following transforms do not have stable
unique names: ParDo(Anonymous)2.
Conflicting instances:
- name=ParDo(Anonymous):
- org.apache.beam.sdk.PipelineTest$3@75d2da2d
- org.apache.beam.sdk.PipelineTest$2@4278284b
You can fix it adding a name when you call apply(): pipeline.apply(<name>, <transform>).
*/
final String sanitized = String.class.cast(o).replaceAll("\\$[\\p{Alnum}]+@[\\p{Alnum}]+", "\\$x@y");
return sanitized.contains("Conflicting instances:\n" + "- name=ParDo(Anonymous):\n" + " - org.apache.beam.sdk.PipelineTest$x@y\n" + " - org.apache.beam.sdk.PipelineTest$x@y\n\n" + "You can fix it adding a name when you call apply(): " + "pipeline.apply(<name>, <transform>).");
}
});
p.apply(Create.of("a")).apply(ParDo.of(new DoFn<String, String>() {
@ProcessElement
public void onElement(final ProcessContext ctx) {
ctx.output(ctx.element());
}
})).apply(ParDo.of(new DoFn<String, String>() {
@ProcessElement
public void onElement(final ProcessContext ctx) {
// no-op
}
}));
p.run();
}
use of org.apache.beam.sdk.transforms.DoFn in project beam by apache.
the class TransformHierarchyTest method visitAfterReplace.
/**
* Tests that visiting the {@link TransformHierarchy} after replacing nodes does not visit any of
* the original nodes or inaccessible values but does visit all of the replacement nodes, new
* inaccessible replacement values, and the original output values.
*/
@Test
public void visitAfterReplace() {
Node root = hierarchy.getCurrent();
final SingleOutput<Long, Long> originalParDo = ParDo.of(new DoFn<Long, Long>() {
@ProcessElement
public void processElement(ProcessContext ctxt) {
ctxt.output(ctxt.element() + 1L);
}
});
GenerateSequence genUpstream = GenerateSequence.from(0);
PCollection<Long> upstream = pipeline.apply(genUpstream);
PCollection<Long> output = upstream.apply("Original", originalParDo);
Node upstreamNode = hierarchy.pushNode("Upstream", pipeline.begin(), genUpstream);
hierarchy.finishSpecifyingInput();
hierarchy.setOutput(upstream);
hierarchy.popNode();
Node original = hierarchy.pushNode("Original", upstream, originalParDo);
hierarchy.finishSpecifyingInput();
hierarchy.setOutput(output);
hierarchy.popNode();
final TupleTag<Long> longs = new TupleTag<>();
final MultiOutput<Long, Long> replacementParDo = ParDo.of(new DoFn<Long, Long>() {
@ProcessElement
public void processElement(ProcessContext ctxt) {
ctxt.output(ctxt.element() + 1L);
}
}).withOutputTags(longs, TupleTagList.empty());
PTransform<PCollection<Long>, PCollection<Long>> replacementComposite = new PTransform<PCollection<Long>, PCollection<Long>>() {
@Override
public PCollection<Long> expand(PCollection<Long> input) {
return input.apply("Contained", replacementParDo).get(longs);
}
};
PCollectionTuple replacementOutput = upstream.apply("Contained", replacementParDo);
Node compositeNode = hierarchy.replaceNode(original, upstream, replacementComposite);
Node replacementParNode = hierarchy.pushNode("Original/Contained", upstream, replacementParDo);
hierarchy.finishSpecifyingInput();
hierarchy.setOutput(replacementOutput);
hierarchy.popNode();
hierarchy.setOutput(replacementOutput.get(longs));
Map<TupleTag<?>, PCollection<?>> expandedReplacementOutput = (Map) replacementOutput.expand();
Entry<TupleTag<?>, PCollection<?>> replacementLongs = Iterables.getOnlyElement(expandedReplacementOutput.entrySet());
hierarchy.replaceOutputs(Collections.singletonMap(replacementOutput.get(longs), ReplacementOutput.of(TaggedPValue.ofExpandedValue(output), TaggedPValue.of(replacementLongs.getKey(), replacementLongs.getValue()))));
hierarchy.popNode();
final Set<Node> visitedCompositeNodes = new HashSet<>();
final Set<Node> visitedPrimitiveNodes = new HashSet<>();
Set<PValue> visitedValues = hierarchy.visit(new Defaults() {
@Override
public CompositeBehavior enterCompositeTransform(Node node) {
visitedCompositeNodes.add(node);
return CompositeBehavior.ENTER_TRANSFORM;
}
@Override
public void visitPrimitiveTransform(Node node) {
visitedPrimitiveNodes.add(node);
}
});
/*
Final Graph:
Upstream -> Upstream.out -> Composite -> (ReplacementParDo -> OriginalParDo.out)
*/
assertThat(visitedCompositeNodes, containsInAnyOrder(root, compositeNode));
assertThat(visitedPrimitiveNodes, containsInAnyOrder(upstreamNode, replacementParNode));
assertThat(visitedValues, containsInAnyOrder(upstream, output));
}
use of org.apache.beam.sdk.transforms.DoFn in project beam by apache.
the class GroupTest method testAggregateLogicalValuesGlobally.
@Test
@Category(NeedsRunner.class)
public void testAggregateLogicalValuesGlobally() {
Collection<BasicEnum> elements = Lists.newArrayList(BasicEnum.of("a", BasicEnum.TestEnum.ONE), BasicEnum.of("a", BasicEnum.TestEnum.TWO));
CombineFn<EnumerationType.Value, ?, Iterable<EnumerationType.Value>> sampleAnyCombineFn = Sample.anyCombineFn(100);
Field aggField = Field.of("sampleList", FieldType.array(FieldType.logicalType(BASIC_ENUM_ENUMERATION)));
pipeline.apply(Create.of(elements)).apply(Group.<BasicEnum>globally().aggregateField("enumeration", sampleAnyCombineFn, aggField)).apply(ParDo.of(new DoFn<Row, List<Integer>>() {
@ProcessElement
public // {
void process(@Element Row value) {
assertThat(value.getArray(0), containsInAnyOrder(BASIC_ENUM_ENUMERATION.valueOf(1), BASIC_ENUM_ENUMERATION.valueOf(2)));
}
}));
pipeline.run();
}
Aggregations