use of org.apache.beam.sdk.values.PValue in project beam by apache.
the class ReplacementOutputsTest method singletonSucceeds.
@Test
public void singletonSucceeds() {
Map<PValue, ReplacementOutput> replacements = ReplacementOutputs.singleton(ints.expand(), replacementInts);
assertThat(replacements, Matchers.<PValue>hasKey(replacementInts));
ReplacementOutput replacement = replacements.get(replacementInts);
Map.Entry<TupleTag<?>, PValue> taggedInts = Iterables.getOnlyElement(ints.expand().entrySet());
assertThat(replacement.getOriginal().getTag(), Matchers.<TupleTag<?>>equalTo(taggedInts.getKey()));
assertThat(replacement.getOriginal().getValue(), equalTo(taggedInts.getValue()));
assertThat(replacement.getReplacement().getValue(), Matchers.<PValue>equalTo(replacementInts));
}
use of org.apache.beam.sdk.values.PValue in project beam by apache.
the class SdkComponentsTest method translatePipeline.
@Test
public void translatePipeline() {
BigEndianLongCoder customCoder = BigEndianLongCoder.of();
PCollection<Long> elems = pipeline.apply(GenerateSequence.from(0L).to(207L));
PCollection<Long> counted = elems.apply(Count.<Long>globally()).setCoder(customCoder);
PCollection<Long> windowed = counted.apply(Window.<Long>into(FixedWindows.of(Duration.standardMinutes(7))).triggering(AfterWatermark.pastEndOfWindow().withEarlyFirings(AfterPane.elementCountAtLeast(19))).accumulatingFiredPanes().withAllowedLateness(Duration.standardMinutes(3L)));
final WindowingStrategy<?, ?> windowedStrategy = windowed.getWindowingStrategy();
PCollection<KV<String, Long>> keyed = windowed.apply(WithKeys.<String, Long>of("foo"));
PCollection<KV<String, Iterable<Long>>> grouped = keyed.apply(GroupByKey.<String, Long>create());
final RunnerApi.Pipeline pipelineProto = SdkComponents.translatePipeline(pipeline);
pipeline.traverseTopologically(new PipelineVisitor.Defaults() {
Set<Node> transforms = new HashSet<>();
Set<PCollection<?>> pcollections = new HashSet<>();
Set<Equivalence.Wrapper<? extends Coder<?>>> coders = new HashSet<>();
Set<WindowingStrategy<?, ?>> windowingStrategies = new HashSet<>();
@Override
public void leaveCompositeTransform(Node node) {
if (node.isRootNode()) {
assertThat("Unexpected number of PTransforms", pipelineProto.getComponents().getTransformsCount(), equalTo(transforms.size()));
assertThat("Unexpected number of PCollections", pipelineProto.getComponents().getPcollectionsCount(), equalTo(pcollections.size()));
assertThat("Unexpected number of Coders", pipelineProto.getComponents().getCodersCount(), equalTo(coders.size()));
assertThat("Unexpected number of Windowing Strategies", pipelineProto.getComponents().getWindowingStrategiesCount(), equalTo(windowingStrategies.size()));
} else {
transforms.add(node);
}
}
@Override
public void visitPrimitiveTransform(Node node) {
transforms.add(node);
}
@Override
public void visitValue(PValue value, Node producer) {
if (value instanceof PCollection) {
PCollection pc = (PCollection) value;
pcollections.add(pc);
addCoders(pc.getCoder());
windowingStrategies.add(pc.getWindowingStrategy());
addCoders(pc.getWindowingStrategy().getWindowFn().windowCoder());
}
}
private void addCoders(Coder<?> coder) {
coders.add(Equivalence.<Coder<?>>identity().wrap(coder));
if (coder instanceof StructuredCoder) {
for (Coder<?> component : ((StructuredCoder<?>) coder).getComponents()) {
addCoders(component);
}
}
}
});
}
use of org.apache.beam.sdk.values.PValue in project beam by apache.
the class PTransformTranslationTest method multiMultiParDo.
private static AppliedPTransform<?, ?, ?> multiMultiParDo(Pipeline pipeline) {
PCollectionView<String> view = pipeline.apply(Create.of("foo")).apply(View.<String>asSingleton());
PCollection<Long> input = pipeline.apply(GenerateSequence.from(0));
ParDo.MultiOutput<Long, KV<Long, String>> parDo = ParDo.of(new TestDoFn()).withSideInputs(view).withOutputTags(new TupleTag<KV<Long, String>>() {
}, TupleTagList.of(new TupleTag<KV<String, Long>>() {
}));
PCollectionTuple output = input.apply(parDo);
Map<TupleTag<?>, PValue> inputs = new HashMap<>();
inputs.putAll(parDo.getAdditionalInputs());
inputs.putAll(input.expand());
return AppliedPTransform.<PCollection<Long>, PCollectionTuple, ParDo.MultiOutput<Long, KV<Long, String>>>of("MultiParDoInAndOut", inputs, output.expand(), parDo, pipeline);
}
use of org.apache.beam.sdk.values.PValue in project beam by apache.
the class WatermarkManager method refreshWatermarks.
private Set<AppliedPTransform<?, ?, ?>> refreshWatermarks(AppliedPTransform<?, ?, ?> toRefresh) {
TransformWatermarks myWatermarks = transformToWatermarks.get(toRefresh);
WatermarkUpdate updateResult = myWatermarks.refresh();
if (updateResult.isAdvanced()) {
Set<AppliedPTransform<?, ?, ?>> additionalRefreshes = new HashSet<>();
for (PValue outputPValue : toRefresh.getOutputs().values()) {
additionalRefreshes.addAll(graph.getPrimitiveConsumers(outputPValue));
}
return additionalRefreshes;
}
return Collections.emptySet();
}
use of org.apache.beam.sdk.values.PValue in project beam by apache.
the class StreamingTransformTranslator method flattenPColl.
private static <T> TransformEvaluator<Flatten.PCollections<T>> flattenPColl() {
return new TransformEvaluator<Flatten.PCollections<T>>() {
@SuppressWarnings("unchecked")
@Override
public void evaluate(Flatten.PCollections<T> transform, EvaluationContext context) {
Map<TupleTag<?>, PValue> pcs = context.getInputs(transform);
// since this is a streaming pipeline, at least one of the PCollections to "flatten" are
// unbounded, meaning it represents a DStream.
// So we could end up with an unbounded unified DStream.
final List<JavaDStream<WindowedValue<T>>> dStreams = new ArrayList<>();
final List<Integer> streamingSources = new ArrayList<>();
for (PValue pv : pcs.values()) {
checkArgument(pv instanceof PCollection, "Flatten had non-PCollection value in input: %s of type %s", pv, pv.getClass().getSimpleName());
PCollection<T> pcol = (PCollection<T>) pv;
Dataset dataset = context.borrowDataset(pcol);
if (dataset instanceof UnboundedDataset) {
UnboundedDataset<T> unboundedDataset = (UnboundedDataset<T>) dataset;
streamingSources.addAll(unboundedDataset.getStreamSources());
dStreams.add(unboundedDataset.getDStream());
} else {
// create a single RDD stream.
Queue<JavaRDD<WindowedValue<T>>> q = new LinkedBlockingQueue<>();
q.offer(((BoundedDataset) dataset).getRDD());
//TODO: this is not recoverable from checkpoint!
JavaDStream<WindowedValue<T>> dStream = context.getStreamingContext().queueStream(q);
dStreams.add(dStream);
}
}
// start by unifying streams into a single stream.
JavaDStream<WindowedValue<T>> unifiedStreams = context.getStreamingContext().union(dStreams.remove(0), dStreams);
context.putDataset(transform, new UnboundedDataset<>(unifiedStreams, streamingSources));
}
@Override
public String toNativeString() {
return "streamingContext.union(...)";
}
};
}
Aggregations