use of org.apache.beam.sdk.values.PCollection in project beam by apache.
the class CombineTest method testCombinePerKeyWithHotKeyFanoutPrimitiveDisplayData.
@Test
@Category(ValidatesRunner.class)
public void testCombinePerKeyWithHotKeyFanoutPrimitiveDisplayData() {
int hotKeyFanout = 2;
DisplayDataEvaluator evaluator = DisplayDataEvaluator.create();
CombineTest.UniqueInts combineFn = new CombineTest.UniqueInts();
PTransform<PCollection<KV<Integer, Integer>>, PCollection<KV<Integer, Set<Integer>>>> combine = Combine.<Integer, Integer, Set<Integer>>perKey(combineFn).withHotKeyFanout(hotKeyFanout);
Set<DisplayData> displayData = evaluator.displayDataForPrimitiveTransforms(combine, KvCoder.of(VarIntCoder.of(), VarIntCoder.of()));
assertThat("Combine.perKey.withHotKeyFanout should include the combineFn in its primitive " + "transform", displayData, hasItem(hasDisplayItem("combineFn", combineFn.getClass())));
assertThat("Combine.perKey.withHotKeyFanout(int) should include the fanout in its primitive " + "transform", displayData, hasItem(hasDisplayItem("fanout", hotKeyFanout)));
}
use of org.apache.beam.sdk.values.PCollection in project beam by apache.
the class CombineTest method testCombinePerKeyPrimitiveDisplayData.
@Test
@Category(ValidatesRunner.class)
public void testCombinePerKeyPrimitiveDisplayData() {
DisplayDataEvaluator evaluator = DisplayDataEvaluator.create();
CombineTest.UniqueInts combineFn = new CombineTest.UniqueInts();
PTransform<PCollection<KV<Integer, Integer>>, ? extends POutput> combine = Combine.perKey(combineFn);
Set<DisplayData> displayData = evaluator.displayDataForPrimitiveTransforms(combine, KvCoder.of(VarIntCoder.of(), VarIntCoder.of()));
assertThat("Combine.perKey should include the combineFn in its primitive transform", displayData, hasItem(hasDisplayItem("combineFn", combineFn.getClass())));
}
use of org.apache.beam.sdk.values.PCollection in project DataflowJavaSDK-examples by GoogleCloudPlatform.
the class GameStats method main.
public static void main(String[] args) throws Exception {
Options options = PipelineOptionsFactory.fromArgs(args).withValidation().as(Options.class);
// Enforce that this pipeline is always run in streaming mode.
options.setStreaming(true);
ExampleUtils exampleUtils = new ExampleUtils(options);
Pipeline pipeline = Pipeline.create(options);
// Read Events from Pub/Sub using custom timestamps
PCollection<GameActionInfo> rawEvents = pipeline.apply(PubsubIO.readStrings().withTimestampAttribute(TIMESTAMP_ATTRIBUTE).fromTopic(options.getTopic())).apply("ParseGameEvent", ParDo.of(new ParseEventFn()));
// Extract username/score pairs from the event stream
PCollection<KV<String, Integer>> userEvents = rawEvents.apply("ExtractUserScore", MapElements.into(TypeDescriptors.kvs(TypeDescriptors.strings(), TypeDescriptors.integers())).via((GameActionInfo gInfo) -> KV.of(gInfo.getUser(), gInfo.getScore())));
// Calculate the total score per user over fixed windows, and
// cumulative updates for late data.
final PCollectionView<Map<String, Integer>> spammersView = userEvents.apply("FixedWindowsUser", Window.<KV<String, Integer>>into(FixedWindows.of(Duration.standardMinutes(options.getFixedWindowDuration())))).apply("CalculateSpammyUsers", new CalculateSpammyUsers()).apply("CreateSpammersView", View.<String, Integer>asMap());
// [START DocInclude_FilterAndCalc]
// Calculate the total score per team over fixed windows,
// and emit cumulative updates for late data. Uses the side input derived above-- the set of
// suspected robots-- to filter out scores from those users from the sum.
// Write the results to BigQuery.
rawEvents.apply("WindowIntoFixedWindows", Window.<GameActionInfo>into(FixedWindows.of(Duration.standardMinutes(options.getFixedWindowDuration())))).apply("FilterOutSpammers", ParDo.of(new DoFn<GameActionInfo, GameActionInfo>() {
@ProcessElement
public void processElement(ProcessContext c) {
// If the user is not in the spammers Map, output the data element.
if (c.sideInput(spammersView).get(c.element().getUser().trim()) == null) {
c.output(c.element());
}
}
}).withSideInputs(spammersView)).apply("ExtractTeamScore", new ExtractAndSumScore("team")).apply("WriteTeamSums", new WriteWindowedToBigQuery<KV<String, Integer>>(options.as(GcpOptions.class).getProject(), options.getDataset(), options.getGameStatsTablePrefix() + "_team", configureWindowedWrite()));
// [START DocInclude_SessionCalc]
// Detect user sessions-- that is, a burst of activity separated by a gap from further
// activity. Find and record the mean session lengths.
// This information could help the game designers track the changing user engagement
// as their set of games changes.
userEvents.apply("WindowIntoSessions", Window.<KV<String, Integer>>into(Sessions.withGapDuration(Duration.standardMinutes(options.getSessionGap()))).withTimestampCombiner(TimestampCombiner.END_OF_WINDOW)).apply(Combine.perKey(x -> 0)).apply("UserSessionActivity", ParDo.of(new UserSessionInfoFn())).apply("WindowToExtractSessionMean", Window.<Integer>into(FixedWindows.of(Duration.standardMinutes(options.getUserActivityWindowDuration())))).apply(Mean.<Integer>globally().withoutDefaults()).apply("WriteAvgSessionLength", new WriteWindowedToBigQuery<Double>(options.as(GcpOptions.class).getProject(), options.getDataset(), options.getGameStatsTablePrefix() + "_sessions", configureSessionWindowWrite()));
// [END DocInclude_Rewindow]
// Run the pipeline and wait for the pipeline to finish; capture cancellation requests from the
// command line.
PipelineResult result = pipeline.run();
exampleUtils.waitToFinish(result);
}
use of org.apache.beam.sdk.values.PCollection in project beam by apache.
the class PTransformTranslation method toProto.
/**
* Translates an {@link AppliedPTransform} into a runner API proto.
*
* <p>Does not register the {@code appliedPTransform} within the provided {@link SdkComponents}.
*/
static RunnerApi.PTransform toProto(AppliedPTransform<?, ?, ?> appliedPTransform, List<AppliedPTransform<?, ?, ?>> subtransforms, SdkComponents components) throws IOException {
RunnerApi.PTransform.Builder transformBuilder = RunnerApi.PTransform.newBuilder();
for (Map.Entry<TupleTag<?>, PValue> taggedInput : appliedPTransform.getInputs().entrySet()) {
checkArgument(taggedInput.getValue() instanceof PCollection, "Unexpected input type %s", taggedInput.getValue().getClass());
transformBuilder.putInputs(toProto(taggedInput.getKey()), components.registerPCollection((PCollection<?>) taggedInput.getValue()));
}
for (Map.Entry<TupleTag<?>, PValue> taggedOutput : appliedPTransform.getOutputs().entrySet()) {
// TODO: Remove gating
if (taggedOutput.getValue() instanceof PCollection) {
checkArgument(taggedOutput.getValue() instanceof PCollection, "Unexpected output type %s", taggedOutput.getValue().getClass());
transformBuilder.putOutputs(toProto(taggedOutput.getKey()), components.registerPCollection((PCollection<?>) taggedOutput.getValue()));
}
}
for (AppliedPTransform<?, ?, ?> subtransform : subtransforms) {
transformBuilder.addSubtransforms(components.getExistingPTransformId(subtransform));
}
transformBuilder.setUniqueName(appliedPTransform.getFullName());
// TODO: Display Data
PTransform<?, ?> transform = appliedPTransform.getTransform();
if (KNOWN_PAYLOAD_TRANSLATORS.containsKey(transform.getClass())) {
FunctionSpec payload = KNOWN_PAYLOAD_TRANSLATORS.get(transform.getClass()).translate(appliedPTransform, components);
transformBuilder.setSpec(payload);
}
return transformBuilder.build();
}
use of org.apache.beam.sdk.values.PCollection in project beam by apache.
the class SplittableProcessElementsEvaluatorFactory method createEvaluator.
@SuppressWarnings({ "unchecked", "rawtypes" })
private TransformEvaluator<KeyedWorkItem<String, ElementAndRestriction<InputT, RestrictionT>>> createEvaluator(AppliedPTransform<PCollection<KeyedWorkItem<String, ElementAndRestriction<InputT, RestrictionT>>>, PCollectionTuple, ProcessElements<InputT, OutputT, RestrictionT, TrackerT>> application, CommittedBundle<InputT> inputBundle) throws Exception {
final ProcessElements<InputT, OutputT, RestrictionT, TrackerT> transform = application.getTransform();
ProcessFn<InputT, OutputT, RestrictionT, TrackerT> processFn = transform.newProcessFn(transform.getFn());
DoFnLifecycleManager fnManager = DoFnLifecycleManager.of(processFn);
processFn = ((ProcessFn<InputT, OutputT, RestrictionT, TrackerT>) fnManager.<KeyedWorkItem<String, ElementAndRestriction<InputT, RestrictionT>>, OutputT>get());
String stepName = evaluationContext.getStepName(application);
final DirectExecutionContext.DirectStepContext stepContext = evaluationContext.getExecutionContext(application, inputBundle.getKey()).getStepContext(stepName);
final ParDoEvaluator<KeyedWorkItem<String, ElementAndRestriction<InputT, RestrictionT>>> parDoEvaluator = delegateFactory.createParDoEvaluator(application, inputBundle.getKey(), transform.getSideInputs(), transform.getMainOutputTag(), transform.getAdditionalOutputTags().getAll(), stepContext, processFn, fnManager);
processFn.setStateInternalsFactory(new StateInternalsFactory<String>() {
@SuppressWarnings({ "unchecked", "rawtypes" })
@Override
public StateInternals stateInternalsForKey(String key) {
return (StateInternals) stepContext.stateInternals();
}
});
processFn.setTimerInternalsFactory(new TimerInternalsFactory<String>() {
@Override
public TimerInternals timerInternalsForKey(String key) {
return stepContext.timerInternals();
}
});
OutputWindowedValue<OutputT> outputWindowedValue = new OutputWindowedValue<OutputT>() {
private final OutputManager outputManager = parDoEvaluator.getOutputManager();
@Override
public void outputWindowedValue(OutputT output, Instant timestamp, Collection<? extends BoundedWindow> windows, PaneInfo pane) {
outputManager.output(transform.getMainOutputTag(), WindowedValue.of(output, timestamp, windows, pane));
}
@Override
public <AdditionalOutputT> void outputWindowedValue(TupleTag<AdditionalOutputT> tag, AdditionalOutputT output, Instant timestamp, Collection<? extends BoundedWindow> windows, PaneInfo pane) {
outputManager.output(tag, WindowedValue.of(output, timestamp, windows, pane));
}
};
processFn.setProcessElementInvoker(new OutputAndTimeBoundedSplittableProcessElementInvoker<InputT, OutputT, RestrictionT, TrackerT>(transform.getFn(), evaluationContext.getPipelineOptions(), outputWindowedValue, evaluationContext.createSideInputReader(transform.getSideInputs()), // DirectRunner.
Executors.newSingleThreadScheduledExecutor(new ThreadFactoryBuilder().setThreadFactory(MoreExecutors.platformThreadFactory()).setDaemon(true).setNameFormat("direct-splittable-process-element-checkpoint-executor").build()), 10000, Duration.standardSeconds(10)));
return DoFnLifecycleManagerRemovingTransformEvaluator.wrapping(parDoEvaluator, fnManager);
}
Aggregations