use of org.apache.beam.sdk.transforms.windowing.GlobalWindows in project beam by apache.
the class RepeatedlyStateMachineTest method testRepeatedlyAfterFirstProcessingTime.
@Test
public void testRepeatedlyAfterFirstProcessingTime() throws Exception {
SimpleTriggerStateMachineTester<GlobalWindow> tester = TriggerStateMachineTester.forTrigger(RepeatedlyStateMachine.forever(AfterFirstStateMachine.of(AfterProcessingTimeStateMachine.pastFirstElementInPane().plusDelayOf(Duration.standardMinutes(15)), AfterPaneStateMachine.elementCountAtLeast(5))), new GlobalWindows());
GlobalWindow window = GlobalWindow.INSTANCE;
tester.injectElements(1);
assertFalse(tester.shouldFire(window));
tester.advanceProcessingTime(new Instant(0).plus(Duration.standardMinutes(15)));
assertTrue(tester.shouldFire(window));
tester.fireIfShouldFire(window);
assertFalse(tester.shouldFire(window));
}
use of org.apache.beam.sdk.transforms.windowing.GlobalWindows in project beam by apache.
the class AutoComplete method main.
public static void main(String[] args) throws IOException {
Options options = PipelineOptionsFactory.fromArgs(args).withValidation().as(Options.class);
options.setBigQuerySchema(FormatForBigquery.getSchema());
ExampleUtils exampleUtils = new ExampleUtils(options);
// We support running the same pipeline in either
// batch or windowed streaming mode.
WindowFn<Object, ?> windowFn;
if (options.isStreaming()) {
checkArgument(!options.getOutputToDatastore(), "DatastoreIO is not supported in streaming.");
windowFn = SlidingWindows.of(Duration.standardMinutes(30)).every(Duration.standardSeconds(5));
} else {
windowFn = new GlobalWindows();
}
// Create the pipeline.
Pipeline p = Pipeline.create(options);
PCollection<KV<String, List<CompletionCandidate>>> toWrite = p.apply(TextIO.read().from(options.getInputFile())).apply(ParDo.of(new ExtractHashtags())).apply(Window.<String>into(windowFn)).apply(ComputeTopCompletions.top(10, options.getRecursive()));
if (options.getOutputToDatastore()) {
toWrite.apply("FormatForDatastore", ParDo.of(new FormatForDatastore(options.getKind(), options.getDatastoreAncestorKey()))).apply(DatastoreIO.v1().write().withProjectId(MoreObjects.firstNonNull(options.getOutputProject(), options.getProject())));
}
if (options.getOutputToBigQuery()) {
exampleUtils.setupBigQueryTable();
TableReference tableRef = new TableReference();
tableRef.setProjectId(options.getProject());
tableRef.setDatasetId(options.getBigQueryDataset());
tableRef.setTableId(options.getBigQueryTable());
toWrite.apply(ParDo.of(new FormatForBigquery())).apply(BigQueryIO.writeTableRows().to(tableRef).withSchema(FormatForBigquery.getSchema()).withCreateDisposition(BigQueryIO.Write.CreateDisposition.CREATE_IF_NEEDED).withWriteDisposition(options.isStreaming() ? BigQueryIO.Write.WriteDisposition.WRITE_APPEND : BigQueryIO.Write.WriteDisposition.WRITE_TRUNCATE));
}
// Run the pipeline.
PipelineResult result = p.run();
// ExampleUtils will try to cancel the pipeline and the injector before the program exists.
exampleUtils.waitToFinish(result);
}
use of org.apache.beam.sdk.transforms.windowing.GlobalWindows in project beam by apache.
the class ReduceFnRunnerTest method fireEmptyOnDrainInGlobalWindowIfRequested.
/**
* We should fire an empty ON_TIME pane in the GlobalWindow when the watermark moves to
* end-of-time.
*/
@Test
public void fireEmptyOnDrainInGlobalWindowIfRequested() throws Exception {
ReduceFnTester<Integer, Iterable<Integer>, GlobalWindow> tester = ReduceFnTester.nonCombining(WindowingStrategy.of(new GlobalWindows()).withTrigger(Repeatedly.<GlobalWindow>forever(AfterProcessingTime.pastFirstElementInPane().plusDelayOf(new Duration(3)))).withMode(AccumulationMode.DISCARDING_FIRED_PANES));
final int n = 20;
for (int i = 0; i < n; i++) {
tester.advanceProcessingTime(new Instant(i));
tester.injectElements(TimestampedValue.of(i, new Instant(i)));
}
tester.advanceProcessingTime(new Instant(n + 4));
List<WindowedValue<Iterable<Integer>>> output = tester.extractOutput();
assertEquals((n + 3) / 4, output.size());
for (int i = 0; i < output.size(); i++) {
assertEquals(Timing.EARLY, output.get(i).getPane().getTiming());
assertEquals(i, output.get(i).getPane().getIndex());
assertEquals(4, Iterables.size(output.get(i).getValue()));
}
tester.advanceInputWatermark(BoundedWindow.TIMESTAMP_MAX_VALUE);
output = tester.extractOutput();
assertEquals(1, output.size());
assertEquals(Timing.ON_TIME, output.get(0).getPane().getTiming());
assertEquals((n + 3) / 4, output.get(0).getPane().getIndex());
assertEquals(0, Iterables.size(output.get(0).getValue()));
}
use of org.apache.beam.sdk.transforms.windowing.GlobalWindows in project beam by apache.
the class CombineTest method testHotKeyCombiningWithAccumulationMode.
@Test
@Category(ValidatesRunner.class)
public void testHotKeyCombiningWithAccumulationMode() {
PCollection<Integer> input = pipeline.apply(Create.of(1, 2, 3, 4, 5));
PCollection<Integer> output = input.apply(Window.<Integer>into(new GlobalWindows()).triggering(Repeatedly.forever(AfterPane.elementCountAtLeast(1))).accumulatingFiredPanes().withAllowedLateness(new Duration(0), ClosingBehavior.FIRE_ALWAYS)).apply(Sum.integersGlobally().withoutDefaults().withFanout(2)).apply(ParDo.of(new GetLast()));
PAssert.that(output).satisfies(new SerializableFunction<Iterable<Integer>, Void>() {
@Override
public Void apply(Iterable<Integer> input) {
assertThat(input, hasItem(15));
return null;
}
});
pipeline.run();
}
Aggregations