Search in sources :

Example 16 with GlobalWindows

use of org.apache.beam.sdk.transforms.windowing.GlobalWindows in project beam by apache.

the class RepeatedlyStateMachineTest method testRepeatedlyAfterFirstProcessingTime.

@Test
public void testRepeatedlyAfterFirstProcessingTime() throws Exception {
    SimpleTriggerStateMachineTester<GlobalWindow> tester = TriggerStateMachineTester.forTrigger(RepeatedlyStateMachine.forever(AfterFirstStateMachine.of(AfterProcessingTimeStateMachine.pastFirstElementInPane().plusDelayOf(Duration.standardMinutes(15)), AfterPaneStateMachine.elementCountAtLeast(5))), new GlobalWindows());
    GlobalWindow window = GlobalWindow.INSTANCE;
    tester.injectElements(1);
    assertFalse(tester.shouldFire(window));
    tester.advanceProcessingTime(new Instant(0).plus(Duration.standardMinutes(15)));
    assertTrue(tester.shouldFire(window));
    tester.fireIfShouldFire(window);
    assertFalse(tester.shouldFire(window));
}
Also used : GlobalWindows(org.apache.beam.sdk.transforms.windowing.GlobalWindows) Instant(org.joda.time.Instant) GlobalWindow(org.apache.beam.sdk.transforms.windowing.GlobalWindow) Test(org.junit.Test)

Example 17 with GlobalWindows

use of org.apache.beam.sdk.transforms.windowing.GlobalWindows in project beam by apache.

the class AutoComplete method main.

public static void main(String[] args) throws IOException {
    Options options = PipelineOptionsFactory.fromArgs(args).withValidation().as(Options.class);
    options.setBigQuerySchema(FormatForBigquery.getSchema());
    ExampleUtils exampleUtils = new ExampleUtils(options);
    // We support running the same pipeline in either
    // batch or windowed streaming mode.
    WindowFn<Object, ?> windowFn;
    if (options.isStreaming()) {
        checkArgument(!options.getOutputToDatastore(), "DatastoreIO is not supported in streaming.");
        windowFn = SlidingWindows.of(Duration.standardMinutes(30)).every(Duration.standardSeconds(5));
    } else {
        windowFn = new GlobalWindows();
    }
    // Create the pipeline.
    Pipeline p = Pipeline.create(options);
    PCollection<KV<String, List<CompletionCandidate>>> toWrite = p.apply(TextIO.read().from(options.getInputFile())).apply(ParDo.of(new ExtractHashtags())).apply(Window.<String>into(windowFn)).apply(ComputeTopCompletions.top(10, options.getRecursive()));
    if (options.getOutputToDatastore()) {
        toWrite.apply("FormatForDatastore", ParDo.of(new FormatForDatastore(options.getKind(), options.getDatastoreAncestorKey()))).apply(DatastoreIO.v1().write().withProjectId(MoreObjects.firstNonNull(options.getOutputProject(), options.getProject())));
    }
    if (options.getOutputToBigQuery()) {
        exampleUtils.setupBigQueryTable();
        TableReference tableRef = new TableReference();
        tableRef.setProjectId(options.getProject());
        tableRef.setDatasetId(options.getBigQueryDataset());
        tableRef.setTableId(options.getBigQueryTable());
        toWrite.apply(ParDo.of(new FormatForBigquery())).apply(BigQueryIO.writeTableRows().to(tableRef).withSchema(FormatForBigquery.getSchema()).withCreateDisposition(BigQueryIO.Write.CreateDisposition.CREATE_IF_NEEDED).withWriteDisposition(options.isStreaming() ? BigQueryIO.Write.WriteDisposition.WRITE_APPEND : BigQueryIO.Write.WriteDisposition.WRITE_TRUNCATE));
    }
    // Run the pipeline.
    PipelineResult result = p.run();
    // ExampleUtils will try to cancel the pipeline and the injector before the program exists.
    exampleUtils.waitToFinish(result);
}
Also used : StreamingOptions(org.apache.beam.sdk.options.StreamingOptions) ExampleOptions(org.apache.beam.examples.common.ExampleOptions) ExampleBigQueryTableOptions(org.apache.beam.examples.common.ExampleBigQueryTableOptions) GlobalWindows(org.apache.beam.sdk.transforms.windowing.GlobalWindows) ExampleUtils(org.apache.beam.examples.common.ExampleUtils) PipelineResult(org.apache.beam.sdk.PipelineResult) KV(org.apache.beam.sdk.values.KV) Pipeline(org.apache.beam.sdk.Pipeline) TableReference(com.google.api.services.bigquery.model.TableReference)

Example 18 with GlobalWindows

use of org.apache.beam.sdk.transforms.windowing.GlobalWindows in project beam by apache.

the class ReduceFnRunnerTest method fireEmptyOnDrainInGlobalWindowIfRequested.

/**
   * We should fire an empty ON_TIME pane in the GlobalWindow when the watermark moves to
   * end-of-time.
   */
@Test
public void fireEmptyOnDrainInGlobalWindowIfRequested() throws Exception {
    ReduceFnTester<Integer, Iterable<Integer>, GlobalWindow> tester = ReduceFnTester.nonCombining(WindowingStrategy.of(new GlobalWindows()).withTrigger(Repeatedly.<GlobalWindow>forever(AfterProcessingTime.pastFirstElementInPane().plusDelayOf(new Duration(3)))).withMode(AccumulationMode.DISCARDING_FIRED_PANES));
    final int n = 20;
    for (int i = 0; i < n; i++) {
        tester.advanceProcessingTime(new Instant(i));
        tester.injectElements(TimestampedValue.of(i, new Instant(i)));
    }
    tester.advanceProcessingTime(new Instant(n + 4));
    List<WindowedValue<Iterable<Integer>>> output = tester.extractOutput();
    assertEquals((n + 3) / 4, output.size());
    for (int i = 0; i < output.size(); i++) {
        assertEquals(Timing.EARLY, output.get(i).getPane().getTiming());
        assertEquals(i, output.get(i).getPane().getIndex());
        assertEquals(4, Iterables.size(output.get(i).getValue()));
    }
    tester.advanceInputWatermark(BoundedWindow.TIMESTAMP_MAX_VALUE);
    output = tester.extractOutput();
    assertEquals(1, output.size());
    assertEquals(Timing.ON_TIME, output.get(0).getPane().getTiming());
    assertEquals((n + 3) / 4, output.get(0).getPane().getIndex());
    assertEquals(0, Iterables.size(output.get(0).getValue()));
}
Also used : GlobalWindows(org.apache.beam.sdk.transforms.windowing.GlobalWindows) Matchers.emptyIterable(org.hamcrest.Matchers.emptyIterable) WindowedValue(org.apache.beam.sdk.util.WindowedValue) WindowMatchers.isWindowedValue(org.apache.beam.runners.core.WindowMatchers.isWindowedValue) WindowMatchers.isSingleWindowedValue(org.apache.beam.runners.core.WindowMatchers.isSingleWindowedValue) Instant(org.joda.time.Instant) Duration(org.joda.time.Duration) GlobalWindow(org.apache.beam.sdk.transforms.windowing.GlobalWindow) Test(org.junit.Test)

Example 19 with GlobalWindows

use of org.apache.beam.sdk.transforms.windowing.GlobalWindows in project beam by apache.

the class CombineTest method testHotKeyCombiningWithAccumulationMode.

@Test
@Category(ValidatesRunner.class)
public void testHotKeyCombiningWithAccumulationMode() {
    PCollection<Integer> input = pipeline.apply(Create.of(1, 2, 3, 4, 5));
    PCollection<Integer> output = input.apply(Window.<Integer>into(new GlobalWindows()).triggering(Repeatedly.forever(AfterPane.elementCountAtLeast(1))).accumulatingFiredPanes().withAllowedLateness(new Duration(0), ClosingBehavior.FIRE_ALWAYS)).apply(Sum.integersGlobally().withoutDefaults().withFanout(2)).apply(ParDo.of(new GetLast()));
    PAssert.that(output).satisfies(new SerializableFunction<Iterable<Integer>, Void>() {

        @Override
        public Void apply(Iterable<Integer> input) {
            assertThat(input, hasItem(15));
            return null;
        }
    });
    pipeline.run();
}
Also used : GlobalWindows(org.apache.beam.sdk.transforms.windowing.GlobalWindows) Duration(org.joda.time.Duration) Category(org.junit.experimental.categories.Category) Test(org.junit.Test)

Aggregations

GlobalWindows (org.apache.beam.sdk.transforms.windowing.GlobalWindows)19 Test (org.junit.Test)15 Instant (org.joda.time.Instant)10 GlobalWindow (org.apache.beam.sdk.transforms.windowing.GlobalWindow)8 Duration (org.joda.time.Duration)6 KV (org.apache.beam.sdk.values.KV)4 Pipeline (org.apache.beam.sdk.Pipeline)3 Category (org.junit.experimental.categories.Category)3 TableRow (com.google.api.services.bigquery.model.TableRow)2 List (java.util.List)2 WindowMatchers.isSingleWindowedValue (org.apache.beam.runners.core.WindowMatchers.isSingleWindowedValue)2 WindowMatchers.isWindowedValue (org.apache.beam.runners.core.WindowMatchers.isWindowedValue)2 WindowedValue (org.apache.beam.sdk.util.WindowedValue)2 PCollectionTuple (org.apache.beam.sdk.values.PCollectionTuple)2 PCollectionView (org.apache.beam.sdk.values.PCollectionView)2 TupleTag (org.apache.beam.sdk.values.TupleTag)2 Matchers.emptyIterable (org.hamcrest.Matchers.emptyIterable)2 TableReference (com.google.api.services.bigquery.model.TableReference)1 ImmutableList (com.google.common.collect.ImmutableList)1 Map (java.util.Map)1