Search in sources :

Example 56 with PCollection

use of org.apache.beam.sdk.values.PCollection in project beam by apache.

the class PAssertTest method testPAssertThatListSatisfiesMultipleMatchersFalse.

/**
 * Test that we throw an error for false assertion on list with multiple matchers.
 */
@Test
@Category({ ValidatesRunner.class, UsesFailureMessage.class })
public void testPAssertThatListSatisfiesMultipleMatchersFalse() {
    PCollection<Integer> firstCollection = pipeline.apply("FirstCreate", Create.of(1, 2, 3));
    PCollection<Integer> secondCollection = pipeline.apply("SecondCreate", Create.of(4, 5, 6));
    PCollectionList<Integer> collectionList = PCollectionList.of(firstCollection).and(secondCollection);
    String expectedAssertionFailMessage = "Elements should be less than 0";
    PAssert.thatList(collectionList).satisfies(ImmutableList.of(input -> {
        for (Integer element : input) {
            assertTrue(expectedAssertionFailMessage, element < 0);
        }
        return null;
    }, input -> {
        for (Integer element : input) {
            assertTrue(expectedAssertionFailMessage, element < 0);
        }
        return null;
    }));
    Throwable thrown = runExpectingAssertionFailure(pipeline);
    String stackTrace = Throwables.getStackTraceAsString(thrown);
    assertThat(stackTrace, containsString(expectedAssertionFailMessage));
}
Also used : SerializableCoder(org.apache.beam.sdk.coders.SerializableCoder) CoderUtils(org.apache.beam.sdk.util.CoderUtils) SerializableFunction(org.apache.beam.sdk.transforms.SerializableFunction) ElementByteSizeObserver(org.apache.beam.sdk.util.common.ElementByteSizeObserver) Throwables(org.apache.beam.vendor.guava.v26_0_jre.com.google.common.base.Throwables) PCollectionList(org.apache.beam.sdk.values.PCollectionList) Create(org.apache.beam.sdk.transforms.Create) Iterables(org.apache.beam.vendor.guava.v26_0_jre.com.google.common.collect.Iterables) MatcherCheckerFn(org.apache.beam.sdk.testing.PAssert.MatcherCheckerFn) Window(org.apache.beam.sdk.transforms.windowing.Window) GlobalWindow(org.apache.beam.sdk.transforms.windowing.GlobalWindow) Assert.fail(org.junit.Assert.fail) TimestampedValue(org.apache.beam.sdk.values.TimestampedValue) Sum(org.apache.beam.sdk.transforms.Sum) GenerateSequence(org.apache.beam.sdk.io.GenerateSequence) VarLongCoder(org.apache.beam.sdk.coders.VarLongCoder) Category(org.junit.experimental.categories.Category) Serializable(java.io.Serializable) CoderException(org.apache.beam.sdk.coders.CoderException) ParDo(org.apache.beam.sdk.transforms.ParDo) Assert.assertFalse(org.junit.Assert.assertFalse) Matchers.equalTo(org.hamcrest.Matchers.equalTo) ImmutableList(org.apache.beam.vendor.guava.v26_0_jre.com.google.common.collect.ImmutableList) Matchers.is(org.hamcrest.Matchers.is) Pattern(java.util.regex.Pattern) Matchers.containsString(org.hamcrest.Matchers.containsString) KV(org.apache.beam.sdk.values.KV) Duration(org.joda.time.Duration) RunWith(org.junit.runner.RunWith) SlidingWindows(org.apache.beam.sdk.transforms.windowing.SlidingWindows) MatcherAssert.assertThat(org.hamcrest.MatcherAssert.assertThat) Pipeline(org.apache.beam.sdk.Pipeline) ExpectedException(org.junit.rules.ExpectedException) Nullable(org.checkerframework.checker.nullness.qual.Nullable) OutputStream(java.io.OutputStream) DoFn(org.apache.beam.sdk.transforms.DoFn) GroupByKey(org.apache.beam.sdk.transforms.GroupByKey) WithKeys(org.apache.beam.sdk.transforms.WithKeys) Assert.assertTrue(org.junit.Assert.assertTrue) FixedWindows(org.apache.beam.sdk.transforms.windowing.FixedWindows) IOException(java.io.IOException) Test(org.junit.Test) JUnit4(org.junit.runners.JUnit4) PCollection(org.apache.beam.sdk.values.PCollection) Rule(org.junit.Rule) Instant(org.joda.time.Instant) JsonCreator(com.fasterxml.jackson.annotation.JsonCreator) AtomicCoder(org.apache.beam.sdk.coders.AtomicCoder) IntervalWindow(org.apache.beam.sdk.transforms.windowing.IntervalWindow) Collections(java.util.Collections) Assert.assertEquals(org.junit.Assert.assertEquals) InputStream(java.io.InputStream) Matchers.containsString(org.hamcrest.Matchers.containsString) Category(org.junit.experimental.categories.Category) Test(org.junit.Test)

Example 57 with PCollection

use of org.apache.beam.sdk.values.PCollection in project beam by apache.

the class TestStreamTest method testEarlyPanesOfWindow.

@Test
@Category({ ValidatesRunner.class, UsesTestStreamWithProcessingTime.class })
public void testEarlyPanesOfWindow() {
    TestStream<Long> source = TestStream.create(VarLongCoder.of()).addElements(TimestampedValue.of(1L, new Instant(1000L))).advanceProcessingTime(// Fire early pane
    Duration.standardMinutes(6)).addElements(TimestampedValue.of(2L, new Instant(2000L))).advanceProcessingTime(// Fire early pane
    Duration.standardMinutes(6)).addElements(TimestampedValue.of(3L, new Instant(3000L))).advanceProcessingTime(// Fire early pane
    Duration.standardMinutes(6)).advanceWatermarkToInfinity();
    PCollection<KV<String, Long>> sum = p.apply(source).apply(Window.<Long>into(FixedWindows.of(Duration.standardMinutes(30))).triggering(AfterWatermark.pastEndOfWindow().withEarlyFirings(AfterProcessingTime.pastFirstElementInPane().plusDelayOf(Duration.standardMinutes(5)))).accumulatingFiredPanes().withAllowedLateness(Duration.ZERO)).apply(MapElements.into(TypeDescriptors.kvs(TypeDescriptors.strings(), TypeDescriptors.longs())).via(v -> KV.of("key", v))).apply(Sum.longsPerKey());
    IntervalWindow window = new IntervalWindow(new Instant(0L), new Instant(0L).plus(Duration.standardMinutes(30)));
    PAssert.that(sum).inEarlyPane(window).satisfies(input -> {
        assertThat(StreamSupport.stream(input.spliterator(), false).count(), is(3L));
        return null;
    }).containsInAnyOrder(KV.of("key", 1L), KV.of("key", 3L), KV.of("key", 6L)).inOnTimePane(window).satisfies(input -> {
        assertThat(StreamSupport.stream(input.spliterator(), false).count(), is(1L));
        return null;
    }).containsInAnyOrder(KV.of("key", 6L));
    p.run().waitUntilFinish();
}
Also used : Count(org.apache.beam.sdk.transforms.Count) Window.into(org.apache.beam.sdk.transforms.windowing.Window.into) StateSpec(org.apache.beam.sdk.state.StateSpec) CoderUtils(org.apache.beam.sdk.util.CoderUtils) TimerSpecs(org.apache.beam.sdk.state.TimerSpecs) ValueState(org.apache.beam.sdk.state.ValueState) ClosingBehavior(org.apache.beam.sdk.transforms.windowing.Window.ClosingBehavior) PCollectionList(org.apache.beam.sdk.values.PCollectionList) Window(org.apache.beam.sdk.transforms.windowing.Window) GlobalWindow(org.apache.beam.sdk.transforms.windowing.GlobalWindow) TimestampedValue(org.apache.beam.sdk.values.TimestampedValue) Keys(org.apache.beam.sdk.transforms.Keys) Flatten(org.apache.beam.sdk.transforms.Flatten) MapElements(org.apache.beam.sdk.transforms.MapElements) Matchers.allOf(org.hamcrest.Matchers.allOf) Matchers.lessThanOrEqualTo(org.hamcrest.Matchers.lessThanOrEqualTo) Sum(org.apache.beam.sdk.transforms.Sum) VarLongCoder(org.apache.beam.sdk.coders.VarLongCoder) Category(org.junit.experimental.categories.Category) Serializable(java.io.Serializable) DefaultTrigger(org.apache.beam.sdk.transforms.windowing.DefaultTrigger) ParDo(org.apache.beam.sdk.transforms.ParDo) Timer(org.apache.beam.sdk.state.Timer) TypeDescriptors(org.apache.beam.sdk.values.TypeDescriptors) ImmutableList(org.apache.beam.vendor.guava.v26_0_jre.com.google.common.collect.ImmutableList) Matchers.is(org.hamcrest.Matchers.is) GlobalWindows(org.apache.beam.sdk.transforms.windowing.GlobalWindows) AfterPane(org.apache.beam.sdk.transforms.windowing.AfterPane) Values(org.apache.beam.sdk.transforms.Values) KV(org.apache.beam.sdk.values.KV) AfterWatermark(org.apache.beam.sdk.transforms.windowing.AfterWatermark) Combine(org.apache.beam.sdk.transforms.Combine) Duration(org.joda.time.Duration) RunWith(org.junit.runner.RunWith) StringUtf8Coder(org.apache.beam.sdk.coders.StringUtf8Coder) TimerSpec(org.apache.beam.sdk.state.TimerSpec) Never(org.apache.beam.sdk.transforms.windowing.Never) StreamSupport(java.util.stream.StreamSupport) MatcherAssert.assertThat(org.hamcrest.MatcherAssert.assertThat) ExpectedException(org.junit.rules.ExpectedException) DoFn(org.apache.beam.sdk.transforms.DoFn) Matchers.greaterThanOrEqualTo(org.hamcrest.Matchers.greaterThanOrEqualTo) GroupByKey(org.apache.beam.sdk.transforms.GroupByKey) WithKeys(org.apache.beam.sdk.transforms.WithKeys) FixedWindows(org.apache.beam.sdk.transforms.windowing.FixedWindows) Test(org.junit.Test) JUnit4(org.junit.runners.JUnit4) PCollection(org.apache.beam.sdk.values.PCollection) StateSpecs(org.apache.beam.sdk.state.StateSpecs) Rule(org.junit.Rule) AfterProcessingTime(org.apache.beam.sdk.transforms.windowing.AfterProcessingTime) BoundedWindow(org.apache.beam.sdk.transforms.windowing.BoundedWindow) Instant(org.joda.time.Instant) VarIntCoder(org.apache.beam.sdk.coders.VarIntCoder) IntervalWindow(org.apache.beam.sdk.transforms.windowing.IntervalWindow) TimeDomain(org.apache.beam.sdk.state.TimeDomain) Instant(org.joda.time.Instant) KV(org.apache.beam.sdk.values.KV) IntervalWindow(org.apache.beam.sdk.transforms.windowing.IntervalWindow) Category(org.junit.experimental.categories.Category) Test(org.junit.Test)

Example 58 with PCollection

use of org.apache.beam.sdk.values.PCollection in project beam by apache.

the class DocumentationExamplesTest method wordCountExample.

@Ignore("We do not want to actually write output files from this test.")
@Test
public void wordCountExample() {
    final PipelineOptions options = PipelineOptionsFactory.create();
    Pipeline pipeline = Pipeline.create(options);
    // Use Kryo as coder fallback
    KryoCoderProvider.of().registerTo(pipeline);
    // Source of data loaded from Beam IO.
    PCollection<String> lines = pipeline.apply(Create.of(textLineByLine)).setTypeDescriptor(TypeDescriptor.of(String.class));
    // FlatMap processes one input element at a time and allows user code to emit
    // zero, one, or more output elements. From input lines we will get data set of words.
    PCollection<String> words = FlatMap.named("TOKENIZER").of(lines).using((String line, Collector<String> context) -> {
        for (String word : Splitter.onPattern("\\s+").split(line)) {
            context.collect(word);
        }
    }).output();
    // Now we can count input words - the operator ensures that all values for the same
    // key (word in this case) end up being processed together. Then it counts number of appearances
    // of the same key in 'words' dataset and emits it to output.
    PCollection<KV<String, Long>> counted = CountByKey.named("COUNT").of(words).keyBy(w -> w).output();
    // Format output.
    PCollection<String> output = MapElements.named("FORMAT").of(counted).using(p -> p.getKey() + ": " + p.getValue()).output();
    // Now we can again use Beam transformation. In this case we save words and their count
    // into the text file.
    output.apply(TextIO.write().to("counted_words"));
    pipeline.run();
}
Also used : Filter(org.apache.beam.sdk.extensions.euphoria.core.client.operator.Filter) Arrays(java.util.Arrays) Distinct(org.apache.beam.sdk.extensions.euphoria.core.client.operator.Distinct) TimestampCombiner(org.apache.beam.sdk.transforms.windowing.TimestampCombiner) ReduceWindow(org.apache.beam.sdk.extensions.euphoria.core.client.operator.ReduceWindow) SumByKey(org.apache.beam.sdk.extensions.euphoria.core.client.operator.SumByKey) Join(org.apache.beam.sdk.extensions.euphoria.core.client.operator.Join) Union(org.apache.beam.sdk.extensions.euphoria.core.client.operator.Union) GenericTranslatorProvider(org.apache.beam.sdk.extensions.euphoria.core.translate.provider.GenericTranslatorProvider) RightJoin(org.apache.beam.sdk.extensions.euphoria.core.client.operator.RightJoin) Create(org.apache.beam.sdk.transforms.Create) Arrays.asList(java.util.Arrays.asList) LeftJoin(org.apache.beam.sdk.extensions.euphoria.core.client.operator.LeftJoin) ReduceByKey(org.apache.beam.sdk.extensions.euphoria.core.client.operator.ReduceByKey) KryoOptions(org.apache.beam.sdk.extensions.kryo.KryoOptions) Fold(org.apache.beam.sdk.extensions.euphoria.core.client.util.Fold) Triple(org.apache.beam.sdk.extensions.euphoria.core.client.util.Triple) CompositeProvider(org.apache.beam.sdk.extensions.euphoria.core.translate.provider.CompositeProvider) CountByKey(org.apache.beam.sdk.extensions.euphoria.core.client.operator.CountByKey) Serializable(java.io.Serializable) List(java.util.List) Stream(java.util.stream.Stream) DefaultTrigger(org.apache.beam.sdk.transforms.windowing.DefaultTrigger) TypeDescriptors(org.apache.beam.sdk.values.TypeDescriptors) AssignEventTime(org.apache.beam.sdk.extensions.euphoria.core.client.operator.AssignEventTime) Optional(java.util.Optional) BroadcastHashJoinTranslator(org.apache.beam.sdk.extensions.euphoria.core.translate.BroadcastHashJoinTranslator) Collector(org.apache.beam.sdk.extensions.euphoria.core.client.io.Collector) KV(org.apache.beam.sdk.values.KV) TypeDescriptor(org.apache.beam.sdk.values.TypeDescriptor) FullJoin(org.apache.beam.sdk.extensions.euphoria.core.client.operator.FullJoin) Duration(org.joda.time.Duration) RunWith(org.junit.runner.RunWith) Splitter(org.apache.beam.vendor.guava.v26_0_jre.com.google.common.base.Splitter) MapElements(org.apache.beam.sdk.extensions.euphoria.core.client.operator.MapElements) PipelineOptionsFactory(org.apache.beam.sdk.options.PipelineOptionsFactory) TestPipeline(org.apache.beam.sdk.testing.TestPipeline) Pipeline(org.apache.beam.sdk.Pipeline) PipelineOptions(org.apache.beam.sdk.options.PipelineOptions) Before(org.junit.Before) PAssert(org.apache.beam.sdk.testing.PAssert) FlatMapTranslator(org.apache.beam.sdk.extensions.euphoria.core.translate.FlatMapTranslator) FixedWindows(org.apache.beam.sdk.transforms.windowing.FixedWindows) Test(org.junit.Test) JUnit4(org.junit.runners.JUnit4) UnaryFunction(org.apache.beam.sdk.extensions.euphoria.core.client.functional.UnaryFunction) Operator(org.apache.beam.sdk.extensions.euphoria.core.client.operator.base.Operator) PCollection(org.apache.beam.sdk.values.PCollection) TopPerKey(org.apache.beam.sdk.extensions.euphoria.core.client.operator.TopPerKey) OperatorTranslator(org.apache.beam.sdk.extensions.euphoria.core.translate.OperatorTranslator) CompositeOperator(org.apache.beam.sdk.extensions.euphoria.core.client.operator.CompositeOperator) Rule(org.junit.Rule) Ignore(org.junit.Ignore) CompositeOperatorTranslator(org.apache.beam.sdk.extensions.euphoria.core.translate.CompositeOperatorTranslator) TranslatorProvider(org.apache.beam.sdk.extensions.euphoria.core.translate.TranslatorProvider) OnTimeBehavior(org.apache.beam.sdk.transforms.windowing.Window.OnTimeBehavior) FlatMap(org.apache.beam.sdk.extensions.euphoria.core.client.operator.FlatMap) KryoCoderProvider(org.apache.beam.sdk.extensions.kryo.KryoCoderProvider) Assert(org.junit.Assert) TextIO(org.apache.beam.sdk.io.TextIO) PipelineOptions(org.apache.beam.sdk.options.PipelineOptions) KV(org.apache.beam.sdk.values.KV) TestPipeline(org.apache.beam.sdk.testing.TestPipeline) Pipeline(org.apache.beam.sdk.Pipeline) Ignore(org.junit.Ignore) Test(org.junit.Test)

Example 59 with PCollection

use of org.apache.beam.sdk.values.PCollection in project beam by apache.

the class CountByKeyTest method testWindow_applyIf.

@Test
public void testWindow_applyIf() {
    final PCollection<String> dataset = TestUtils.createMockDataset(TypeDescriptors.strings());
    final FixedWindows windowing = FixedWindows.of(org.joda.time.Duration.standardHours(1));
    final DefaultTrigger trigger = DefaultTrigger.of();
    final PCollection<KV<String, Long>> counted = CountByKey.named("CountByKey1").of(dataset).keyBy(s -> s).applyIf(true, b -> b.windowBy(windowing).triggeredBy(trigger).discardingFiredPanes()).output();
    final CountByKey count = (CountByKey) TestUtils.getProducer(counted);
    assertTrue(count.getWindow().isPresent());
    final WindowDesc<?> desc = WindowDesc.of((Window<?>) count.getWindow().get());
    assertEquals(windowing, desc.getWindowFn());
    assertEquals(trigger, desc.getTrigger());
    assertEquals(AccumulationMode.DISCARDING_FIRED_PANES, desc.getAccumulationMode());
}
Also used : KV(org.apache.beam.sdk.values.KV) TypeDescriptor(org.apache.beam.sdk.values.TypeDescriptor) AccumulationMode(org.apache.beam.sdk.values.WindowingStrategy.AccumulationMode) Assert.assertNotNull(org.junit.Assert.assertNotNull) WindowDesc(org.apache.beam.sdk.transforms.windowing.WindowDesc) Duration(org.joda.time.Duration) RunWith(org.junit.runner.RunWith) Assert.assertTrue(org.junit.Assert.assertTrue) FixedWindows(org.apache.beam.sdk.transforms.windowing.FixedWindows) Test(org.junit.Test) JUnit4(org.junit.runners.JUnit4) PCollection(org.apache.beam.sdk.values.PCollection) DefaultTrigger(org.apache.beam.sdk.transforms.windowing.DefaultTrigger) Assert.assertFalse(org.junit.Assert.assertFalse) TypeDescriptors(org.apache.beam.sdk.values.TypeDescriptors) Window(org.apache.beam.sdk.transforms.windowing.Window) Assert.assertEquals(org.junit.Assert.assertEquals) FixedWindows(org.apache.beam.sdk.transforms.windowing.FixedWindows) DefaultTrigger(org.apache.beam.sdk.transforms.windowing.DefaultTrigger) KV(org.apache.beam.sdk.values.KV) Test(org.junit.Test)

Example 60 with PCollection

use of org.apache.beam.sdk.values.PCollection in project beam by apache.

the class TimestampExtractTransformTest method testTransform.

@SuppressWarnings("unchecked")
@Test(timeout = 10000)
public void testTransform() {
    Pipeline p = Pipeline.create();
    PCollection<Integer> input = p.apply(Create.of(1, 2, 3));
    PCollection<KV<Integer, Long>> result = input.apply(TimestampExtractTransform.of(in -> CountByKey.of(in).keyBy(KV::getValue, TypeDescriptors.integers()).output()));
    PAssert.that(result).containsInAnyOrder(KV.of(1, 1L), KV.of(2, 1L), KV.of(3, 1L));
    p.run().waitUntilFinish();
}
Also used : KV(org.apache.beam.sdk.values.KV) PAssert(org.apache.beam.sdk.testing.PAssert) Create(org.apache.beam.sdk.transforms.Create) TypeDescriptors(org.apache.beam.sdk.values.TypeDescriptors) RunWith(org.junit.runner.RunWith) Test(org.junit.Test) Pipeline(org.apache.beam.sdk.Pipeline) JUnit4(org.junit.runners.JUnit4) PCollection(org.apache.beam.sdk.values.PCollection) CountByKey(org.apache.beam.sdk.extensions.euphoria.core.client.operator.CountByKey) KV(org.apache.beam.sdk.values.KV) Pipeline(org.apache.beam.sdk.Pipeline) Test(org.junit.Test)

Aggregations

PCollection (org.apache.beam.sdk.values.PCollection)199 Test (org.junit.Test)133 KV (org.apache.beam.sdk.values.KV)62 TestPipeline (org.apache.beam.sdk.testing.TestPipeline)61 Map (java.util.Map)59 List (java.util.List)58 Rule (org.junit.Rule)57 RunWith (org.junit.runner.RunWith)54 PAssert (org.apache.beam.sdk.testing.PAssert)52 Instant (org.joda.time.Instant)46 Duration (org.joda.time.Duration)45 JUnit4 (org.junit.runners.JUnit4)45 ParDo (org.apache.beam.sdk.transforms.ParDo)44 TupleTag (org.apache.beam.sdk.values.TupleTag)42 Pipeline (org.apache.beam.sdk.Pipeline)41 Create (org.apache.beam.sdk.transforms.Create)41 ArrayList (java.util.ArrayList)40 Serializable (java.io.Serializable)39 PTransform (org.apache.beam.sdk.transforms.PTransform)37 Row (org.apache.beam.sdk.values.Row)37