use of org.apache.beam.sdk.values.PCollection in project beam by apache.
the class PAssertTest method testPAssertThatListSatisfiesMultipleMatchersFalse.
/**
* Test that we throw an error for false assertion on list with multiple matchers.
*/
@Test
@Category({ ValidatesRunner.class, UsesFailureMessage.class })
public void testPAssertThatListSatisfiesMultipleMatchersFalse() {
PCollection<Integer> firstCollection = pipeline.apply("FirstCreate", Create.of(1, 2, 3));
PCollection<Integer> secondCollection = pipeline.apply("SecondCreate", Create.of(4, 5, 6));
PCollectionList<Integer> collectionList = PCollectionList.of(firstCollection).and(secondCollection);
String expectedAssertionFailMessage = "Elements should be less than 0";
PAssert.thatList(collectionList).satisfies(ImmutableList.of(input -> {
for (Integer element : input) {
assertTrue(expectedAssertionFailMessage, element < 0);
}
return null;
}, input -> {
for (Integer element : input) {
assertTrue(expectedAssertionFailMessage, element < 0);
}
return null;
}));
Throwable thrown = runExpectingAssertionFailure(pipeline);
String stackTrace = Throwables.getStackTraceAsString(thrown);
assertThat(stackTrace, containsString(expectedAssertionFailMessage));
}
use of org.apache.beam.sdk.values.PCollection in project beam by apache.
the class TestStreamTest method testEarlyPanesOfWindow.
@Test
@Category({ ValidatesRunner.class, UsesTestStreamWithProcessingTime.class })
public void testEarlyPanesOfWindow() {
TestStream<Long> source = TestStream.create(VarLongCoder.of()).addElements(TimestampedValue.of(1L, new Instant(1000L))).advanceProcessingTime(// Fire early pane
Duration.standardMinutes(6)).addElements(TimestampedValue.of(2L, new Instant(2000L))).advanceProcessingTime(// Fire early pane
Duration.standardMinutes(6)).addElements(TimestampedValue.of(3L, new Instant(3000L))).advanceProcessingTime(// Fire early pane
Duration.standardMinutes(6)).advanceWatermarkToInfinity();
PCollection<KV<String, Long>> sum = p.apply(source).apply(Window.<Long>into(FixedWindows.of(Duration.standardMinutes(30))).triggering(AfterWatermark.pastEndOfWindow().withEarlyFirings(AfterProcessingTime.pastFirstElementInPane().plusDelayOf(Duration.standardMinutes(5)))).accumulatingFiredPanes().withAllowedLateness(Duration.ZERO)).apply(MapElements.into(TypeDescriptors.kvs(TypeDescriptors.strings(), TypeDescriptors.longs())).via(v -> KV.of("key", v))).apply(Sum.longsPerKey());
IntervalWindow window = new IntervalWindow(new Instant(0L), new Instant(0L).plus(Duration.standardMinutes(30)));
PAssert.that(sum).inEarlyPane(window).satisfies(input -> {
assertThat(StreamSupport.stream(input.spliterator(), false).count(), is(3L));
return null;
}).containsInAnyOrder(KV.of("key", 1L), KV.of("key", 3L), KV.of("key", 6L)).inOnTimePane(window).satisfies(input -> {
assertThat(StreamSupport.stream(input.spliterator(), false).count(), is(1L));
return null;
}).containsInAnyOrder(KV.of("key", 6L));
p.run().waitUntilFinish();
}
use of org.apache.beam.sdk.values.PCollection in project beam by apache.
the class DocumentationExamplesTest method wordCountExample.
@Ignore("We do not want to actually write output files from this test.")
@Test
public void wordCountExample() {
final PipelineOptions options = PipelineOptionsFactory.create();
Pipeline pipeline = Pipeline.create(options);
// Use Kryo as coder fallback
KryoCoderProvider.of().registerTo(pipeline);
// Source of data loaded from Beam IO.
PCollection<String> lines = pipeline.apply(Create.of(textLineByLine)).setTypeDescriptor(TypeDescriptor.of(String.class));
// FlatMap processes one input element at a time and allows user code to emit
// zero, one, or more output elements. From input lines we will get data set of words.
PCollection<String> words = FlatMap.named("TOKENIZER").of(lines).using((String line, Collector<String> context) -> {
for (String word : Splitter.onPattern("\\s+").split(line)) {
context.collect(word);
}
}).output();
// Now we can count input words - the operator ensures that all values for the same
// key (word in this case) end up being processed together. Then it counts number of appearances
// of the same key in 'words' dataset and emits it to output.
PCollection<KV<String, Long>> counted = CountByKey.named("COUNT").of(words).keyBy(w -> w).output();
// Format output.
PCollection<String> output = MapElements.named("FORMAT").of(counted).using(p -> p.getKey() + ": " + p.getValue()).output();
// Now we can again use Beam transformation. In this case we save words and their count
// into the text file.
output.apply(TextIO.write().to("counted_words"));
pipeline.run();
}
use of org.apache.beam.sdk.values.PCollection in project beam by apache.
the class CountByKeyTest method testWindow_applyIf.
@Test
public void testWindow_applyIf() {
final PCollection<String> dataset = TestUtils.createMockDataset(TypeDescriptors.strings());
final FixedWindows windowing = FixedWindows.of(org.joda.time.Duration.standardHours(1));
final DefaultTrigger trigger = DefaultTrigger.of();
final PCollection<KV<String, Long>> counted = CountByKey.named("CountByKey1").of(dataset).keyBy(s -> s).applyIf(true, b -> b.windowBy(windowing).triggeredBy(trigger).discardingFiredPanes()).output();
final CountByKey count = (CountByKey) TestUtils.getProducer(counted);
assertTrue(count.getWindow().isPresent());
final WindowDesc<?> desc = WindowDesc.of((Window<?>) count.getWindow().get());
assertEquals(windowing, desc.getWindowFn());
assertEquals(trigger, desc.getTrigger());
assertEquals(AccumulationMode.DISCARDING_FIRED_PANES, desc.getAccumulationMode());
}
use of org.apache.beam.sdk.values.PCollection in project beam by apache.
the class TimestampExtractTransformTest method testTransform.
@SuppressWarnings("unchecked")
@Test(timeout = 10000)
public void testTransform() {
Pipeline p = Pipeline.create();
PCollection<Integer> input = p.apply(Create.of(1, 2, 3));
PCollection<KV<Integer, Long>> result = input.apply(TimestampExtractTransform.of(in -> CountByKey.of(in).keyBy(KV::getValue, TypeDescriptors.integers()).output()));
PAssert.that(result).containsInAnyOrder(KV.of(1, 1L), KV.of(2, 1L), KV.of(3, 1L));
p.run().waitUntilFinish();
}
Aggregations