use of org.apache.beam.sdk.Pipeline in project beam by apache.
the class ParDoTranslatorTest method testAssertionFailure.
@Test
public void testAssertionFailure() throws Exception {
ApexPipelineOptions options = PipelineOptionsFactory.create().as(ApexPipelineOptions.class);
options.setRunner(TestApexRunner.class);
Pipeline pipeline = Pipeline.create(options);
PCollection<Integer> pcollection = pipeline.apply(Create.of(1, 2, 3, 4));
PAssert.that(pcollection).containsInAnyOrder(2, 1, 4, 3, 7);
Throwable exc = runExpectingAssertionFailure(pipeline);
Pattern expectedPattern = Pattern.compile("Expected: iterable over \\[((<4>|<7>|<3>|<2>|<1>)(, )?){5}\\] in any order");
// A loose pattern, but should get the job done.
assertTrue("Expected error message from PAssert with substring matching " + expectedPattern + " but the message was \"" + exc.getMessage() + "\"", expectedPattern.matcher(exc.getMessage()).find());
}
use of org.apache.beam.sdk.Pipeline in project beam by apache.
the class ParDoTranslatorTest method testContainsInAnyOrder.
@Test
public void testContainsInAnyOrder() throws Exception {
ApexPipelineOptions options = PipelineOptionsFactory.create().as(ApexPipelineOptions.class);
options.setRunner(TestApexRunner.class);
Pipeline pipeline = Pipeline.create(options);
PCollection<Integer> pcollection = pipeline.apply(Create.of(1, 2, 3, 4));
PAssert.that(pcollection).containsInAnyOrder(2, 1, 4, 3);
// TODO: terminate faster based on processed assertion vs. auto-shutdown
pipeline.run();
}
use of org.apache.beam.sdk.Pipeline in project beam by apache.
the class ParDoTranslatorTest method testMultiOutputParDoWithSideInputs.
@Test
public void testMultiOutputParDoWithSideInputs() throws Exception {
ApexPipelineOptions options = PipelineOptionsFactory.create().as(ApexPipelineOptions.class);
// non-blocking run
options.setRunner(ApexRunner.class);
Pipeline pipeline = Pipeline.create(options);
List<Integer> inputs = Arrays.asList(3, -42, 666);
final TupleTag<String> mainOutputTag = new TupleTag<>("main");
final TupleTag<Void> additionalOutputTag = new TupleTag<>("output");
PCollectionView<Integer> sideInput1 = pipeline.apply("CreateSideInput1", Create.of(11)).apply("ViewSideInput1", View.<Integer>asSingleton());
PCollectionView<Integer> sideInputUnread = pipeline.apply("CreateSideInputUnread", Create.of(-3333)).apply("ViewSideInputUnread", View.<Integer>asSingleton());
PCollectionView<Integer> sideInput2 = pipeline.apply("CreateSideInput2", Create.of(222)).apply("ViewSideInput2", View.<Integer>asSingleton());
PCollectionTuple outputs = pipeline.apply(Create.of(inputs)).apply(ParDo.of(new TestMultiOutputWithSideInputsFn(Arrays.asList(sideInput1, sideInput2), Arrays.<TupleTag<String>>asList())).withSideInputs(sideInput1).withSideInputs(sideInputUnread).withSideInputs(sideInput2).withOutputTags(mainOutputTag, TupleTagList.of(additionalOutputTag)));
outputs.get(mainOutputTag).apply(ParDo.of(new EmbeddedCollector()));
outputs.get(additionalOutputTag).setCoder(VoidCoder.of());
ApexRunnerResult result = (ApexRunnerResult) pipeline.run();
HashSet<String> expected = Sets.newHashSet("processing: 3: [11, 222]", "processing: -42: [11, 222]", "processing: 666: [11, 222]");
long timeout = System.currentTimeMillis() + TIMEOUT_MILLIS;
while (System.currentTimeMillis() < timeout) {
if (EmbeddedCollector.RESULTS.containsAll(expected)) {
break;
}
LOG.info("Waiting for expected results.");
Thread.sleep(SLEEP_MILLIS);
}
result.cancel();
Assert.assertEquals(Sets.newHashSet(expected), EmbeddedCollector.RESULTS);
}
use of org.apache.beam.sdk.Pipeline in project beam by apache.
the class GroupByNullKeyTest method testProgram.
@Override
protected void testProgram() throws Exception {
Pipeline p = FlinkTestPipeline.createForStreaming();
PCollection<String> output = p.apply(Create.of(Arrays.asList(KV.<Integer, String>of(0, "user1"), KV.<Integer, String>of(1, "user1"), KV.<Integer, String>of(2, "user1"), KV.<Integer, String>of(10, "user2"), KV.<Integer, String>of(1, "user2"), KV.<Integer, String>of(15000, "user2"), KV.<Integer, String>of(12000, "user2"), KV.<Integer, String>of(25000, "user3")))).apply(ParDo.of(new ExtractUserAndTimestamp())).apply(Window.<String>into(FixedWindows.of(Duration.standardHours(1))).triggering(AfterWatermark.pastEndOfWindow()).withAllowedLateness(Duration.ZERO).discardingFiredPanes()).apply(ParDo.of(new DoFn<String, KV<Void, String>>() {
@ProcessElement
public void processElement(ProcessContext c) throws Exception {
String elem = c.element();
c.output(KV.<Void, String>of(null, elem));
}
})).apply(GroupByKey.<Void, String>create()).apply(ParDo.of(new DoFn<KV<Void, Iterable<String>>, String>() {
@ProcessElement
public void processElement(ProcessContext c) throws Exception {
KV<Void, Iterable<String>> elem = c.element();
StringBuilder str = new StringBuilder();
str.append("k: " + elem.getKey() + " v:");
for (String v : elem.getValue()) {
str.append(" " + v);
}
c.output(str.toString());
}
}));
output.apply(TextIO.write().to(resultPath));
p.run();
}
use of org.apache.beam.sdk.Pipeline in project beam by apache.
the class TopWikipediaSessionsITCase method testProgram.
@Override
protected void testProgram() throws Exception {
Pipeline p = FlinkTestPipeline.createForStreaming();
Long now = (System.currentTimeMillis() + 10000) / 1000;
PCollection<KV<String, Long>> output = p.apply(Create.of(Arrays.asList(new TableRow().set("timestamp", now).set("contributor_username", "user1"), new TableRow().set("timestamp", now + 10).set("contributor_username", "user3"), new TableRow().set("timestamp", now).set("contributor_username", "user2"), new TableRow().set("timestamp", now).set("contributor_username", "user1"), new TableRow().set("timestamp", now + 2).set("contributor_username", "user1"), new TableRow().set("timestamp", now).set("contributor_username", "user2"), new TableRow().set("timestamp", now + 1).set("contributor_username", "user2"), new TableRow().set("timestamp", now + 5).set("contributor_username", "user2"), new TableRow().set("timestamp", now + 7).set("contributor_username", "user2"), new TableRow().set("timestamp", now + 8).set("contributor_username", "user2"), new TableRow().set("timestamp", now + 200).set("contributor_username", "user2"), new TableRow().set("timestamp", now + 230).set("contributor_username", "user1"), new TableRow().set("timestamp", now + 230).set("contributor_username", "user2"), new TableRow().set("timestamp", now + 240).set("contributor_username", "user2"), new TableRow().set("timestamp", now + 245).set("contributor_username", "user3"), new TableRow().set("timestamp", now + 235).set("contributor_username", "user3"), new TableRow().set("timestamp", now + 236).set("contributor_username", "user3"), new TableRow().set("timestamp", now + 237).set("contributor_username", "user3"), new TableRow().set("timestamp", now + 238).set("contributor_username", "user3"), new TableRow().set("timestamp", now + 239).set("contributor_username", "user3"), new TableRow().set("timestamp", now + 240).set("contributor_username", "user3"), new TableRow().set("timestamp", now + 241).set("contributor_username", "user2"), new TableRow().set("timestamp", now).set("contributor_username", "user3")))).apply(ParDo.of(new DoFn<TableRow, String>() {
@ProcessElement
public void processElement(ProcessContext c) throws Exception {
TableRow row = c.element();
long timestamp = (Integer) row.get("timestamp");
String userName = (String) row.get("contributor_username");
if (userName != null) {
// Sets the timestamp field to be used in windowing.
c.outputWithTimestamp(userName, new Instant(timestamp * 1000L));
}
}
})).apply(Window.<String>into(Sessions.withGapDuration(Duration.standardMinutes(1)))).apply(Count.<String>perElement());
PCollection<String> format = output.apply(ParDo.of(new DoFn<KV<String, Long>, String>() {
@ProcessElement
public void processElement(ProcessContext c) throws Exception {
KV<String, Long> el = c.element();
String out = "user: " + el.getKey() + " value:" + el.getValue();
c.output(out);
}
}));
format.apply(TextIO.write().to(resultPath));
p.run();
}
Aggregations