use of org.apache.beam.runners.apex.ApexPipelineOptions in project beam by apache.
the class WordCountTest method testWindowedWordCount.
@Test
public void testWindowedWordCount() throws Exception {
String[] args = new String[] { "--runner=" + ApexRunner.class.getName() };
ApexPipelineOptions options = PipelineOptionsFactory.fromArgs(args).withValidation().as(ApexPipelineOptions.class);
options.setApplicationName("StreamingWordCount");
Pipeline p = Pipeline.create(options);
PCollection<KV<String, Long>> wordCounts = p.apply(Read.from(new UnboundedTextSource())).apply(ParDo.of(new ExtractWordsFn())).apply(Window.<String>into(FixedWindows.of(Duration.standardSeconds(10)))).apply(Count.<String>perElement());
wordCounts.apply(ParDo.of(new CollectResultsFn()));
ApexRunnerResult result = (ApexRunnerResult) p.run();
Assert.assertNotNull(result.getApexDAG().getOperatorMeta("Read(UnboundedTextSource)"));
long timeout = System.currentTimeMillis() + 30000;
while (System.currentTimeMillis() < timeout) {
if (CollectResultsFn.RESULTS.containsKey("foo") && CollectResultsFn.RESULTS.containsKey("bar")) {
break;
}
result.waitUntilFinish(Duration.millis(1000));
}
result.cancel();
Assert.assertTrue(CollectResultsFn.RESULTS.containsKey("foo") && CollectResultsFn.RESULTS.containsKey("bar"));
CollectResultsFn.RESULTS.clear();
}
use of org.apache.beam.runners.apex.ApexPipelineOptions in project beam by apache.
the class FlattenPCollectionTranslatorTest method test.
@Test
public void test() throws Exception {
ApexPipelineOptions options = PipelineOptionsFactory.as(ApexPipelineOptions.class);
options.setApplicationName("FlattenPCollection");
options.setRunner(ApexRunner.class);
Pipeline p = Pipeline.create(options);
String[][] collections = { { "1" }, { "2" }, { "3" }, { "4" }, { "5" } };
Set<String> expected = Sets.newHashSet();
List<PCollection<String>> pcList = new ArrayList<PCollection<String>>();
for (String[] collection : collections) {
pcList.add(p.apply(Create.of(ImmutableList.copyOf(collection)).withCoder(StringUtf8Coder.of())));
expected.addAll(Arrays.asList(collection));
}
PCollection<String> actual = PCollectionList.of(pcList).apply(Flatten.<String>pCollections());
actual.apply(ParDo.of(new EmbeddedCollector()));
ApexRunnerResult result = (ApexRunnerResult) p.run();
// TODO: verify translation
result.getApexDAG();
long timeout = System.currentTimeMillis() + 30000;
while (System.currentTimeMillis() < timeout && EmbeddedCollector.RESULTS.size() < expected.size()) {
LOG.info("Waiting for expected results.");
Thread.sleep(500);
}
Assert.assertEquals("number results", expected.size(), EmbeddedCollector.RESULTS.size());
Assert.assertEquals(expected, Sets.newHashSet(EmbeddedCollector.RESULTS));
}
use of org.apache.beam.runners.apex.ApexPipelineOptions in project beam by apache.
the class ParDoTranslatorTest method testAssertionFailure.
@Test
public void testAssertionFailure() throws Exception {
ApexPipelineOptions options = PipelineOptionsFactory.create().as(ApexPipelineOptions.class);
options.setRunner(TestApexRunner.class);
Pipeline pipeline = Pipeline.create(options);
PCollection<Integer> pcollection = pipeline.apply(Create.of(1, 2, 3, 4));
PAssert.that(pcollection).containsInAnyOrder(2, 1, 4, 3, 7);
Throwable exc = runExpectingAssertionFailure(pipeline);
Pattern expectedPattern = Pattern.compile("Expected: iterable over \\[((<4>|<7>|<3>|<2>|<1>)(, )?){5}\\] in any order");
// A loose pattern, but should get the job done.
assertTrue("Expected error message from PAssert with substring matching " + expectedPattern + " but the message was \"" + exc.getMessage() + "\"", expectedPattern.matcher(exc.getMessage()).find());
}
use of org.apache.beam.runners.apex.ApexPipelineOptions in project beam by apache.
the class ParDoTranslatorTest method testContainsInAnyOrder.
@Test
public void testContainsInAnyOrder() throws Exception {
ApexPipelineOptions options = PipelineOptionsFactory.create().as(ApexPipelineOptions.class);
options.setRunner(TestApexRunner.class);
Pipeline pipeline = Pipeline.create(options);
PCollection<Integer> pcollection = pipeline.apply(Create.of(1, 2, 3, 4));
PAssert.that(pcollection).containsInAnyOrder(2, 1, 4, 3);
// TODO: terminate faster based on processed assertion vs. auto-shutdown
pipeline.run();
}
use of org.apache.beam.runners.apex.ApexPipelineOptions in project beam by apache.
the class ParDoTranslatorTest method testMultiOutputParDoWithSideInputs.
@Test
public void testMultiOutputParDoWithSideInputs() throws Exception {
ApexPipelineOptions options = PipelineOptionsFactory.create().as(ApexPipelineOptions.class);
// non-blocking run
options.setRunner(ApexRunner.class);
Pipeline pipeline = Pipeline.create(options);
List<Integer> inputs = Arrays.asList(3, -42, 666);
final TupleTag<String> mainOutputTag = new TupleTag<>("main");
final TupleTag<Void> additionalOutputTag = new TupleTag<>("output");
PCollectionView<Integer> sideInput1 = pipeline.apply("CreateSideInput1", Create.of(11)).apply("ViewSideInput1", View.<Integer>asSingleton());
PCollectionView<Integer> sideInputUnread = pipeline.apply("CreateSideInputUnread", Create.of(-3333)).apply("ViewSideInputUnread", View.<Integer>asSingleton());
PCollectionView<Integer> sideInput2 = pipeline.apply("CreateSideInput2", Create.of(222)).apply("ViewSideInput2", View.<Integer>asSingleton());
PCollectionTuple outputs = pipeline.apply(Create.of(inputs)).apply(ParDo.of(new TestMultiOutputWithSideInputsFn(Arrays.asList(sideInput1, sideInput2), Arrays.<TupleTag<String>>asList())).withSideInputs(sideInput1).withSideInputs(sideInputUnread).withSideInputs(sideInput2).withOutputTags(mainOutputTag, TupleTagList.of(additionalOutputTag)));
outputs.get(mainOutputTag).apply(ParDo.of(new EmbeddedCollector()));
outputs.get(additionalOutputTag).setCoder(VoidCoder.of());
ApexRunnerResult result = (ApexRunnerResult) pipeline.run();
HashSet<String> expected = Sets.newHashSet("processing: 3: [11, 222]", "processing: -42: [11, 222]", "processing: 666: [11, 222]");
long timeout = System.currentTimeMillis() + TIMEOUT_MILLIS;
while (System.currentTimeMillis() < timeout) {
if (EmbeddedCollector.RESULTS.containsAll(expected)) {
break;
}
LOG.info("Waiting for expected results.");
Thread.sleep(SLEEP_MILLIS);
}
result.cancel();
Assert.assertEquals(Sets.newHashSet(expected), EmbeddedCollector.RESULTS);
}
Aggregations