use of org.apache.beam.runners.apex.ApexRunnerResult in project beam by apache.
the class WordCountTest method testWindowedWordCount.
@Test
public void testWindowedWordCount() throws Exception {
String[] args = new String[] { "--runner=" + ApexRunner.class.getName() };
ApexPipelineOptions options = PipelineOptionsFactory.fromArgs(args).withValidation().as(ApexPipelineOptions.class);
options.setApplicationName("StreamingWordCount");
Pipeline p = Pipeline.create(options);
PCollection<KV<String, Long>> wordCounts = p.apply(Read.from(new UnboundedTextSource())).apply(ParDo.of(new ExtractWordsFn())).apply(Window.<String>into(FixedWindows.of(Duration.standardSeconds(10)))).apply(Count.<String>perElement());
wordCounts.apply(ParDo.of(new CollectResultsFn()));
ApexRunnerResult result = (ApexRunnerResult) p.run();
Assert.assertNotNull(result.getApexDAG().getOperatorMeta("Read(UnboundedTextSource)"));
long timeout = System.currentTimeMillis() + 30000;
while (System.currentTimeMillis() < timeout) {
if (CollectResultsFn.RESULTS.containsKey("foo") && CollectResultsFn.RESULTS.containsKey("bar")) {
break;
}
result.waitUntilFinish(Duration.millis(1000));
}
result.cancel();
Assert.assertTrue(CollectResultsFn.RESULTS.containsKey("foo") && CollectResultsFn.RESULTS.containsKey("bar"));
CollectResultsFn.RESULTS.clear();
}
use of org.apache.beam.runners.apex.ApexRunnerResult in project beam by apache.
the class FlattenPCollectionTranslatorTest method test.
@Test
public void test() throws Exception {
ApexPipelineOptions options = PipelineOptionsFactory.as(ApexPipelineOptions.class);
options.setApplicationName("FlattenPCollection");
options.setRunner(ApexRunner.class);
Pipeline p = Pipeline.create(options);
String[][] collections = { { "1" }, { "2" }, { "3" }, { "4" }, { "5" } };
Set<String> expected = Sets.newHashSet();
List<PCollection<String>> pcList = new ArrayList<PCollection<String>>();
for (String[] collection : collections) {
pcList.add(p.apply(Create.of(ImmutableList.copyOf(collection)).withCoder(StringUtf8Coder.of())));
expected.addAll(Arrays.asList(collection));
}
PCollection<String> actual = PCollectionList.of(pcList).apply(Flatten.<String>pCollections());
actual.apply(ParDo.of(new EmbeddedCollector()));
ApexRunnerResult result = (ApexRunnerResult) p.run();
// TODO: verify translation
result.getApexDAG();
long timeout = System.currentTimeMillis() + 30000;
while (System.currentTimeMillis() < timeout && EmbeddedCollector.RESULTS.size() < expected.size()) {
LOG.info("Waiting for expected results.");
Thread.sleep(500);
}
Assert.assertEquals("number results", expected.size(), EmbeddedCollector.RESULTS.size());
Assert.assertEquals(expected, Sets.newHashSet(EmbeddedCollector.RESULTS));
}
use of org.apache.beam.runners.apex.ApexRunnerResult in project beam by apache.
the class ParDoTranslatorTest method testMultiOutputParDoWithSideInputs.
@Test
public void testMultiOutputParDoWithSideInputs() throws Exception {
ApexPipelineOptions options = PipelineOptionsFactory.create().as(ApexPipelineOptions.class);
// non-blocking run
options.setRunner(ApexRunner.class);
Pipeline pipeline = Pipeline.create(options);
List<Integer> inputs = Arrays.asList(3, -42, 666);
final TupleTag<String> mainOutputTag = new TupleTag<>("main");
final TupleTag<Void> additionalOutputTag = new TupleTag<>("output");
PCollectionView<Integer> sideInput1 = pipeline.apply("CreateSideInput1", Create.of(11)).apply("ViewSideInput1", View.<Integer>asSingleton());
PCollectionView<Integer> sideInputUnread = pipeline.apply("CreateSideInputUnread", Create.of(-3333)).apply("ViewSideInputUnread", View.<Integer>asSingleton());
PCollectionView<Integer> sideInput2 = pipeline.apply("CreateSideInput2", Create.of(222)).apply("ViewSideInput2", View.<Integer>asSingleton());
PCollectionTuple outputs = pipeline.apply(Create.of(inputs)).apply(ParDo.of(new TestMultiOutputWithSideInputsFn(Arrays.asList(sideInput1, sideInput2), Arrays.<TupleTag<String>>asList())).withSideInputs(sideInput1).withSideInputs(sideInputUnread).withSideInputs(sideInput2).withOutputTags(mainOutputTag, TupleTagList.of(additionalOutputTag)));
outputs.get(mainOutputTag).apply(ParDo.of(new EmbeddedCollector()));
outputs.get(additionalOutputTag).setCoder(VoidCoder.of());
ApexRunnerResult result = (ApexRunnerResult) pipeline.run();
HashSet<String> expected = Sets.newHashSet("processing: 3: [11, 222]", "processing: -42: [11, 222]", "processing: 666: [11, 222]");
long timeout = System.currentTimeMillis() + TIMEOUT_MILLIS;
while (System.currentTimeMillis() < timeout) {
if (EmbeddedCollector.RESULTS.containsAll(expected)) {
break;
}
LOG.info("Waiting for expected results.");
Thread.sleep(SLEEP_MILLIS);
}
result.cancel();
Assert.assertEquals(Sets.newHashSet(expected), EmbeddedCollector.RESULTS);
}
use of org.apache.beam.runners.apex.ApexRunnerResult in project beam by apache.
the class ParDoTranslatorTest method test.
@Test
public void test() throws Exception {
ApexPipelineOptions options = PipelineOptionsFactory.create().as(ApexPipelineOptions.class);
options.setApplicationName("ParDoBound");
options.setRunner(ApexRunner.class);
Pipeline p = Pipeline.create(options);
List<Integer> collection = Lists.newArrayList(1, 2, 3, 4, 5);
List<Integer> expected = Lists.newArrayList(6, 7, 8, 9, 10);
p.apply(Create.of(collection).withCoder(SerializableCoder.of(Integer.class))).apply(ParDo.of(new Add(5))).apply(ParDo.of(new EmbeddedCollector()));
ApexRunnerResult result = (ApexRunnerResult) p.run();
DAG dag = result.getApexDAG();
DAG.OperatorMeta om = dag.getOperatorMeta("Create.Values");
Assert.assertNotNull(om);
Assert.assertEquals(om.getOperator().getClass(), ApexReadUnboundedInputOperator.class);
om = dag.getOperatorMeta("ParDo(Add)/ParMultiDo(Add)");
Assert.assertNotNull(om);
Assert.assertEquals(om.getOperator().getClass(), ApexParDoOperator.class);
long timeout = System.currentTimeMillis() + TIMEOUT_MILLIS;
while (System.currentTimeMillis() < timeout) {
if (EmbeddedCollector.RESULTS.containsAll(expected)) {
break;
}
LOG.info("Waiting for expected results.");
Thread.sleep(SLEEP_MILLIS);
}
Assert.assertEquals(Sets.newHashSet(expected), EmbeddedCollector.RESULTS);
}
use of org.apache.beam.runners.apex.ApexRunnerResult in project beam by apache.
the class ReadUnboundTranslatorTest method testReadBounded.
@Test
public void testReadBounded() throws Exception {
ApexPipelineOptions options = PipelineOptionsFactory.create().as(ApexPipelineOptions.class);
EmbeddedCollector.RESULTS.clear();
options.setApplicationName("ReadBounded");
options.setRunner(ApexRunner.class);
Pipeline p = Pipeline.create(options);
Set<Long> expected = ContiguousSet.create(Range.closedOpen(0L, 10L), DiscreteDomain.longs());
p.apply(GenerateSequence.from(0).to(10)).apply(ParDo.of(new EmbeddedCollector()));
ApexRunnerResult result = (ApexRunnerResult) p.run();
DAG dag = result.getApexDAG();
String operatorName = "GenerateSequence/Read(BoundedCountingSource)";
DAG.OperatorMeta om = dag.getOperatorMeta(operatorName);
Assert.assertNotNull(om);
Assert.assertEquals(om.getOperator().getClass(), ApexReadUnboundedInputOperator.class);
long timeout = System.currentTimeMillis() + 30000;
while (System.currentTimeMillis() < timeout) {
if (EmbeddedCollector.RESULTS.containsAll(expected)) {
break;
}
LOG.info("Waiting for expected results.");
Thread.sleep(1000);
}
Assert.assertEquals(Sets.newHashSet(expected), EmbeddedCollector.RESULTS);
}
Aggregations