Search in sources :

Example 1 with ApexRunnerResult

use of org.apache.beam.runners.apex.ApexRunnerResult in project beam by apache.

the class WordCountTest method testWindowedWordCount.

@Test
public void testWindowedWordCount() throws Exception {
    String[] args = new String[] { "--runner=" + ApexRunner.class.getName() };
    ApexPipelineOptions options = PipelineOptionsFactory.fromArgs(args).withValidation().as(ApexPipelineOptions.class);
    options.setApplicationName("StreamingWordCount");
    Pipeline p = Pipeline.create(options);
    PCollection<KV<String, Long>> wordCounts = p.apply(Read.from(new UnboundedTextSource())).apply(ParDo.of(new ExtractWordsFn())).apply(Window.<String>into(FixedWindows.of(Duration.standardSeconds(10)))).apply(Count.<String>perElement());
    wordCounts.apply(ParDo.of(new CollectResultsFn()));
    ApexRunnerResult result = (ApexRunnerResult) p.run();
    Assert.assertNotNull(result.getApexDAG().getOperatorMeta("Read(UnboundedTextSource)"));
    long timeout = System.currentTimeMillis() + 30000;
    while (System.currentTimeMillis() < timeout) {
        if (CollectResultsFn.RESULTS.containsKey("foo") && CollectResultsFn.RESULTS.containsKey("bar")) {
            break;
        }
        result.waitUntilFinish(Duration.millis(1000));
    }
    result.cancel();
    Assert.assertTrue(CollectResultsFn.RESULTS.containsKey("foo") && CollectResultsFn.RESULTS.containsKey("bar"));
    CollectResultsFn.RESULTS.clear();
}
Also used : ApexRunner(org.apache.beam.runners.apex.ApexRunner) TestApexRunner(org.apache.beam.runners.apex.TestApexRunner) ApexRunnerResult(org.apache.beam.runners.apex.ApexRunnerResult) KV(org.apache.beam.sdk.values.KV) ApexPipelineOptions(org.apache.beam.runners.apex.ApexPipelineOptions) TestPipeline(org.apache.beam.sdk.testing.TestPipeline) Pipeline(org.apache.beam.sdk.Pipeline) Test(org.junit.Test)

Example 2 with ApexRunnerResult

use of org.apache.beam.runners.apex.ApexRunnerResult in project beam by apache.

the class FlattenPCollectionTranslatorTest method test.

@Test
public void test() throws Exception {
    ApexPipelineOptions options = PipelineOptionsFactory.as(ApexPipelineOptions.class);
    options.setApplicationName("FlattenPCollection");
    options.setRunner(ApexRunner.class);
    Pipeline p = Pipeline.create(options);
    String[][] collections = { { "1" }, { "2" }, { "3" }, { "4" }, { "5" } };
    Set<String> expected = Sets.newHashSet();
    List<PCollection<String>> pcList = new ArrayList<PCollection<String>>();
    for (String[] collection : collections) {
        pcList.add(p.apply(Create.of(ImmutableList.copyOf(collection)).withCoder(StringUtf8Coder.of())));
        expected.addAll(Arrays.asList(collection));
    }
    PCollection<String> actual = PCollectionList.of(pcList).apply(Flatten.<String>pCollections());
    actual.apply(ParDo.of(new EmbeddedCollector()));
    ApexRunnerResult result = (ApexRunnerResult) p.run();
    // TODO: verify translation
    result.getApexDAG();
    long timeout = System.currentTimeMillis() + 30000;
    while (System.currentTimeMillis() < timeout && EmbeddedCollector.RESULTS.size() < expected.size()) {
        LOG.info("Waiting for expected results.");
        Thread.sleep(500);
    }
    Assert.assertEquals("number results", expected.size(), EmbeddedCollector.RESULTS.size());
    Assert.assertEquals(expected, Sets.newHashSet(EmbeddedCollector.RESULTS));
}
Also used : ArrayList(java.util.ArrayList) ApexRunnerResult(org.apache.beam.runners.apex.ApexRunnerResult) Pipeline(org.apache.beam.sdk.Pipeline) PCollection(org.apache.beam.sdk.values.PCollection) ApexPipelineOptions(org.apache.beam.runners.apex.ApexPipelineOptions) Test(org.junit.Test)

Example 3 with ApexRunnerResult

use of org.apache.beam.runners.apex.ApexRunnerResult in project beam by apache.

the class ParDoTranslatorTest method testMultiOutputParDoWithSideInputs.

@Test
public void testMultiOutputParDoWithSideInputs() throws Exception {
    ApexPipelineOptions options = PipelineOptionsFactory.create().as(ApexPipelineOptions.class);
    // non-blocking run
    options.setRunner(ApexRunner.class);
    Pipeline pipeline = Pipeline.create(options);
    List<Integer> inputs = Arrays.asList(3, -42, 666);
    final TupleTag<String> mainOutputTag = new TupleTag<>("main");
    final TupleTag<Void> additionalOutputTag = new TupleTag<>("output");
    PCollectionView<Integer> sideInput1 = pipeline.apply("CreateSideInput1", Create.of(11)).apply("ViewSideInput1", View.<Integer>asSingleton());
    PCollectionView<Integer> sideInputUnread = pipeline.apply("CreateSideInputUnread", Create.of(-3333)).apply("ViewSideInputUnread", View.<Integer>asSingleton());
    PCollectionView<Integer> sideInput2 = pipeline.apply("CreateSideInput2", Create.of(222)).apply("ViewSideInput2", View.<Integer>asSingleton());
    PCollectionTuple outputs = pipeline.apply(Create.of(inputs)).apply(ParDo.of(new TestMultiOutputWithSideInputsFn(Arrays.asList(sideInput1, sideInput2), Arrays.<TupleTag<String>>asList())).withSideInputs(sideInput1).withSideInputs(sideInputUnread).withSideInputs(sideInput2).withOutputTags(mainOutputTag, TupleTagList.of(additionalOutputTag)));
    outputs.get(mainOutputTag).apply(ParDo.of(new EmbeddedCollector()));
    outputs.get(additionalOutputTag).setCoder(VoidCoder.of());
    ApexRunnerResult result = (ApexRunnerResult) pipeline.run();
    HashSet<String> expected = Sets.newHashSet("processing: 3: [11, 222]", "processing: -42: [11, 222]", "processing: 666: [11, 222]");
    long timeout = System.currentTimeMillis() + TIMEOUT_MILLIS;
    while (System.currentTimeMillis() < timeout) {
        if (EmbeddedCollector.RESULTS.containsAll(expected)) {
            break;
        }
        LOG.info("Waiting for expected results.");
        Thread.sleep(SLEEP_MILLIS);
    }
    result.cancel();
    Assert.assertEquals(Sets.newHashSet(expected), EmbeddedCollector.RESULTS);
}
Also used : TupleTag(org.apache.beam.sdk.values.TupleTag) ApexRunnerResult(org.apache.beam.runners.apex.ApexRunnerResult) Pipeline(org.apache.beam.sdk.Pipeline) PCollectionTuple(org.apache.beam.sdk.values.PCollectionTuple) ApexPipelineOptions(org.apache.beam.runners.apex.ApexPipelineOptions) Test(org.junit.Test)

Example 4 with ApexRunnerResult

use of org.apache.beam.runners.apex.ApexRunnerResult in project beam by apache.

the class ParDoTranslatorTest method test.

@Test
public void test() throws Exception {
    ApexPipelineOptions options = PipelineOptionsFactory.create().as(ApexPipelineOptions.class);
    options.setApplicationName("ParDoBound");
    options.setRunner(ApexRunner.class);
    Pipeline p = Pipeline.create(options);
    List<Integer> collection = Lists.newArrayList(1, 2, 3, 4, 5);
    List<Integer> expected = Lists.newArrayList(6, 7, 8, 9, 10);
    p.apply(Create.of(collection).withCoder(SerializableCoder.of(Integer.class))).apply(ParDo.of(new Add(5))).apply(ParDo.of(new EmbeddedCollector()));
    ApexRunnerResult result = (ApexRunnerResult) p.run();
    DAG dag = result.getApexDAG();
    DAG.OperatorMeta om = dag.getOperatorMeta("Create.Values");
    Assert.assertNotNull(om);
    Assert.assertEquals(om.getOperator().getClass(), ApexReadUnboundedInputOperator.class);
    om = dag.getOperatorMeta("ParDo(Add)/ParMultiDo(Add)");
    Assert.assertNotNull(om);
    Assert.assertEquals(om.getOperator().getClass(), ApexParDoOperator.class);
    long timeout = System.currentTimeMillis() + TIMEOUT_MILLIS;
    while (System.currentTimeMillis() < timeout) {
        if (EmbeddedCollector.RESULTS.containsAll(expected)) {
            break;
        }
        LOG.info("Waiting for expected results.");
        Thread.sleep(SLEEP_MILLIS);
    }
    Assert.assertEquals(Sets.newHashSet(expected), EmbeddedCollector.RESULTS);
}
Also used : ApexRunnerResult(org.apache.beam.runners.apex.ApexRunnerResult) DAG(com.datatorrent.api.DAG) ApexPipelineOptions(org.apache.beam.runners.apex.ApexPipelineOptions) Pipeline(org.apache.beam.sdk.Pipeline) Test(org.junit.Test)

Example 5 with ApexRunnerResult

use of org.apache.beam.runners.apex.ApexRunnerResult in project beam by apache.

the class ReadUnboundTranslatorTest method testReadBounded.

@Test
public void testReadBounded() throws Exception {
    ApexPipelineOptions options = PipelineOptionsFactory.create().as(ApexPipelineOptions.class);
    EmbeddedCollector.RESULTS.clear();
    options.setApplicationName("ReadBounded");
    options.setRunner(ApexRunner.class);
    Pipeline p = Pipeline.create(options);
    Set<Long> expected = ContiguousSet.create(Range.closedOpen(0L, 10L), DiscreteDomain.longs());
    p.apply(GenerateSequence.from(0).to(10)).apply(ParDo.of(new EmbeddedCollector()));
    ApexRunnerResult result = (ApexRunnerResult) p.run();
    DAG dag = result.getApexDAG();
    String operatorName = "GenerateSequence/Read(BoundedCountingSource)";
    DAG.OperatorMeta om = dag.getOperatorMeta(operatorName);
    Assert.assertNotNull(om);
    Assert.assertEquals(om.getOperator().getClass(), ApexReadUnboundedInputOperator.class);
    long timeout = System.currentTimeMillis() + 30000;
    while (System.currentTimeMillis() < timeout) {
        if (EmbeddedCollector.RESULTS.containsAll(expected)) {
            break;
        }
        LOG.info("Waiting for expected results.");
        Thread.sleep(1000);
    }
    Assert.assertEquals(Sets.newHashSet(expected), EmbeddedCollector.RESULTS);
}
Also used : ApexRunnerResult(org.apache.beam.runners.apex.ApexRunnerResult) DAG(com.datatorrent.api.DAG) ApexPipelineOptions(org.apache.beam.runners.apex.ApexPipelineOptions) Pipeline(org.apache.beam.sdk.Pipeline) Test(org.junit.Test)

Aggregations

ApexPipelineOptions (org.apache.beam.runners.apex.ApexPipelineOptions)7 ApexRunnerResult (org.apache.beam.runners.apex.ApexRunnerResult)7 Pipeline (org.apache.beam.sdk.Pipeline)7 Test (org.junit.Test)7 DAG (com.datatorrent.api.DAG)3 KV (org.apache.beam.sdk.values.KV)2 ArrayList (java.util.ArrayList)1 ApexRunner (org.apache.beam.runners.apex.ApexRunner)1 TestApexRunner (org.apache.beam.runners.apex.TestApexRunner)1 CollectionSource (org.apache.beam.runners.apex.translation.utils.CollectionSource)1 TestPipeline (org.apache.beam.sdk.testing.TestPipeline)1 PCollection (org.apache.beam.sdk.values.PCollection)1 PCollectionTuple (org.apache.beam.sdk.values.PCollectionTuple)1 TupleTag (org.apache.beam.sdk.values.TupleTag)1 Instant (org.joda.time.Instant)1