Search in sources :

Example 1 with ApexPipelineOptions

use of org.apache.beam.runners.apex.ApexPipelineOptions in project beam by apache.

the class WordCountTest method testWindowedWordCount.

@Test
public void testWindowedWordCount() throws Exception {
    String[] args = new String[] { "--runner=" + ApexRunner.class.getName() };
    ApexPipelineOptions options = PipelineOptionsFactory.fromArgs(args).withValidation().as(ApexPipelineOptions.class);
    options.setApplicationName("StreamingWordCount");
    Pipeline p = Pipeline.create(options);
    PCollection<KV<String, Long>> wordCounts = p.apply(Read.from(new UnboundedTextSource())).apply(ParDo.of(new ExtractWordsFn())).apply(Window.<String>into(FixedWindows.of(Duration.standardSeconds(10)))).apply(Count.<String>perElement());
    wordCounts.apply(ParDo.of(new CollectResultsFn()));
    ApexRunnerResult result = (ApexRunnerResult) p.run();
    Assert.assertNotNull(result.getApexDAG().getOperatorMeta("Read(UnboundedTextSource)"));
    long timeout = System.currentTimeMillis() + 30000;
    while (System.currentTimeMillis() < timeout) {
        if (CollectResultsFn.RESULTS.containsKey("foo") && CollectResultsFn.RESULTS.containsKey("bar")) {
            break;
        }
        result.waitUntilFinish(Duration.millis(1000));
    }
    result.cancel();
    Assert.assertTrue(CollectResultsFn.RESULTS.containsKey("foo") && CollectResultsFn.RESULTS.containsKey("bar"));
    CollectResultsFn.RESULTS.clear();
}
Also used : ApexRunner(org.apache.beam.runners.apex.ApexRunner) TestApexRunner(org.apache.beam.runners.apex.TestApexRunner) ApexRunnerResult(org.apache.beam.runners.apex.ApexRunnerResult) KV(org.apache.beam.sdk.values.KV) ApexPipelineOptions(org.apache.beam.runners.apex.ApexPipelineOptions) TestPipeline(org.apache.beam.sdk.testing.TestPipeline) Pipeline(org.apache.beam.sdk.Pipeline) Test(org.junit.Test)

Example 2 with ApexPipelineOptions

use of org.apache.beam.runners.apex.ApexPipelineOptions in project beam by apache.

the class FlattenPCollectionTranslatorTest method test.

@Test
public void test() throws Exception {
    ApexPipelineOptions options = PipelineOptionsFactory.as(ApexPipelineOptions.class);
    options.setApplicationName("FlattenPCollection");
    options.setRunner(ApexRunner.class);
    Pipeline p = Pipeline.create(options);
    String[][] collections = { { "1" }, { "2" }, { "3" }, { "4" }, { "5" } };
    Set<String> expected = Sets.newHashSet();
    List<PCollection<String>> pcList = new ArrayList<PCollection<String>>();
    for (String[] collection : collections) {
        pcList.add(p.apply(Create.of(ImmutableList.copyOf(collection)).withCoder(StringUtf8Coder.of())));
        expected.addAll(Arrays.asList(collection));
    }
    PCollection<String> actual = PCollectionList.of(pcList).apply(Flatten.<String>pCollections());
    actual.apply(ParDo.of(new EmbeddedCollector()));
    ApexRunnerResult result = (ApexRunnerResult) p.run();
    // TODO: verify translation
    result.getApexDAG();
    long timeout = System.currentTimeMillis() + 30000;
    while (System.currentTimeMillis() < timeout && EmbeddedCollector.RESULTS.size() < expected.size()) {
        LOG.info("Waiting for expected results.");
        Thread.sleep(500);
    }
    Assert.assertEquals("number results", expected.size(), EmbeddedCollector.RESULTS.size());
    Assert.assertEquals(expected, Sets.newHashSet(EmbeddedCollector.RESULTS));
}
Also used : ArrayList(java.util.ArrayList) ApexRunnerResult(org.apache.beam.runners.apex.ApexRunnerResult) Pipeline(org.apache.beam.sdk.Pipeline) PCollection(org.apache.beam.sdk.values.PCollection) ApexPipelineOptions(org.apache.beam.runners.apex.ApexPipelineOptions) Test(org.junit.Test)

Example 3 with ApexPipelineOptions

use of org.apache.beam.runners.apex.ApexPipelineOptions in project beam by apache.

the class ParDoTranslatorTest method testAssertionFailure.

@Test
public void testAssertionFailure() throws Exception {
    ApexPipelineOptions options = PipelineOptionsFactory.create().as(ApexPipelineOptions.class);
    options.setRunner(TestApexRunner.class);
    Pipeline pipeline = Pipeline.create(options);
    PCollection<Integer> pcollection = pipeline.apply(Create.of(1, 2, 3, 4));
    PAssert.that(pcollection).containsInAnyOrder(2, 1, 4, 3, 7);
    Throwable exc = runExpectingAssertionFailure(pipeline);
    Pattern expectedPattern = Pattern.compile("Expected: iterable over \\[((<4>|<7>|<3>|<2>|<1>)(, )?){5}\\] in any order");
    // A loose pattern, but should get the job done.
    assertTrue("Expected error message from PAssert with substring matching " + expectedPattern + " but the message was \"" + exc.getMessage() + "\"", expectedPattern.matcher(exc.getMessage()).find());
}
Also used : Pattern(java.util.regex.Pattern) ApexPipelineOptions(org.apache.beam.runners.apex.ApexPipelineOptions) Pipeline(org.apache.beam.sdk.Pipeline) Test(org.junit.Test)

Example 4 with ApexPipelineOptions

use of org.apache.beam.runners.apex.ApexPipelineOptions in project beam by apache.

the class ParDoTranslatorTest method testContainsInAnyOrder.

@Test
public void testContainsInAnyOrder() throws Exception {
    ApexPipelineOptions options = PipelineOptionsFactory.create().as(ApexPipelineOptions.class);
    options.setRunner(TestApexRunner.class);
    Pipeline pipeline = Pipeline.create(options);
    PCollection<Integer> pcollection = pipeline.apply(Create.of(1, 2, 3, 4));
    PAssert.that(pcollection).containsInAnyOrder(2, 1, 4, 3);
    // TODO: terminate faster based on processed assertion vs. auto-shutdown
    pipeline.run();
}
Also used : ApexPipelineOptions(org.apache.beam.runners.apex.ApexPipelineOptions) Pipeline(org.apache.beam.sdk.Pipeline) Test(org.junit.Test)

Example 5 with ApexPipelineOptions

use of org.apache.beam.runners.apex.ApexPipelineOptions in project beam by apache.

the class ParDoTranslatorTest method testMultiOutputParDoWithSideInputs.

@Test
public void testMultiOutputParDoWithSideInputs() throws Exception {
    ApexPipelineOptions options = PipelineOptionsFactory.create().as(ApexPipelineOptions.class);
    // non-blocking run
    options.setRunner(ApexRunner.class);
    Pipeline pipeline = Pipeline.create(options);
    List<Integer> inputs = Arrays.asList(3, -42, 666);
    final TupleTag<String> mainOutputTag = new TupleTag<>("main");
    final TupleTag<Void> additionalOutputTag = new TupleTag<>("output");
    PCollectionView<Integer> sideInput1 = pipeline.apply("CreateSideInput1", Create.of(11)).apply("ViewSideInput1", View.<Integer>asSingleton());
    PCollectionView<Integer> sideInputUnread = pipeline.apply("CreateSideInputUnread", Create.of(-3333)).apply("ViewSideInputUnread", View.<Integer>asSingleton());
    PCollectionView<Integer> sideInput2 = pipeline.apply("CreateSideInput2", Create.of(222)).apply("ViewSideInput2", View.<Integer>asSingleton());
    PCollectionTuple outputs = pipeline.apply(Create.of(inputs)).apply(ParDo.of(new TestMultiOutputWithSideInputsFn(Arrays.asList(sideInput1, sideInput2), Arrays.<TupleTag<String>>asList())).withSideInputs(sideInput1).withSideInputs(sideInputUnread).withSideInputs(sideInput2).withOutputTags(mainOutputTag, TupleTagList.of(additionalOutputTag)));
    outputs.get(mainOutputTag).apply(ParDo.of(new EmbeddedCollector()));
    outputs.get(additionalOutputTag).setCoder(VoidCoder.of());
    ApexRunnerResult result = (ApexRunnerResult) pipeline.run();
    HashSet<String> expected = Sets.newHashSet("processing: 3: [11, 222]", "processing: -42: [11, 222]", "processing: 666: [11, 222]");
    long timeout = System.currentTimeMillis() + TIMEOUT_MILLIS;
    while (System.currentTimeMillis() < timeout) {
        if (EmbeddedCollector.RESULTS.containsAll(expected)) {
            break;
        }
        LOG.info("Waiting for expected results.");
        Thread.sleep(SLEEP_MILLIS);
    }
    result.cancel();
    Assert.assertEquals(Sets.newHashSet(expected), EmbeddedCollector.RESULTS);
}
Also used : TupleTag(org.apache.beam.sdk.values.TupleTag) ApexRunnerResult(org.apache.beam.runners.apex.ApexRunnerResult) Pipeline(org.apache.beam.sdk.Pipeline) PCollectionTuple(org.apache.beam.sdk.values.PCollectionTuple) ApexPipelineOptions(org.apache.beam.runners.apex.ApexPipelineOptions) Test(org.junit.Test)

Aggregations

ApexPipelineOptions (org.apache.beam.runners.apex.ApexPipelineOptions)12 Pipeline (org.apache.beam.sdk.Pipeline)12 Test (org.junit.Test)12 ApexRunnerResult (org.apache.beam.runners.apex.ApexRunnerResult)7 DAG (com.datatorrent.api.DAG)4 KV (org.apache.beam.sdk.values.KV)3 Sink (com.datatorrent.api.Sink)2 ApexStateInternals (org.apache.beam.runners.apex.translation.utils.ApexStateInternals)2 ApexStreamTuple (org.apache.beam.runners.apex.translation.utils.ApexStreamTuple)2 WindowedValue (org.apache.beam.sdk.util.WindowedValue)2 Instant (org.joda.time.Instant)2 ArrayList (java.util.ArrayList)1 Pattern (java.util.regex.Pattern)1 ApexRunner (org.apache.beam.runners.apex.ApexRunner)1 TestApexRunner (org.apache.beam.runners.apex.TestApexRunner)1 ApexGroupByKeyOperator (org.apache.beam.runners.apex.translation.operators.ApexGroupByKeyOperator)1 ApexParDoOperator (org.apache.beam.runners.apex.translation.operators.ApexParDoOperator)1 CollectionSource (org.apache.beam.runners.apex.translation.utils.CollectionSource)1 TestPipeline (org.apache.beam.sdk.testing.TestPipeline)1 BoundedWindow (org.apache.beam.sdk.transforms.windowing.BoundedWindow)1