Search in sources :

Example 6 with ApexPipelineOptions

use of org.apache.beam.runners.apex.ApexPipelineOptions in project beam by apache.

the class ParDoTranslatorTest method test.

@Test
public void test() throws Exception {
    ApexPipelineOptions options = PipelineOptionsFactory.create().as(ApexPipelineOptions.class);
    options.setApplicationName("ParDoBound");
    options.setRunner(ApexRunner.class);
    Pipeline p = Pipeline.create(options);
    List<Integer> collection = Lists.newArrayList(1, 2, 3, 4, 5);
    List<Integer> expected = Lists.newArrayList(6, 7, 8, 9, 10);
    p.apply(Create.of(collection).withCoder(SerializableCoder.of(Integer.class))).apply(ParDo.of(new Add(5))).apply(ParDo.of(new EmbeddedCollector()));
    ApexRunnerResult result = (ApexRunnerResult) p.run();
    DAG dag = result.getApexDAG();
    DAG.OperatorMeta om = dag.getOperatorMeta("Create.Values");
    Assert.assertNotNull(om);
    Assert.assertEquals(om.getOperator().getClass(), ApexReadUnboundedInputOperator.class);
    om = dag.getOperatorMeta("ParDo(Add)/ParMultiDo(Add)");
    Assert.assertNotNull(om);
    Assert.assertEquals(om.getOperator().getClass(), ApexParDoOperator.class);
    long timeout = System.currentTimeMillis() + TIMEOUT_MILLIS;
    while (System.currentTimeMillis() < timeout) {
        if (EmbeddedCollector.RESULTS.containsAll(expected)) {
            break;
        }
        LOG.info("Waiting for expected results.");
        Thread.sleep(SLEEP_MILLIS);
    }
    Assert.assertEquals(Sets.newHashSet(expected), EmbeddedCollector.RESULTS);
}
Also used : ApexRunnerResult(org.apache.beam.runners.apex.ApexRunnerResult) DAG(com.datatorrent.api.DAG) ApexPipelineOptions(org.apache.beam.runners.apex.ApexPipelineOptions) Pipeline(org.apache.beam.sdk.Pipeline) Test(org.junit.Test)

Example 7 with ApexPipelineOptions

use of org.apache.beam.runners.apex.ApexPipelineOptions in project beam by apache.

the class ParDoTranslatorTest method testSerialization.

@Test
public void testSerialization() throws Exception {
    ApexPipelineOptions options = PipelineOptionsFactory.create().as(ApexPipelineOptions.class);
    options.setRunner(TestApexRunner.class);
    Pipeline pipeline = Pipeline.create(options);
    Coder<WindowedValue<Integer>> coder = WindowedValue.getValueOnlyCoder(VarIntCoder.of());
    PCollectionView<Integer> singletonView = pipeline.apply(Create.of(1)).apply(Sum.integersGlobally().asSingletonView());
    ApexParDoOperator<Integer, Integer> operator = new ApexParDoOperator<>(options, new Add(singletonView), new TupleTag<Integer>(), TupleTagList.empty().getAll(), WindowingStrategy.globalDefault(), Collections.<PCollectionView<?>>singletonList(singletonView), coder, new ApexStateInternals.ApexStateBackend());
    operator.setup(null);
    operator.beginWindow(0);
    WindowedValue<Integer> wv1 = WindowedValue.valueInGlobalWindow(1);
    WindowedValue<Iterable<?>> sideInput = WindowedValue.<Iterable<?>>valueInGlobalWindow(Lists.<Integer>newArrayList(22));
    // pushed back input
    operator.input.process(ApexStreamTuple.DataTuple.of(wv1));
    final List<Object> results = Lists.newArrayList();
    Sink<Object> sink = new Sink<Object>() {

        @Override
        public void put(Object tuple) {
            results.add(tuple);
        }

        @Override
        public int getCount(boolean reset) {
            return 0;
        }
    };
    // verify pushed back input checkpointing
    Assert.assertNotNull("Serialization", operator = KryoCloneUtils.cloneObject(operator));
    operator.output.setSink(sink);
    operator.setup(null);
    operator.beginWindow(1);
    WindowedValue<Integer> wv2 = WindowedValue.valueInGlobalWindow(2);
    operator.sideInput1.process(ApexStreamTuple.DataTuple.of(sideInput));
    Assert.assertEquals("number outputs", 1, results.size());
    Assert.assertEquals("result", WindowedValue.valueInGlobalWindow(23), ((ApexStreamTuple.DataTuple<?>) results.get(0)).getValue());
    // verify side input checkpointing
    results.clear();
    Assert.assertNotNull("Serialization", operator = KryoCloneUtils.cloneObject(operator));
    operator.output.setSink(sink);
    operator.setup(null);
    operator.beginWindow(2);
    operator.input.process(ApexStreamTuple.DataTuple.of(wv2));
    Assert.assertEquals("number outputs", 1, results.size());
    Assert.assertEquals("result", WindowedValue.valueInGlobalWindow(24), ((ApexStreamTuple.DataTuple<?>) results.get(0)).getValue());
}
Also used : ApexStreamTuple(org.apache.beam.runners.apex.translation.utils.ApexStreamTuple) ApexParDoOperator(org.apache.beam.runners.apex.translation.operators.ApexParDoOperator) Pipeline(org.apache.beam.sdk.Pipeline) Sink(com.datatorrent.api.Sink) WindowedValue(org.apache.beam.sdk.util.WindowedValue) ApexStateInternals(org.apache.beam.runners.apex.translation.utils.ApexStateInternals) ApexPipelineOptions(org.apache.beam.runners.apex.ApexPipelineOptions) Test(org.junit.Test)

Example 8 with ApexPipelineOptions

use of org.apache.beam.runners.apex.ApexPipelineOptions in project beam by apache.

the class ReadUnboundTranslatorTest method testReadBounded.

@Test
public void testReadBounded() throws Exception {
    ApexPipelineOptions options = PipelineOptionsFactory.create().as(ApexPipelineOptions.class);
    EmbeddedCollector.RESULTS.clear();
    options.setApplicationName("ReadBounded");
    options.setRunner(ApexRunner.class);
    Pipeline p = Pipeline.create(options);
    Set<Long> expected = ContiguousSet.create(Range.closedOpen(0L, 10L), DiscreteDomain.longs());
    p.apply(GenerateSequence.from(0).to(10)).apply(ParDo.of(new EmbeddedCollector()));
    ApexRunnerResult result = (ApexRunnerResult) p.run();
    DAG dag = result.getApexDAG();
    String operatorName = "GenerateSequence/Read(BoundedCountingSource)";
    DAG.OperatorMeta om = dag.getOperatorMeta(operatorName);
    Assert.assertNotNull(om);
    Assert.assertEquals(om.getOperator().getClass(), ApexReadUnboundedInputOperator.class);
    long timeout = System.currentTimeMillis() + 30000;
    while (System.currentTimeMillis() < timeout) {
        if (EmbeddedCollector.RESULTS.containsAll(expected)) {
            break;
        }
        LOG.info("Waiting for expected results.");
        Thread.sleep(1000);
    }
    Assert.assertEquals(Sets.newHashSet(expected), EmbeddedCollector.RESULTS);
}
Also used : ApexRunnerResult(org.apache.beam.runners.apex.ApexRunnerResult) DAG(com.datatorrent.api.DAG) ApexPipelineOptions(org.apache.beam.runners.apex.ApexPipelineOptions) Pipeline(org.apache.beam.sdk.Pipeline) Test(org.junit.Test)

Example 9 with ApexPipelineOptions

use of org.apache.beam.runners.apex.ApexPipelineOptions in project beam by apache.

the class ReadUnboundTranslatorTest method test.

@Test
public void test() throws Exception {
    ApexPipelineOptions options = PipelineOptionsFactory.create().as(ApexPipelineOptions.class);
    EmbeddedCollector.RESULTS.clear();
    options.setApplicationName("ReadUnbound");
    options.setRunner(ApexRunner.class);
    Pipeline p = Pipeline.create(options);
    List<String> collection = Lists.newArrayList("1", "2", "3", "4", "5");
    CollectionSource<String> source = new CollectionSource<>(collection, StringUtf8Coder.of());
    p.apply(Read.from(source)).apply(ParDo.of(new EmbeddedCollector()));
    ApexRunnerResult result = (ApexRunnerResult) p.run();
    DAG dag = result.getApexDAG();
    DAG.OperatorMeta om = dag.getOperatorMeta("Read(CollectionSource)");
    Assert.assertNotNull(om);
    Assert.assertEquals(om.getOperator().getClass(), ApexReadUnboundedInputOperator.class);
    long timeout = System.currentTimeMillis() + 30000;
    while (System.currentTimeMillis() < timeout) {
        if (EmbeddedCollector.RESULTS.containsAll(collection)) {
            break;
        }
        LOG.info("Waiting for expected results.");
        Thread.sleep(1000);
    }
    Assert.assertEquals(Sets.newHashSet(collection), EmbeddedCollector.RESULTS);
}
Also used : CollectionSource(org.apache.beam.runners.apex.translation.utils.CollectionSource) ApexRunnerResult(org.apache.beam.runners.apex.ApexRunnerResult) DAG(com.datatorrent.api.DAG) ApexPipelineOptions(org.apache.beam.runners.apex.ApexPipelineOptions) Pipeline(org.apache.beam.sdk.Pipeline) Test(org.junit.Test)

Example 10 with ApexPipelineOptions

use of org.apache.beam.runners.apex.ApexPipelineOptions in project beam by apache.

the class ApexGroupByKeyOperatorTest method testGlobalWindowMinTimestamp.

@Test
public void testGlobalWindowMinTimestamp() throws Exception {
    ApexPipelineOptions options = PipelineOptionsFactory.create().as(ApexPipelineOptions.class);
    options.setRunner(TestApexRunner.class);
    Pipeline pipeline = Pipeline.create(options);
    WindowingStrategy<?, ?> ws = WindowingStrategy.of(FixedWindows.of(Duration.standardSeconds(10)));
    PCollection<KV<String, Integer>> input = PCollection.createPrimitiveOutputInternal(pipeline, ws, IsBounded.BOUNDED);
    input.setCoder(KvCoder.of(StringUtf8Coder.of(), VarIntCoder.of()));
    ApexGroupByKeyOperator<String, Integer> operator = new ApexGroupByKeyOperator<>(options, input, new ApexStateInternals.ApexStateBackend());
    operator.setup(null);
    operator.beginWindow(1);
    Assert.assertNotNull("Serialization", operator = KryoCloneUtils.cloneObject(operator));
    final List<Object> results = Lists.newArrayList();
    Sink<Object> sink = new Sink<Object>() {

        @Override
        public void put(Object tuple) {
            results.add(tuple);
        }

        @Override
        public int getCount(boolean reset) {
            return 0;
        }
    };
    operator.output.setSink(sink);
    operator.setup(null);
    operator.beginWindow(1);
    Instant windowStart = BoundedWindow.TIMESTAMP_MIN_VALUE;
    BoundedWindow window = new IntervalWindow(windowStart, windowStart.plus(10000));
    PaneInfo paneInfo = PaneInfo.NO_FIRING;
    WindowedValue<KV<String, Integer>> wv1 = WindowedValue.of(KV.of("foo", 1), windowStart, window, paneInfo);
    operator.input.process(ApexStreamTuple.DataTuple.of(wv1));
    WindowedValue<KV<String, Integer>> wv2 = WindowedValue.of(KV.of("foo", 1), windowStart, window, paneInfo);
    operator.input.process(ApexStreamTuple.DataTuple.of(wv2));
    ApexStreamTuple<WindowedValue<KV<String, Integer>>> watermark = ApexStreamTuple.WatermarkTuple.of(BoundedWindow.TIMESTAMP_MAX_VALUE.getMillis());
    Assert.assertEquals("number outputs", 0, results.size());
    operator.input.process(watermark);
    Assert.assertEquals("number outputs", 2, results.size());
    @SuppressWarnings({ "unchecked", "rawtypes" }) ApexStreamTuple.DataTuple<WindowedValue<KV<String, Iterable<Integer>>>> dataTuple = (ApexStreamTuple.DataTuple) results.get(0);
    List<Integer> counts = Lists.newArrayList(1, 1);
    Assert.assertEquals("iterable", KV.of("foo", counts), dataTuple.getValue().getValue());
    Assert.assertEquals("expected watermark", watermark, results.get(1));
}
Also used : ApexStreamTuple(org.apache.beam.runners.apex.translation.utils.ApexStreamTuple) Sink(com.datatorrent.api.Sink) WindowedValue(org.apache.beam.sdk.util.WindowedValue) PaneInfo(org.apache.beam.sdk.transforms.windowing.PaneInfo) BoundedWindow(org.apache.beam.sdk.transforms.windowing.BoundedWindow) IntervalWindow(org.apache.beam.sdk.transforms.windowing.IntervalWindow) Instant(org.joda.time.Instant) KV(org.apache.beam.sdk.values.KV) Pipeline(org.apache.beam.sdk.Pipeline) ApexGroupByKeyOperator(org.apache.beam.runners.apex.translation.operators.ApexGroupByKeyOperator) ApexStateInternals(org.apache.beam.runners.apex.translation.utils.ApexStateInternals) ApexPipelineOptions(org.apache.beam.runners.apex.ApexPipelineOptions) Test(org.junit.Test)

Aggregations

ApexPipelineOptions (org.apache.beam.runners.apex.ApexPipelineOptions)12 Pipeline (org.apache.beam.sdk.Pipeline)12 Test (org.junit.Test)12 ApexRunnerResult (org.apache.beam.runners.apex.ApexRunnerResult)7 DAG (com.datatorrent.api.DAG)4 KV (org.apache.beam.sdk.values.KV)3 Sink (com.datatorrent.api.Sink)2 ApexStateInternals (org.apache.beam.runners.apex.translation.utils.ApexStateInternals)2 ApexStreamTuple (org.apache.beam.runners.apex.translation.utils.ApexStreamTuple)2 WindowedValue (org.apache.beam.sdk.util.WindowedValue)2 Instant (org.joda.time.Instant)2 ArrayList (java.util.ArrayList)1 Pattern (java.util.regex.Pattern)1 ApexRunner (org.apache.beam.runners.apex.ApexRunner)1 TestApexRunner (org.apache.beam.runners.apex.TestApexRunner)1 ApexGroupByKeyOperator (org.apache.beam.runners.apex.translation.operators.ApexGroupByKeyOperator)1 ApexParDoOperator (org.apache.beam.runners.apex.translation.operators.ApexParDoOperator)1 CollectionSource (org.apache.beam.runners.apex.translation.utils.CollectionSource)1 TestPipeline (org.apache.beam.sdk.testing.TestPipeline)1 BoundedWindow (org.apache.beam.sdk.transforms.windowing.BoundedWindow)1