use of org.apache.beam.runners.apex.ApexPipelineOptions in project beam by apache.
the class ParDoTranslatorTest method test.
@Test
public void test() throws Exception {
ApexPipelineOptions options = PipelineOptionsFactory.create().as(ApexPipelineOptions.class);
options.setApplicationName("ParDoBound");
options.setRunner(ApexRunner.class);
Pipeline p = Pipeline.create(options);
List<Integer> collection = Lists.newArrayList(1, 2, 3, 4, 5);
List<Integer> expected = Lists.newArrayList(6, 7, 8, 9, 10);
p.apply(Create.of(collection).withCoder(SerializableCoder.of(Integer.class))).apply(ParDo.of(new Add(5))).apply(ParDo.of(new EmbeddedCollector()));
ApexRunnerResult result = (ApexRunnerResult) p.run();
DAG dag = result.getApexDAG();
DAG.OperatorMeta om = dag.getOperatorMeta("Create.Values");
Assert.assertNotNull(om);
Assert.assertEquals(om.getOperator().getClass(), ApexReadUnboundedInputOperator.class);
om = dag.getOperatorMeta("ParDo(Add)/ParMultiDo(Add)");
Assert.assertNotNull(om);
Assert.assertEquals(om.getOperator().getClass(), ApexParDoOperator.class);
long timeout = System.currentTimeMillis() + TIMEOUT_MILLIS;
while (System.currentTimeMillis() < timeout) {
if (EmbeddedCollector.RESULTS.containsAll(expected)) {
break;
}
LOG.info("Waiting for expected results.");
Thread.sleep(SLEEP_MILLIS);
}
Assert.assertEquals(Sets.newHashSet(expected), EmbeddedCollector.RESULTS);
}
use of org.apache.beam.runners.apex.ApexPipelineOptions in project beam by apache.
the class ParDoTranslatorTest method testSerialization.
@Test
public void testSerialization() throws Exception {
ApexPipelineOptions options = PipelineOptionsFactory.create().as(ApexPipelineOptions.class);
options.setRunner(TestApexRunner.class);
Pipeline pipeline = Pipeline.create(options);
Coder<WindowedValue<Integer>> coder = WindowedValue.getValueOnlyCoder(VarIntCoder.of());
PCollectionView<Integer> singletonView = pipeline.apply(Create.of(1)).apply(Sum.integersGlobally().asSingletonView());
ApexParDoOperator<Integer, Integer> operator = new ApexParDoOperator<>(options, new Add(singletonView), new TupleTag<Integer>(), TupleTagList.empty().getAll(), WindowingStrategy.globalDefault(), Collections.<PCollectionView<?>>singletonList(singletonView), coder, new ApexStateInternals.ApexStateBackend());
operator.setup(null);
operator.beginWindow(0);
WindowedValue<Integer> wv1 = WindowedValue.valueInGlobalWindow(1);
WindowedValue<Iterable<?>> sideInput = WindowedValue.<Iterable<?>>valueInGlobalWindow(Lists.<Integer>newArrayList(22));
// pushed back input
operator.input.process(ApexStreamTuple.DataTuple.of(wv1));
final List<Object> results = Lists.newArrayList();
Sink<Object> sink = new Sink<Object>() {
@Override
public void put(Object tuple) {
results.add(tuple);
}
@Override
public int getCount(boolean reset) {
return 0;
}
};
// verify pushed back input checkpointing
Assert.assertNotNull("Serialization", operator = KryoCloneUtils.cloneObject(operator));
operator.output.setSink(sink);
operator.setup(null);
operator.beginWindow(1);
WindowedValue<Integer> wv2 = WindowedValue.valueInGlobalWindow(2);
operator.sideInput1.process(ApexStreamTuple.DataTuple.of(sideInput));
Assert.assertEquals("number outputs", 1, results.size());
Assert.assertEquals("result", WindowedValue.valueInGlobalWindow(23), ((ApexStreamTuple.DataTuple<?>) results.get(0)).getValue());
// verify side input checkpointing
results.clear();
Assert.assertNotNull("Serialization", operator = KryoCloneUtils.cloneObject(operator));
operator.output.setSink(sink);
operator.setup(null);
operator.beginWindow(2);
operator.input.process(ApexStreamTuple.DataTuple.of(wv2));
Assert.assertEquals("number outputs", 1, results.size());
Assert.assertEquals("result", WindowedValue.valueInGlobalWindow(24), ((ApexStreamTuple.DataTuple<?>) results.get(0)).getValue());
}
use of org.apache.beam.runners.apex.ApexPipelineOptions in project beam by apache.
the class ReadUnboundTranslatorTest method testReadBounded.
@Test
public void testReadBounded() throws Exception {
ApexPipelineOptions options = PipelineOptionsFactory.create().as(ApexPipelineOptions.class);
EmbeddedCollector.RESULTS.clear();
options.setApplicationName("ReadBounded");
options.setRunner(ApexRunner.class);
Pipeline p = Pipeline.create(options);
Set<Long> expected = ContiguousSet.create(Range.closedOpen(0L, 10L), DiscreteDomain.longs());
p.apply(GenerateSequence.from(0).to(10)).apply(ParDo.of(new EmbeddedCollector()));
ApexRunnerResult result = (ApexRunnerResult) p.run();
DAG dag = result.getApexDAG();
String operatorName = "GenerateSequence/Read(BoundedCountingSource)";
DAG.OperatorMeta om = dag.getOperatorMeta(operatorName);
Assert.assertNotNull(om);
Assert.assertEquals(om.getOperator().getClass(), ApexReadUnboundedInputOperator.class);
long timeout = System.currentTimeMillis() + 30000;
while (System.currentTimeMillis() < timeout) {
if (EmbeddedCollector.RESULTS.containsAll(expected)) {
break;
}
LOG.info("Waiting for expected results.");
Thread.sleep(1000);
}
Assert.assertEquals(Sets.newHashSet(expected), EmbeddedCollector.RESULTS);
}
use of org.apache.beam.runners.apex.ApexPipelineOptions in project beam by apache.
the class ReadUnboundTranslatorTest method test.
@Test
public void test() throws Exception {
ApexPipelineOptions options = PipelineOptionsFactory.create().as(ApexPipelineOptions.class);
EmbeddedCollector.RESULTS.clear();
options.setApplicationName("ReadUnbound");
options.setRunner(ApexRunner.class);
Pipeline p = Pipeline.create(options);
List<String> collection = Lists.newArrayList("1", "2", "3", "4", "5");
CollectionSource<String> source = new CollectionSource<>(collection, StringUtf8Coder.of());
p.apply(Read.from(source)).apply(ParDo.of(new EmbeddedCollector()));
ApexRunnerResult result = (ApexRunnerResult) p.run();
DAG dag = result.getApexDAG();
DAG.OperatorMeta om = dag.getOperatorMeta("Read(CollectionSource)");
Assert.assertNotNull(om);
Assert.assertEquals(om.getOperator().getClass(), ApexReadUnboundedInputOperator.class);
long timeout = System.currentTimeMillis() + 30000;
while (System.currentTimeMillis() < timeout) {
if (EmbeddedCollector.RESULTS.containsAll(collection)) {
break;
}
LOG.info("Waiting for expected results.");
Thread.sleep(1000);
}
Assert.assertEquals(Sets.newHashSet(collection), EmbeddedCollector.RESULTS);
}
use of org.apache.beam.runners.apex.ApexPipelineOptions in project beam by apache.
the class ApexGroupByKeyOperatorTest method testGlobalWindowMinTimestamp.
@Test
public void testGlobalWindowMinTimestamp() throws Exception {
ApexPipelineOptions options = PipelineOptionsFactory.create().as(ApexPipelineOptions.class);
options.setRunner(TestApexRunner.class);
Pipeline pipeline = Pipeline.create(options);
WindowingStrategy<?, ?> ws = WindowingStrategy.of(FixedWindows.of(Duration.standardSeconds(10)));
PCollection<KV<String, Integer>> input = PCollection.createPrimitiveOutputInternal(pipeline, ws, IsBounded.BOUNDED);
input.setCoder(KvCoder.of(StringUtf8Coder.of(), VarIntCoder.of()));
ApexGroupByKeyOperator<String, Integer> operator = new ApexGroupByKeyOperator<>(options, input, new ApexStateInternals.ApexStateBackend());
operator.setup(null);
operator.beginWindow(1);
Assert.assertNotNull("Serialization", operator = KryoCloneUtils.cloneObject(operator));
final List<Object> results = Lists.newArrayList();
Sink<Object> sink = new Sink<Object>() {
@Override
public void put(Object tuple) {
results.add(tuple);
}
@Override
public int getCount(boolean reset) {
return 0;
}
};
operator.output.setSink(sink);
operator.setup(null);
operator.beginWindow(1);
Instant windowStart = BoundedWindow.TIMESTAMP_MIN_VALUE;
BoundedWindow window = new IntervalWindow(windowStart, windowStart.plus(10000));
PaneInfo paneInfo = PaneInfo.NO_FIRING;
WindowedValue<KV<String, Integer>> wv1 = WindowedValue.of(KV.of("foo", 1), windowStart, window, paneInfo);
operator.input.process(ApexStreamTuple.DataTuple.of(wv1));
WindowedValue<KV<String, Integer>> wv2 = WindowedValue.of(KV.of("foo", 1), windowStart, window, paneInfo);
operator.input.process(ApexStreamTuple.DataTuple.of(wv2));
ApexStreamTuple<WindowedValue<KV<String, Integer>>> watermark = ApexStreamTuple.WatermarkTuple.of(BoundedWindow.TIMESTAMP_MAX_VALUE.getMillis());
Assert.assertEquals("number outputs", 0, results.size());
operator.input.process(watermark);
Assert.assertEquals("number outputs", 2, results.size());
@SuppressWarnings({ "unchecked", "rawtypes" }) ApexStreamTuple.DataTuple<WindowedValue<KV<String, Iterable<Integer>>>> dataTuple = (ApexStreamTuple.DataTuple) results.get(0);
List<Integer> counts = Lists.newArrayList(1, 1);
Assert.assertEquals("iterable", KV.of("foo", counts), dataTuple.getValue().getValue());
Assert.assertEquals("expected watermark", watermark, results.get(1));
}
Aggregations