use of org.apache.flink.api.java.DataSet in project flink by apache.
the class PregelCompilerTest method testPregelCompiler.
@SuppressWarnings("serial")
@Test
public void testPregelCompiler() {
ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
env.setParallelism(DEFAULT_PARALLELISM);
// compose test program
{
DataSet<Vertex<Long, Long>> initialVertices = env.fromElements(new Tuple2<>(1L, 1L), new Tuple2<>(2L, 2L)).map(new Tuple2ToVertexMap<>());
DataSet<Edge<Long, NullValue>> edges = env.fromElements(new Tuple2<>(1L, 2L)).map(new MapFunction<Tuple2<Long, Long>, Edge<Long, NullValue>>() {
public Edge<Long, NullValue> map(Tuple2<Long, Long> edge) {
return new Edge<>(edge.f0, edge.f1, NullValue.getInstance());
}
});
Graph<Long, Long, NullValue> graph = Graph.fromDataSet(initialVertices, edges, env);
DataSet<Vertex<Long, Long>> result = graph.runVertexCentricIteration(new CCCompute(), null, 100).getVertices();
result.output(new DiscardingOutputFormat<>());
}
Plan p = env.createProgramPlan("Pregel Connected Components");
OptimizedPlan op = compileNoStats(p);
// check the sink
SinkPlanNode sink = op.getDataSinks().iterator().next();
assertEquals(ShipStrategyType.FORWARD, sink.getInput().getShipStrategy());
assertEquals(DEFAULT_PARALLELISM, sink.getParallelism());
// check the iteration
WorksetIterationPlanNode iteration = (WorksetIterationPlanNode) sink.getInput().getSource();
assertEquals(DEFAULT_PARALLELISM, iteration.getParallelism());
// check the solution set delta
PlanNode ssDelta = iteration.getSolutionSetDeltaPlanNode();
assertTrue(ssDelta instanceof SingleInputPlanNode);
SingleInputPlanNode ssFlatMap = (SingleInputPlanNode) ((SingleInputPlanNode) (ssDelta)).getInput().getSource();
assertEquals(DEFAULT_PARALLELISM, ssFlatMap.getParallelism());
assertEquals(ShipStrategyType.FORWARD, ssFlatMap.getInput().getShipStrategy());
// check the computation coGroup
DualInputPlanNode computationCoGroup = (DualInputPlanNode) (ssFlatMap.getInput().getSource());
assertEquals(DEFAULT_PARALLELISM, computationCoGroup.getParallelism());
assertEquals(ShipStrategyType.FORWARD, computationCoGroup.getInput1().getShipStrategy());
assertEquals(ShipStrategyType.PARTITION_HASH, computationCoGroup.getInput2().getShipStrategy());
assertTrue(computationCoGroup.getInput2().getTempMode().isCached());
assertEquals(new FieldList(0), computationCoGroup.getInput2().getShipStrategyKeys());
// check that the initial partitioning is pushed out of the loop
assertEquals(ShipStrategyType.PARTITION_HASH, iteration.getInput1().getShipStrategy());
assertEquals(new FieldList(0), iteration.getInput1().getShipStrategyKeys());
}
use of org.apache.flink.api.java.DataSet in project flink by apache.
the class PregelCompilerTest method testPregelWithCombiner.
@SuppressWarnings("serial")
@Test
public void testPregelWithCombiner() {
ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
env.setParallelism(DEFAULT_PARALLELISM);
// compose test program
{
DataSet<Vertex<Long, Long>> initialVertices = env.fromElements(new Tuple2<>(1L, 1L), new Tuple2<>(2L, 2L)).map(new Tuple2ToVertexMap<>());
DataSet<Edge<Long, NullValue>> edges = env.fromElements(new Tuple2<>(1L, 2L)).map(new MapFunction<Tuple2<Long, Long>, Edge<Long, NullValue>>() {
public Edge<Long, NullValue> map(Tuple2<Long, Long> edge) {
return new Edge<>(edge.f0, edge.f1, NullValue.getInstance());
}
});
Graph<Long, Long, NullValue> graph = Graph.fromDataSet(initialVertices, edges, env);
DataSet<Vertex<Long, Long>> result = graph.runVertexCentricIteration(new CCCompute(), new CCCombiner(), 100).getVertices();
result.output(new DiscardingOutputFormat<>());
}
Plan p = env.createProgramPlan("Pregel Connected Components");
OptimizedPlan op = compileNoStats(p);
// check the sink
SinkPlanNode sink = op.getDataSinks().iterator().next();
assertEquals(ShipStrategyType.FORWARD, sink.getInput().getShipStrategy());
assertEquals(DEFAULT_PARALLELISM, sink.getParallelism());
// check the iteration
WorksetIterationPlanNode iteration = (WorksetIterationPlanNode) sink.getInput().getSource();
assertEquals(DEFAULT_PARALLELISM, iteration.getParallelism());
// check the combiner
SingleInputPlanNode combiner = (SingleInputPlanNode) iteration.getInput2().getSource();
assertEquals(ShipStrategyType.FORWARD, combiner.getInput().getShipStrategy());
// check the solution set delta
PlanNode ssDelta = iteration.getSolutionSetDeltaPlanNode();
assertTrue(ssDelta instanceof SingleInputPlanNode);
SingleInputPlanNode ssFlatMap = (SingleInputPlanNode) ((SingleInputPlanNode) (ssDelta)).getInput().getSource();
assertEquals(DEFAULT_PARALLELISM, ssFlatMap.getParallelism());
assertEquals(ShipStrategyType.FORWARD, ssFlatMap.getInput().getShipStrategy());
// check the computation coGroup
DualInputPlanNode computationCoGroup = (DualInputPlanNode) (ssFlatMap.getInput().getSource());
assertEquals(DEFAULT_PARALLELISM, computationCoGroup.getParallelism());
assertEquals(ShipStrategyType.FORWARD, computationCoGroup.getInput1().getShipStrategy());
assertEquals(ShipStrategyType.PARTITION_HASH, computationCoGroup.getInput2().getShipStrategy());
assertTrue(computationCoGroup.getInput2().getTempMode().isCached());
assertEquals(new FieldList(0), computationCoGroup.getInput2().getShipStrategyKeys());
// check that the initial partitioning is pushed out of the loop
assertEquals(ShipStrategyType.PARTITION_HASH, iteration.getInput1().getShipStrategy());
assertEquals(new FieldList(0), iteration.getInput1().getShipStrategyKeys());
}
use of org.apache.flink.api.java.DataSet in project flink by apache.
the class BulkIterationTranslationTest method testCorrectTranslation.
@Test
public void testCorrectTranslation() {
final String jobName = "Test JobName";
final int numIterations = 13;
final int defaultParallelism = 133;
final int iterationParallelism = 77;
ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
// ------------ construct the test program ------------------
{
env.setParallelism(defaultParallelism);
@SuppressWarnings("unchecked") DataSet<Tuple3<Double, Long, String>> initialDataSet = env.fromElements(new Tuple3<>(3.44, 5L, "abc"));
IterativeDataSet<Tuple3<Double, Long, String>> bulkIteration = initialDataSet.iterate(numIterations);
bulkIteration.setParallelism(iterationParallelism);
// test that multiple iteration consumers are supported
DataSet<Tuple3<Double, Long, String>> identity = bulkIteration.map(new IdentityMapper<Tuple3<Double, Long, String>>());
DataSet<Tuple3<Double, Long, String>> result = bulkIteration.closeWith(identity);
result.output(new DiscardingOutputFormat<Tuple3<Double, Long, String>>());
result.writeAsText("/dev/null");
}
Plan p = env.createProgramPlan(jobName);
// ------------- validate the plan ----------------
BulkIterationBase<?> iteration = (BulkIterationBase<?>) p.getDataSinks().iterator().next().getInput();
assertEquals(jobName, p.getJobName());
assertEquals(defaultParallelism, p.getDefaultParallelism());
assertEquals(iterationParallelism, iteration.getParallelism());
}
use of org.apache.flink.api.java.DataSet in project flink by apache.
the class WritableSavepointWindowITCase method testSlideWindow.
@Test
public void testSlideWindow() throws Exception {
final String savepointPath = getTempDirPath(new AbstractID().toHexString());
ExecutionEnvironment bEnv = ExecutionEnvironment.getExecutionEnvironment();
DataSet<Tuple2<String, Integer>> bootstrapData = bEnv.fromCollection(WORDS).map(word -> Tuple2.of(word, 1)).returns(TUPLE_TYPE_INFO);
WindowedOperatorTransformation<Tuple2<String, Integer>, String, TimeWindow> transformation = OperatorTransformation.bootstrapWith(bootstrapData).assignTimestamps(record -> 2L).keyBy(tuple -> tuple.f0, Types.STRING).window(SlidingEventTimeWindows.of(Time.milliseconds(5), Time.milliseconds(1)));
Savepoint.create(new MemoryStateBackend(), 128).withOperator(UID, windowBootstrap.bootstrap(transformation)).write(savepointPath);
bEnv.execute("write state");
StreamExecutionEnvironment sEnv = StreamExecutionEnvironment.getExecutionEnvironment();
WindowedStream<Tuple2<String, Integer>, String, TimeWindow> stream = sEnv.addSource(new MaxWatermarkSource<Tuple2<String, Integer>>()).returns(TUPLE_TYPE_INFO).keyBy(tuple -> tuple.f0).window(SlidingEventTimeWindows.of(Time.milliseconds(5), Time.milliseconds(1)));
DataStream<Tuple2<String, Integer>> windowed = windowStream.window(stream).uid(UID);
CompletableFuture<Collection<Tuple2<String, Integer>>> future = collector.collect(windowed);
submitJob(savepointPath, sEnv);
Collection<Tuple2<String, Integer>> results = future.get();
Assert.assertEquals("Incorrect number of results", 15, results.size());
Assert.assertThat("Incorrect bootstrap state", new HashSet<>(results), STANDARD_MATCHER);
}
use of org.apache.flink.api.java.DataSet in project flink by apache.
the class WritableSavepointWindowITCase method testSlideWindowWithEvictor.
@Test
public void testSlideWindowWithEvictor() throws Exception {
final String savepointPath = getTempDirPath(new AbstractID().toHexString());
ExecutionEnvironment bEnv = ExecutionEnvironment.getExecutionEnvironment();
DataSet<Tuple2<String, Integer>> bootstrapData = bEnv.fromCollection(WORDS).map(word -> Tuple2.of(word, 1)).returns(TUPLE_TYPE_INFO);
WindowedOperatorTransformation<Tuple2<String, Integer>, String, TimeWindow> transformation = OperatorTransformation.bootstrapWith(bootstrapData).assignTimestamps(record -> 2L).keyBy(tuple -> tuple.f0, Types.STRING).window(SlidingEventTimeWindows.of(Time.milliseconds(5), Time.milliseconds(1))).evictor(CountEvictor.of(1));
Savepoint.create(new MemoryStateBackend(), 128).withOperator(UID, windowBootstrap.bootstrap(transformation)).write(savepointPath);
bEnv.execute("write state");
StreamExecutionEnvironment sEnv = StreamExecutionEnvironment.getExecutionEnvironment();
WindowedStream<Tuple2<String, Integer>, String, TimeWindow> stream = sEnv.addSource(new MaxWatermarkSource<Tuple2<String, Integer>>()).returns(TUPLE_TYPE_INFO).keyBy(tuple -> tuple.f0).window(SlidingEventTimeWindows.of(Time.milliseconds(5), Time.milliseconds(1))).evictor(CountEvictor.of(1));
DataStream<Tuple2<String, Integer>> windowed = windowStream.window(stream).uid(UID);
CompletableFuture<Collection<Tuple2<String, Integer>>> future = collector.collect(windowed);
submitJob(savepointPath, sEnv);
Collection<Tuple2<String, Integer>> results = future.get();
Assert.assertEquals("Incorrect number of results", 15, results.size());
Assert.assertThat("Incorrect bootstrap state", new HashSet<>(results), EVICTOR_MATCHER);
}
Aggregations