use of org.apache.flink.optimizer.testfunctions.IdentityJoiner in project flink by apache.
the class BranchingPlansCompilerTest method testDeltaIterationWithStaticInput.
/**
* <pre>
* +----Iteration-------+
* | |
* /---------< >---------join-----< >---sink
* / (Solution)| / |
* / | / |
* /--map-------< >----\ / /--|
* / (Workset)| \ / / |
* src-map | join------/ |
* \ | / |
* \ +-----/--------------+
* \ /
* \--reduce-------/
* </pre>
*/
@Test
public void testDeltaIterationWithStaticInput() {
ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
env.setParallelism(DEFAULT_PARALLELISM);
DataSet<Tuple2<Long, Long>> source = env.generateSequence(0, 1).map(new Duplicator<Long>());
DataSet<Tuple2<Long, Long>> map = source.map(new IdentityMapper<Tuple2<Long, Long>>());
DataSet<Tuple2<Long, Long>> reduce = source.reduceGroup(new IdentityGroupReducer<Tuple2<Long, Long>>());
DeltaIteration<Tuple2<Long, Long>, Tuple2<Long, Long>> loop = source.iterateDelta(map, 10, 0);
DataSet<Tuple2<Long, Long>> workset = loop.getWorkset().join(reduce).where(0).equalTo(0).with(new IdentityJoiner<Tuple2<Long, Long>>()).name("Next work set");
DataSet<Tuple2<Long, Long>> delta = loop.getSolutionSet().join(workset).where(0).equalTo(0).with(new IdentityJoiner<Tuple2<Long, Long>>()).name("Solution set delta");
DataSet<Tuple2<Long, Long>> result = loop.closeWith(delta, workset);
result.output(new DiscardingOutputFormat<Tuple2<Long, Long>>());
Plan plan = env.createProgramPlan();
try {
compileNoStats(plan);
} catch (Exception e) {
e.printStackTrace();
Assert.fail(e.getMessage());
}
}
use of org.apache.flink.optimizer.testfunctions.IdentityJoiner in project flink by apache.
the class BranchingPlansCompilerTest method testBranchingUnion.
@Test
public void testBranchingUnion() {
try {
// construct the plan
ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
env.setParallelism(DEFAULT_PARALLELISM);
DataSet<Long> source1 = env.generateSequence(0, 1);
DataSet<Long> source2 = env.generateSequence(0, 1);
DataSet<Long> join1 = source1.join(source2).where("*").equalTo("*").with(new IdentityJoiner<Long>()).name("Join 1");
DataSet<Long> map1 = join1.map(new IdentityMapper<Long>()).name("Map 1");
DataSet<Long> reduce1 = map1.groupBy("*").reduceGroup(new IdentityGroupReducer<Long>()).name("Reduce 1");
DataSet<Long> reduce2 = join1.groupBy("*").reduceGroup(new IdentityGroupReducer<Long>()).name("Reduce 2");
DataSet<Long> map2 = join1.map(new IdentityMapper<Long>()).name("Map 2");
DataSet<Long> map3 = map2.map(new IdentityMapper<Long>()).name("Map 3");
DataSet<Long> join2 = reduce1.union(reduce2).union(map2).union(map3).join(map2, JoinHint.REPARTITION_SORT_MERGE).where("*").equalTo("*").with(new IdentityJoiner<Long>()).name("Join 2");
join2.output(new DiscardingOutputFormat<Long>());
Plan plan = env.createProgramPlan();
OptimizedPlan oPlan = compileNoStats(plan);
JobGraphGenerator jobGen = new JobGraphGenerator();
// Compile plan to verify that no error is thrown
jobGen.compileJobGraph(oPlan);
} catch (Exception e) {
e.printStackTrace();
Assert.fail(e.getMessage());
}
}
use of org.apache.flink.optimizer.testfunctions.IdentityJoiner in project flink by apache.
the class ParallelismChangeTest method checkPropertyHandlingWithTwoInputs.
/**
* Checks that re-partitioning happens when the inputs of a two-input contract have different
* parallelisms.
*
* <p>Test Plan:
*
* <pre>
*
* (source) -> reduce -\
* Match -> (sink)
* (source) -> reduce -/
*
* </pre>
*/
@Test
public void checkPropertyHandlingWithTwoInputs() {
// construct the plan
ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
env.setParallelism(DEFAULT_PARALLELISM);
DataSet<Long> set1 = env.generateSequence(0, 1).setParallelism(5);
DataSet<Long> set2 = env.generateSequence(0, 1).setParallelism(7);
DataSet<Long> reduce1 = set1.groupBy("*").reduceGroup(new IdentityGroupReducer<Long>()).withForwardedFields("*").setParallelism(5);
DataSet<Long> reduce2 = set2.groupBy("*").reduceGroup(new IdentityGroupReducer<Long>()).withForwardedFields("*").setParallelism(7);
reduce1.join(reduce2).where("*").equalTo("*").with(new IdentityJoiner<Long>()).setParallelism(5).output(new DiscardingOutputFormat<Long>()).setParallelism(5);
Plan plan = env.createProgramPlan();
// submit the plan to the compiler
OptimizedPlan oPlan = compileNoStats(plan);
JobGraphGenerator jobGen = new JobGraphGenerator();
// Compile plan to verify that no error is thrown
jobGen.compileJobGraph(oPlan);
oPlan.accept(new Visitor<PlanNode>() {
@Override
public boolean preVisit(PlanNode visitable) {
if (visitable instanceof DualInputPlanNode) {
DualInputPlanNode node = (DualInputPlanNode) visitable;
Channel c1 = node.getInput1();
Channel c2 = node.getInput2();
Assert.assertEquals("Incompatible shipping strategy chosen for match", ShipStrategyType.FORWARD, c1.getShipStrategy());
Assert.assertEquals("Incompatible shipping strategy chosen for match", ShipStrategyType.PARTITION_HASH, c2.getShipStrategy());
return false;
}
return true;
}
@Override
public void postVisit(PlanNode visitable) {
// DO NOTHING
}
});
}
use of org.apache.flink.optimizer.testfunctions.IdentityJoiner in project flink by apache.
the class WorksetIterationsRecordApiCompilerTest method getTestPlan.
private Plan getTestPlan(boolean joinPreservesSolutionSet, boolean mapBeforeSolutionDelta) {
// construct the plan
ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
env.setParallelism(DEFAULT_PARALLELISM);
DataSet<Tuple2<Long, Long>> solSetInput = env.readCsvFile("/tmp/sol.csv").types(Long.class, Long.class).name("Solution Set");
DataSet<Tuple2<Long, Long>> workSetInput = env.readCsvFile("/tmp/sol.csv").types(Long.class, Long.class).name("Workset");
DataSet<Tuple2<Long, Long>> invariantInput = env.readCsvFile("/tmp/sol.csv").types(Long.class, Long.class).name("Invariant Input");
DeltaIteration<Tuple2<Long, Long>, Tuple2<Long, Long>> deltaIt = solSetInput.iterateDelta(workSetInput, 100, 0).name(ITERATION_NAME);
DataSet<Tuple2<Long, Long>> join1 = deltaIt.getWorkset().join(invariantInput).where(0).equalTo(0).with(new IdentityJoiner<Tuple2<Long, Long>>()).withForwardedFieldsFirst("*").name(JOIN_WITH_INVARIANT_NAME);
DataSet<Tuple2<Long, Long>> join2 = deltaIt.getSolutionSet().join(join1).where(0).equalTo(0).with(new IdentityJoiner<Tuple2<Long, Long>>()).name(JOIN_WITH_SOLUTION_SET);
if (joinPreservesSolutionSet) {
((JoinOperator<?, ?, ?>) join2).withForwardedFieldsFirst("*");
}
DataSet<Tuple2<Long, Long>> nextWorkset = join2.groupBy(0).reduceGroup(new IdentityGroupReducer<Tuple2<Long, Long>>()).withForwardedFields("*").name(NEXT_WORKSET_REDUCER_NAME);
if (mapBeforeSolutionDelta) {
DataSet<Tuple2<Long, Long>> mapper = join2.map(new IdentityMapper<Tuple2<Long, Long>>()).withForwardedFields("*").name(SOLUTION_DELTA_MAPPER_NAME);
deltaIt.closeWith(mapper, nextWorkset).output(new DiscardingOutputFormat<Tuple2<Long, Long>>());
} else {
deltaIt.closeWith(join2, nextWorkset).output(new DiscardingOutputFormat<Tuple2<Long, Long>>());
}
return env.createProgramPlan();
}
use of org.apache.flink.optimizer.testfunctions.IdentityJoiner in project flink by apache.
the class BranchingPlansCompilerTest method testBranchEachContractType.
@SuppressWarnings("unchecked")
@Test
public void testBranchEachContractType() {
try {
// construct the plan
ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
env.setParallelism(DEFAULT_PARALLELISM);
DataSet<Long> sourceA = env.generateSequence(0, 1);
DataSet<Long> sourceB = env.generateSequence(0, 1);
DataSet<Long> sourceC = env.generateSequence(0, 1);
DataSet<Long> map1 = sourceA.map(new IdentityMapper<Long>()).name("Map 1");
DataSet<Long> reduce1 = map1.groupBy("*").reduceGroup(new IdentityGroupReducer<Long>()).name("Reduce 1");
DataSet<Long> join1 = sourceB.union(sourceB).union(sourceC).join(sourceC).where("*").equalTo("*").with(new IdentityJoiner<Long>()).name("Join 1");
DataSet<Long> coGroup1 = sourceA.coGroup(sourceB).where("*").equalTo("*").with(new IdentityCoGrouper<Long>()).name("CoGroup 1");
DataSet<Long> cross1 = reduce1.cross(coGroup1).with(new IdentityCrosser<Long>()).name("Cross 1");
DataSet<Long> coGroup2 = cross1.coGroup(cross1).where("*").equalTo("*").with(new IdentityCoGrouper<Long>()).name("CoGroup 2");
DataSet<Long> coGroup3 = map1.coGroup(join1).where("*").equalTo("*").with(new IdentityCoGrouper<Long>()).name("CoGroup 3");
DataSet<Long> map2 = coGroup3.map(new IdentityMapper<Long>()).name("Map 2");
DataSet<Long> coGroup4 = map2.coGroup(join1).where("*").equalTo("*").with(new IdentityCoGrouper<Long>()).name("CoGroup 4");
DataSet<Long> coGroup5 = coGroup2.coGroup(coGroup1).where("*").equalTo("*").with(new IdentityCoGrouper<Long>()).name("CoGroup 5");
DataSet<Long> coGroup6 = reduce1.coGroup(coGroup4).where("*").equalTo("*").with(new IdentityCoGrouper<Long>()).name("CoGroup 6");
DataSet<Long> coGroup7 = coGroup5.coGroup(coGroup6).where("*").equalTo("*").with(new IdentityCoGrouper<Long>()).name("CoGroup 7");
coGroup7.union(sourceA).union(coGroup3).union(coGroup4).union(coGroup1).output(new DiscardingOutputFormat<Long>());
Plan plan = env.createProgramPlan();
OptimizedPlan oPlan = compileNoStats(plan);
JobGraphGenerator jobGen = new JobGraphGenerator();
// Compile plan to verify that no error is thrown
jobGen.compileJobGraph(oPlan);
} catch (Exception e) {
e.printStackTrace();
Assert.fail(e.getMessage());
}
}
Aggregations