use of org.apache.flink.api.common.operators.base.ReduceOperatorBase in project flink by apache.
the class SemanticPropertiesAPIToPlanTest method forwardFieldsTestMapReduce.
@Test
public void forwardFieldsTestMapReduce() {
ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
DataSet<Tuple3<Integer, Integer, Integer>> set = env.readCsvFile(IN_FILE).types(Integer.class, Integer.class, Integer.class);
set = set.map(new MockMapper()).withForwardedFields("*").groupBy(0).reduce(new MockReducer()).withForwardedFields("f0->f1").map(new MockMapper()).withForwardedFields("*").groupBy(1).reduce(new MockReducer()).withForwardedFields("*");
set.output(new DiscardingOutputFormat<Tuple3<Integer, Integer, Integer>>());
Plan plan = env.createProgramPlan();
OptimizedPlan oPlan = compileWithStats(plan);
oPlan.accept(new Visitor<PlanNode>() {
@Override
public boolean preVisit(PlanNode visitable) {
if (visitable instanceof SingleInputPlanNode && visitable.getProgramOperator() instanceof ReduceOperatorBase) {
for (Channel input : visitable.getInputs()) {
GlobalProperties gprops = visitable.getGlobalProperties();
LocalProperties lprops = visitable.getLocalProperties();
Assert.assertTrue("Reduce should just forward the input if it is already partitioned", input.getShipStrategy() == ShipStrategyType.FORWARD);
Assert.assertTrue("Wrong GlobalProperties on Reducer", gprops.isPartitionedOnFields(new FieldSet(1)));
Assert.assertTrue("Wrong GlobalProperties on Reducer", gprops.getPartitioning() == PartitioningProperty.HASH_PARTITIONED);
Assert.assertTrue("Wrong LocalProperties on Reducer", lprops.getGroupedFields().contains(1));
}
}
if (visitable instanceof SingleInputPlanNode && visitable.getProgramOperator() instanceof MapOperatorBase) {
for (Channel input : visitable.getInputs()) {
GlobalProperties gprops = visitable.getGlobalProperties();
LocalProperties lprops = visitable.getLocalProperties();
Assert.assertTrue("Map should just forward the input if it is already partitioned", input.getShipStrategy() == ShipStrategyType.FORWARD);
Assert.assertTrue("Wrong GlobalProperties on Mapper", gprops.isPartitionedOnFields(new FieldSet(1)));
Assert.assertTrue("Wrong GlobalProperties on Mapper", gprops.getPartitioning() == PartitioningProperty.HASH_PARTITIONED);
Assert.assertTrue("Wrong LocalProperties on Mapper", lprops.getGroupedFields().contains(1));
}
return false;
}
return true;
}
@Override
public void postVisit(PlanNode visitable) {
}
});
}
use of org.apache.flink.api.common.operators.base.ReduceOperatorBase in project flink by apache.
the class DistinctTranslationTest method translateDistinctPlain.
@Test
public void translateDistinctPlain() {
try {
final int parallelism = 8;
ExecutionEnvironment env = ExecutionEnvironment.createLocalEnvironment(parallelism);
DataSet<Tuple3<Double, StringValue, LongValue>> initialData = getSourceDataSet(env);
initialData.distinct().output(new DiscardingOutputFormat<Tuple3<Double, StringValue, LongValue>>());
Plan p = env.createProgramPlan();
GenericDataSinkBase<?> sink = p.getDataSinks().iterator().next();
// currently distinct is translated to a Reduce
ReduceOperatorBase<?, ?> reducer = (ReduceOperatorBase<?, ?>) sink.getInput();
// check types
assertEquals(initialData.getType(), reducer.getOperatorInfo().getInputType());
assertEquals(initialData.getType(), reducer.getOperatorInfo().getOutputType());
// check keys
assertArrayEquals(new int[] { 0, 1, 2 }, reducer.getKeyColumns(0));
// parallelism was not configured on the operator
assertTrue(reducer.getParallelism() == 1 || reducer.getParallelism() == -1);
assertTrue(reducer.getInput() instanceof GenericDataSourceBase<?, ?>);
} catch (Exception e) {
System.err.println(e.getMessage());
e.printStackTrace();
fail("Test caused an error: " + e.getMessage());
}
}
use of org.apache.flink.api.common.operators.base.ReduceOperatorBase in project flink by apache.
the class DistinctTranslationTest method translateDistinctPlain2.
@Test
public void translateDistinctPlain2() {
try {
final int parallelism = 8;
ExecutionEnvironment env = ExecutionEnvironment.createLocalEnvironment(parallelism);
DataSet<CustomType> initialData = getSourcePojoDataSet(env);
initialData.distinct().output(new DiscardingOutputFormat<CustomType>());
Plan p = env.createProgramPlan();
GenericDataSinkBase<?> sink = p.getDataSinks().iterator().next();
// currently distinct is translated to a Reduce
ReduceOperatorBase<?, ?> reducer = (ReduceOperatorBase<?, ?>) sink.getInput();
// check types
assertEquals(initialData.getType(), reducer.getOperatorInfo().getInputType());
assertEquals(initialData.getType(), reducer.getOperatorInfo().getOutputType());
// check keys
assertArrayEquals(new int[] { 0 }, reducer.getKeyColumns(0));
// parallelism was not configured on the operator
assertTrue(reducer.getParallelism() == 1 || reducer.getParallelism() == -1);
assertTrue(reducer.getInput() instanceof GenericDataSourceBase<?, ?>);
} catch (Exception e) {
System.err.println(e.getMessage());
e.printStackTrace();
fail("Test caused an error: " + e.getMessage());
}
}
use of org.apache.flink.api.common.operators.base.ReduceOperatorBase in project flink by apache.
the class DistinctTranslationTest method translateDistinctPosition.
@Test
public void translateDistinctPosition() {
try {
final int parallelism = 8;
ExecutionEnvironment env = ExecutionEnvironment.createLocalEnvironment(parallelism);
DataSet<Tuple3<Double, StringValue, LongValue>> initialData = getSourceDataSet(env);
initialData.distinct(1, 2).output(new DiscardingOutputFormat<Tuple3<Double, StringValue, LongValue>>());
Plan p = env.createProgramPlan();
GenericDataSinkBase<?> sink = p.getDataSinks().iterator().next();
// currently distinct is translated to a Reduce
ReduceOperatorBase<?, ?> reducer = (ReduceOperatorBase<?, ?>) sink.getInput();
// check types
assertEquals(initialData.getType(), reducer.getOperatorInfo().getInputType());
assertEquals(initialData.getType(), reducer.getOperatorInfo().getOutputType());
// check keys
assertArrayEquals(new int[] { 1, 2 }, reducer.getKeyColumns(0));
// parallelism was not configured on the operator
assertTrue(reducer.getParallelism() == 1 || reducer.getParallelism() == -1);
assertTrue(reducer.getInput() instanceof GenericDataSourceBase<?, ?>);
} catch (Exception e) {
System.err.println(e.getMessage());
e.printStackTrace();
fail("Test caused an error: " + e.getMessage());
}
}
use of org.apache.flink.api.common.operators.base.ReduceOperatorBase in project flink by apache.
the class DistinctTranslationTest method translateDistinctExpressionKey.
@Test
public void translateDistinctExpressionKey() {
try {
final int parallelism = 8;
ExecutionEnvironment env = ExecutionEnvironment.createLocalEnvironment(parallelism);
DataSet<CustomType> initialData = getSourcePojoDataSet(env);
initialData.distinct("myInt").output(new DiscardingOutputFormat<CustomType>());
Plan p = env.createProgramPlan();
GenericDataSinkBase<?> sink = p.getDataSinks().iterator().next();
// currently distinct is translated to a Reduce
ReduceOperatorBase<?, ?> reducer = (ReduceOperatorBase<?, ?>) sink.getInput();
// check types
assertEquals(initialData.getType(), reducer.getOperatorInfo().getInputType());
assertEquals(initialData.getType(), reducer.getOperatorInfo().getOutputType());
// check keys
assertArrayEquals(new int[] { 0 }, reducer.getKeyColumns(0));
// parallelism was not configured on the operator
assertTrue(reducer.getParallelism() == 1 || reducer.getParallelism() == -1);
assertTrue(reducer.getInput() instanceof GenericDataSourceBase<?, ?>);
} catch (Exception e) {
System.err.println(e.getMessage());
e.printStackTrace();
fail("Test caused an error: " + e.getMessage());
}
}
Aggregations