Search in sources :

Example 1 with ReduceOperatorBase

use of org.apache.flink.api.common.operators.base.ReduceOperatorBase in project flink by apache.

the class SemanticPropertiesAPIToPlanTest method forwardFieldsTestMapReduce.

@Test
public void forwardFieldsTestMapReduce() {
    ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
    DataSet<Tuple3<Integer, Integer, Integer>> set = env.readCsvFile(IN_FILE).types(Integer.class, Integer.class, Integer.class);
    set = set.map(new MockMapper()).withForwardedFields("*").groupBy(0).reduce(new MockReducer()).withForwardedFields("f0->f1").map(new MockMapper()).withForwardedFields("*").groupBy(1).reduce(new MockReducer()).withForwardedFields("*");
    set.output(new DiscardingOutputFormat<Tuple3<Integer, Integer, Integer>>());
    Plan plan = env.createProgramPlan();
    OptimizedPlan oPlan = compileWithStats(plan);
    oPlan.accept(new Visitor<PlanNode>() {

        @Override
        public boolean preVisit(PlanNode visitable) {
            if (visitable instanceof SingleInputPlanNode && visitable.getProgramOperator() instanceof ReduceOperatorBase) {
                for (Channel input : visitable.getInputs()) {
                    GlobalProperties gprops = visitable.getGlobalProperties();
                    LocalProperties lprops = visitable.getLocalProperties();
                    Assert.assertTrue("Reduce should just forward the input if it is already partitioned", input.getShipStrategy() == ShipStrategyType.FORWARD);
                    Assert.assertTrue("Wrong GlobalProperties on Reducer", gprops.isPartitionedOnFields(new FieldSet(1)));
                    Assert.assertTrue("Wrong GlobalProperties on Reducer", gprops.getPartitioning() == PartitioningProperty.HASH_PARTITIONED);
                    Assert.assertTrue("Wrong LocalProperties on Reducer", lprops.getGroupedFields().contains(1));
                }
            }
            if (visitable instanceof SingleInputPlanNode && visitable.getProgramOperator() instanceof MapOperatorBase) {
                for (Channel input : visitable.getInputs()) {
                    GlobalProperties gprops = visitable.getGlobalProperties();
                    LocalProperties lprops = visitable.getLocalProperties();
                    Assert.assertTrue("Map should just forward the input if it is already partitioned", input.getShipStrategy() == ShipStrategyType.FORWARD);
                    Assert.assertTrue("Wrong GlobalProperties on Mapper", gprops.isPartitionedOnFields(new FieldSet(1)));
                    Assert.assertTrue("Wrong GlobalProperties on Mapper", gprops.getPartitioning() == PartitioningProperty.HASH_PARTITIONED);
                    Assert.assertTrue("Wrong LocalProperties on Mapper", lprops.getGroupedFields().contains(1));
                }
                return false;
            }
            return true;
        }

        @Override
        public void postVisit(PlanNode visitable) {
        }
    });
}
Also used : ExecutionEnvironment(org.apache.flink.api.java.ExecutionEnvironment) ReduceOperatorBase(org.apache.flink.api.common.operators.base.ReduceOperatorBase) Channel(org.apache.flink.optimizer.plan.Channel) Plan(org.apache.flink.api.common.Plan) OptimizedPlan(org.apache.flink.optimizer.plan.OptimizedPlan) OptimizedPlan(org.apache.flink.optimizer.plan.OptimizedPlan) SingleInputPlanNode(org.apache.flink.optimizer.plan.SingleInputPlanNode) FieldSet(org.apache.flink.api.common.operators.util.FieldSet) MapOperatorBase(org.apache.flink.api.common.operators.base.MapOperatorBase) DualInputPlanNode(org.apache.flink.optimizer.plan.DualInputPlanNode) PlanNode(org.apache.flink.optimizer.plan.PlanNode) SingleInputPlanNode(org.apache.flink.optimizer.plan.SingleInputPlanNode) GlobalProperties(org.apache.flink.optimizer.dataproperties.GlobalProperties) Tuple3(org.apache.flink.api.java.tuple.Tuple3) LocalProperties(org.apache.flink.optimizer.dataproperties.LocalProperties) Test(org.junit.Test)

Example 2 with ReduceOperatorBase

use of org.apache.flink.api.common.operators.base.ReduceOperatorBase in project flink by apache.

the class DistinctTranslationTest method translateDistinctPlain.

@Test
public void translateDistinctPlain() {
    try {
        final int parallelism = 8;
        ExecutionEnvironment env = ExecutionEnvironment.createLocalEnvironment(parallelism);
        DataSet<Tuple3<Double, StringValue, LongValue>> initialData = getSourceDataSet(env);
        initialData.distinct().output(new DiscardingOutputFormat<Tuple3<Double, StringValue, LongValue>>());
        Plan p = env.createProgramPlan();
        GenericDataSinkBase<?> sink = p.getDataSinks().iterator().next();
        // currently distinct is translated to a Reduce
        ReduceOperatorBase<?, ?> reducer = (ReduceOperatorBase<?, ?>) sink.getInput();
        // check types
        assertEquals(initialData.getType(), reducer.getOperatorInfo().getInputType());
        assertEquals(initialData.getType(), reducer.getOperatorInfo().getOutputType());
        // check keys
        assertArrayEquals(new int[] { 0, 1, 2 }, reducer.getKeyColumns(0));
        // parallelism was not configured on the operator
        assertTrue(reducer.getParallelism() == 1 || reducer.getParallelism() == -1);
        assertTrue(reducer.getInput() instanceof GenericDataSourceBase<?, ?>);
    } catch (Exception e) {
        System.err.println(e.getMessage());
        e.printStackTrace();
        fail("Test caused an error: " + e.getMessage());
    }
}
Also used : ExecutionEnvironment(org.apache.flink.api.java.ExecutionEnvironment) ReduceOperatorBase(org.apache.flink.api.common.operators.base.ReduceOperatorBase) Tuple3(org.apache.flink.api.java.tuple.Tuple3) Plan(org.apache.flink.api.common.Plan) Test(org.junit.Test)

Example 3 with ReduceOperatorBase

use of org.apache.flink.api.common.operators.base.ReduceOperatorBase in project flink by apache.

the class DistinctTranslationTest method translateDistinctPlain2.

@Test
public void translateDistinctPlain2() {
    try {
        final int parallelism = 8;
        ExecutionEnvironment env = ExecutionEnvironment.createLocalEnvironment(parallelism);
        DataSet<CustomType> initialData = getSourcePojoDataSet(env);
        initialData.distinct().output(new DiscardingOutputFormat<CustomType>());
        Plan p = env.createProgramPlan();
        GenericDataSinkBase<?> sink = p.getDataSinks().iterator().next();
        // currently distinct is translated to a Reduce
        ReduceOperatorBase<?, ?> reducer = (ReduceOperatorBase<?, ?>) sink.getInput();
        // check types
        assertEquals(initialData.getType(), reducer.getOperatorInfo().getInputType());
        assertEquals(initialData.getType(), reducer.getOperatorInfo().getOutputType());
        // check keys
        assertArrayEquals(new int[] { 0 }, reducer.getKeyColumns(0));
        // parallelism was not configured on the operator
        assertTrue(reducer.getParallelism() == 1 || reducer.getParallelism() == -1);
        assertTrue(reducer.getInput() instanceof GenericDataSourceBase<?, ?>);
    } catch (Exception e) {
        System.err.println(e.getMessage());
        e.printStackTrace();
        fail("Test caused an error: " + e.getMessage());
    }
}
Also used : ExecutionEnvironment(org.apache.flink.api.java.ExecutionEnvironment) ReduceOperatorBase(org.apache.flink.api.common.operators.base.ReduceOperatorBase) Plan(org.apache.flink.api.common.Plan) Test(org.junit.Test)

Example 4 with ReduceOperatorBase

use of org.apache.flink.api.common.operators.base.ReduceOperatorBase in project flink by apache.

the class DistinctTranslationTest method translateDistinctPosition.

@Test
public void translateDistinctPosition() {
    try {
        final int parallelism = 8;
        ExecutionEnvironment env = ExecutionEnvironment.createLocalEnvironment(parallelism);
        DataSet<Tuple3<Double, StringValue, LongValue>> initialData = getSourceDataSet(env);
        initialData.distinct(1, 2).output(new DiscardingOutputFormat<Tuple3<Double, StringValue, LongValue>>());
        Plan p = env.createProgramPlan();
        GenericDataSinkBase<?> sink = p.getDataSinks().iterator().next();
        // currently distinct is translated to a Reduce
        ReduceOperatorBase<?, ?> reducer = (ReduceOperatorBase<?, ?>) sink.getInput();
        // check types
        assertEquals(initialData.getType(), reducer.getOperatorInfo().getInputType());
        assertEquals(initialData.getType(), reducer.getOperatorInfo().getOutputType());
        // check keys
        assertArrayEquals(new int[] { 1, 2 }, reducer.getKeyColumns(0));
        // parallelism was not configured on the operator
        assertTrue(reducer.getParallelism() == 1 || reducer.getParallelism() == -1);
        assertTrue(reducer.getInput() instanceof GenericDataSourceBase<?, ?>);
    } catch (Exception e) {
        System.err.println(e.getMessage());
        e.printStackTrace();
        fail("Test caused an error: " + e.getMessage());
    }
}
Also used : ExecutionEnvironment(org.apache.flink.api.java.ExecutionEnvironment) ReduceOperatorBase(org.apache.flink.api.common.operators.base.ReduceOperatorBase) Tuple3(org.apache.flink.api.java.tuple.Tuple3) Plan(org.apache.flink.api.common.Plan) Test(org.junit.Test)

Example 5 with ReduceOperatorBase

use of org.apache.flink.api.common.operators.base.ReduceOperatorBase in project flink by apache.

the class DistinctTranslationTest method translateDistinctExpressionKey.

@Test
public void translateDistinctExpressionKey() {
    try {
        final int parallelism = 8;
        ExecutionEnvironment env = ExecutionEnvironment.createLocalEnvironment(parallelism);
        DataSet<CustomType> initialData = getSourcePojoDataSet(env);
        initialData.distinct("myInt").output(new DiscardingOutputFormat<CustomType>());
        Plan p = env.createProgramPlan();
        GenericDataSinkBase<?> sink = p.getDataSinks().iterator().next();
        // currently distinct is translated to a Reduce
        ReduceOperatorBase<?, ?> reducer = (ReduceOperatorBase<?, ?>) sink.getInput();
        // check types
        assertEquals(initialData.getType(), reducer.getOperatorInfo().getInputType());
        assertEquals(initialData.getType(), reducer.getOperatorInfo().getOutputType());
        // check keys
        assertArrayEquals(new int[] { 0 }, reducer.getKeyColumns(0));
        // parallelism was not configured on the operator
        assertTrue(reducer.getParallelism() == 1 || reducer.getParallelism() == -1);
        assertTrue(reducer.getInput() instanceof GenericDataSourceBase<?, ?>);
    } catch (Exception e) {
        System.err.println(e.getMessage());
        e.printStackTrace();
        fail("Test caused an error: " + e.getMessage());
    }
}
Also used : ExecutionEnvironment(org.apache.flink.api.java.ExecutionEnvironment) ReduceOperatorBase(org.apache.flink.api.common.operators.base.ReduceOperatorBase) Plan(org.apache.flink.api.common.Plan) Test(org.junit.Test)

Aggregations

ReduceOperatorBase (org.apache.flink.api.common.operators.base.ReduceOperatorBase)9 Plan (org.apache.flink.api.common.Plan)7 ExecutionEnvironment (org.apache.flink.api.java.ExecutionEnvironment)7 Test (org.junit.Test)6 Tuple3 (org.apache.flink.api.java.tuple.Tuple3)5 RichReduceFunction (org.apache.flink.api.common.functions.RichReduceFunction)2 MapOperatorBase (org.apache.flink.api.common.operators.base.MapOperatorBase)2 InvalidProgramException (org.apache.flink.api.common.InvalidProgramException)1 ReduceFunction (org.apache.flink.api.common.functions.ReduceFunction)1 GenericDataSinkBase (org.apache.flink.api.common.operators.GenericDataSinkBase)1 GenericDataSourceBase (org.apache.flink.api.common.operators.GenericDataSourceBase)1 SelectorFunctionKeys (org.apache.flink.api.common.operators.Keys.SelectorFunctionKeys)1 SingleInputSemanticProperties (org.apache.flink.api.common.operators.SingleInputSemanticProperties)1 UnaryOperatorInformation (org.apache.flink.api.common.operators.UnaryOperatorInformation)1 Union (org.apache.flink.api.common.operators.Union)1 BulkIterationBase (org.apache.flink.api.common.operators.base.BulkIterationBase)1 CoGroupOperatorBase (org.apache.flink.api.common.operators.base.CoGroupOperatorBase)1 CoGroupRawOperatorBase (org.apache.flink.api.common.operators.base.CoGroupRawOperatorBase)1 CrossOperatorBase (org.apache.flink.api.common.operators.base.CrossOperatorBase)1 DeltaIterationBase (org.apache.flink.api.common.operators.base.DeltaIterationBase)1