Search in sources :

Example 21 with LocalProperties

use of org.apache.flink.optimizer.dataproperties.LocalProperties in project flink by apache.

the class PropertyDataSourceTest method checkCoPartitionedSources2.

@Test
public void checkCoPartitionedSources2() {
    ExecutionEnvironment env = ExecutionEnvironment.createLocalEnvironment();
    env.setParallelism(DEFAULT_PARALLELISM);
    DataSource<Tuple2<Long, String>> data1 = env.readCsvFile("/some/path").types(Long.class, String.class);
    data1.getSplitDataProperties().splitsPartitionedBy("byCountry", 0);
    DataSource<Tuple2<Long, String>> data2 = env.readCsvFile("/some/path").types(Long.class, String.class);
    data2.getSplitDataProperties().splitsPartitionedBy("byDate", 0);
    data1.union(data2).output(new DiscardingOutputFormat<Tuple2<Long, String>>());
    Plan plan = env.createProgramPlan();
    // submit the plan to the compiler
    OptimizedPlan oPlan = compileNoStats(plan);
    // check the optimized Plan
    SinkPlanNode sinkNode = oPlan.getDataSinks().iterator().next();
    SourcePlanNode sourceNode1 = (SourcePlanNode) ((NAryUnionPlanNode) sinkNode.getPredecessor()).getListOfInputs().get(0).getSource();
    SourcePlanNode sourceNode2 = (SourcePlanNode) ((NAryUnionPlanNode) sinkNode.getPredecessor()).getListOfInputs().get(1).getSource();
    GlobalProperties gprops1 = sourceNode1.getGlobalProperties();
    LocalProperties lprops1 = sourceNode1.getLocalProperties();
    GlobalProperties gprops2 = sourceNode2.getGlobalProperties();
    LocalProperties lprops2 = sourceNode2.getLocalProperties();
    Assert.assertTrue((new FieldSet(gprops1.getPartitioningFields().toArray())).equals(new FieldSet(0)));
    Assert.assertTrue(gprops1.getPartitioning() == PartitioningProperty.CUSTOM_PARTITIONING);
    Assert.assertTrue(lprops1.getGroupedFields() == null);
    Assert.assertTrue(lprops1.getOrdering() == null);
    Assert.assertTrue((new FieldSet(gprops2.getPartitioningFields().toArray())).equals(new FieldSet(0)));
    Assert.assertTrue(gprops2.getPartitioning() == PartitioningProperty.CUSTOM_PARTITIONING);
    Assert.assertTrue(lprops2.getGroupedFields() == null);
    Assert.assertTrue(lprops2.getOrdering() == null);
    Assert.assertTrue(!gprops1.getCustomPartitioner().equals(gprops2.getCustomPartitioner()));
}
Also used : ExecutionEnvironment(org.apache.flink.api.java.ExecutionEnvironment) FieldSet(org.apache.flink.api.common.operators.util.FieldSet) GlobalProperties(org.apache.flink.optimizer.dataproperties.GlobalProperties) Tuple2(org.apache.flink.api.java.tuple.Tuple2) SinkPlanNode(org.apache.flink.optimizer.plan.SinkPlanNode) SourcePlanNode(org.apache.flink.optimizer.plan.SourcePlanNode) Plan(org.apache.flink.api.common.Plan) OptimizedPlan(org.apache.flink.optimizer.plan.OptimizedPlan) LocalProperties(org.apache.flink.optimizer.dataproperties.LocalProperties) OptimizedPlan(org.apache.flink.optimizer.plan.OptimizedPlan) Test(org.junit.Test)

Example 22 with LocalProperties

use of org.apache.flink.optimizer.dataproperties.LocalProperties in project flink by apache.

the class SemanticPropertiesAPIToPlanTest method forwardFieldsTestMapReduce.

@Test
public void forwardFieldsTestMapReduce() {
    ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
    DataSet<Tuple3<Integer, Integer, Integer>> set = env.readCsvFile(IN_FILE).types(Integer.class, Integer.class, Integer.class);
    set = set.map(new MockMapper()).withForwardedFields("*").groupBy(0).reduce(new MockReducer()).withForwardedFields("f0->f1").map(new MockMapper()).withForwardedFields("*").groupBy(1).reduce(new MockReducer()).withForwardedFields("*");
    set.output(new DiscardingOutputFormat<Tuple3<Integer, Integer, Integer>>());
    Plan plan = env.createProgramPlan();
    OptimizedPlan oPlan = compileWithStats(plan);
    oPlan.accept(new Visitor<PlanNode>() {

        @Override
        public boolean preVisit(PlanNode visitable) {
            if (visitable instanceof SingleInputPlanNode && visitable.getProgramOperator() instanceof ReduceOperatorBase) {
                for (Channel input : visitable.getInputs()) {
                    GlobalProperties gprops = visitable.getGlobalProperties();
                    LocalProperties lprops = visitable.getLocalProperties();
                    Assert.assertTrue("Reduce should just forward the input if it is already partitioned", input.getShipStrategy() == ShipStrategyType.FORWARD);
                    Assert.assertTrue("Wrong GlobalProperties on Reducer", gprops.isPartitionedOnFields(new FieldSet(1)));
                    Assert.assertTrue("Wrong GlobalProperties on Reducer", gprops.getPartitioning() == PartitioningProperty.HASH_PARTITIONED);
                    Assert.assertTrue("Wrong LocalProperties on Reducer", lprops.getGroupedFields().contains(1));
                }
            }
            if (visitable instanceof SingleInputPlanNode && visitable.getProgramOperator() instanceof MapOperatorBase) {
                for (Channel input : visitable.getInputs()) {
                    GlobalProperties gprops = visitable.getGlobalProperties();
                    LocalProperties lprops = visitable.getLocalProperties();
                    Assert.assertTrue("Map should just forward the input if it is already partitioned", input.getShipStrategy() == ShipStrategyType.FORWARD);
                    Assert.assertTrue("Wrong GlobalProperties on Mapper", gprops.isPartitionedOnFields(new FieldSet(1)));
                    Assert.assertTrue("Wrong GlobalProperties on Mapper", gprops.getPartitioning() == PartitioningProperty.HASH_PARTITIONED);
                    Assert.assertTrue("Wrong LocalProperties on Mapper", lprops.getGroupedFields().contains(1));
                }
                return false;
            }
            return true;
        }

        @Override
        public void postVisit(PlanNode visitable) {
        }
    });
}
Also used : ExecutionEnvironment(org.apache.flink.api.java.ExecutionEnvironment) ReduceOperatorBase(org.apache.flink.api.common.operators.base.ReduceOperatorBase) Channel(org.apache.flink.optimizer.plan.Channel) Plan(org.apache.flink.api.common.Plan) OptimizedPlan(org.apache.flink.optimizer.plan.OptimizedPlan) OptimizedPlan(org.apache.flink.optimizer.plan.OptimizedPlan) SingleInputPlanNode(org.apache.flink.optimizer.plan.SingleInputPlanNode) FieldSet(org.apache.flink.api.common.operators.util.FieldSet) MapOperatorBase(org.apache.flink.api.common.operators.base.MapOperatorBase) DualInputPlanNode(org.apache.flink.optimizer.plan.DualInputPlanNode) PlanNode(org.apache.flink.optimizer.plan.PlanNode) SingleInputPlanNode(org.apache.flink.optimizer.plan.SingleInputPlanNode) GlobalProperties(org.apache.flink.optimizer.dataproperties.GlobalProperties) Tuple3(org.apache.flink.api.java.tuple.Tuple3) LocalProperties(org.apache.flink.optimizer.dataproperties.LocalProperties) Test(org.junit.Test)

Example 23 with LocalProperties

use of org.apache.flink.optimizer.dataproperties.LocalProperties in project flink by apache.

the class TwoInputNode method instantiate.

protected void instantiate(OperatorDescriptorDual operator, Channel in1, Channel in2, List<Set<? extends NamedChannel>> broadcastPlanChannels, List<PlanNode> target, CostEstimator estimator, RequestedGlobalProperties globPropsReq1, RequestedGlobalProperties globPropsReq2, RequestedLocalProperties locPropsReq1, RequestedLocalProperties locPropsReq2) {
    final PlanNode inputSource1 = in1.getSource();
    final PlanNode inputSource2 = in2.getSource();
    for (List<NamedChannel> broadcastChannelsCombination : Sets.cartesianProduct(broadcastPlanChannels)) {
        boolean validCombination = true;
        // check whether the broadcast inputs use the same plan candidate at the branching point
        for (int i = 0; i < broadcastChannelsCombination.size(); i++) {
            NamedChannel nc = broadcastChannelsCombination.get(i);
            PlanNode bcSource = nc.getSource();
            if (!(areBranchCompatible(bcSource, inputSource1) || areBranchCompatible(bcSource, inputSource2))) {
                validCombination = false;
                break;
            }
            // check branch compatibility against all other broadcast variables
            for (int k = 0; k < i; k++) {
                PlanNode otherBcSource = broadcastChannelsCombination.get(k).getSource();
                if (!areBranchCompatible(bcSource, otherBcSource)) {
                    validCombination = false;
                    break;
                }
            }
        }
        if (!validCombination) {
            continue;
        }
        placePipelineBreakersIfNecessary(operator.getStrategy(), in1, in2);
        DualInputPlanNode node = operator.instantiate(in1, in2, this);
        node.setBroadcastInputs(broadcastChannelsCombination);
        SemanticProperties semPropsGlobalPropFiltering = getSemanticPropertiesForGlobalPropertyFiltering();
        GlobalProperties gp1 = in1.getGlobalProperties().clone().filterBySemanticProperties(semPropsGlobalPropFiltering, 0);
        GlobalProperties gp2 = in2.getGlobalProperties().clone().filterBySemanticProperties(semPropsGlobalPropFiltering, 1);
        GlobalProperties combined = operator.computeGlobalProperties(gp1, gp2);
        SemanticProperties semPropsLocalPropFiltering = getSemanticPropertiesForLocalPropertyFiltering();
        LocalProperties lp1 = in1.getLocalProperties().clone().filterBySemanticProperties(semPropsLocalPropFiltering, 0);
        LocalProperties lp2 = in2.getLocalProperties().clone().filterBySemanticProperties(semPropsLocalPropFiltering, 1);
        LocalProperties locals = operator.computeLocalProperties(lp1, lp2);
        node.initProperties(combined, locals);
        node.updatePropertiesWithUniqueSets(getUniqueFields());
        target.add(node);
    }
}
Also used : DualInputPlanNode(org.apache.flink.optimizer.plan.DualInputPlanNode) SemanticProperties(org.apache.flink.api.common.operators.SemanticProperties) DualInputPlanNode(org.apache.flink.optimizer.plan.DualInputPlanNode) PlanNode(org.apache.flink.optimizer.plan.PlanNode) RequestedGlobalProperties(org.apache.flink.optimizer.dataproperties.RequestedGlobalProperties) GlobalProperties(org.apache.flink.optimizer.dataproperties.GlobalProperties) NamedChannel(org.apache.flink.optimizer.plan.NamedChannel) RequestedLocalProperties(org.apache.flink.optimizer.dataproperties.RequestedLocalProperties) LocalProperties(org.apache.flink.optimizer.dataproperties.LocalProperties)

Example 24 with LocalProperties

use of org.apache.flink.optimizer.dataproperties.LocalProperties in project flink by apache.

the class DataSourceNode method setDataPropertiesFromSplitProperties.

private void setDataPropertiesFromSplitProperties(SplitDataProperties splitProps) {
    // set global properties
    int[] partitionKeys = splitProps.getSplitPartitionKeys();
    Partitioner<?> partitioner = splitProps.getSplitPartitioner();
    if (partitionKeys != null && partitioner != null) {
        this.gprops.setCustomPartitioned(new FieldList(partitionKeys), partitioner);
    } else if (partitionKeys != null) {
        this.gprops.setAnyPartitioning(new FieldList(partitionKeys));
    }
    // set local properties
    int[] groupingKeys = splitProps.getSplitGroupKeys();
    Ordering ordering = splitProps.getSplitOrder();
    // adapt split grouping and sorting
    if (ordering != null) {
        // sorting falls back to grouping because a source can read multiple,
        // randomly assigned splits
        groupingKeys = ordering.getFieldPositions();
    }
    if (groupingKeys != null && partitionKeys != null) {
        // check if grouping is also valid across splits, i.e., whether grouping keys are
        // valid superset of partition keys
        boolean allFieldsIncluded = true;
        for (int i : partitionKeys) {
            boolean fieldIncluded = false;
            for (int j : groupingKeys) {
                if (i == j) {
                    fieldIncluded = true;
                    break;
                }
            }
            if (!fieldIncluded) {
                allFieldsIncluded = false;
                break;
            }
        }
        if (allFieldsIncluded) {
            this.lprops = LocalProperties.forGrouping(new FieldList(groupingKeys));
        } else {
            this.lprops = new LocalProperties();
        }
    } else {
        this.lprops = new LocalProperties();
    }
}
Also used : Ordering(org.apache.flink.api.common.operators.Ordering) LocalProperties(org.apache.flink.optimizer.dataproperties.LocalProperties) FieldList(org.apache.flink.api.common.operators.util.FieldList)

Example 25 with LocalProperties

use of org.apache.flink.optimizer.dataproperties.LocalProperties in project flink by apache.

the class PropertyDataSourceTest method checkSinglePartitionedSource2.

@Test
public void checkSinglePartitionedSource2() {
    ExecutionEnvironment env = ExecutionEnvironment.createLocalEnvironment();
    env.setParallelism(DEFAULT_PARALLELISM);
    DataSource<Tuple2<Long, String>> data = env.readCsvFile("/some/path").types(Long.class, String.class);
    data.getSplitDataProperties().splitsPartitionedBy(1, 0);
    data.output(new DiscardingOutputFormat<Tuple2<Long, String>>());
    Plan plan = env.createProgramPlan();
    // submit the plan to the compiler
    OptimizedPlan oPlan = compileNoStats(plan);
    // check the optimized Plan
    SinkPlanNode sinkNode = oPlan.getDataSinks().iterator().next();
    SourcePlanNode sourceNode = (SourcePlanNode) sinkNode.getPredecessor();
    GlobalProperties gprops = sourceNode.getGlobalProperties();
    LocalProperties lprops = sourceNode.getLocalProperties();
    Assert.assertTrue((new FieldSet(gprops.getPartitioningFields().toArray())).equals(new FieldSet(0, 1)));
    Assert.assertTrue(gprops.getPartitioning() == PartitioningProperty.ANY_PARTITIONING);
    Assert.assertTrue(lprops.getGroupedFields() == null);
    Assert.assertTrue(lprops.getOrdering() == null);
}
Also used : ExecutionEnvironment(org.apache.flink.api.java.ExecutionEnvironment) FieldSet(org.apache.flink.api.common.operators.util.FieldSet) GlobalProperties(org.apache.flink.optimizer.dataproperties.GlobalProperties) Tuple2(org.apache.flink.api.java.tuple.Tuple2) SinkPlanNode(org.apache.flink.optimizer.plan.SinkPlanNode) SourcePlanNode(org.apache.flink.optimizer.plan.SourcePlanNode) Plan(org.apache.flink.api.common.Plan) OptimizedPlan(org.apache.flink.optimizer.plan.OptimizedPlan) LocalProperties(org.apache.flink.optimizer.dataproperties.LocalProperties) OptimizedPlan(org.apache.flink.optimizer.plan.OptimizedPlan) Test(org.junit.Test)

Aggregations

LocalProperties (org.apache.flink.optimizer.dataproperties.LocalProperties)40 GlobalProperties (org.apache.flink.optimizer.dataproperties.GlobalProperties)39 Test (org.junit.Test)32 SourcePlanNode (org.apache.flink.optimizer.plan.SourcePlanNode)31 FieldSet (org.apache.flink.api.common.operators.util.FieldSet)28 Plan (org.apache.flink.api.common.Plan)25 ExecutionEnvironment (org.apache.flink.api.java.ExecutionEnvironment)25 OptimizedPlan (org.apache.flink.optimizer.plan.OptimizedPlan)25 SinkPlanNode (org.apache.flink.optimizer.plan.SinkPlanNode)25 Tuple2 (org.apache.flink.api.java.tuple.Tuple2)14 SingleInputPlanNode (org.apache.flink.optimizer.plan.SingleInputPlanNode)13 Tuple3 (org.apache.flink.api.java.tuple.Tuple3)12 Channel (org.apache.flink.optimizer.plan.Channel)12 RequestedGlobalProperties (org.apache.flink.optimizer.dataproperties.RequestedGlobalProperties)11 RequestedLocalProperties (org.apache.flink.optimizer.dataproperties.RequestedLocalProperties)11 FeedbackPropertiesMeetRequirementsReport (org.apache.flink.optimizer.plan.PlanNode.FeedbackPropertiesMeetRequirementsReport)9 FieldList (org.apache.flink.api.common.operators.util.FieldList)8 PlanNode (org.apache.flink.optimizer.plan.PlanNode)7 Ordering (org.apache.flink.api.common.operators.Ordering)6 DualInputPlanNode (org.apache.flink.optimizer.plan.DualInputPlanNode)6