Search in sources :

Example 41 with GlobalProperties

use of org.apache.flink.optimizer.dataproperties.GlobalProperties in project flink by apache.

the class PropertyDataSourceTest method checkSinglePartitionedGroupedSource8.

@Test
public void checkSinglePartitionedGroupedSource8() {
    ExecutionEnvironment env = ExecutionEnvironment.createLocalEnvironment();
    env.setParallelism(DEFAULT_PARALLELISM);
    DataSource<Tuple3<Long, SomePojo, String>> data = env.fromCollection(tuple3PojoData, tuple3PojoType);
    data.getSplitDataProperties().splitsPartitionedBy("f1").splitsGroupedBy("f1.stringField");
    data.output(new DiscardingOutputFormat<Tuple3<Long, SomePojo, String>>());
    Plan plan = env.createProgramPlan();
    // submit the plan to the compiler
    OptimizedPlan oPlan = compileNoStats(plan);
    // check the optimized Plan
    SinkPlanNode sinkNode = oPlan.getDataSinks().iterator().next();
    SourcePlanNode sourceNode = (SourcePlanNode) sinkNode.getPredecessor();
    GlobalProperties gprops = sourceNode.getGlobalProperties();
    LocalProperties lprops = sourceNode.getLocalProperties();
    Assert.assertTrue((new FieldSet(gprops.getPartitioningFields().toArray())).equals(new FieldSet(1, 2, 3)));
    Assert.assertTrue(gprops.getPartitioning() == PartitioningProperty.ANY_PARTITIONING);
    Assert.assertTrue(lprops.getGroupedFields() == null);
    Assert.assertTrue(lprops.getOrdering() == null);
}
Also used : ExecutionEnvironment(org.apache.flink.api.java.ExecutionEnvironment) FieldSet(org.apache.flink.api.common.operators.util.FieldSet) GlobalProperties(org.apache.flink.optimizer.dataproperties.GlobalProperties) Tuple3(org.apache.flink.api.java.tuple.Tuple3) SinkPlanNode(org.apache.flink.optimizer.plan.SinkPlanNode) SourcePlanNode(org.apache.flink.optimizer.plan.SourcePlanNode) Plan(org.apache.flink.api.common.Plan) OptimizedPlan(org.apache.flink.optimizer.plan.OptimizedPlan) LocalProperties(org.apache.flink.optimizer.dataproperties.LocalProperties) OptimizedPlan(org.apache.flink.optimizer.plan.OptimizedPlan) Test(org.junit.Test)

Example 42 with GlobalProperties

use of org.apache.flink.optimizer.dataproperties.GlobalProperties in project flink by apache.

the class PropertyDataSourceTest method checkSinglePartitionedOrderedSource3.

@Test
public void checkSinglePartitionedOrderedSource3() {
    ExecutionEnvironment env = ExecutionEnvironment.createLocalEnvironment();
    env.setParallelism(DEFAULT_PARALLELISM);
    DataSource<Tuple2<Long, String>> data = env.readCsvFile("/some/path").types(Long.class, String.class);
    data.getSplitDataProperties().splitsPartitionedBy(0).splitsOrderedBy(new int[] { 1 }, new Order[] { Order.ASCENDING });
    data.output(new DiscardingOutputFormat<Tuple2<Long, String>>());
    Plan plan = env.createProgramPlan();
    // submit the plan to the compiler
    OptimizedPlan oPlan = compileNoStats(plan);
    // check the optimized Plan
    SinkPlanNode sinkNode = oPlan.getDataSinks().iterator().next();
    SourcePlanNode sourceNode = (SourcePlanNode) sinkNode.getPredecessor();
    GlobalProperties gprops = sourceNode.getGlobalProperties();
    LocalProperties lprops = sourceNode.getLocalProperties();
    Assert.assertTrue((new FieldSet(gprops.getPartitioningFields().toArray())).equals(new FieldSet(0)));
    Assert.assertTrue(gprops.getPartitioning() == PartitioningProperty.ANY_PARTITIONING);
    Assert.assertTrue(lprops.getGroupedFields() == null);
    Assert.assertTrue(lprops.getOrdering() == null);
}
Also used : ExecutionEnvironment(org.apache.flink.api.java.ExecutionEnvironment) FieldSet(org.apache.flink.api.common.operators.util.FieldSet) GlobalProperties(org.apache.flink.optimizer.dataproperties.GlobalProperties) Tuple2(org.apache.flink.api.java.tuple.Tuple2) SinkPlanNode(org.apache.flink.optimizer.plan.SinkPlanNode) SourcePlanNode(org.apache.flink.optimizer.plan.SourcePlanNode) Plan(org.apache.flink.api.common.Plan) OptimizedPlan(org.apache.flink.optimizer.plan.OptimizedPlan) LocalProperties(org.apache.flink.optimizer.dataproperties.LocalProperties) OptimizedPlan(org.apache.flink.optimizer.plan.OptimizedPlan) Test(org.junit.Test)

Example 43 with GlobalProperties

use of org.apache.flink.optimizer.dataproperties.GlobalProperties in project flink by apache.

the class PropertyDataSourceTest method checkSinglePartitionedGroupedSource5.

@Test
public void checkSinglePartitionedGroupedSource5() {
    ExecutionEnvironment env = ExecutionEnvironment.createLocalEnvironment();
    env.setParallelism(DEFAULT_PARALLELISM);
    DataSource<Tuple3<Long, SomePojo, String>> data = env.fromCollection(tuple3PojoData, tuple3PojoType);
    data.getSplitDataProperties().splitsPartitionedBy("f2").splitsGroupedBy("f2");
    data.output(new DiscardingOutputFormat<Tuple3<Long, SomePojo, String>>());
    Plan plan = env.createProgramPlan();
    // submit the plan to the compiler
    OptimizedPlan oPlan = compileNoStats(plan);
    // check the optimized Plan
    SinkPlanNode sinkNode = oPlan.getDataSinks().iterator().next();
    SourcePlanNode sourceNode = (SourcePlanNode) sinkNode.getPredecessor();
    GlobalProperties gprops = sourceNode.getGlobalProperties();
    LocalProperties lprops = sourceNode.getLocalProperties();
    Assert.assertTrue((new FieldSet(gprops.getPartitioningFields().toArray())).equals(new FieldSet(4)));
    Assert.assertTrue(gprops.getPartitioning() == PartitioningProperty.ANY_PARTITIONING);
    Assert.assertTrue(new FieldSet(lprops.getGroupedFields().toArray()).equals(new FieldSet(4)));
    Assert.assertTrue(lprops.getOrdering() == null);
}
Also used : ExecutionEnvironment(org.apache.flink.api.java.ExecutionEnvironment) FieldSet(org.apache.flink.api.common.operators.util.FieldSet) GlobalProperties(org.apache.flink.optimizer.dataproperties.GlobalProperties) Tuple3(org.apache.flink.api.java.tuple.Tuple3) SinkPlanNode(org.apache.flink.optimizer.plan.SinkPlanNode) SourcePlanNode(org.apache.flink.optimizer.plan.SourcePlanNode) Plan(org.apache.flink.api.common.Plan) OptimizedPlan(org.apache.flink.optimizer.plan.OptimizedPlan) LocalProperties(org.apache.flink.optimizer.dataproperties.LocalProperties) OptimizedPlan(org.apache.flink.optimizer.plan.OptimizedPlan) Test(org.junit.Test)

Example 44 with GlobalProperties

use of org.apache.flink.optimizer.dataproperties.GlobalProperties in project flink by apache.

the class PartitioningReusageTest method checkValidJoinInputProperties.

private void checkValidJoinInputProperties(DualInputPlanNode join) {
    GlobalProperties inProps1 = join.getInput1().getGlobalProperties();
    GlobalProperties inProps2 = join.getInput2().getGlobalProperties();
    if (inProps1.getPartitioning() == PartitioningProperty.HASH_PARTITIONED && inProps2.getPartitioning() == PartitioningProperty.HASH_PARTITIONED) {
        // check that both inputs are hash partitioned on the same fields
        FieldList pFields1 = inProps1.getPartitioningFields();
        FieldList pFields2 = inProps2.getPartitioningFields();
        assertTrue("Inputs are not the same number of fields. Input 1: " + pFields1 + ", Input 2: " + pFields2, pFields1.size() == pFields2.size());
        FieldList reqPFields1 = join.getKeysForInput1();
        FieldList reqPFields2 = join.getKeysForInput2();
        for (int i = 0; i < pFields1.size(); i++) {
            // get fields
            int f1 = pFields1.get(i);
            int f2 = pFields2.get(i);
            // check that field positions in original key field list are identical
            int pos1 = getPosInFieldList(f1, reqPFields1);
            int pos2 = getPosInFieldList(f2, reqPFields2);
            if (pos1 < 0) {
                fail("Input 1 is partitioned on field " + f1 + " which is not contained in the key set " + reqPFields1);
            }
            if (pos2 < 0) {
                fail("Input 2 is partitioned on field " + f2 + " which is not contained in the key set " + reqPFields2);
            }
            if (pos1 != pos2) {
                fail("Inputs are not partitioned on the same key fields");
            }
        }
    } else if (inProps1.getPartitioning() == PartitioningProperty.FULL_REPLICATION && inProps2.getPartitioning() == PartitioningProperty.RANDOM_PARTITIONED) {
    // we are good. No need to check for fields
    } else if (inProps1.getPartitioning() == PartitioningProperty.RANDOM_PARTITIONED && inProps2.getPartitioning() == PartitioningProperty.FULL_REPLICATION) {
    // we are good. No need to check for fields
    } else {
        throw new UnsupportedOperationException("This method has only been implemented to check for hash partitioned coGroupinputs");
    }
}
Also used : GlobalProperties(org.apache.flink.optimizer.dataproperties.GlobalProperties) FieldList(org.apache.flink.api.common.operators.util.FieldList)

Example 45 with GlobalProperties

use of org.apache.flink.optimizer.dataproperties.GlobalProperties in project flink by apache.

the class RangePartitionRewriter method rewriteRangePartitionChannel.

private List<Channel> rewriteRangePartitionChannel(Channel channel) {
    final List<Channel> sourceNewOutputChannels = new ArrayList<>();
    final PlanNode sourceNode = channel.getSource();
    final PlanNode targetNode = channel.getTarget();
    final int sourceParallelism = sourceNode.getParallelism();
    final int targetParallelism = targetNode.getParallelism();
    final Costs defaultZeroCosts = new Costs(0, 0, 0);
    final TypeComparatorFactory<?> comparator = Utils.getShipComparator(channel, this.plan.getOriginalPlan().getExecutionConfig());
    // 1. Fixed size sample in each partitions.
    final int sampleSize = SAMPLES_PER_PARTITION * targetParallelism;
    final SampleInPartition sampleInPartition = new SampleInPartition(false, sampleSize, SEED);
    final TypeInformation<?> sourceOutputType = sourceNode.getOptimizerNode().getOperator().getOperatorInfo().getOutputType();
    final TypeInformation<IntermediateSampleData> isdTypeInformation = TypeExtractor.getForClass(IntermediateSampleData.class);
    final UnaryOperatorInformation sipOperatorInformation = new UnaryOperatorInformation(sourceOutputType, isdTypeInformation);
    final MapPartitionOperatorBase sipOperatorBase = new MapPartitionOperatorBase(sampleInPartition, sipOperatorInformation, SIP_NAME);
    final MapPartitionNode sipNode = new MapPartitionNode(sipOperatorBase);
    final Channel sipChannel = new Channel(sourceNode, TempMode.NONE);
    sipChannel.setShipStrategy(ShipStrategyType.FORWARD, DataExchangeMode.PIPELINED);
    final SingleInputPlanNode sipPlanNode = new SingleInputPlanNode(sipNode, SIP_NAME, sipChannel, DriverStrategy.MAP_PARTITION);
    sipNode.setParallelism(sourceParallelism);
    sipPlanNode.setParallelism(sourceParallelism);
    sipPlanNode.initProperties(new GlobalProperties(), new LocalProperties());
    sipPlanNode.setCosts(defaultZeroCosts);
    sipChannel.setTarget(sipPlanNode);
    this.plan.getAllNodes().add(sipPlanNode);
    sourceNewOutputChannels.add(sipChannel);
    // 2. Fixed size sample in a single coordinator.
    final SampleInCoordinator sampleInCoordinator = new SampleInCoordinator(false, sampleSize, SEED);
    final UnaryOperatorInformation sicOperatorInformation = new UnaryOperatorInformation(isdTypeInformation, sourceOutputType);
    final GroupReduceOperatorBase sicOperatorBase = new GroupReduceOperatorBase(sampleInCoordinator, sicOperatorInformation, SIC_NAME);
    final GroupReduceNode sicNode = new GroupReduceNode(sicOperatorBase);
    final Channel sicChannel = new Channel(sipPlanNode, TempMode.NONE);
    sicChannel.setShipStrategy(ShipStrategyType.FORWARD, DataExchangeMode.PIPELINED);
    final SingleInputPlanNode sicPlanNode = new SingleInputPlanNode(sicNode, SIC_NAME, sicChannel, DriverStrategy.ALL_GROUP_REDUCE);
    sicNode.setParallelism(1);
    sicPlanNode.setParallelism(1);
    sicPlanNode.initProperties(new GlobalProperties(), new LocalProperties());
    sicPlanNode.setCosts(defaultZeroCosts);
    sicChannel.setTarget(sicPlanNode);
    sipPlanNode.addOutgoingChannel(sicChannel);
    this.plan.getAllNodes().add(sicPlanNode);
    // 3. Use sampled data to build range boundaries.
    final RangeBoundaryBuilder rangeBoundaryBuilder = new RangeBoundaryBuilder(comparator, targetParallelism);
    final TypeInformation<CommonRangeBoundaries> rbTypeInformation = TypeExtractor.getForClass(CommonRangeBoundaries.class);
    final UnaryOperatorInformation rbOperatorInformation = new UnaryOperatorInformation(sourceOutputType, rbTypeInformation);
    final MapPartitionOperatorBase rbOperatorBase = new MapPartitionOperatorBase(rangeBoundaryBuilder, rbOperatorInformation, RB_NAME);
    final MapPartitionNode rbNode = new MapPartitionNode(rbOperatorBase);
    final Channel rbChannel = new Channel(sicPlanNode, TempMode.NONE);
    rbChannel.setShipStrategy(ShipStrategyType.FORWARD, DataExchangeMode.PIPELINED);
    final SingleInputPlanNode rbPlanNode = new SingleInputPlanNode(rbNode, RB_NAME, rbChannel, DriverStrategy.MAP_PARTITION);
    rbNode.setParallelism(1);
    rbPlanNode.setParallelism(1);
    rbPlanNode.initProperties(new GlobalProperties(), new LocalProperties());
    rbPlanNode.setCosts(defaultZeroCosts);
    rbChannel.setTarget(rbPlanNode);
    sicPlanNode.addOutgoingChannel(rbChannel);
    this.plan.getAllNodes().add(rbPlanNode);
    // 4. Take range boundaries as broadcast input and take the tuple of partition id and record
    // as output.
    final AssignRangeIndex assignRangeIndex = new AssignRangeIndex(comparator);
    final TypeInformation<Tuple2> ariOutputTypeInformation = new TupleTypeInfo<>(BasicTypeInfo.INT_TYPE_INFO, sourceOutputType);
    final UnaryOperatorInformation ariOperatorInformation = new UnaryOperatorInformation(sourceOutputType, ariOutputTypeInformation);
    final MapPartitionOperatorBase ariOperatorBase = new MapPartitionOperatorBase(assignRangeIndex, ariOperatorInformation, ARI_NAME);
    final MapPartitionNode ariNode = new MapPartitionNode(ariOperatorBase);
    final Channel ariChannel = new Channel(sourceNode, TempMode.NONE);
    // To avoid deadlock, set the DataExchangeMode of channel between source node and this to
    // Batch.
    ariChannel.setShipStrategy(ShipStrategyType.FORWARD, DataExchangeMode.BATCH);
    final SingleInputPlanNode ariPlanNode = new SingleInputPlanNode(ariNode, ARI_NAME, ariChannel, DriverStrategy.MAP_PARTITION);
    ariNode.setParallelism(sourceParallelism);
    ariPlanNode.setParallelism(sourceParallelism);
    ariPlanNode.initProperties(new GlobalProperties(), new LocalProperties());
    ariPlanNode.setCosts(defaultZeroCosts);
    ariChannel.setTarget(ariPlanNode);
    this.plan.getAllNodes().add(ariPlanNode);
    sourceNewOutputChannels.add(ariChannel);
    final NamedChannel broadcastChannel = new NamedChannel("RangeBoundaries", rbPlanNode);
    broadcastChannel.setShipStrategy(ShipStrategyType.BROADCAST, DataExchangeMode.PIPELINED);
    broadcastChannel.setTarget(ariPlanNode);
    List<NamedChannel> broadcastChannels = new ArrayList<>(1);
    broadcastChannels.add(broadcastChannel);
    ariPlanNode.setBroadcastInputs(broadcastChannels);
    // 5. Remove the partition id.
    final Channel partChannel = new Channel(ariPlanNode, TempMode.NONE);
    final FieldList keys = new FieldList(0);
    partChannel.setShipStrategy(ShipStrategyType.PARTITION_CUSTOM, keys, idPartitioner, DataExchangeMode.PIPELINED);
    ariPlanNode.addOutgoingChannel(partChannel);
    final RemoveRangeIndex partitionIDRemoveWrapper = new RemoveRangeIndex();
    final UnaryOperatorInformation prOperatorInformation = new UnaryOperatorInformation(ariOutputTypeInformation, sourceOutputType);
    final MapOperatorBase prOperatorBase = new MapOperatorBase(partitionIDRemoveWrapper, prOperatorInformation, PR_NAME);
    final MapNode prRemoverNode = new MapNode(prOperatorBase);
    final SingleInputPlanNode prPlanNode = new SingleInputPlanNode(prRemoverNode, PR_NAME, partChannel, DriverStrategy.MAP);
    partChannel.setTarget(prPlanNode);
    prRemoverNode.setParallelism(targetParallelism);
    prPlanNode.setParallelism(targetParallelism);
    GlobalProperties globalProperties = new GlobalProperties();
    globalProperties.setRangePartitioned(new Ordering(0, null, Order.ASCENDING));
    prPlanNode.initProperties(globalProperties, new LocalProperties());
    prPlanNode.setCosts(defaultZeroCosts);
    this.plan.getAllNodes().add(prPlanNode);
    // 6. Connect to target node.
    channel.setSource(prPlanNode);
    channel.setShipStrategy(ShipStrategyType.FORWARD, DataExchangeMode.PIPELINED);
    prPlanNode.addOutgoingChannel(channel);
    return sourceNewOutputChannels;
}
Also used : SampleInPartition(org.apache.flink.api.java.functions.SampleInPartition) Costs(org.apache.flink.optimizer.costs.Costs) GroupReduceNode(org.apache.flink.optimizer.dag.GroupReduceNode) ArrayList(java.util.ArrayList) SampleInCoordinator(org.apache.flink.api.java.functions.SampleInCoordinator) MapNode(org.apache.flink.optimizer.dag.MapNode) RangeBoundaryBuilder(org.apache.flink.runtime.operators.udf.RangeBoundaryBuilder) FieldList(org.apache.flink.api.common.operators.util.FieldList) MapOperatorBase(org.apache.flink.api.common.operators.base.MapOperatorBase) IterationPlanNode(org.apache.flink.optimizer.plan.IterationPlanNode) PlanNode(org.apache.flink.optimizer.plan.PlanNode) SingleInputPlanNode(org.apache.flink.optimizer.plan.SingleInputPlanNode) UnaryOperatorInformation(org.apache.flink.api.common.operators.UnaryOperatorInformation) GlobalProperties(org.apache.flink.optimizer.dataproperties.GlobalProperties) RemoveRangeIndex(org.apache.flink.runtime.operators.udf.RemoveRangeIndex) Ordering(org.apache.flink.api.common.operators.Ordering) MapPartitionNode(org.apache.flink.optimizer.dag.MapPartitionNode) MapPartitionOperatorBase(org.apache.flink.api.common.operators.base.MapPartitionOperatorBase) AssignRangeIndex(org.apache.flink.runtime.operators.udf.AssignRangeIndex) Channel(org.apache.flink.optimizer.plan.Channel) NamedChannel(org.apache.flink.optimizer.plan.NamedChannel) NamedChannel(org.apache.flink.optimizer.plan.NamedChannel) TupleTypeInfo(org.apache.flink.api.java.typeutils.TupleTypeInfo) SingleInputPlanNode(org.apache.flink.optimizer.plan.SingleInputPlanNode) IntermediateSampleData(org.apache.flink.api.java.sampling.IntermediateSampleData) Tuple2(org.apache.flink.api.java.tuple.Tuple2) GroupReduceOperatorBase(org.apache.flink.api.common.operators.base.GroupReduceOperatorBase) LocalProperties(org.apache.flink.optimizer.dataproperties.LocalProperties) CommonRangeBoundaries(org.apache.flink.api.common.distributions.CommonRangeBoundaries)

Aggregations

GlobalProperties (org.apache.flink.optimizer.dataproperties.GlobalProperties)50 LocalProperties (org.apache.flink.optimizer.dataproperties.LocalProperties)39 Test (org.junit.Test)36 SourcePlanNode (org.apache.flink.optimizer.plan.SourcePlanNode)31 FieldSet (org.apache.flink.api.common.operators.util.FieldSet)29 Plan (org.apache.flink.api.common.Plan)25 ExecutionEnvironment (org.apache.flink.api.java.ExecutionEnvironment)25 OptimizedPlan (org.apache.flink.optimizer.plan.OptimizedPlan)25 SinkPlanNode (org.apache.flink.optimizer.plan.SinkPlanNode)25 RequestedGlobalProperties (org.apache.flink.optimizer.dataproperties.RequestedGlobalProperties)20 Tuple2 (org.apache.flink.api.java.tuple.Tuple2)14 FieldList (org.apache.flink.api.common.operators.util.FieldList)13 Channel (org.apache.flink.optimizer.plan.Channel)13 SingleInputPlanNode (org.apache.flink.optimizer.plan.SingleInputPlanNode)13 Tuple3 (org.apache.flink.api.java.tuple.Tuple3)12 RequestedLocalProperties (org.apache.flink.optimizer.dataproperties.RequestedLocalProperties)12 Ordering (org.apache.flink.api.common.operators.Ordering)9 FeedbackPropertiesMeetRequirementsReport (org.apache.flink.optimizer.plan.PlanNode.FeedbackPropertiesMeetRequirementsReport)9 PlanNode (org.apache.flink.optimizer.plan.PlanNode)8 DualInputPlanNode (org.apache.flink.optimizer.plan.DualInputPlanNode)6