Search in sources :

Example 46 with FieldList

use of org.apache.flink.api.common.operators.util.FieldList in project flink by apache.

the class PregelCompilerTest method testPregelCompiler.

@SuppressWarnings("serial")
@Test
public void testPregelCompiler() {
    ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
    env.setParallelism(DEFAULT_PARALLELISM);
    // compose test program
    {
        DataSet<Vertex<Long, Long>> initialVertices = env.fromElements(new Tuple2<>(1L, 1L), new Tuple2<>(2L, 2L)).map(new Tuple2ToVertexMap<>());
        DataSet<Edge<Long, NullValue>> edges = env.fromElements(new Tuple2<>(1L, 2L)).map(new MapFunction<Tuple2<Long, Long>, Edge<Long, NullValue>>() {

            public Edge<Long, NullValue> map(Tuple2<Long, Long> edge) {
                return new Edge<>(edge.f0, edge.f1, NullValue.getInstance());
            }
        });
        Graph<Long, Long, NullValue> graph = Graph.fromDataSet(initialVertices, edges, env);
        DataSet<Vertex<Long, Long>> result = graph.runVertexCentricIteration(new CCCompute(), null, 100).getVertices();
        result.output(new DiscardingOutputFormat<>());
    }
    Plan p = env.createProgramPlan("Pregel Connected Components");
    OptimizedPlan op = compileNoStats(p);
    // check the sink
    SinkPlanNode sink = op.getDataSinks().iterator().next();
    assertEquals(ShipStrategyType.FORWARD, sink.getInput().getShipStrategy());
    assertEquals(DEFAULT_PARALLELISM, sink.getParallelism());
    // check the iteration
    WorksetIterationPlanNode iteration = (WorksetIterationPlanNode) sink.getInput().getSource();
    assertEquals(DEFAULT_PARALLELISM, iteration.getParallelism());
    // check the solution set delta
    PlanNode ssDelta = iteration.getSolutionSetDeltaPlanNode();
    assertTrue(ssDelta instanceof SingleInputPlanNode);
    SingleInputPlanNode ssFlatMap = (SingleInputPlanNode) ((SingleInputPlanNode) (ssDelta)).getInput().getSource();
    assertEquals(DEFAULT_PARALLELISM, ssFlatMap.getParallelism());
    assertEquals(ShipStrategyType.FORWARD, ssFlatMap.getInput().getShipStrategy());
    // check the computation coGroup
    DualInputPlanNode computationCoGroup = (DualInputPlanNode) (ssFlatMap.getInput().getSource());
    assertEquals(DEFAULT_PARALLELISM, computationCoGroup.getParallelism());
    assertEquals(ShipStrategyType.FORWARD, computationCoGroup.getInput1().getShipStrategy());
    assertEquals(ShipStrategyType.PARTITION_HASH, computationCoGroup.getInput2().getShipStrategy());
    assertTrue(computationCoGroup.getInput2().getTempMode().isCached());
    assertEquals(new FieldList(0), computationCoGroup.getInput2().getShipStrategyKeys());
    // check that the initial partitioning is pushed out of the loop
    assertEquals(ShipStrategyType.PARTITION_HASH, iteration.getInput1().getShipStrategy());
    assertEquals(new FieldList(0), iteration.getInput1().getShipStrategyKeys());
}
Also used : ExecutionEnvironment(org.apache.flink.api.java.ExecutionEnvironment) Tuple2ToVertexMap(org.apache.flink.graph.utils.Tuple2ToVertexMap) DataSet(org.apache.flink.api.java.DataSet) WorksetIterationPlanNode(org.apache.flink.optimizer.plan.WorksetIterationPlanNode) MapFunction(org.apache.flink.api.common.functions.MapFunction) Plan(org.apache.flink.api.common.Plan) OptimizedPlan(org.apache.flink.optimizer.plan.OptimizedPlan) DiscardingOutputFormat(org.apache.flink.api.java.io.DiscardingOutputFormat) OptimizedPlan(org.apache.flink.optimizer.plan.OptimizedPlan) FieldList(org.apache.flink.api.common.operators.util.FieldList) SingleInputPlanNode(org.apache.flink.optimizer.plan.SingleInputPlanNode) DualInputPlanNode(org.apache.flink.optimizer.plan.DualInputPlanNode) NullValue(org.apache.flink.types.NullValue) Graph(org.apache.flink.graph.Graph) WorksetIterationPlanNode(org.apache.flink.optimizer.plan.WorksetIterationPlanNode) DualInputPlanNode(org.apache.flink.optimizer.plan.DualInputPlanNode) PlanNode(org.apache.flink.optimizer.plan.PlanNode) SinkPlanNode(org.apache.flink.optimizer.plan.SinkPlanNode) SingleInputPlanNode(org.apache.flink.optimizer.plan.SingleInputPlanNode) Tuple2(org.apache.flink.api.java.tuple.Tuple2) SinkPlanNode(org.apache.flink.optimizer.plan.SinkPlanNode) Edge(org.apache.flink.graph.Edge) Test(org.junit.Test)

Example 47 with FieldList

use of org.apache.flink.api.common.operators.util.FieldList in project flink by apache.

the class PregelCompilerTest method testPregelWithCombiner.

@SuppressWarnings("serial")
@Test
public void testPregelWithCombiner() {
    ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
    env.setParallelism(DEFAULT_PARALLELISM);
    // compose test program
    {
        DataSet<Vertex<Long, Long>> initialVertices = env.fromElements(new Tuple2<>(1L, 1L), new Tuple2<>(2L, 2L)).map(new Tuple2ToVertexMap<>());
        DataSet<Edge<Long, NullValue>> edges = env.fromElements(new Tuple2<>(1L, 2L)).map(new MapFunction<Tuple2<Long, Long>, Edge<Long, NullValue>>() {

            public Edge<Long, NullValue> map(Tuple2<Long, Long> edge) {
                return new Edge<>(edge.f0, edge.f1, NullValue.getInstance());
            }
        });
        Graph<Long, Long, NullValue> graph = Graph.fromDataSet(initialVertices, edges, env);
        DataSet<Vertex<Long, Long>> result = graph.runVertexCentricIteration(new CCCompute(), new CCCombiner(), 100).getVertices();
        result.output(new DiscardingOutputFormat<>());
    }
    Plan p = env.createProgramPlan("Pregel Connected Components");
    OptimizedPlan op = compileNoStats(p);
    // check the sink
    SinkPlanNode sink = op.getDataSinks().iterator().next();
    assertEquals(ShipStrategyType.FORWARD, sink.getInput().getShipStrategy());
    assertEquals(DEFAULT_PARALLELISM, sink.getParallelism());
    // check the iteration
    WorksetIterationPlanNode iteration = (WorksetIterationPlanNode) sink.getInput().getSource();
    assertEquals(DEFAULT_PARALLELISM, iteration.getParallelism());
    // check the combiner
    SingleInputPlanNode combiner = (SingleInputPlanNode) iteration.getInput2().getSource();
    assertEquals(ShipStrategyType.FORWARD, combiner.getInput().getShipStrategy());
    // check the solution set delta
    PlanNode ssDelta = iteration.getSolutionSetDeltaPlanNode();
    assertTrue(ssDelta instanceof SingleInputPlanNode);
    SingleInputPlanNode ssFlatMap = (SingleInputPlanNode) ((SingleInputPlanNode) (ssDelta)).getInput().getSource();
    assertEquals(DEFAULT_PARALLELISM, ssFlatMap.getParallelism());
    assertEquals(ShipStrategyType.FORWARD, ssFlatMap.getInput().getShipStrategy());
    // check the computation coGroup
    DualInputPlanNode computationCoGroup = (DualInputPlanNode) (ssFlatMap.getInput().getSource());
    assertEquals(DEFAULT_PARALLELISM, computationCoGroup.getParallelism());
    assertEquals(ShipStrategyType.FORWARD, computationCoGroup.getInput1().getShipStrategy());
    assertEquals(ShipStrategyType.PARTITION_HASH, computationCoGroup.getInput2().getShipStrategy());
    assertTrue(computationCoGroup.getInput2().getTempMode().isCached());
    assertEquals(new FieldList(0), computationCoGroup.getInput2().getShipStrategyKeys());
    // check that the initial partitioning is pushed out of the loop
    assertEquals(ShipStrategyType.PARTITION_HASH, iteration.getInput1().getShipStrategy());
    assertEquals(new FieldList(0), iteration.getInput1().getShipStrategyKeys());
}
Also used : ExecutionEnvironment(org.apache.flink.api.java.ExecutionEnvironment) Tuple2ToVertexMap(org.apache.flink.graph.utils.Tuple2ToVertexMap) DataSet(org.apache.flink.api.java.DataSet) WorksetIterationPlanNode(org.apache.flink.optimizer.plan.WorksetIterationPlanNode) MapFunction(org.apache.flink.api.common.functions.MapFunction) Plan(org.apache.flink.api.common.Plan) OptimizedPlan(org.apache.flink.optimizer.plan.OptimizedPlan) DiscardingOutputFormat(org.apache.flink.api.java.io.DiscardingOutputFormat) OptimizedPlan(org.apache.flink.optimizer.plan.OptimizedPlan) FieldList(org.apache.flink.api.common.operators.util.FieldList) SingleInputPlanNode(org.apache.flink.optimizer.plan.SingleInputPlanNode) DualInputPlanNode(org.apache.flink.optimizer.plan.DualInputPlanNode) NullValue(org.apache.flink.types.NullValue) Graph(org.apache.flink.graph.Graph) WorksetIterationPlanNode(org.apache.flink.optimizer.plan.WorksetIterationPlanNode) DualInputPlanNode(org.apache.flink.optimizer.plan.DualInputPlanNode) PlanNode(org.apache.flink.optimizer.plan.PlanNode) SinkPlanNode(org.apache.flink.optimizer.plan.SinkPlanNode) SingleInputPlanNode(org.apache.flink.optimizer.plan.SingleInputPlanNode) Tuple2(org.apache.flink.api.java.tuple.Tuple2) SinkPlanNode(org.apache.flink.optimizer.plan.SinkPlanNode) Edge(org.apache.flink.graph.Edge) Test(org.junit.Test)

Example 48 with FieldList

use of org.apache.flink.api.common.operators.util.FieldList in project flink by apache.

the class SpargelCompilerTest method testSpargelCompiler.

@SuppressWarnings("serial")
@Test
public void testSpargelCompiler() {
    ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
    env.setParallelism(DEFAULT_PARALLELISM);
    // compose test program
    DataSet<Vertex<Long, Long>> initialVertices = env.fromElements(new Tuple2<>(1L, 1L), new Tuple2<>(2L, 2L)).map(new Tuple2ToVertexMap<>());
    DataSet<Edge<Long, NullValue>> edges = env.fromElements(new Tuple2<>(1L, 2L)).map(new MapFunction<Tuple2<Long, Long>, Edge<Long, NullValue>>() {

        public Edge<Long, NullValue> map(Tuple2<Long, Long> edge) {
            return new Edge<>(edge.f0, edge.f1, NullValue.getInstance());
        }
    });
    Graph<Long, Long, NullValue> graph = Graph.fromDataSet(initialVertices, edges, env);
    DataSet<Vertex<Long, Long>> result = graph.runScatterGatherIteration(new ConnectedComponents.CCMessenger<>(BasicTypeInfo.LONG_TYPE_INFO), new ConnectedComponents.CCUpdater<>(), 100).getVertices();
    result.output(new DiscardingOutputFormat<>());
    Plan p = env.createProgramPlan("Spargel Connected Components");
    OptimizedPlan op = compileNoStats(p);
    // check the sink
    SinkPlanNode sink = op.getDataSinks().iterator().next();
    assertEquals(ShipStrategyType.FORWARD, sink.getInput().getShipStrategy());
    assertEquals(DEFAULT_PARALLELISM, sink.getParallelism());
    // check the iteration
    WorksetIterationPlanNode iteration = (WorksetIterationPlanNode) sink.getInput().getSource();
    assertEquals(DEFAULT_PARALLELISM, iteration.getParallelism());
    // check the solution set join and the delta
    PlanNode ssDelta = iteration.getSolutionSetDeltaPlanNode();
    assertTrue(ssDelta instanceof // this is only true if the update functions preserves
    DualInputPlanNode);
    // the partitioning
    DualInputPlanNode ssJoin = (DualInputPlanNode) ssDelta;
    assertEquals(DEFAULT_PARALLELISM, ssJoin.getParallelism());
    assertEquals(ShipStrategyType.PARTITION_HASH, ssJoin.getInput1().getShipStrategy());
    assertEquals(new FieldList(0), ssJoin.getInput1().getShipStrategyKeys());
    // check the workset set join
    DualInputPlanNode edgeJoin = (DualInputPlanNode) ssJoin.getInput1().getSource();
    assertEquals(DEFAULT_PARALLELISM, edgeJoin.getParallelism());
    assertEquals(ShipStrategyType.PARTITION_HASH, edgeJoin.getInput1().getShipStrategy());
    assertEquals(ShipStrategyType.FORWARD, edgeJoin.getInput2().getShipStrategy());
    assertTrue(edgeJoin.getInput1().getTempMode().isCached());
    assertEquals(new FieldList(0), edgeJoin.getInput1().getShipStrategyKeys());
    // check that the initial partitioning is pushed out of the loop
    assertEquals(ShipStrategyType.PARTITION_HASH, iteration.getInput1().getShipStrategy());
    assertEquals(ShipStrategyType.PARTITION_HASH, iteration.getInput2().getShipStrategy());
    assertEquals(new FieldList(0), iteration.getInput1().getShipStrategyKeys());
    assertEquals(new FieldList(0), iteration.getInput2().getShipStrategyKeys());
    // check that the initial workset sort is outside the loop
    assertEquals(LocalStrategy.SORT, iteration.getInput2().getLocalStrategy());
    assertEquals(new FieldList(0), iteration.getInput2().getLocalStrategyKeys());
}
Also used : Vertex(org.apache.flink.graph.Vertex) ExecutionEnvironment(org.apache.flink.api.java.ExecutionEnvironment) WorksetIterationPlanNode(org.apache.flink.optimizer.plan.WorksetIterationPlanNode) Plan(org.apache.flink.api.common.Plan) OptimizedPlan(org.apache.flink.optimizer.plan.OptimizedPlan) OptimizedPlan(org.apache.flink.optimizer.plan.OptimizedPlan) FieldList(org.apache.flink.api.common.operators.util.FieldList) DualInputPlanNode(org.apache.flink.optimizer.plan.DualInputPlanNode) NullValue(org.apache.flink.types.NullValue) WorksetIterationPlanNode(org.apache.flink.optimizer.plan.WorksetIterationPlanNode) DualInputPlanNode(org.apache.flink.optimizer.plan.DualInputPlanNode) PlanNode(org.apache.flink.optimizer.plan.PlanNode) SinkPlanNode(org.apache.flink.optimizer.plan.SinkPlanNode) Tuple2(org.apache.flink.api.java.tuple.Tuple2) SinkPlanNode(org.apache.flink.optimizer.plan.SinkPlanNode) Edge(org.apache.flink.graph.Edge) Test(org.junit.Test)

Example 49 with FieldList

use of org.apache.flink.api.common.operators.util.FieldList in project flink by apache.

the class LocalProperties method filterBySemanticProperties.

// --------------------------------------------------------------------------------------------
/**
 * Filters these LocalProperties by the fields that are forwarded to the output as described by
 * the SemanticProperties.
 *
 * @param props The semantic properties holding information about forwarded fields.
 * @param input The index of the input.
 * @return The filtered LocalProperties
 */
public LocalProperties filterBySemanticProperties(SemanticProperties props, int input) {
    if (props == null) {
        throw new NullPointerException("SemanticProperties may not be null.");
    }
    LocalProperties returnProps = new LocalProperties();
    // check if sorting is preserved
    if (this.ordering != null) {
        Ordering newOrdering = new Ordering();
        for (int i = 0; i < this.ordering.getInvolvedIndexes().size(); i++) {
            int sourceField = this.ordering.getInvolvedIndexes().get(i);
            FieldSet targetField = props.getForwardingTargetFields(input, sourceField);
            if (targetField == null || targetField.size() == 0) {
                if (i == 0) {
                    // order fully destroyed
                    newOrdering = null;
                    break;
                } else {
                    // order partially preserved
                    break;
                }
            } else {
                // equivalence sets in the future.
                if (targetField.size() > 1) {
                    LOG.warn("Found that a field is forwarded to more than one target field in " + "semantic forwarded field information. Will only use the field with the lowest index.");
                }
                newOrdering.appendOrdering(targetField.toArray()[0], this.ordering.getType(i), this.ordering.getOrder(i));
            }
        }
        returnProps.ordering = newOrdering;
        if (newOrdering != null) {
            returnProps.groupedFields = newOrdering.getInvolvedIndexes();
        } else {
            returnProps.groupedFields = null;
        }
    } else // check if grouping is preserved
    if (this.groupedFields != null) {
        FieldList newGroupedFields = new FieldList();
        for (Integer sourceField : this.groupedFields) {
            FieldSet targetField = props.getForwardingTargetFields(input, sourceField);
            if (targetField == null || targetField.size() == 0) {
                newGroupedFields = null;
                break;
            } else {
                // equivalence sets in the future.
                if (targetField.size() > 1) {
                    LOG.warn("Found that a field is forwarded to more than one target field in " + "semantic forwarded field information. Will only use the field with the lowest index.");
                }
                newGroupedFields = newGroupedFields.addField(targetField.toArray()[0]);
            }
        }
        returnProps.groupedFields = newGroupedFields;
    }
    if (this.uniqueFields != null) {
        Set<FieldSet> newUniqueFields = new HashSet<FieldSet>();
        for (FieldSet fields : this.uniqueFields) {
            FieldSet newFields = new FieldSet();
            for (Integer sourceField : fields) {
                FieldSet targetField = props.getForwardingTargetFields(input, sourceField);
                if (targetField == null || targetField.size() == 0) {
                    newFields = null;
                    break;
                } else {
                    // field equivalence sets in the future.
                    if (targetField.size() > 1) {
                        LOG.warn("Found that a field is forwarded to more than one target field in " + "semantic forwarded field information. Will only use the field with the lowest index.");
                    }
                    newFields = newFields.addField(targetField.toArray()[0]);
                }
            }
            if (newFields != null) {
                newUniqueFields.add(newFields);
            }
        }
        if (!newUniqueFields.isEmpty()) {
            returnProps.uniqueFields = newUniqueFields;
        } else {
            returnProps.uniqueFields = null;
        }
    }
    return returnProps;
}
Also used : FieldSet(org.apache.flink.api.common.operators.util.FieldSet) Ordering(org.apache.flink.api.common.operators.Ordering) FieldList(org.apache.flink.api.common.operators.util.FieldList) HashSet(java.util.HashSet)

Example 50 with FieldList

use of org.apache.flink.api.common.operators.util.FieldList in project flink by apache.

the class RequestedGlobalProperties method filterBySemanticProperties.

/**
 * Filters these properties by what can be preserved by the given SemanticProperties when
 * propagated down to the given input.
 *
 * @param props The SemanticProperties which define which fields are preserved.
 * @param input The index of the operator's input.
 * @return The filtered RequestedGlobalProperties
 */
public RequestedGlobalProperties filterBySemanticProperties(SemanticProperties props, int input) {
    // no semantic properties available. All global properties are filtered.
    if (props == null) {
        throw new NullPointerException("SemanticProperties may not be null.");
    }
    RequestedGlobalProperties rgProp = new RequestedGlobalProperties();
    switch(this.partitioning) {
        case FULL_REPLICATION:
        case FORCED_REBALANCED:
        case CUSTOM_PARTITIONING:
        case RANDOM_PARTITIONED:
        case ANY_DISTRIBUTION:
            // make sure that certain properties are not pushed down
            return null;
        case HASH_PARTITIONED:
        case ANY_PARTITIONING:
            FieldSet newFields;
            if (this.partitioningFields instanceof FieldList) {
                newFields = new FieldList();
            } else {
                newFields = new FieldSet();
            }
            for (Integer targetField : this.partitioningFields) {
                int sourceField = props.getForwardingSourceField(input, targetField);
                if (sourceField >= 0) {
                    newFields = newFields.addField(sourceField);
                } else {
                    // partial partitionings are not preserved to avoid skewed partitioning
                    return null;
                }
            }
            rgProp.partitioning = this.partitioning;
            rgProp.partitioningFields = newFields;
            return rgProp;
        case RANGE_PARTITIONED:
            // range partitioning
            Ordering newOrdering = new Ordering();
            for (int i = 0; i < this.ordering.getInvolvedIndexes().size(); i++) {
                int value = this.ordering.getInvolvedIndexes().get(i);
                int sourceField = props.getForwardingSourceField(input, value);
                if (sourceField >= 0) {
                    newOrdering.appendOrdering(sourceField, this.ordering.getType(i), this.ordering.getOrder(i));
                } else {
                    return null;
                }
            }
            rgProp.partitioning = this.partitioning;
            rgProp.ordering = newOrdering;
            rgProp.dataDistribution = this.dataDistribution;
            return rgProp;
        default:
            throw new RuntimeException("Unknown partitioning type encountered.");
    }
}
Also used : FieldSet(org.apache.flink.api.common.operators.util.FieldSet) Ordering(org.apache.flink.api.common.operators.Ordering) FieldList(org.apache.flink.api.common.operators.util.FieldList)

Aggregations

FieldList (org.apache.flink.api.common.operators.util.FieldList)80 Test (org.junit.Test)70 SingleInputPlanNode (org.apache.flink.optimizer.plan.SingleInputPlanNode)31 OptimizedPlan (org.apache.flink.optimizer.plan.OptimizedPlan)28 Plan (org.apache.flink.api.common.Plan)26 SingleInputSemanticProperties (org.apache.flink.api.common.operators.SingleInputSemanticProperties)26 ExecutionEnvironment (org.apache.flink.api.java.ExecutionEnvironment)25 Tuple2 (org.apache.flink.api.java.tuple.Tuple2)25 Ordering (org.apache.flink.api.common.operators.Ordering)24 SinkPlanNode (org.apache.flink.optimizer.plan.SinkPlanNode)23 DiscardingOutputFormat (org.apache.flink.api.java.io.DiscardingOutputFormat)19 SourcePlanNode (org.apache.flink.optimizer.plan.SourcePlanNode)18 FieldSet (org.apache.flink.api.common.operators.util.FieldSet)14 GlobalProperties (org.apache.flink.optimizer.dataproperties.GlobalProperties)12 DualInputPlanNode (org.apache.flink.optimizer.plan.DualInputPlanNode)12 Channel (org.apache.flink.optimizer.plan.Channel)11 RequestedGlobalProperties (org.apache.flink.optimizer.dataproperties.RequestedGlobalProperties)9 LocalProperties (org.apache.flink.optimizer.dataproperties.LocalProperties)7 PlanNode (org.apache.flink.optimizer.plan.PlanNode)7 Edge (org.apache.flink.graph.Edge)6