Search in sources :

Example 26 with FieldList

use of org.apache.flink.api.common.operators.util.FieldList in project flink by apache.

the class LocalPropertiesFilteringTest method testGroupingPreserved2.

@Test
public void testGroupingPreserved2() {
    SingleInputSemanticProperties sp = new SingleInputSemanticProperties();
    SemanticPropUtil.getSemanticPropsSingleFromString(sp, new String[] { "0->4;2->0;3->7" }, null, null, tupleInfo, tupleInfo);
    LocalProperties lProps = LocalProperties.forGrouping(new FieldList(0, 2, 3));
    LocalProperties filtered = lProps.filterBySemanticProperties(sp, 0);
    assertNotNull(filtered.getGroupedFields());
    assertEquals(3, filtered.getGroupedFields().size());
    assertTrue(filtered.getGroupedFields().contains(4));
    assertTrue(filtered.getGroupedFields().contains(0));
    assertTrue(filtered.getGroupedFields().contains(7));
    assertNull(filtered.getOrdering());
    assertNull(filtered.getUniqueFields());
}
Also used : SingleInputSemanticProperties(org.apache.flink.api.common.operators.SingleInputSemanticProperties) FieldList(org.apache.flink.api.common.operators.util.FieldList) Test(org.junit.Test)

Example 27 with FieldList

use of org.apache.flink.api.common.operators.util.FieldList in project flink by apache.

the class PregelCompilerTest method testPregelCompilerWithBroadcastVariable.

@SuppressWarnings("serial")
@Test
public void testPregelCompilerWithBroadcastVariable() {
    try {
        final String BC_VAR_NAME = "borat variable";
        ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
        env.setParallelism(DEFAULT_PARALLELISM);
        // compose test program
        {
            DataSet<Long> bcVar = env.fromElements(1L);
            DataSet<Vertex<Long, Long>> initialVertices = env.fromElements(new Tuple2<>(1L, 1L), new Tuple2<>(2L, 2L)).map(new Tuple2ToVertexMap<Long, Long>());
            DataSet<Edge<Long, NullValue>> edges = env.fromElements(new Tuple2<>(1L, 2L)).map(new MapFunction<Tuple2<Long, Long>, Edge<Long, NullValue>>() {

                public Edge<Long, NullValue> map(Tuple2<Long, Long> edge) {
                    return new Edge<>(edge.f0, edge.f1, NullValue.getInstance());
                }
            });
            Graph<Long, Long, NullValue> graph = Graph.fromDataSet(initialVertices, edges, env);
            VertexCentricConfiguration parameters = new VertexCentricConfiguration();
            parameters.addBroadcastSet(BC_VAR_NAME, bcVar);
            DataSet<Vertex<Long, Long>> result = graph.runVertexCentricIteration(new CCCompute(), null, 100, parameters).getVertices();
            result.output(new DiscardingOutputFormat<Vertex<Long, Long>>());
        }
        Plan p = env.createProgramPlan("Pregel Connected Components");
        OptimizedPlan op = compileNoStats(p);
        // check the sink
        SinkPlanNode sink = op.getDataSinks().iterator().next();
        assertEquals(ShipStrategyType.FORWARD, sink.getInput().getShipStrategy());
        assertEquals(DEFAULT_PARALLELISM, sink.getParallelism());
        // check the iteration
        WorksetIterationPlanNode iteration = (WorksetIterationPlanNode) sink.getInput().getSource();
        assertEquals(DEFAULT_PARALLELISM, iteration.getParallelism());
        // check the solution set delta
        PlanNode ssDelta = iteration.getSolutionSetDeltaPlanNode();
        assertTrue(ssDelta instanceof SingleInputPlanNode);
        SingleInputPlanNode ssFlatMap = (SingleInputPlanNode) ((SingleInputPlanNode) (ssDelta)).getInput().getSource();
        assertEquals(DEFAULT_PARALLELISM, ssFlatMap.getParallelism());
        assertEquals(ShipStrategyType.FORWARD, ssFlatMap.getInput().getShipStrategy());
        // check the computation coGroup
        DualInputPlanNode computationCoGroup = (DualInputPlanNode) (ssFlatMap.getInput().getSource());
        assertEquals(DEFAULT_PARALLELISM, computationCoGroup.getParallelism());
        assertEquals(ShipStrategyType.FORWARD, computationCoGroup.getInput1().getShipStrategy());
        assertEquals(ShipStrategyType.PARTITION_HASH, computationCoGroup.getInput2().getShipStrategy());
        assertTrue(computationCoGroup.getInput2().getTempMode().isCached());
        assertEquals(new FieldList(0), computationCoGroup.getInput2().getShipStrategyKeys());
        // check that the initial partitioning is pushed out of the loop
        assertEquals(ShipStrategyType.PARTITION_HASH, iteration.getInput1().getShipStrategy());
        assertEquals(new FieldList(0), iteration.getInput1().getShipStrategyKeys());
    } catch (Exception e) {
        System.err.println(e.getMessage());
        e.printStackTrace();
        fail(e.getMessage());
    }
}
Also used : ExecutionEnvironment(org.apache.flink.api.java.ExecutionEnvironment) Tuple2ToVertexMap(org.apache.flink.graph.utils.Tuple2ToVertexMap) DataSet(org.apache.flink.api.java.DataSet) WorksetIterationPlanNode(org.apache.flink.optimizer.plan.WorksetIterationPlanNode) MapFunction(org.apache.flink.api.common.functions.MapFunction) Plan(org.apache.flink.api.common.Plan) OptimizedPlan(org.apache.flink.optimizer.plan.OptimizedPlan) DiscardingOutputFormat(org.apache.flink.api.java.io.DiscardingOutputFormat) OptimizedPlan(org.apache.flink.optimizer.plan.OptimizedPlan) FieldList(org.apache.flink.api.common.operators.util.FieldList) SingleInputPlanNode(org.apache.flink.optimizer.plan.SingleInputPlanNode) DualInputPlanNode(org.apache.flink.optimizer.plan.DualInputPlanNode) NullValue(org.apache.flink.types.NullValue) Graph(org.apache.flink.graph.Graph) WorksetIterationPlanNode(org.apache.flink.optimizer.plan.WorksetIterationPlanNode) DualInputPlanNode(org.apache.flink.optimizer.plan.DualInputPlanNode) PlanNode(org.apache.flink.optimizer.plan.PlanNode) SinkPlanNode(org.apache.flink.optimizer.plan.SinkPlanNode) SingleInputPlanNode(org.apache.flink.optimizer.plan.SingleInputPlanNode) Tuple2(org.apache.flink.api.java.tuple.Tuple2) SinkPlanNode(org.apache.flink.optimizer.plan.SinkPlanNode) Edge(org.apache.flink.graph.Edge) Test(org.junit.Test)

Example 28 with FieldList

use of org.apache.flink.api.common.operators.util.FieldList in project flink by apache.

the class SpargelCompilerTest method testSpargelCompilerWithBroadcastVariable.

@SuppressWarnings("serial")
@Test
public void testSpargelCompilerWithBroadcastVariable() {
    try {
        final String BC_VAR_NAME = "borat variable";
        ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
        env.setParallelism(DEFAULT_PARALLELISM);
        // compose test program
        {
            DataSet<Long> bcVar = env.fromElements(1L);
            DataSet<Vertex<Long, Long>> initialVertices = env.fromElements(new Tuple2<>(1L, 1L), new Tuple2<>(2L, 2L)).map(new Tuple2ToVertexMap<Long, Long>());
            DataSet<Edge<Long, NullValue>> edges = env.fromElements(new Tuple2<>(1L, 2L)).map(new MapFunction<Tuple2<Long, Long>, Edge<Long, NullValue>>() {

                public Edge<Long, NullValue> map(Tuple2<Long, Long> edge) {
                    return new Edge<>(edge.f0, edge.f1, NullValue.getInstance());
                }
            });
            Graph<Long, Long, NullValue> graph = Graph.fromDataSet(initialVertices, edges, env);
            ScatterGatherConfiguration parameters = new ScatterGatherConfiguration();
            parameters.addBroadcastSetForScatterFunction(BC_VAR_NAME, bcVar);
            parameters.addBroadcastSetForGatherFunction(BC_VAR_NAME, bcVar);
            DataSet<Vertex<Long, Long>> result = graph.runScatterGatherIteration(new ConnectedComponents.CCMessenger<Long, Long>(BasicTypeInfo.LONG_TYPE_INFO), new ConnectedComponents.CCUpdater<Long, Long>(), 100).getVertices();
            result.output(new DiscardingOutputFormat<Vertex<Long, Long>>());
        }
        Plan p = env.createProgramPlan("Spargel Connected Components");
        OptimizedPlan op = compileNoStats(p);
        // check the sink
        SinkPlanNode sink = op.getDataSinks().iterator().next();
        assertEquals(ShipStrategyType.FORWARD, sink.getInput().getShipStrategy());
        assertEquals(DEFAULT_PARALLELISM, sink.getParallelism());
        // check the iteration
        WorksetIterationPlanNode iteration = (WorksetIterationPlanNode) sink.getInput().getSource();
        assertEquals(DEFAULT_PARALLELISM, iteration.getParallelism());
        // check the solution set join and the delta
        PlanNode ssDelta = iteration.getSolutionSetDeltaPlanNode();
        // this is only true if the update functions preserves the partitioning
        assertTrue(ssDelta instanceof DualInputPlanNode);
        DualInputPlanNode ssJoin = (DualInputPlanNode) ssDelta;
        assertEquals(DEFAULT_PARALLELISM, ssJoin.getParallelism());
        assertEquals(ShipStrategyType.PARTITION_HASH, ssJoin.getInput1().getShipStrategy());
        assertEquals(new FieldList(0), ssJoin.getInput1().getShipStrategyKeys());
        // check the workset set join
        DualInputPlanNode edgeJoin = (DualInputPlanNode) ssJoin.getInput1().getSource();
        assertEquals(DEFAULT_PARALLELISM, edgeJoin.getParallelism());
        assertEquals(ShipStrategyType.PARTITION_HASH, edgeJoin.getInput1().getShipStrategy());
        assertEquals(ShipStrategyType.FORWARD, edgeJoin.getInput2().getShipStrategy());
        assertTrue(edgeJoin.getInput1().getTempMode().isCached());
        assertEquals(new FieldList(0), edgeJoin.getInput1().getShipStrategyKeys());
        // check that the initial partitioning is pushed out of the loop
        assertEquals(ShipStrategyType.PARTITION_HASH, iteration.getInput1().getShipStrategy());
        assertEquals(ShipStrategyType.PARTITION_HASH, iteration.getInput2().getShipStrategy());
        assertEquals(new FieldList(0), iteration.getInput1().getShipStrategyKeys());
        assertEquals(new FieldList(0), iteration.getInput2().getShipStrategyKeys());
    } catch (Exception e) {
        System.err.println(e.getMessage());
        e.printStackTrace();
        fail(e.getMessage());
    }
}
Also used : ExecutionEnvironment(org.apache.flink.api.java.ExecutionEnvironment) Tuple2ToVertexMap(org.apache.flink.graph.utils.Tuple2ToVertexMap) DataSet(org.apache.flink.api.java.DataSet) WorksetIterationPlanNode(org.apache.flink.optimizer.plan.WorksetIterationPlanNode) MapFunction(org.apache.flink.api.common.functions.MapFunction) Plan(org.apache.flink.api.common.Plan) OptimizedPlan(org.apache.flink.optimizer.plan.OptimizedPlan) DiscardingOutputFormat(org.apache.flink.api.java.io.DiscardingOutputFormat) OptimizedPlan(org.apache.flink.optimizer.plan.OptimizedPlan) FieldList(org.apache.flink.api.common.operators.util.FieldList) DualInputPlanNode(org.apache.flink.optimizer.plan.DualInputPlanNode) NullValue(org.apache.flink.types.NullValue) Graph(org.apache.flink.graph.Graph) WorksetIterationPlanNode(org.apache.flink.optimizer.plan.WorksetIterationPlanNode) DualInputPlanNode(org.apache.flink.optimizer.plan.DualInputPlanNode) PlanNode(org.apache.flink.optimizer.plan.PlanNode) SinkPlanNode(org.apache.flink.optimizer.plan.SinkPlanNode) ConnectedComponents(org.apache.flink.graph.library.ConnectedComponents) Tuple2(org.apache.flink.api.java.tuple.Tuple2) SinkPlanNode(org.apache.flink.optimizer.plan.SinkPlanNode) Edge(org.apache.flink.graph.Edge) Test(org.junit.Test)

Example 29 with FieldList

use of org.apache.flink.api.common.operators.util.FieldList in project flink by apache.

the class GSACompilerTest method testGSACompiler.

@Test
public void testGSACompiler() {
    try {
        ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
        env.setParallelism(DEFAULT_PARALLELISM);
        // compose test program
        {
            DataSet<Edge<Long, NullValue>> edges = env.fromElements(new Tuple3<>(1L, 2L, NullValue.getInstance())).map(new Tuple3ToEdgeMap<Long, NullValue>());
            Graph<Long, Long, NullValue> graph = Graph.fromDataSet(edges, new InitVertices(), env);
            DataSet<Vertex<Long, Long>> result = graph.runGatherSumApplyIteration(new GatherNeighborIds(), new SelectMinId(), new UpdateComponentId(), 100).getVertices();
            result.output(new DiscardingOutputFormat<Vertex<Long, Long>>());
        }
        Plan p = env.createProgramPlan("GSA Connected Components");
        OptimizedPlan op = compileNoStats(p);
        // check the sink
        SinkPlanNode sink = op.getDataSinks().iterator().next();
        assertEquals(ShipStrategyType.FORWARD, sink.getInput().getShipStrategy());
        assertEquals(DEFAULT_PARALLELISM, sink.getParallelism());
        assertEquals(PartitioningProperty.HASH_PARTITIONED, sink.getGlobalProperties().getPartitioning());
        // check the iteration
        WorksetIterationPlanNode iteration = (WorksetIterationPlanNode) sink.getInput().getSource();
        assertEquals(DEFAULT_PARALLELISM, iteration.getParallelism());
        // check the solution set join and the delta
        PlanNode ssDelta = iteration.getSolutionSetDeltaPlanNode();
        // this is only true if the update function preserves the partitioning
        assertTrue(ssDelta instanceof DualInputPlanNode);
        DualInputPlanNode ssJoin = (DualInputPlanNode) ssDelta;
        assertEquals(DEFAULT_PARALLELISM, ssJoin.getParallelism());
        assertEquals(ShipStrategyType.PARTITION_HASH, ssJoin.getInput1().getShipStrategy());
        assertEquals(new FieldList(0), ssJoin.getInput1().getShipStrategyKeys());
        // check the workset set join
        SingleInputPlanNode sumReducer = (SingleInputPlanNode) ssJoin.getInput1().getSource();
        SingleInputPlanNode gatherMapper = (SingleInputPlanNode) sumReducer.getInput().getSource();
        DualInputPlanNode edgeJoin = (DualInputPlanNode) gatherMapper.getInput().getSource();
        assertEquals(DEFAULT_PARALLELISM, edgeJoin.getParallelism());
        // input1 is the workset
        assertEquals(ShipStrategyType.FORWARD, edgeJoin.getInput1().getShipStrategy());
        // input2 is the edges
        assertEquals(ShipStrategyType.PARTITION_HASH, edgeJoin.getInput2().getShipStrategy());
        assertTrue(edgeJoin.getInput2().getTempMode().isCached());
        assertEquals(new FieldList(0), edgeJoin.getInput2().getShipStrategyKeys());
    } catch (Exception e) {
        System.err.println(e.getMessage());
        e.printStackTrace();
        fail(e.getMessage());
    }
}
Also used : ExecutionEnvironment(org.apache.flink.api.java.ExecutionEnvironment) DataSet(org.apache.flink.api.java.DataSet) WorksetIterationPlanNode(org.apache.flink.optimizer.plan.WorksetIterationPlanNode) Plan(org.apache.flink.api.common.Plan) OptimizedPlan(org.apache.flink.optimizer.plan.OptimizedPlan) DiscardingOutputFormat(org.apache.flink.api.java.io.DiscardingOutputFormat) OptimizedPlan(org.apache.flink.optimizer.plan.OptimizedPlan) FieldList(org.apache.flink.api.common.operators.util.FieldList) DualInputPlanNode(org.apache.flink.optimizer.plan.DualInputPlanNode) SingleInputPlanNode(org.apache.flink.optimizer.plan.SingleInputPlanNode) NullValue(org.apache.flink.types.NullValue) Graph(org.apache.flink.graph.Graph) WorksetIterationPlanNode(org.apache.flink.optimizer.plan.WorksetIterationPlanNode) DualInputPlanNode(org.apache.flink.optimizer.plan.DualInputPlanNode) PlanNode(org.apache.flink.optimizer.plan.PlanNode) SinkPlanNode(org.apache.flink.optimizer.plan.SinkPlanNode) SingleInputPlanNode(org.apache.flink.optimizer.plan.SingleInputPlanNode) Tuple3ToEdgeMap(org.apache.flink.graph.utils.Tuple3ToEdgeMap) SinkPlanNode(org.apache.flink.optimizer.plan.SinkPlanNode) Test(org.junit.Test)

Example 30 with FieldList

use of org.apache.flink.api.common.operators.util.FieldList in project flink by apache.

the class DistinctCompilationTest method testDistinctWithFieldPositionKeyCombinable.

@Test
public void testDistinctWithFieldPositionKeyCombinable() {
    try {
        ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
        env.setParallelism(8);
        DataSet<Tuple2<String, Double>> data = env.readCsvFile("file:///will/never/be/read").types(String.class, Double.class).name("source").setParallelism(6);
        DistinctOperator<Tuple2<String, Double>> reduced = data.distinct(1).name("reducer");
        reduced.output(new DiscardingOutputFormat<Tuple2<String, Double>>()).name("sink");
        Plan p = env.createProgramPlan();
        OptimizedPlan op = compileNoStats(p);
        OptimizerPlanNodeResolver resolver = getOptimizerPlanNodeResolver(op);
        // get the original nodes
        SourcePlanNode sourceNode = resolver.getNode("source");
        SingleInputPlanNode reduceNode = resolver.getNode("reducer");
        SinkPlanNode sinkNode = resolver.getNode("sink");
        // get the combiner
        SingleInputPlanNode combineNode = (SingleInputPlanNode) reduceNode.getInput().getSource();
        // check wiring
        assertEquals(sourceNode, combineNode.getInput().getSource());
        assertEquals(reduceNode, sinkNode.getInput().getSource());
        // check that both reduce and combiner have the same strategy
        assertEquals(DriverStrategy.SORTED_REDUCE, reduceNode.getDriverStrategy());
        assertEquals(DriverStrategy.SORTED_PARTIAL_REDUCE, combineNode.getDriverStrategy());
        // check the keys
        assertEquals(new FieldList(1), reduceNode.getKeys(0));
        assertEquals(new FieldList(1), combineNode.getKeys(0));
        assertEquals(new FieldList(1), reduceNode.getInput().getLocalStrategyKeys());
        // check parallelism
        assertEquals(6, sourceNode.getParallelism());
        assertEquals(6, combineNode.getParallelism());
        assertEquals(8, reduceNode.getParallelism());
        assertEquals(8, sinkNode.getParallelism());
    } catch (Exception e) {
        System.err.println(e.getMessage());
        e.printStackTrace();
        fail(e.getClass().getSimpleName() + " in test: " + e.getMessage());
    }
}
Also used : ExecutionEnvironment(org.apache.flink.api.java.ExecutionEnvironment) Plan(org.apache.flink.api.common.Plan) OptimizedPlan(org.apache.flink.optimizer.plan.OptimizedPlan) DiscardingOutputFormat(org.apache.flink.api.java.io.DiscardingOutputFormat) OptimizedPlan(org.apache.flink.optimizer.plan.OptimizedPlan) FieldList(org.apache.flink.api.common.operators.util.FieldList) SingleInputPlanNode(org.apache.flink.optimizer.plan.SingleInputPlanNode) Tuple2(org.apache.flink.api.java.tuple.Tuple2) SourcePlanNode(org.apache.flink.optimizer.plan.SourcePlanNode) SinkPlanNode(org.apache.flink.optimizer.plan.SinkPlanNode) Test(org.junit.Test)

Aggregations

FieldList (org.apache.flink.api.common.operators.util.FieldList)79 Test (org.junit.Test)69 SingleInputPlanNode (org.apache.flink.optimizer.plan.SingleInputPlanNode)30 OptimizedPlan (org.apache.flink.optimizer.plan.OptimizedPlan)27 SingleInputSemanticProperties (org.apache.flink.api.common.operators.SingleInputSemanticProperties)26 Plan (org.apache.flink.api.common.Plan)25 Ordering (org.apache.flink.api.common.operators.Ordering)24 ExecutionEnvironment (org.apache.flink.api.java.ExecutionEnvironment)24 Tuple2 (org.apache.flink.api.java.tuple.Tuple2)24 SinkPlanNode (org.apache.flink.optimizer.plan.SinkPlanNode)22 DiscardingOutputFormat (org.apache.flink.api.java.io.DiscardingOutputFormat)21 SourcePlanNode (org.apache.flink.optimizer.plan.SourcePlanNode)17 FieldSet (org.apache.flink.api.common.operators.util.FieldSet)14 GlobalProperties (org.apache.flink.optimizer.dataproperties.GlobalProperties)12 DualInputPlanNode (org.apache.flink.optimizer.plan.DualInputPlanNode)12 Channel (org.apache.flink.optimizer.plan.Channel)11 RequestedGlobalProperties (org.apache.flink.optimizer.dataproperties.RequestedGlobalProperties)9 LocalProperties (org.apache.flink.optimizer.dataproperties.LocalProperties)7 PlanNode (org.apache.flink.optimizer.plan.PlanNode)7 DataSet (org.apache.flink.api.java.DataSet)6