Search in sources :

Example 31 with SourcePlanNode

use of org.apache.flink.optimizer.plan.SourcePlanNode in project flink by apache.

the class PropertyDataSourceTest method checkCoPartitionedSources2.

@Test
public void checkCoPartitionedSources2() {
    ExecutionEnvironment env = ExecutionEnvironment.createLocalEnvironment();
    env.setParallelism(DEFAULT_PARALLELISM);
    DataSource<Tuple2<Long, String>> data1 = env.readCsvFile("/some/path").types(Long.class, String.class);
    data1.getSplitDataProperties().splitsPartitionedBy("byCountry", 0);
    DataSource<Tuple2<Long, String>> data2 = env.readCsvFile("/some/path").types(Long.class, String.class);
    data2.getSplitDataProperties().splitsPartitionedBy("byDate", 0);
    data1.union(data2).output(new DiscardingOutputFormat<Tuple2<Long, String>>());
    Plan plan = env.createProgramPlan();
    // submit the plan to the compiler
    OptimizedPlan oPlan = compileNoStats(plan);
    // check the optimized Plan
    SinkPlanNode sinkNode = oPlan.getDataSinks().iterator().next();
    SourcePlanNode sourceNode1 = (SourcePlanNode) ((NAryUnionPlanNode) sinkNode.getPredecessor()).getListOfInputs().get(0).getSource();
    SourcePlanNode sourceNode2 = (SourcePlanNode) ((NAryUnionPlanNode) sinkNode.getPredecessor()).getListOfInputs().get(1).getSource();
    GlobalProperties gprops1 = sourceNode1.getGlobalProperties();
    LocalProperties lprops1 = sourceNode1.getLocalProperties();
    GlobalProperties gprops2 = sourceNode2.getGlobalProperties();
    LocalProperties lprops2 = sourceNode2.getLocalProperties();
    Assert.assertTrue((new FieldSet(gprops1.getPartitioningFields().toArray())).equals(new FieldSet(0)));
    Assert.assertTrue(gprops1.getPartitioning() == PartitioningProperty.CUSTOM_PARTITIONING);
    Assert.assertTrue(lprops1.getGroupedFields() == null);
    Assert.assertTrue(lprops1.getOrdering() == null);
    Assert.assertTrue((new FieldSet(gprops2.getPartitioningFields().toArray())).equals(new FieldSet(0)));
    Assert.assertTrue(gprops2.getPartitioning() == PartitioningProperty.CUSTOM_PARTITIONING);
    Assert.assertTrue(lprops2.getGroupedFields() == null);
    Assert.assertTrue(lprops2.getOrdering() == null);
    Assert.assertTrue(!gprops1.getCustomPartitioner().equals(gprops2.getCustomPartitioner()));
}
Also used : ExecutionEnvironment(org.apache.flink.api.java.ExecutionEnvironment) FieldSet(org.apache.flink.api.common.operators.util.FieldSet) GlobalProperties(org.apache.flink.optimizer.dataproperties.GlobalProperties) Tuple2(org.apache.flink.api.java.tuple.Tuple2) SinkPlanNode(org.apache.flink.optimizer.plan.SinkPlanNode) SourcePlanNode(org.apache.flink.optimizer.plan.SourcePlanNode) Plan(org.apache.flink.api.common.Plan) OptimizedPlan(org.apache.flink.optimizer.plan.OptimizedPlan) LocalProperties(org.apache.flink.optimizer.dataproperties.LocalProperties) OptimizedPlan(org.apache.flink.optimizer.plan.OptimizedPlan) Test(org.junit.Test)

Example 32 with SourcePlanNode

use of org.apache.flink.optimizer.plan.SourcePlanNode in project flink by apache.

the class ConnectedComponentsCoGroupTest method testWorksetConnectedComponents.

@Test
public void testWorksetConnectedComponents() throws Exception {
    Plan plan = getConnectedComponentsCoGroupPlan();
    plan.setExecutionConfig(new ExecutionConfig());
    OptimizedPlan optPlan = compileNoStats(plan);
    OptimizerPlanNodeResolver or = getOptimizerPlanNodeResolver(optPlan);
    if (PRINT_PLAN) {
        PlanJSONDumpGenerator dumper = new PlanJSONDumpGenerator();
        String json = dumper.getOptimizerPlanAsJSON(optPlan);
        System.out.println(json);
    }
    SourcePlanNode vertexSource = or.getNode(VERTEX_SOURCE);
    SourcePlanNode edgesSource = or.getNode(EDGES_SOURCE);
    SinkPlanNode sink = or.getNode(SINK);
    WorksetIterationPlanNode iter = or.getNode(ITERATION_NAME);
    DualInputPlanNode neighborsJoin = or.getNode(JOIN_NEIGHBORS_MATCH);
    DualInputPlanNode cogroup = or.getNode(MIN_ID_AND_UPDATE);
    // --------------------------------------------------------------------
    // Plan validation:
    // 
    // We expect the plan to go with a sort-merge join, because the CoGroup
    // sorts and the join in the successive iteration can re-exploit the sorting.
    // --------------------------------------------------------------------
    // test all drivers
    Assert.assertEquals(DriverStrategy.NONE, sink.getDriverStrategy());
    Assert.assertEquals(DriverStrategy.NONE, vertexSource.getDriverStrategy());
    Assert.assertEquals(DriverStrategy.NONE, edgesSource.getDriverStrategy());
    Assert.assertEquals(DriverStrategy.INNER_MERGE, neighborsJoin.getDriverStrategy());
    Assert.assertEquals(set0, neighborsJoin.getKeysForInput1());
    Assert.assertEquals(set0, neighborsJoin.getKeysForInput2());
    Assert.assertEquals(DriverStrategy.CO_GROUP, cogroup.getDriverStrategy());
    Assert.assertEquals(set0, cogroup.getKeysForInput1());
    Assert.assertEquals(set0, cogroup.getKeysForInput2());
    // test all the shipping strategies
    Assert.assertEquals(ShipStrategyType.FORWARD, sink.getInput().getShipStrategy());
    Assert.assertEquals(ShipStrategyType.PARTITION_HASH, iter.getInitialSolutionSetInput().getShipStrategy());
    Assert.assertEquals(set0, iter.getInitialSolutionSetInput().getShipStrategyKeys());
    Assert.assertEquals(ShipStrategyType.PARTITION_HASH, iter.getInitialWorksetInput().getShipStrategy());
    Assert.assertEquals(set0, iter.getInitialWorksetInput().getShipStrategyKeys());
    Assert.assertEquals(ShipStrategyType.FORWARD, // workset
    neighborsJoin.getInput1().getShipStrategy());
    Assert.assertEquals(ShipStrategyType.PARTITION_HASH, // edges
    neighborsJoin.getInput2().getShipStrategy());
    Assert.assertEquals(set0, neighborsJoin.getInput2().getShipStrategyKeys());
    Assert.assertTrue(neighborsJoin.getInput2().getTempMode().isCached());
    Assert.assertEquals(ShipStrategyType.PARTITION_HASH, // min id
    cogroup.getInput1().getShipStrategy());
    Assert.assertEquals(ShipStrategyType.FORWARD, // solution set
    cogroup.getInput2().getShipStrategy());
    // test all the local strategies
    Assert.assertEquals(LocalStrategy.NONE, sink.getInput().getLocalStrategy());
    Assert.assertEquals(LocalStrategy.NONE, iter.getInitialSolutionSetInput().getLocalStrategy());
    // the sort for the neighbor join in the first iteration is pushed out of the loop
    Assert.assertEquals(LocalStrategy.SORT, iter.getInitialWorksetInput().getLocalStrategy());
    Assert.assertEquals(LocalStrategy.NONE, // workset
    neighborsJoin.getInput1().getLocalStrategy());
    Assert.assertEquals(LocalStrategy.SORT, // edges
    neighborsJoin.getInput2().getLocalStrategy());
    Assert.assertEquals(LocalStrategy.SORT, cogroup.getInput1().getLocalStrategy());
    Assert.assertEquals(LocalStrategy.NONE, // solution set
    cogroup.getInput2().getLocalStrategy());
    // check the caches
    Assert.assertTrue(TempMode.CACHED == neighborsJoin.getInput2().getTempMode());
    JobGraphGenerator jgg = new JobGraphGenerator();
    jgg.compileJobGraph(optPlan);
}
Also used : DualInputPlanNode(org.apache.flink.optimizer.plan.DualInputPlanNode) PlanJSONDumpGenerator(org.apache.flink.optimizer.plandump.PlanJSONDumpGenerator) WorksetIterationPlanNode(org.apache.flink.optimizer.plan.WorksetIterationPlanNode) JobGraphGenerator(org.apache.flink.optimizer.plantranslate.JobGraphGenerator) SourcePlanNode(org.apache.flink.optimizer.plan.SourcePlanNode) SinkPlanNode(org.apache.flink.optimizer.plan.SinkPlanNode) ExecutionConfig(org.apache.flink.api.common.ExecutionConfig) Plan(org.apache.flink.api.common.Plan) OptimizedPlan(org.apache.flink.optimizer.plan.OptimizedPlan) OptimizedPlan(org.apache.flink.optimizer.plan.OptimizedPlan) Test(org.junit.Test)

Example 33 with SourcePlanNode

use of org.apache.flink.optimizer.plan.SourcePlanNode in project flink by apache.

the class GenericFlatTypePostPass method traverse.

@SuppressWarnings("unchecked")
protected void traverse(PlanNode node, T parentSchema, boolean createUtilities) {
    // distinguish the node types
    if (node instanceof SinkPlanNode) {
        SinkPlanNode sn = (SinkPlanNode) node;
        Channel inchannel = sn.getInput();
        T schema = createEmptySchema();
        sn.postPassHelper = schema;
        // add the sinks information to the schema
        try {
            getSinkSchema(sn, schema);
        } catch (ConflictingFieldTypeInfoException e) {
            throw new CompilerPostPassException("Conflicting type infomation for the data sink '" + sn.getSinkNode().getOperator().getName() + "'.");
        }
        // descend to the input channel
        try {
            propagateToChannel(schema, inchannel, createUtilities);
        } catch (MissingFieldTypeInfoException ex) {
            throw new CompilerPostPassException("Missing type infomation for the channel that inputs to the data sink '" + sn.getSinkNode().getOperator().getName() + "'.");
        }
    } else if (node instanceof SourcePlanNode) {
        if (createUtilities) {
            ((SourcePlanNode) node).setSerializer(createSerializer(parentSchema, node));
        // nothing else to be done here. the source has no input and no strategy itself
        }
    } else if (node instanceof BulkIterationPlanNode) {
        BulkIterationPlanNode iterationNode = (BulkIterationPlanNode) node;
        // get the nodes current schema
        T schema;
        if (iterationNode.postPassHelper == null) {
            schema = createEmptySchema();
            iterationNode.postPassHelper = schema;
        } else {
            schema = (T) iterationNode.postPassHelper;
        }
        schema.increaseNumConnectionsThatContributed();
        // add the parent schema to the schema
        if (propagateParentSchemaDown) {
            addSchemaToSchema(parentSchema, schema, iterationNode.getProgramOperator().getName());
        }
        // check whether all outgoing channels have not yet contributed. come back later if not.
        if (schema.getNumConnectionsThatContributed() < iterationNode.getOutgoingChannels().size()) {
            return;
        }
        if (iterationNode.getRootOfStepFunction() instanceof NAryUnionPlanNode) {
            throw new CompilerException("Optimizer cannot compile an iteration step function where next partial solution is created by a Union node.");
        }
        // traverse the termination criterion for the first time. create schema only, no utilities. Needed in case of intermediate termination criterion
        if (iterationNode.getRootOfTerminationCriterion() != null) {
            SingleInputPlanNode addMapper = (SingleInputPlanNode) iterationNode.getRootOfTerminationCriterion();
            traverse(addMapper.getInput().getSource(), createEmptySchema(), false);
            try {
                addMapper.getInput().setSerializer(createSerializer(createEmptySchema()));
            } catch (MissingFieldTypeInfoException e) {
                throw new RuntimeException(e);
            }
        }
        // traverse the step function for the first time. create schema only, no utilities
        traverse(iterationNode.getRootOfStepFunction(), schema, false);
        T pss = (T) iterationNode.getPartialSolutionPlanNode().postPassHelper;
        if (pss == null) {
            throw new CompilerException("Error in Optimizer Post Pass: Partial solution schema is null after first traversal of the step function.");
        }
        // traverse the step function for the second time, taking the schema of the partial solution
        traverse(iterationNode.getRootOfStepFunction(), pss, createUtilities);
        if (iterationNode.getRootOfTerminationCriterion() != null) {
            SingleInputPlanNode addMapper = (SingleInputPlanNode) iterationNode.getRootOfTerminationCriterion();
            traverse(addMapper.getInput().getSource(), createEmptySchema(), createUtilities);
            try {
                addMapper.getInput().setSerializer(createSerializer(createEmptySchema()));
            } catch (MissingFieldTypeInfoException e) {
                throw new RuntimeException(e);
            }
        }
        // take the schema from the partial solution node and add its fields to the iteration result schema.
        // input and output schema need to be identical, so this is essentially a sanity check
        addSchemaToSchema(pss, schema, iterationNode.getProgramOperator().getName());
        // set the serializer
        if (createUtilities) {
            iterationNode.setSerializerForIterationChannel(createSerializer(pss, iterationNode.getPartialSolutionPlanNode()));
        }
        // done, we can now propagate our info down
        try {
            propagateToChannel(schema, iterationNode.getInput(), createUtilities);
        } catch (MissingFieldTypeInfoException e) {
            throw new CompilerPostPassException("Could not set up runtime strategy for input channel to node '" + iterationNode.getProgramOperator().getName() + "'. Missing type information for key field " + e.getFieldNumber());
        }
    } else if (node instanceof WorksetIterationPlanNode) {
        WorksetIterationPlanNode iterationNode = (WorksetIterationPlanNode) node;
        // get the nodes current schema
        T schema;
        if (iterationNode.postPassHelper == null) {
            schema = createEmptySchema();
            iterationNode.postPassHelper = schema;
        } else {
            schema = (T) iterationNode.postPassHelper;
        }
        schema.increaseNumConnectionsThatContributed();
        // add the parent schema to the schema (which refers to the solution set schema)
        if (propagateParentSchemaDown) {
            addSchemaToSchema(parentSchema, schema, iterationNode.getProgramOperator().getName());
        }
        // check whether all outgoing channels have not yet contributed. come back later if not.
        if (schema.getNumConnectionsThatContributed() < iterationNode.getOutgoingChannels().size()) {
            return;
        }
        if (iterationNode.getNextWorkSetPlanNode() instanceof NAryUnionPlanNode) {
            throw new CompilerException("Optimizer cannot compile a workset iteration step function where the next workset is produced by a Union node.");
        }
        if (iterationNode.getSolutionSetDeltaPlanNode() instanceof NAryUnionPlanNode) {
            throw new CompilerException("Optimizer cannot compile a workset iteration step function where the solution set delta is produced by a Union node.");
        }
        // traverse the step function
        // pass an empty schema to the next workset and the parent schema to the solution set delta
        // these first traversals are schema only
        traverse(iterationNode.getNextWorkSetPlanNode(), createEmptySchema(), false);
        traverse(iterationNode.getSolutionSetDeltaPlanNode(), schema, false);
        T wss = (T) iterationNode.getWorksetPlanNode().postPassHelper;
        T sss = (T) iterationNode.getSolutionSetPlanNode().postPassHelper;
        if (wss == null) {
            throw new CompilerException("Error in Optimizer Post Pass: Workset schema is null after first traversal of the step function.");
        }
        if (sss == null) {
            throw new CompilerException("Error in Optimizer Post Pass: Solution set schema is null after first traversal of the step function.");
        }
        // make the second pass and instantiate the utilities
        traverse(iterationNode.getNextWorkSetPlanNode(), wss, createUtilities);
        traverse(iterationNode.getSolutionSetDeltaPlanNode(), sss, createUtilities);
        // the solution set input and the result must have the same schema, this acts as a sanity check.
        try {
            for (Map.Entry<Integer, X> entry : sss) {
                Integer pos = entry.getKey();
                schema.addType(pos, entry.getValue());
            }
        } catch (ConflictingFieldTypeInfoException e) {
            throw new CompilerPostPassException("Conflicting type information for field " + e.getFieldNumber() + " in node '" + iterationNode.getProgramOperator().getName() + "'. Contradicting types between the " + "result of the iteration and the solution set schema: " + e.getPreviousType() + " and " + e.getNewType() + ". Most probable cause: Invalid constant field annotations.");
        }
        // set the serializers and comparators
        if (createUtilities) {
            WorksetIterationNode optNode = iterationNode.getIterationNode();
            iterationNode.setWorksetSerializer(createSerializer(wss, iterationNode.getWorksetPlanNode()));
            iterationNode.setSolutionSetSerializer(createSerializer(sss, iterationNode.getSolutionSetPlanNode()));
            try {
                iterationNode.setSolutionSetComparator(createComparator(optNode.getSolutionSetKeyFields(), null, sss));
            } catch (MissingFieldTypeInfoException ex) {
                throw new CompilerPostPassException("Could not set up the solution set for workset iteration '" + optNode.getOperator().getName() + "'. Missing type information for key field " + ex.getFieldNumber() + '.');
            }
        }
        // done, we can now propagate our info down
        try {
            propagateToChannel(schema, iterationNode.getInitialSolutionSetInput(), createUtilities);
            propagateToChannel(wss, iterationNode.getInitialWorksetInput(), createUtilities);
        } catch (MissingFieldTypeInfoException ex) {
            throw new CompilerPostPassException("Could not set up runtime strategy for input channel to node '" + iterationNode.getProgramOperator().getName() + "'. Missing type information for key field " + ex.getFieldNumber());
        }
    } else if (node instanceof SingleInputPlanNode) {
        SingleInputPlanNode sn = (SingleInputPlanNode) node;
        // get the nodes current schema
        T schema;
        if (sn.postPassHelper == null) {
            schema = createEmptySchema();
            sn.postPassHelper = schema;
        } else {
            schema = (T) sn.postPassHelper;
        }
        schema.increaseNumConnectionsThatContributed();
        SingleInputNode optNode = sn.getSingleInputNode();
        // add the parent schema to the schema
        if (propagateParentSchemaDown) {
            addSchemaToSchema(parentSchema, schema, optNode, 0);
        }
        // check whether all outgoing channels have not yet contributed. come back later if not.
        if (schema.getNumConnectionsThatContributed() < sn.getOutgoingChannels().size()) {
            return;
        }
        // add the nodes local information
        try {
            getSingleInputNodeSchema(sn, schema);
        } catch (ConflictingFieldTypeInfoException e) {
            throw new CompilerPostPassException(getConflictingTypeErrorMessage(e, optNode.getOperator().getName()));
        }
        if (createUtilities) {
            // parameterize the node's driver strategy
            for (int i = 0; i < sn.getDriverStrategy().getNumRequiredComparators(); i++) {
                try {
                    sn.setComparator(createComparator(sn.getKeys(i), sn.getSortOrders(i), schema), i);
                } catch (MissingFieldTypeInfoException e) {
                    throw new CompilerPostPassException("Could not set up runtime strategy for node '" + optNode.getOperator().getName() + "'. Missing type information for key field " + e.getFieldNumber());
                }
            }
        }
        // done, we can now propagate our info down
        try {
            propagateToChannel(schema, sn.getInput(), createUtilities);
        } catch (MissingFieldTypeInfoException e) {
            throw new CompilerPostPassException("Could not set up runtime strategy for input channel to node '" + optNode.getOperator().getName() + "'. Missing type information for field " + e.getFieldNumber());
        }
        // don't forget the broadcast inputs
        for (Channel c : sn.getBroadcastInputs()) {
            try {
                propagateToChannel(createEmptySchema(), c, createUtilities);
            } catch (MissingFieldTypeInfoException e) {
                throw new CompilerPostPassException("Could not set up runtime strategy for broadcast channel in node '" + optNode.getOperator().getName() + "'. Missing type information for field " + e.getFieldNumber());
            }
        }
    } else if (node instanceof DualInputPlanNode) {
        DualInputPlanNode dn = (DualInputPlanNode) node;
        // get the nodes current schema
        T schema1;
        T schema2;
        if (dn.postPassHelper1 == null) {
            schema1 = createEmptySchema();
            schema2 = createEmptySchema();
            dn.postPassHelper1 = schema1;
            dn.postPassHelper2 = schema2;
        } else {
            schema1 = (T) dn.postPassHelper1;
            schema2 = (T) dn.postPassHelper2;
        }
        schema1.increaseNumConnectionsThatContributed();
        schema2.increaseNumConnectionsThatContributed();
        TwoInputNode optNode = dn.getTwoInputNode();
        // add the parent schema to the schema
        if (propagateParentSchemaDown) {
            addSchemaToSchema(parentSchema, schema1, optNode, 0);
            addSchemaToSchema(parentSchema, schema2, optNode, 1);
        }
        // check whether all outgoing channels have not yet contributed. come back later if not.
        if (schema1.getNumConnectionsThatContributed() < dn.getOutgoingChannels().size()) {
            return;
        }
        // add the nodes local information
        try {
            getDualInputNodeSchema(dn, schema1, schema2);
        } catch (ConflictingFieldTypeInfoException e) {
            throw new CompilerPostPassException(getConflictingTypeErrorMessage(e, optNode.getOperator().getName()));
        }
        // parameterize the node's driver strategy
        if (createUtilities) {
            if (dn.getDriverStrategy().getNumRequiredComparators() > 0) {
                // set the individual comparators
                try {
                    dn.setComparator1(createComparator(dn.getKeysForInput1(), dn.getSortOrders(), schema1));
                    dn.setComparator2(createComparator(dn.getKeysForInput2(), dn.getSortOrders(), schema2));
                } catch (MissingFieldTypeInfoException e) {
                    throw new CompilerPostPassException("Could not set up runtime strategy for node '" + optNode.getOperator().getName() + "'. Missing type information for field " + e.getFieldNumber());
                }
                // set the pair comparator
                try {
                    dn.setPairComparator(createPairComparator(dn.getKeysForInput1(), dn.getKeysForInput2(), dn.getSortOrders(), schema1, schema2));
                } catch (MissingFieldTypeInfoException e) {
                    throw new CompilerPostPassException("Could not set up runtime strategy for node '" + optNode.getOperator().getName() + "'. Missing type information for field " + e.getFieldNumber());
                }
            }
        }
        // done, we can now propagate our info down
        try {
            propagateToChannel(schema1, dn.getInput1(), createUtilities);
        } catch (MissingFieldTypeInfoException e) {
            throw new CompilerPostPassException("Could not set up runtime strategy for the first input channel to node '" + optNode.getOperator().getName() + "'. Missing type information for field " + e.getFieldNumber());
        }
        try {
            propagateToChannel(schema2, dn.getInput2(), createUtilities);
        } catch (MissingFieldTypeInfoException e) {
            throw new CompilerPostPassException("Could not set up runtime strategy for the second input channel to node '" + optNode.getOperator().getName() + "'. Missing type information for field " + e.getFieldNumber());
        }
        // don't forget the broadcast inputs
        for (Channel c : dn.getBroadcastInputs()) {
            try {
                propagateToChannel(createEmptySchema(), c, createUtilities);
            } catch (MissingFieldTypeInfoException e) {
                throw new CompilerPostPassException("Could not set up runtime strategy for broadcast channel in node '" + optNode.getOperator().getName() + "'. Missing type information for field " + e.getFieldNumber());
            }
        }
    } else if (node instanceof NAryUnionPlanNode) {
        // only propagate the info down
        try {
            for (Channel channel : node.getInputs()) {
                propagateToChannel(parentSchema, channel, createUtilities);
            }
        } catch (MissingFieldTypeInfoException ex) {
            throw new CompilerPostPassException("Could not set up runtime strategy for the input channel to " + " a union node. Missing type information for field " + ex.getFieldNumber());
        }
    } else // catch the sources of the iterative step functions
    if (node instanceof BulkPartialSolutionPlanNode || node instanceof SolutionSetPlanNode || node instanceof WorksetPlanNode) {
        // get the nodes current schema
        T schema;
        String name;
        if (node instanceof BulkPartialSolutionPlanNode) {
            BulkPartialSolutionPlanNode psn = (BulkPartialSolutionPlanNode) node;
            if (psn.postPassHelper == null) {
                schema = createEmptySchema();
                psn.postPassHelper = schema;
            } else {
                schema = (T) psn.postPassHelper;
            }
            name = "partial solution of bulk iteration '" + psn.getPartialSolutionNode().getIterationNode().getOperator().getName() + "'";
        } else if (node instanceof SolutionSetPlanNode) {
            SolutionSetPlanNode ssn = (SolutionSetPlanNode) node;
            if (ssn.postPassHelper == null) {
                schema = createEmptySchema();
                ssn.postPassHelper = schema;
            } else {
                schema = (T) ssn.postPassHelper;
            }
            name = "solution set of workset iteration '" + ssn.getSolutionSetNode().getIterationNode().getOperator().getName() + "'";
        } else if (node instanceof WorksetPlanNode) {
            WorksetPlanNode wsn = (WorksetPlanNode) node;
            if (wsn.postPassHelper == null) {
                schema = createEmptySchema();
                wsn.postPassHelper = schema;
            } else {
                schema = (T) wsn.postPassHelper;
            }
            name = "workset of workset iteration '" + wsn.getWorksetNode().getIterationNode().getOperator().getName() + "'";
        } else {
            throw new CompilerException();
        }
        schema.increaseNumConnectionsThatContributed();
        // add the parent schema to the schema
        addSchemaToSchema(parentSchema, schema, name);
    } else {
        throw new CompilerPostPassException("Unknown node type encountered: " + node.getClass().getName());
    }
}
Also used : SingleInputNode(org.apache.flink.optimizer.dag.SingleInputNode) SolutionSetPlanNode(org.apache.flink.optimizer.plan.SolutionSetPlanNode) WorksetIterationPlanNode(org.apache.flink.optimizer.plan.WorksetIterationPlanNode) BulkPartialSolutionPlanNode(org.apache.flink.optimizer.plan.BulkPartialSolutionPlanNode) Channel(org.apache.flink.optimizer.plan.Channel) NAryUnionPlanNode(org.apache.flink.optimizer.plan.NAryUnionPlanNode) SingleInputPlanNode(org.apache.flink.optimizer.plan.SingleInputPlanNode) DualInputPlanNode(org.apache.flink.optimizer.plan.DualInputPlanNode) WorksetIterationNode(org.apache.flink.optimizer.dag.WorksetIterationNode) SinkPlanNode(org.apache.flink.optimizer.plan.SinkPlanNode) SourcePlanNode(org.apache.flink.optimizer.plan.SourcePlanNode) CompilerException(org.apache.flink.optimizer.CompilerException) WorksetPlanNode(org.apache.flink.optimizer.plan.WorksetPlanNode) CompilerPostPassException(org.apache.flink.optimizer.CompilerPostPassException) BulkIterationPlanNode(org.apache.flink.optimizer.plan.BulkIterationPlanNode) TwoInputNode(org.apache.flink.optimizer.dag.TwoInputNode)

Example 34 with SourcePlanNode

use of org.apache.flink.optimizer.plan.SourcePlanNode in project flink by apache.

the class ConnectedComponentsTest method testWorksetConnectedComponentsWithSolutionSetAsFirstInput.

@Test
public void testWorksetConnectedComponentsWithSolutionSetAsFirstInput() {
    Plan plan = getConnectedComponentsPlan(DEFAULT_PARALLELISM, 100, true);
    OptimizedPlan optPlan = compileNoStats(plan);
    OptimizerPlanNodeResolver or = getOptimizerPlanNodeResolver(optPlan);
    SourcePlanNode vertexSource = or.getNode(VERTEX_SOURCE);
    SourcePlanNode edgesSource = or.getNode(EDGES_SOURCE);
    SinkPlanNode sink = or.getNode(SINK);
    WorksetIterationPlanNode iter = or.getNode(ITERATION_NAME);
    DualInputPlanNode neighborsJoin = or.getNode(JOIN_NEIGHBORS_MATCH);
    SingleInputPlanNode minIdReducer = or.getNode(MIN_ID_REDUCER);
    SingleInputPlanNode minIdCombiner = (SingleInputPlanNode) minIdReducer.getPredecessor();
    DualInputPlanNode updatingMatch = or.getNode(UPDATE_ID_MATCH);
    // test all drivers
    Assert.assertEquals(DriverStrategy.NONE, sink.getDriverStrategy());
    Assert.assertEquals(DriverStrategy.NONE, vertexSource.getDriverStrategy());
    Assert.assertEquals(DriverStrategy.NONE, edgesSource.getDriverStrategy());
    Assert.assertEquals(DriverStrategy.HYBRIDHASH_BUILD_SECOND_CACHED, neighborsJoin.getDriverStrategy());
    Assert.assertTrue(!neighborsJoin.getInput1().getTempMode().isCached());
    Assert.assertTrue(!neighborsJoin.getInput2().getTempMode().isCached());
    Assert.assertEquals(set0, neighborsJoin.getKeysForInput1());
    Assert.assertEquals(set0, neighborsJoin.getKeysForInput2());
    Assert.assertEquals(DriverStrategy.HYBRIDHASH_BUILD_FIRST, updatingMatch.getDriverStrategy());
    Assert.assertEquals(set0, updatingMatch.getKeysForInput1());
    Assert.assertEquals(set0, updatingMatch.getKeysForInput2());
    // test all the shipping strategies
    Assert.assertEquals(ShipStrategyType.FORWARD, sink.getInput().getShipStrategy());
    Assert.assertEquals(ShipStrategyType.PARTITION_HASH, iter.getInitialSolutionSetInput().getShipStrategy());
    Assert.assertEquals(set0, iter.getInitialSolutionSetInput().getShipStrategyKeys());
    Assert.assertEquals(ShipStrategyType.PARTITION_HASH, iter.getInitialWorksetInput().getShipStrategy());
    Assert.assertEquals(set0, iter.getInitialWorksetInput().getShipStrategyKeys());
    Assert.assertEquals(ShipStrategyType.FORWARD, // workset
    neighborsJoin.getInput1().getShipStrategy());
    Assert.assertEquals(ShipStrategyType.PARTITION_HASH, // edges
    neighborsJoin.getInput2().getShipStrategy());
    Assert.assertEquals(set0, neighborsJoin.getInput2().getShipStrategyKeys());
    Assert.assertEquals(ShipStrategyType.PARTITION_HASH, minIdReducer.getInput().getShipStrategy());
    Assert.assertEquals(set0, minIdReducer.getInput().getShipStrategyKeys());
    Assert.assertEquals(ShipStrategyType.FORWARD, minIdCombiner.getInput().getShipStrategy());
    Assert.assertEquals(ShipStrategyType.FORWARD, // solution set
    updatingMatch.getInput1().getShipStrategy());
    Assert.assertEquals(ShipStrategyType.FORWARD, // min id
    updatingMatch.getInput2().getShipStrategy());
    // test all the local strategies
    Assert.assertEquals(LocalStrategy.NONE, sink.getInput().getLocalStrategy());
    Assert.assertEquals(LocalStrategy.NONE, iter.getInitialSolutionSetInput().getLocalStrategy());
    Assert.assertEquals(LocalStrategy.NONE, iter.getInitialWorksetInput().getLocalStrategy());
    Assert.assertEquals(LocalStrategy.NONE, // workset
    neighborsJoin.getInput1().getLocalStrategy());
    Assert.assertEquals(LocalStrategy.NONE, // edges
    neighborsJoin.getInput2().getLocalStrategy());
    Assert.assertEquals(LocalStrategy.COMBININGSORT, minIdReducer.getInput().getLocalStrategy());
    Assert.assertEquals(set0, minIdReducer.getInput().getLocalStrategyKeys());
    Assert.assertEquals(LocalStrategy.NONE, minIdCombiner.getInput().getLocalStrategy());
    Assert.assertEquals(LocalStrategy.NONE, // min id
    updatingMatch.getInput1().getLocalStrategy());
    Assert.assertEquals(LocalStrategy.NONE, // solution set
    updatingMatch.getInput2().getLocalStrategy());
    // check the dams
    Assert.assertEquals(TempMode.NONE, iter.getInitialWorksetInput().getTempMode());
    Assert.assertEquals(TempMode.NONE, iter.getInitialSolutionSetInput().getTempMode());
    Assert.assertEquals(DataExchangeMode.BATCH, iter.getInitialWorksetInput().getDataExchangeMode());
    Assert.assertEquals(DataExchangeMode.BATCH, iter.getInitialSolutionSetInput().getDataExchangeMode());
    JobGraphGenerator jgg = new JobGraphGenerator();
    jgg.compileJobGraph(optPlan);
}
Also used : DualInputPlanNode(org.apache.flink.optimizer.plan.DualInputPlanNode) SingleInputPlanNode(org.apache.flink.optimizer.plan.SingleInputPlanNode) WorksetIterationPlanNode(org.apache.flink.optimizer.plan.WorksetIterationPlanNode) JobGraphGenerator(org.apache.flink.optimizer.plantranslate.JobGraphGenerator) SourcePlanNode(org.apache.flink.optimizer.plan.SourcePlanNode) SinkPlanNode(org.apache.flink.optimizer.plan.SinkPlanNode) Plan(org.apache.flink.api.common.Plan) OptimizedPlan(org.apache.flink.optimizer.plan.OptimizedPlan) OptimizedPlan(org.apache.flink.optimizer.plan.OptimizedPlan) Test(org.junit.Test)

Example 35 with SourcePlanNode

use of org.apache.flink.optimizer.plan.SourcePlanNode in project flink by apache.

the class ConnectedComponentsTest method testWorksetConnectedComponents.

@Test
public void testWorksetConnectedComponents() {
    Plan plan = getConnectedComponentsPlan(DEFAULT_PARALLELISM, 100, false);
    OptimizedPlan optPlan = compileNoStats(plan);
    OptimizerPlanNodeResolver or = getOptimizerPlanNodeResolver(optPlan);
    SourcePlanNode vertexSource = or.getNode(VERTEX_SOURCE);
    SourcePlanNode edgesSource = or.getNode(EDGES_SOURCE);
    SinkPlanNode sink = or.getNode(SINK);
    WorksetIterationPlanNode iter = or.getNode(ITERATION_NAME);
    DualInputPlanNode neighborsJoin = or.getNode(JOIN_NEIGHBORS_MATCH);
    SingleInputPlanNode minIdReducer = or.getNode(MIN_ID_REDUCER);
    SingleInputPlanNode minIdCombiner = (SingleInputPlanNode) minIdReducer.getPredecessor();
    DualInputPlanNode updatingMatch = or.getNode(UPDATE_ID_MATCH);
    // test all drivers
    Assert.assertEquals(DriverStrategy.NONE, sink.getDriverStrategy());
    Assert.assertEquals(DriverStrategy.NONE, vertexSource.getDriverStrategy());
    Assert.assertEquals(DriverStrategy.NONE, edgesSource.getDriverStrategy());
    Assert.assertEquals(DriverStrategy.HYBRIDHASH_BUILD_SECOND_CACHED, neighborsJoin.getDriverStrategy());
    Assert.assertTrue(!neighborsJoin.getInput1().getTempMode().isCached());
    Assert.assertTrue(!neighborsJoin.getInput2().getTempMode().isCached());
    Assert.assertEquals(set0, neighborsJoin.getKeysForInput1());
    Assert.assertEquals(set0, neighborsJoin.getKeysForInput2());
    Assert.assertEquals(DriverStrategy.HYBRIDHASH_BUILD_SECOND, updatingMatch.getDriverStrategy());
    Assert.assertEquals(set0, updatingMatch.getKeysForInput1());
    Assert.assertEquals(set0, updatingMatch.getKeysForInput2());
    // test all the shipping strategies
    Assert.assertEquals(ShipStrategyType.FORWARD, sink.getInput().getShipStrategy());
    Assert.assertEquals(ShipStrategyType.PARTITION_HASH, iter.getInitialSolutionSetInput().getShipStrategy());
    Assert.assertEquals(set0, iter.getInitialSolutionSetInput().getShipStrategyKeys());
    Assert.assertEquals(ShipStrategyType.PARTITION_HASH, iter.getInitialWorksetInput().getShipStrategy());
    Assert.assertEquals(set0, iter.getInitialWorksetInput().getShipStrategyKeys());
    Assert.assertEquals(ShipStrategyType.FORWARD, // workset
    neighborsJoin.getInput1().getShipStrategy());
    Assert.assertEquals(ShipStrategyType.PARTITION_HASH, // edges
    neighborsJoin.getInput2().getShipStrategy());
    Assert.assertEquals(set0, neighborsJoin.getInput2().getShipStrategyKeys());
    Assert.assertEquals(ShipStrategyType.PARTITION_HASH, minIdReducer.getInput().getShipStrategy());
    Assert.assertEquals(set0, minIdReducer.getInput().getShipStrategyKeys());
    Assert.assertEquals(ShipStrategyType.FORWARD, minIdCombiner.getInput().getShipStrategy());
    Assert.assertEquals(ShipStrategyType.FORWARD, // min id
    updatingMatch.getInput1().getShipStrategy());
    Assert.assertEquals(ShipStrategyType.FORWARD, // solution set
    updatingMatch.getInput2().getShipStrategy());
    // test all the local strategies
    Assert.assertEquals(LocalStrategy.NONE, sink.getInput().getLocalStrategy());
    Assert.assertEquals(LocalStrategy.NONE, iter.getInitialSolutionSetInput().getLocalStrategy());
    Assert.assertEquals(LocalStrategy.NONE, iter.getInitialWorksetInput().getLocalStrategy());
    Assert.assertEquals(LocalStrategy.NONE, // workset
    neighborsJoin.getInput1().getLocalStrategy());
    Assert.assertEquals(LocalStrategy.NONE, // edges
    neighborsJoin.getInput2().getLocalStrategy());
    Assert.assertEquals(LocalStrategy.COMBININGSORT, minIdReducer.getInput().getLocalStrategy());
    Assert.assertEquals(set0, minIdReducer.getInput().getLocalStrategyKeys());
    Assert.assertEquals(LocalStrategy.NONE, minIdCombiner.getInput().getLocalStrategy());
    Assert.assertEquals(LocalStrategy.NONE, // min id
    updatingMatch.getInput1().getLocalStrategy());
    Assert.assertEquals(LocalStrategy.NONE, // solution set
    updatingMatch.getInput2().getLocalStrategy());
    // check the dams
    Assert.assertEquals(TempMode.NONE, iter.getInitialWorksetInput().getTempMode());
    Assert.assertEquals(TempMode.NONE, iter.getInitialSolutionSetInput().getTempMode());
    Assert.assertEquals(DataExchangeMode.BATCH, iter.getInitialWorksetInput().getDataExchangeMode());
    Assert.assertEquals(DataExchangeMode.BATCH, iter.getInitialSolutionSetInput().getDataExchangeMode());
    JobGraphGenerator jgg = new JobGraphGenerator();
    jgg.compileJobGraph(optPlan);
}
Also used : DualInputPlanNode(org.apache.flink.optimizer.plan.DualInputPlanNode) SingleInputPlanNode(org.apache.flink.optimizer.plan.SingleInputPlanNode) WorksetIterationPlanNode(org.apache.flink.optimizer.plan.WorksetIterationPlanNode) JobGraphGenerator(org.apache.flink.optimizer.plantranslate.JobGraphGenerator) SourcePlanNode(org.apache.flink.optimizer.plan.SourcePlanNode) SinkPlanNode(org.apache.flink.optimizer.plan.SinkPlanNode) Plan(org.apache.flink.api.common.Plan) OptimizedPlan(org.apache.flink.optimizer.plan.OptimizedPlan) OptimizedPlan(org.apache.flink.optimizer.plan.OptimizedPlan) Test(org.junit.Test)

Aggregations

SourcePlanNode (org.apache.flink.optimizer.plan.SourcePlanNode)61 Test (org.junit.Test)55 SinkPlanNode (org.apache.flink.optimizer.plan.SinkPlanNode)51 OptimizedPlan (org.apache.flink.optimizer.plan.OptimizedPlan)48 Plan (org.apache.flink.api.common.Plan)45 ExecutionEnvironment (org.apache.flink.api.java.ExecutionEnvironment)45 SingleInputPlanNode (org.apache.flink.optimizer.plan.SingleInputPlanNode)33 GlobalProperties (org.apache.flink.optimizer.dataproperties.GlobalProperties)31 LocalProperties (org.apache.flink.optimizer.dataproperties.LocalProperties)31 Tuple2 (org.apache.flink.api.java.tuple.Tuple2)30 FieldSet (org.apache.flink.api.common.operators.util.FieldSet)27 FieldList (org.apache.flink.api.common.operators.util.FieldList)18 DiscardingOutputFormat (org.apache.flink.api.java.io.DiscardingOutputFormat)18 Channel (org.apache.flink.optimizer.plan.Channel)17 Tuple3 (org.apache.flink.api.java.tuple.Tuple3)11 DualInputPlanNode (org.apache.flink.optimizer.plan.DualInputPlanNode)11 WorksetIterationPlanNode (org.apache.flink.optimizer.plan.WorksetIterationPlanNode)8 RequestedGlobalProperties (org.apache.flink.optimizer.dataproperties.RequestedGlobalProperties)7 RequestedLocalProperties (org.apache.flink.optimizer.dataproperties.RequestedLocalProperties)7 FeedbackPropertiesMeetRequirementsReport (org.apache.flink.optimizer.plan.PlanNode.FeedbackPropertiesMeetRequirementsReport)7