Search in sources :

Example 46 with SingleInputPlanNode

use of org.apache.flink.optimizer.plan.SingleInputPlanNode in project flink by apache.

the class PregelCompilerTest method testPregelWithCombiner.

@SuppressWarnings("serial")
@Test
public void testPregelWithCombiner() {
    try {
        ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
        env.setParallelism(DEFAULT_PARALLELISM);
        // compose test program
        {
            DataSet<Vertex<Long, Long>> initialVertices = env.fromElements(new Tuple2<>(1L, 1L), new Tuple2<>(2L, 2L)).map(new Tuple2ToVertexMap<Long, Long>());
            DataSet<Edge<Long, NullValue>> edges = env.fromElements(new Tuple2<>(1L, 2L)).map(new MapFunction<Tuple2<Long, Long>, Edge<Long, NullValue>>() {

                public Edge<Long, NullValue> map(Tuple2<Long, Long> edge) {
                    return new Edge<>(edge.f0, edge.f1, NullValue.getInstance());
                }
            });
            Graph<Long, Long, NullValue> graph = Graph.fromDataSet(initialVertices, edges, env);
            DataSet<Vertex<Long, Long>> result = graph.runVertexCentricIteration(new CCCompute(), new CCCombiner(), 100).getVertices();
            result.output(new DiscardingOutputFormat<Vertex<Long, Long>>());
        }
        Plan p = env.createProgramPlan("Pregel Connected Components");
        OptimizedPlan op = compileNoStats(p);
        // check the sink
        SinkPlanNode sink = op.getDataSinks().iterator().next();
        assertEquals(ShipStrategyType.FORWARD, sink.getInput().getShipStrategy());
        assertEquals(DEFAULT_PARALLELISM, sink.getParallelism());
        // check the iteration
        WorksetIterationPlanNode iteration = (WorksetIterationPlanNode) sink.getInput().getSource();
        assertEquals(DEFAULT_PARALLELISM, iteration.getParallelism());
        // check the combiner
        SingleInputPlanNode combiner = (SingleInputPlanNode) iteration.getInput2().getSource();
        assertEquals(ShipStrategyType.FORWARD, combiner.getInput().getShipStrategy());
        // check the solution set delta
        PlanNode ssDelta = iteration.getSolutionSetDeltaPlanNode();
        assertTrue(ssDelta instanceof SingleInputPlanNode);
        SingleInputPlanNode ssFlatMap = (SingleInputPlanNode) ((SingleInputPlanNode) (ssDelta)).getInput().getSource();
        assertEquals(DEFAULT_PARALLELISM, ssFlatMap.getParallelism());
        assertEquals(ShipStrategyType.FORWARD, ssFlatMap.getInput().getShipStrategy());
        // check the computation coGroup
        DualInputPlanNode computationCoGroup = (DualInputPlanNode) (ssFlatMap.getInput().getSource());
        assertEquals(DEFAULT_PARALLELISM, computationCoGroup.getParallelism());
        assertEquals(ShipStrategyType.FORWARD, computationCoGroup.getInput1().getShipStrategy());
        assertEquals(ShipStrategyType.PARTITION_HASH, computationCoGroup.getInput2().getShipStrategy());
        assertTrue(computationCoGroup.getInput2().getTempMode().isCached());
        assertEquals(new FieldList(0), computationCoGroup.getInput2().getShipStrategyKeys());
        // check that the initial partitioning is pushed out of the loop
        assertEquals(ShipStrategyType.PARTITION_HASH, iteration.getInput1().getShipStrategy());
        assertEquals(new FieldList(0), iteration.getInput1().getShipStrategyKeys());
    } catch (Exception e) {
        System.err.println(e.getMessage());
        e.printStackTrace();
        fail(e.getMessage());
    }
}
Also used : ExecutionEnvironment(org.apache.flink.api.java.ExecutionEnvironment) Tuple2ToVertexMap(org.apache.flink.graph.utils.Tuple2ToVertexMap) DataSet(org.apache.flink.api.java.DataSet) WorksetIterationPlanNode(org.apache.flink.optimizer.plan.WorksetIterationPlanNode) MapFunction(org.apache.flink.api.common.functions.MapFunction) Plan(org.apache.flink.api.common.Plan) OptimizedPlan(org.apache.flink.optimizer.plan.OptimizedPlan) DiscardingOutputFormat(org.apache.flink.api.java.io.DiscardingOutputFormat) OptimizedPlan(org.apache.flink.optimizer.plan.OptimizedPlan) FieldList(org.apache.flink.api.common.operators.util.FieldList) SingleInputPlanNode(org.apache.flink.optimizer.plan.SingleInputPlanNode) DualInputPlanNode(org.apache.flink.optimizer.plan.DualInputPlanNode) NullValue(org.apache.flink.types.NullValue) Graph(org.apache.flink.graph.Graph) WorksetIterationPlanNode(org.apache.flink.optimizer.plan.WorksetIterationPlanNode) DualInputPlanNode(org.apache.flink.optimizer.plan.DualInputPlanNode) PlanNode(org.apache.flink.optimizer.plan.PlanNode) SinkPlanNode(org.apache.flink.optimizer.plan.SinkPlanNode) SingleInputPlanNode(org.apache.flink.optimizer.plan.SingleInputPlanNode) Tuple2(org.apache.flink.api.java.tuple.Tuple2) SinkPlanNode(org.apache.flink.optimizer.plan.SinkPlanNode) Edge(org.apache.flink.graph.Edge) Test(org.junit.Test)

Example 47 with SingleInputPlanNode

use of org.apache.flink.optimizer.plan.SingleInputPlanNode in project flink by apache.

the class RangePartitionRewriter method rewriteRangePartitionChannel.

private List<Channel> rewriteRangePartitionChannel(Channel channel) {
    final List<Channel> sourceNewOutputChannels = new ArrayList<>();
    final PlanNode sourceNode = channel.getSource();
    final PlanNode targetNode = channel.getTarget();
    final int sourceParallelism = sourceNode.getParallelism();
    final int targetParallelism = targetNode.getParallelism();
    final Costs defaultZeroCosts = new Costs(0, 0, 0);
    final TypeComparatorFactory<?> comparator = Utils.getShipComparator(channel, this.plan.getOriginalPlan().getExecutionConfig());
    // 1. Fixed size sample in each partitions.
    final int sampleSize = SAMPLES_PER_PARTITION * targetParallelism;
    final SampleInPartition sampleInPartition = new SampleInPartition(false, sampleSize, SEED);
    final TypeInformation<?> sourceOutputType = sourceNode.getOptimizerNode().getOperator().getOperatorInfo().getOutputType();
    final TypeInformation<IntermediateSampleData> isdTypeInformation = TypeExtractor.getForClass(IntermediateSampleData.class);
    final UnaryOperatorInformation sipOperatorInformation = new UnaryOperatorInformation(sourceOutputType, isdTypeInformation);
    final MapPartitionOperatorBase sipOperatorBase = new MapPartitionOperatorBase(sampleInPartition, sipOperatorInformation, SIP_NAME);
    final MapPartitionNode sipNode = new MapPartitionNode(sipOperatorBase);
    final Channel sipChannel = new Channel(sourceNode, TempMode.NONE);
    sipChannel.setShipStrategy(ShipStrategyType.FORWARD, DataExchangeMode.PIPELINED);
    final SingleInputPlanNode sipPlanNode = new SingleInputPlanNode(sipNode, SIP_NAME, sipChannel, DriverStrategy.MAP_PARTITION);
    sipNode.setParallelism(sourceParallelism);
    sipPlanNode.setParallelism(sourceParallelism);
    sipPlanNode.initProperties(new GlobalProperties(), new LocalProperties());
    sipPlanNode.setCosts(defaultZeroCosts);
    sipChannel.setTarget(sipPlanNode);
    this.plan.getAllNodes().add(sipPlanNode);
    sourceNewOutputChannels.add(sipChannel);
    // 2. Fixed size sample in a single coordinator.
    final SampleInCoordinator sampleInCoordinator = new SampleInCoordinator(false, sampleSize, SEED);
    final UnaryOperatorInformation sicOperatorInformation = new UnaryOperatorInformation(isdTypeInformation, sourceOutputType);
    final GroupReduceOperatorBase sicOperatorBase = new GroupReduceOperatorBase(sampleInCoordinator, sicOperatorInformation, SIC_NAME);
    final GroupReduceNode sicNode = new GroupReduceNode(sicOperatorBase);
    final Channel sicChannel = new Channel(sipPlanNode, TempMode.NONE);
    sicChannel.setShipStrategy(ShipStrategyType.FORWARD, DataExchangeMode.PIPELINED);
    final SingleInputPlanNode sicPlanNode = new SingleInputPlanNode(sicNode, SIC_NAME, sicChannel, DriverStrategy.ALL_GROUP_REDUCE);
    sicNode.setParallelism(1);
    sicPlanNode.setParallelism(1);
    sicPlanNode.initProperties(new GlobalProperties(), new LocalProperties());
    sicPlanNode.setCosts(defaultZeroCosts);
    sicChannel.setTarget(sicPlanNode);
    sipPlanNode.addOutgoingChannel(sicChannel);
    this.plan.getAllNodes().add(sicPlanNode);
    // 3. Use sampled data to build range boundaries.
    final RangeBoundaryBuilder rangeBoundaryBuilder = new RangeBoundaryBuilder(comparator, targetParallelism);
    final TypeInformation<CommonRangeBoundaries> rbTypeInformation = TypeExtractor.getForClass(CommonRangeBoundaries.class);
    final UnaryOperatorInformation rbOperatorInformation = new UnaryOperatorInformation(sourceOutputType, rbTypeInformation);
    final MapPartitionOperatorBase rbOperatorBase = new MapPartitionOperatorBase(rangeBoundaryBuilder, rbOperatorInformation, RB_NAME);
    final MapPartitionNode rbNode = new MapPartitionNode(rbOperatorBase);
    final Channel rbChannel = new Channel(sicPlanNode, TempMode.NONE);
    rbChannel.setShipStrategy(ShipStrategyType.FORWARD, DataExchangeMode.PIPELINED);
    final SingleInputPlanNode rbPlanNode = new SingleInputPlanNode(rbNode, RB_NAME, rbChannel, DriverStrategy.MAP_PARTITION);
    rbNode.setParallelism(1);
    rbPlanNode.setParallelism(1);
    rbPlanNode.initProperties(new GlobalProperties(), new LocalProperties());
    rbPlanNode.setCosts(defaultZeroCosts);
    rbChannel.setTarget(rbPlanNode);
    sicPlanNode.addOutgoingChannel(rbChannel);
    this.plan.getAllNodes().add(rbPlanNode);
    // 4. Take range boundaries as broadcast input and take the tuple of partition id and record as output.
    final AssignRangeIndex assignRangeIndex = new AssignRangeIndex(comparator);
    final TypeInformation<Tuple2> ariOutputTypeInformation = new TupleTypeInfo<>(BasicTypeInfo.INT_TYPE_INFO, sourceOutputType);
    final UnaryOperatorInformation ariOperatorInformation = new UnaryOperatorInformation(sourceOutputType, ariOutputTypeInformation);
    final MapPartitionOperatorBase ariOperatorBase = new MapPartitionOperatorBase(assignRangeIndex, ariOperatorInformation, ARI_NAME);
    final MapPartitionNode ariNode = new MapPartitionNode(ariOperatorBase);
    final Channel ariChannel = new Channel(sourceNode, TempMode.NONE);
    // To avoid deadlock, set the DataExchangeMode of channel between source node and this to Batch.
    ariChannel.setShipStrategy(ShipStrategyType.FORWARD, DataExchangeMode.BATCH);
    final SingleInputPlanNode ariPlanNode = new SingleInputPlanNode(ariNode, ARI_NAME, ariChannel, DriverStrategy.MAP_PARTITION);
    ariNode.setParallelism(sourceParallelism);
    ariPlanNode.setParallelism(sourceParallelism);
    ariPlanNode.initProperties(new GlobalProperties(), new LocalProperties());
    ariPlanNode.setCosts(defaultZeroCosts);
    ariChannel.setTarget(ariPlanNode);
    this.plan.getAllNodes().add(ariPlanNode);
    sourceNewOutputChannels.add(ariChannel);
    final NamedChannel broadcastChannel = new NamedChannel("RangeBoundaries", rbPlanNode);
    broadcastChannel.setShipStrategy(ShipStrategyType.BROADCAST, DataExchangeMode.PIPELINED);
    broadcastChannel.setTarget(ariPlanNode);
    List<NamedChannel> broadcastChannels = new ArrayList<>(1);
    broadcastChannels.add(broadcastChannel);
    ariPlanNode.setBroadcastInputs(broadcastChannels);
    // 5. Remove the partition id.
    final Channel partChannel = new Channel(ariPlanNode, TempMode.NONE);
    final FieldList keys = new FieldList(0);
    partChannel.setShipStrategy(ShipStrategyType.PARTITION_CUSTOM, keys, idPartitioner, DataExchangeMode.PIPELINED);
    ariPlanNode.addOutgoingChannel(partChannel);
    final RemoveRangeIndex partitionIDRemoveWrapper = new RemoveRangeIndex();
    final UnaryOperatorInformation prOperatorInformation = new UnaryOperatorInformation(ariOutputTypeInformation, sourceOutputType);
    final MapOperatorBase prOperatorBase = new MapOperatorBase(partitionIDRemoveWrapper, prOperatorInformation, PR_NAME);
    final MapNode prRemoverNode = new MapNode(prOperatorBase);
    final SingleInputPlanNode prPlanNode = new SingleInputPlanNode(prRemoverNode, PR_NAME, partChannel, DriverStrategy.MAP);
    partChannel.setTarget(prPlanNode);
    prRemoverNode.setParallelism(targetParallelism);
    prPlanNode.setParallelism(targetParallelism);
    GlobalProperties globalProperties = new GlobalProperties();
    globalProperties.setRangePartitioned(new Ordering(0, null, Order.ASCENDING));
    prPlanNode.initProperties(globalProperties, new LocalProperties());
    prPlanNode.setCosts(defaultZeroCosts);
    this.plan.getAllNodes().add(prPlanNode);
    // 6. Connect to target node.
    channel.setSource(prPlanNode);
    channel.setShipStrategy(ShipStrategyType.FORWARD, DataExchangeMode.PIPELINED);
    prPlanNode.addOutgoingChannel(channel);
    return sourceNewOutputChannels;
}
Also used : SampleInPartition(org.apache.flink.api.java.functions.SampleInPartition) Costs(org.apache.flink.optimizer.costs.Costs) GroupReduceNode(org.apache.flink.optimizer.dag.GroupReduceNode) ArrayList(java.util.ArrayList) SampleInCoordinator(org.apache.flink.api.java.functions.SampleInCoordinator) MapNode(org.apache.flink.optimizer.dag.MapNode) RangeBoundaryBuilder(org.apache.flink.runtime.operators.udf.RangeBoundaryBuilder) FieldList(org.apache.flink.api.common.operators.util.FieldList) MapOperatorBase(org.apache.flink.api.common.operators.base.MapOperatorBase) IterationPlanNode(org.apache.flink.optimizer.plan.IterationPlanNode) PlanNode(org.apache.flink.optimizer.plan.PlanNode) SingleInputPlanNode(org.apache.flink.optimizer.plan.SingleInputPlanNode) UnaryOperatorInformation(org.apache.flink.api.common.operators.UnaryOperatorInformation) GlobalProperties(org.apache.flink.optimizer.dataproperties.GlobalProperties) RemoveRangeIndex(org.apache.flink.runtime.operators.udf.RemoveRangeIndex) Ordering(org.apache.flink.api.common.operators.Ordering) MapPartitionNode(org.apache.flink.optimizer.dag.MapPartitionNode) MapPartitionOperatorBase(org.apache.flink.api.common.operators.base.MapPartitionOperatorBase) AssignRangeIndex(org.apache.flink.runtime.operators.udf.AssignRangeIndex) Channel(org.apache.flink.optimizer.plan.Channel) NamedChannel(org.apache.flink.optimizer.plan.NamedChannel) NamedChannel(org.apache.flink.optimizer.plan.NamedChannel) TupleTypeInfo(org.apache.flink.api.java.typeutils.TupleTypeInfo) SingleInputPlanNode(org.apache.flink.optimizer.plan.SingleInputPlanNode) IntermediateSampleData(org.apache.flink.api.java.sampling.IntermediateSampleData) Tuple2(org.apache.flink.api.java.tuple.Tuple2) GroupReduceOperatorBase(org.apache.flink.api.common.operators.base.GroupReduceOperatorBase) LocalProperties(org.apache.flink.optimizer.dataproperties.LocalProperties) CommonRangeBoundaries(org.apache.flink.api.common.distributions.CommonRangeBoundaries)

Example 48 with SingleInputPlanNode

use of org.apache.flink.optimizer.plan.SingleInputPlanNode in project flink by apache.

the class PlanJSONDumpGenerator method visit.

private boolean visit(DumpableNode<?> node, PrintWriter writer, boolean first) {
    // check for duplicate traversal
    if (this.nodeIds.containsKey(node)) {
        return false;
    }
    // assign an id first
    this.nodeIds.put(node, this.nodeCnt++);
    // then recurse
    for (DumpableNode<?> child : node.getPredecessors()) {
        //to set first to false!
        if (visit(child, writer, first)) {
            first = false;
        }
    }
    // check if this node should be skipped from the dump
    final OptimizerNode n = node.getOptimizerNode();
    // start a new node and output node id
    if (!first) {
        writer.print(",\n");
    }
    // open the node
    writer.print("\t{\n");
    // recurse, it is is an iteration node
    if (node instanceof BulkIterationNode || node instanceof BulkIterationPlanNode) {
        DumpableNode<?> innerChild = node instanceof BulkIterationNode ? ((BulkIterationNode) node).getNextPartialSolution() : ((BulkIterationPlanNode) node).getRootOfStepFunction();
        DumpableNode<?> begin = node instanceof BulkIterationNode ? ((BulkIterationNode) node).getPartialSolution() : ((BulkIterationPlanNode) node).getPartialSolutionPlanNode();
        writer.print("\t\t\"step_function\": [\n");
        visit(innerChild, writer, true);
        writer.print("\n\t\t],\n");
        writer.print("\t\t\"partial_solution\": " + this.nodeIds.get(begin) + ",\n");
        writer.print("\t\t\"next_partial_solution\": " + this.nodeIds.get(innerChild) + ",\n");
    } else if (node instanceof WorksetIterationNode || node instanceof WorksetIterationPlanNode) {
        DumpableNode<?> worksetRoot = node instanceof WorksetIterationNode ? ((WorksetIterationNode) node).getNextWorkset() : ((WorksetIterationPlanNode) node).getNextWorkSetPlanNode();
        DumpableNode<?> solutionDelta = node instanceof WorksetIterationNode ? ((WorksetIterationNode) node).getSolutionSetDelta() : ((WorksetIterationPlanNode) node).getSolutionSetDeltaPlanNode();
        DumpableNode<?> workset = node instanceof WorksetIterationNode ? ((WorksetIterationNode) node).getWorksetNode() : ((WorksetIterationPlanNode) node).getWorksetPlanNode();
        DumpableNode<?> solutionSet = node instanceof WorksetIterationNode ? ((WorksetIterationNode) node).getSolutionSetNode() : ((WorksetIterationPlanNode) node).getSolutionSetPlanNode();
        writer.print("\t\t\"step_function\": [\n");
        visit(worksetRoot, writer, true);
        visit(solutionDelta, writer, false);
        writer.print("\n\t\t],\n");
        writer.print("\t\t\"workset\": " + this.nodeIds.get(workset) + ",\n");
        writer.print("\t\t\"solution_set\": " + this.nodeIds.get(solutionSet) + ",\n");
        writer.print("\t\t\"next_workset\": " + this.nodeIds.get(worksetRoot) + ",\n");
        writer.print("\t\t\"solution_delta\": " + this.nodeIds.get(solutionDelta) + ",\n");
    }
    // print the id
    writer.print("\t\t\"id\": " + this.nodeIds.get(node));
    final String type;
    String contents;
    if (n instanceof DataSinkNode) {
        type = "sink";
        contents = n.getOperator().toString();
    } else if (n instanceof DataSourceNode) {
        type = "source";
        contents = n.getOperator().toString();
    } else if (n instanceof BulkIterationNode) {
        type = "bulk_iteration";
        contents = n.getOperator().getName();
    } else if (n instanceof WorksetIterationNode) {
        type = "workset_iteration";
        contents = n.getOperator().getName();
    } else if (n instanceof BinaryUnionNode) {
        type = "pact";
        contents = "";
    } else {
        type = "pact";
        contents = n.getOperator().getName();
    }
    contents = StringUtils.showControlCharacters(contents);
    if (encodeForHTML) {
        contents = StringEscapeUtils.escapeHtml4(contents);
        contents = contents.replace("\\", "&#92;");
    }
    String name = n.getOperatorName();
    if (name.equals("Reduce") && (node instanceof SingleInputPlanNode) && ((SingleInputPlanNode) node).getDriverStrategy() == DriverStrategy.SORTED_GROUP_COMBINE) {
        name = "Combine";
    }
    // output the type identifier
    writer.print(",\n\t\t\"type\": \"" + type + "\"");
    // output node name
    writer.print(",\n\t\t\"pact\": \"" + name + "\"");
    // output node contents
    writer.print(",\n\t\t\"contents\": \"" + contents + "\"");
    // parallelism
    writer.print(",\n\t\t\"parallelism\": \"" + (n.getParallelism() >= 1 ? n.getParallelism() : "default") + "\"");
    // output node predecessors
    Iterator<? extends DumpableConnection<?>> inConns = node.getDumpableInputs().iterator();
    String child1name = "", child2name = "";
    if (inConns != null && inConns.hasNext()) {
        // start predecessor list
        writer.print(",\n\t\t\"predecessors\": [");
        int inputNum = 0;
        while (inConns.hasNext()) {
            final DumpableConnection<?> inConn = inConns.next();
            final DumpableNode<?> source = inConn.getSource();
            writer.print(inputNum == 0 ? "\n" : ",\n");
            if (inputNum == 0) {
                child1name += child1name.length() > 0 ? ", " : "";
                child1name += source.getOptimizerNode().getOperator().getName() + " (id: " + this.nodeIds.get(source) + ")";
            } else if (inputNum == 1) {
                child2name += child2name.length() > 0 ? ", " : "";
                child2name += source.getOptimizerNode().getOperator().getName() + " (id: " + this.nodeIds.get(source) + ")";
            }
            // output predecessor id
            writer.print("\t\t\t{\"id\": " + this.nodeIds.get(source));
            // output connection side
            if (inConns.hasNext() || inputNum > 0) {
                writer.print(", \"side\": \"" + (inputNum == 0 ? "first" : "second") + "\"");
            }
            // output shipping strategy and channel type
            final Channel channel = (inConn instanceof Channel) ? (Channel) inConn : null;
            final ShipStrategyType shipType = channel != null ? channel.getShipStrategy() : inConn.getShipStrategy();
            String shipStrategy = null;
            if (shipType != null) {
                switch(shipType) {
                    case NONE:
                        // nothing
                        break;
                    case FORWARD:
                        shipStrategy = "Forward";
                        break;
                    case BROADCAST:
                        shipStrategy = "Broadcast";
                        break;
                    case PARTITION_HASH:
                        shipStrategy = "Hash Partition";
                        break;
                    case PARTITION_RANGE:
                        shipStrategy = "Range Partition";
                        break;
                    case PARTITION_RANDOM:
                        shipStrategy = "Redistribute";
                        break;
                    case PARTITION_FORCED_REBALANCE:
                        shipStrategy = "Rebalance";
                        break;
                    case PARTITION_CUSTOM:
                        shipStrategy = "Custom Partition";
                        break;
                    default:
                        throw new CompilerException("Unknown ship strategy '" + inConn.getShipStrategy().name() + "' in JSON generator.");
                }
            }
            if (channel != null && channel.getShipStrategyKeys() != null && channel.getShipStrategyKeys().size() > 0) {
                shipStrategy += " on " + (channel.getShipStrategySortOrder() == null ? channel.getShipStrategyKeys().toString() : Utils.createOrdering(channel.getShipStrategyKeys(), channel.getShipStrategySortOrder()).toString());
            }
            if (shipStrategy != null) {
                writer.print(", \"ship_strategy\": \"" + shipStrategy + "\"");
            }
            if (channel != null) {
                String localStrategy = null;
                switch(channel.getLocalStrategy()) {
                    case NONE:
                        break;
                    case SORT:
                        localStrategy = "Sort";
                        break;
                    case COMBININGSORT:
                        localStrategy = "Sort (combining)";
                        break;
                    default:
                        throw new CompilerException("Unknown local strategy " + channel.getLocalStrategy().name());
                }
                if (channel != null && channel.getLocalStrategyKeys() != null && channel.getLocalStrategyKeys().size() > 0) {
                    localStrategy += " on " + (channel.getLocalStrategySortOrder() == null ? channel.getLocalStrategyKeys().toString() : Utils.createOrdering(channel.getLocalStrategyKeys(), channel.getLocalStrategySortOrder()).toString());
                }
                if (localStrategy != null) {
                    writer.print(", \"local_strategy\": \"" + localStrategy + "\"");
                }
                if (channel != null && channel.getTempMode() != TempMode.NONE) {
                    String tempMode = channel.getTempMode().toString();
                    writer.print(", \"temp_mode\": \"" + tempMode + "\"");
                }
                if (channel != null) {
                    String exchangeMode = channel.getDataExchangeMode().toString();
                    writer.print(", \"exchange_mode\": \"" + exchangeMode + "\"");
                }
            }
            writer.print('}');
            inputNum++;
        }
        // finish predecessors
        writer.print("\n\t\t]");
    }
    //---------------------------------------------------------------------------------------
    // the part below here is relevant only to plan nodes with concrete strategies, etc
    //---------------------------------------------------------------------------------------
    final PlanNode p = node.getPlanNode();
    if (p == null) {
        // finish node
        writer.print("\n\t}");
        return true;
    }
    // local strategy
    String locString = null;
    if (p.getDriverStrategy() != null) {
        switch(p.getDriverStrategy()) {
            case NONE:
            case BINARY_NO_OP:
                break;
            case UNARY_NO_OP:
                locString = "No-Op";
                break;
            case MAP:
                locString = "Map";
                break;
            case FLAT_MAP:
                locString = "FlatMap";
                break;
            case MAP_PARTITION:
                locString = "Map Partition";
                break;
            case ALL_REDUCE:
                locString = "Reduce All";
                break;
            case ALL_GROUP_REDUCE:
            case ALL_GROUP_REDUCE_COMBINE:
                locString = "Group Reduce All";
                break;
            case SORTED_REDUCE:
                locString = "Sorted Reduce";
                break;
            case SORTED_PARTIAL_REDUCE:
                locString = "Sorted Combine/Reduce";
                break;
            case SORTED_GROUP_REDUCE:
                locString = "Sorted Group Reduce";
                break;
            case SORTED_GROUP_COMBINE:
                locString = "Sorted Combine";
                break;
            case HYBRIDHASH_BUILD_FIRST:
                locString = "Hybrid Hash (build: " + child1name + ")";
                break;
            case HYBRIDHASH_BUILD_SECOND:
                locString = "Hybrid Hash (build: " + child2name + ")";
                break;
            case HYBRIDHASH_BUILD_FIRST_CACHED:
                locString = "Hybrid Hash (CACHED) (build: " + child1name + ")";
                break;
            case HYBRIDHASH_BUILD_SECOND_CACHED:
                locString = "Hybrid Hash (CACHED) (build: " + child2name + ")";
                break;
            case NESTEDLOOP_BLOCKED_OUTER_FIRST:
                locString = "Nested Loops (Blocked Outer: " + child1name + ")";
                break;
            case NESTEDLOOP_BLOCKED_OUTER_SECOND:
                locString = "Nested Loops (Blocked Outer: " + child2name + ")";
                break;
            case NESTEDLOOP_STREAMED_OUTER_FIRST:
                locString = "Nested Loops (Streamed Outer: " + child1name + ")";
                break;
            case NESTEDLOOP_STREAMED_OUTER_SECOND:
                locString = "Nested Loops (Streamed Outer: " + child2name + ")";
                break;
            case INNER_MERGE:
                locString = "Merge";
                break;
            case CO_GROUP:
                locString = "Co-Group";
                break;
            default:
                locString = p.getDriverStrategy().name();
                break;
        }
        if (locString != null) {
            writer.print(",\n\t\t\"driver_strategy\": \"");
            writer.print(locString);
            writer.print("\"");
        }
    }
    {
        // output node global properties
        final GlobalProperties gp = p.getGlobalProperties();
        writer.print(",\n\t\t\"global_properties\": [\n");
        addProperty(writer, "Partitioning", gp.getPartitioning().name(), true);
        if (gp.getPartitioningFields() != null) {
            addProperty(writer, "Partitioned on", gp.getPartitioningFields().toString(), false);
        }
        if (gp.getPartitioningOrdering() != null) {
            addProperty(writer, "Partitioning Order", gp.getPartitioningOrdering().toString(), false);
        } else {
            addProperty(writer, "Partitioning Order", "(none)", false);
        }
        if (n.getUniqueFields() == null || n.getUniqueFields().size() == 0) {
            addProperty(writer, "Uniqueness", "not unique", false);
        } else {
            addProperty(writer, "Uniqueness", n.getUniqueFields().toString(), false);
        }
        writer.print("\n\t\t]");
    }
    {
        // output node local properties
        LocalProperties lp = p.getLocalProperties();
        writer.print(",\n\t\t\"local_properties\": [\n");
        if (lp.getOrdering() != null) {
            addProperty(writer, "Order", lp.getOrdering().toString(), true);
        } else {
            addProperty(writer, "Order", "(none)", true);
        }
        if (lp.getGroupedFields() != null && lp.getGroupedFields().size() > 0) {
            addProperty(writer, "Grouped on", lp.getGroupedFields().toString(), false);
        } else {
            addProperty(writer, "Grouping", "not grouped", false);
        }
        if (n.getUniqueFields() == null || n.getUniqueFields().size() == 0) {
            addProperty(writer, "Uniqueness", "not unique", false);
        } else {
            addProperty(writer, "Uniqueness", n.getUniqueFields().toString(), false);
        }
        writer.print("\n\t\t]");
    }
    // output node size estimates
    writer.print(",\n\t\t\"estimates\": [\n");
    addProperty(writer, "Est. Output Size", n.getEstimatedOutputSize() == -1 ? "(unknown)" : formatNumber(n.getEstimatedOutputSize(), "B"), true);
    addProperty(writer, "Est. Cardinality", n.getEstimatedNumRecords() == -1 ? "(unknown)" : formatNumber(n.getEstimatedNumRecords()), false);
    writer.print("\t\t]");
    // output node cost
    if (p.getNodeCosts() != null) {
        writer.print(",\n\t\t\"costs\": [\n");
        addProperty(writer, "Network", p.getNodeCosts().getNetworkCost() == -1 ? "(unknown)" : formatNumber(p.getNodeCosts().getNetworkCost(), "B"), true);
        addProperty(writer, "Disk I/O", p.getNodeCosts().getDiskCost() == -1 ? "(unknown)" : formatNumber(p.getNodeCosts().getDiskCost(), "B"), false);
        addProperty(writer, "CPU", p.getNodeCosts().getCpuCost() == -1 ? "(unknown)" : formatNumber(p.getNodeCosts().getCpuCost(), ""), false);
        addProperty(writer, "Cumulative Network", p.getCumulativeCosts().getNetworkCost() == -1 ? "(unknown)" : formatNumber(p.getCumulativeCosts().getNetworkCost(), "B"), false);
        addProperty(writer, "Cumulative Disk I/O", p.getCumulativeCosts().getDiskCost() == -1 ? "(unknown)" : formatNumber(p.getCumulativeCosts().getDiskCost(), "B"), false);
        addProperty(writer, "Cumulative CPU", p.getCumulativeCosts().getCpuCost() == -1 ? "(unknown)" : formatNumber(p.getCumulativeCosts().getCpuCost(), ""), false);
        writer.print("\n\t\t]");
    }
    // output the node compiler hints
    if (n.getOperator().getCompilerHints() != null) {
        CompilerHints hints = n.getOperator().getCompilerHints();
        CompilerHints defaults = new CompilerHints();
        String size = hints.getOutputSize() == defaults.getOutputSize() ? "(none)" : String.valueOf(hints.getOutputSize());
        String card = hints.getOutputCardinality() == defaults.getOutputCardinality() ? "(none)" : String.valueOf(hints.getOutputCardinality());
        String width = hints.getAvgOutputRecordSize() == defaults.getAvgOutputRecordSize() ? "(none)" : String.valueOf(hints.getAvgOutputRecordSize());
        String filter = hints.getFilterFactor() == defaults.getFilterFactor() ? "(none)" : String.valueOf(hints.getFilterFactor());
        writer.print(",\n\t\t\"compiler_hints\": [\n");
        addProperty(writer, "Output Size (bytes)", size, true);
        addProperty(writer, "Output Cardinality", card, false);
        addProperty(writer, "Avg. Output Record Size (bytes)", width, false);
        addProperty(writer, "Filter Factor", filter, false);
        writer.print("\t\t]");
    }
    // finish node
    writer.print("\n\t}");
    return true;
}
Also used : DataSourceNode(org.apache.flink.optimizer.dag.DataSourceNode) CompilerHints(org.apache.flink.api.common.operators.CompilerHints) WorksetIterationPlanNode(org.apache.flink.optimizer.plan.WorksetIterationPlanNode) DataSinkNode(org.apache.flink.optimizer.dag.DataSinkNode) Channel(org.apache.flink.optimizer.plan.Channel) BinaryUnionNode(org.apache.flink.optimizer.dag.BinaryUnionNode) BulkIterationNode(org.apache.flink.optimizer.dag.BulkIterationNode) ShipStrategyType(org.apache.flink.runtime.operators.shipping.ShipStrategyType) SingleInputPlanNode(org.apache.flink.optimizer.plan.SingleInputPlanNode) WorksetIterationPlanNode(org.apache.flink.optimizer.plan.WorksetIterationPlanNode) BulkIterationPlanNode(org.apache.flink.optimizer.plan.BulkIterationPlanNode) PlanNode(org.apache.flink.optimizer.plan.PlanNode) SinkPlanNode(org.apache.flink.optimizer.plan.SinkPlanNode) SingleInputPlanNode(org.apache.flink.optimizer.plan.SingleInputPlanNode) OptimizerNode(org.apache.flink.optimizer.dag.OptimizerNode) WorksetIterationNode(org.apache.flink.optimizer.dag.WorksetIterationNode) GlobalProperties(org.apache.flink.optimizer.dataproperties.GlobalProperties) CompilerException(org.apache.flink.optimizer.CompilerException) LocalProperties(org.apache.flink.optimizer.dataproperties.LocalProperties) BulkIterationPlanNode(org.apache.flink.optimizer.plan.BulkIterationPlanNode)

Example 49 with SingleInputPlanNode

use of org.apache.flink.optimizer.plan.SingleInputPlanNode in project flink by apache.

the class JobGraphGenerator method createSingleInputVertex.

// ------------------------------------------------------------------------
// Methods for creating individual vertices
// ------------------------------------------------------------------------
private JobVertex createSingleInputVertex(SingleInputPlanNode node) throws CompilerException {
    final String taskName = node.getNodeName();
    final DriverStrategy ds = node.getDriverStrategy();
    // check, whether chaining is possible
    boolean chaining;
    {
        Channel inConn = node.getInput();
        PlanNode pred = inConn.getSource();
        chaining = ds.getPushChainDriverClass() != null && // first op after union is stand-alone, because union is merged
        !(pred instanceof NAryUnionPlanNode) && // partial solution merges anyways
        !(pred instanceof BulkPartialSolutionPlanNode) && // workset merges anyways
        !(pred instanceof WorksetPlanNode) && // cannot chain with iteration heads currently
        !(pred instanceof IterationPlanNode) && inConn.getShipStrategy() == ShipStrategyType.FORWARD && inConn.getLocalStrategy() == LocalStrategy.NONE && pred.getOutgoingChannels().size() == 1 && node.getParallelism() == pred.getParallelism() && node.getBroadcastInputs().isEmpty();
        // in a tail 
        if (this.currentIteration != null && this.currentIteration instanceof WorksetIterationPlanNode && node.getOutgoingChannels().size() > 0) {
            WorksetIterationPlanNode wspn = (WorksetIterationPlanNode) this.currentIteration;
            if (wspn.getSolutionSetDeltaPlanNode() == pred || wspn.getNextWorkSetPlanNode() == pred) {
                chaining = false;
            }
        }
        // cannot chain the nodes that produce the next workset in a bulk iteration if a termination criterion follows
        if (this.currentIteration != null && this.currentIteration instanceof BulkIterationPlanNode) {
            BulkIterationPlanNode wspn = (BulkIterationPlanNode) this.currentIteration;
            if (node == wspn.getRootOfTerminationCriterion() && wspn.getRootOfStepFunction() == pred) {
                chaining = false;
            } else if (node.getOutgoingChannels().size() > 0 && (wspn.getRootOfStepFunction() == pred || wspn.getRootOfTerminationCriterion() == pred)) {
                chaining = false;
            }
        }
    }
    final JobVertex vertex;
    final TaskConfig config;
    if (chaining) {
        vertex = null;
        config = new TaskConfig(new Configuration());
        this.chainedTasks.put(node, new TaskInChain(node, ds.getPushChainDriverClass(), config, taskName));
    } else {
        // create task vertex
        vertex = new JobVertex(taskName);
        vertex.setResources(node.getMinResources(), node.getPreferredResources());
        vertex.setInvokableClass((this.currentIteration != null && node.isOnDynamicPath()) ? IterationIntermediateTask.class : BatchTask.class);
        config = new TaskConfig(vertex.getConfiguration());
        config.setDriver(ds.getDriverClass());
    }
    // set user code
    config.setStubWrapper(node.getProgramOperator().getUserCodeWrapper());
    config.setStubParameters(node.getProgramOperator().getParameters());
    // set the driver strategy
    config.setDriverStrategy(ds);
    for (int i = 0; i < ds.getNumRequiredComparators(); i++) {
        config.setDriverComparator(node.getComparator(i), i);
    }
    // assign memory, file-handles, etc.
    assignDriverResources(node, config);
    return vertex;
}
Also used : Configuration(org.apache.flink.configuration.Configuration) GlobalConfiguration(org.apache.flink.configuration.GlobalConfiguration) BulkPartialSolutionPlanNode(org.apache.flink.optimizer.plan.BulkPartialSolutionPlanNode) WorksetIterationPlanNode(org.apache.flink.optimizer.plan.WorksetIterationPlanNode) BatchTask(org.apache.flink.runtime.operators.BatchTask) Channel(org.apache.flink.optimizer.plan.Channel) NamedChannel(org.apache.flink.optimizer.plan.NamedChannel) TaskConfig(org.apache.flink.runtime.operators.util.TaskConfig) IterationPlanNode(org.apache.flink.optimizer.plan.IterationPlanNode) BulkIterationPlanNode(org.apache.flink.optimizer.plan.BulkIterationPlanNode) WorksetIterationPlanNode(org.apache.flink.optimizer.plan.WorksetIterationPlanNode) NAryUnionPlanNode(org.apache.flink.optimizer.plan.NAryUnionPlanNode) SolutionSetPlanNode(org.apache.flink.optimizer.plan.SolutionSetPlanNode) IterationPlanNode(org.apache.flink.optimizer.plan.IterationPlanNode) BulkIterationPlanNode(org.apache.flink.optimizer.plan.BulkIterationPlanNode) WorksetPlanNode(org.apache.flink.optimizer.plan.WorksetPlanNode) SingleInputPlanNode(org.apache.flink.optimizer.plan.SingleInputPlanNode) WorksetIterationPlanNode(org.apache.flink.optimizer.plan.WorksetIterationPlanNode) SourcePlanNode(org.apache.flink.optimizer.plan.SourcePlanNode) BulkPartialSolutionPlanNode(org.apache.flink.optimizer.plan.BulkPartialSolutionPlanNode) DualInputPlanNode(org.apache.flink.optimizer.plan.DualInputPlanNode) PlanNode(org.apache.flink.optimizer.plan.PlanNode) SinkPlanNode(org.apache.flink.optimizer.plan.SinkPlanNode) NAryUnionPlanNode(org.apache.flink.optimizer.plan.NAryUnionPlanNode) JobVertex(org.apache.flink.runtime.jobgraph.JobVertex) WorksetPlanNode(org.apache.flink.optimizer.plan.WorksetPlanNode) DriverStrategy(org.apache.flink.runtime.operators.DriverStrategy) IterationIntermediateTask(org.apache.flink.runtime.iterative.task.IterationIntermediateTask) BulkIterationPlanNode(org.apache.flink.optimizer.plan.BulkIterationPlanNode)

Example 50 with SingleInputPlanNode

use of org.apache.flink.optimizer.plan.SingleInputPlanNode in project flink by apache.

the class GenericFlatTypePostPass method traverse.

@SuppressWarnings("unchecked")
protected void traverse(PlanNode node, T parentSchema, boolean createUtilities) {
    // distinguish the node types
    if (node instanceof SinkPlanNode) {
        SinkPlanNode sn = (SinkPlanNode) node;
        Channel inchannel = sn.getInput();
        T schema = createEmptySchema();
        sn.postPassHelper = schema;
        // add the sinks information to the schema
        try {
            getSinkSchema(sn, schema);
        } catch (ConflictingFieldTypeInfoException e) {
            throw new CompilerPostPassException("Conflicting type infomation for the data sink '" + sn.getSinkNode().getOperator().getName() + "'.");
        }
        // descend to the input channel
        try {
            propagateToChannel(schema, inchannel, createUtilities);
        } catch (MissingFieldTypeInfoException ex) {
            throw new CompilerPostPassException("Missing type infomation for the channel that inputs to the data sink '" + sn.getSinkNode().getOperator().getName() + "'.");
        }
    } else if (node instanceof SourcePlanNode) {
        if (createUtilities) {
            ((SourcePlanNode) node).setSerializer(createSerializer(parentSchema, node));
        // nothing else to be done here. the source has no input and no strategy itself
        }
    } else if (node instanceof BulkIterationPlanNode) {
        BulkIterationPlanNode iterationNode = (BulkIterationPlanNode) node;
        // get the nodes current schema
        T schema;
        if (iterationNode.postPassHelper == null) {
            schema = createEmptySchema();
            iterationNode.postPassHelper = schema;
        } else {
            schema = (T) iterationNode.postPassHelper;
        }
        schema.increaseNumConnectionsThatContributed();
        // add the parent schema to the schema
        if (propagateParentSchemaDown) {
            addSchemaToSchema(parentSchema, schema, iterationNode.getProgramOperator().getName());
        }
        // check whether all outgoing channels have not yet contributed. come back later if not.
        if (schema.getNumConnectionsThatContributed() < iterationNode.getOutgoingChannels().size()) {
            return;
        }
        if (iterationNode.getRootOfStepFunction() instanceof NAryUnionPlanNode) {
            throw new CompilerException("Optimizer cannot compile an iteration step function where next partial solution is created by a Union node.");
        }
        // traverse the termination criterion for the first time. create schema only, no utilities. Needed in case of intermediate termination criterion
        if (iterationNode.getRootOfTerminationCriterion() != null) {
            SingleInputPlanNode addMapper = (SingleInputPlanNode) iterationNode.getRootOfTerminationCriterion();
            traverse(addMapper.getInput().getSource(), createEmptySchema(), false);
            try {
                addMapper.getInput().setSerializer(createSerializer(createEmptySchema()));
            } catch (MissingFieldTypeInfoException e) {
                throw new RuntimeException(e);
            }
        }
        // traverse the step function for the first time. create schema only, no utilities
        traverse(iterationNode.getRootOfStepFunction(), schema, false);
        T pss = (T) iterationNode.getPartialSolutionPlanNode().postPassHelper;
        if (pss == null) {
            throw new CompilerException("Error in Optimizer Post Pass: Partial solution schema is null after first traversal of the step function.");
        }
        // traverse the step function for the second time, taking the schema of the partial solution
        traverse(iterationNode.getRootOfStepFunction(), pss, createUtilities);
        if (iterationNode.getRootOfTerminationCriterion() != null) {
            SingleInputPlanNode addMapper = (SingleInputPlanNode) iterationNode.getRootOfTerminationCriterion();
            traverse(addMapper.getInput().getSource(), createEmptySchema(), createUtilities);
            try {
                addMapper.getInput().setSerializer(createSerializer(createEmptySchema()));
            } catch (MissingFieldTypeInfoException e) {
                throw new RuntimeException(e);
            }
        }
        // take the schema from the partial solution node and add its fields to the iteration result schema.
        // input and output schema need to be identical, so this is essentially a sanity check
        addSchemaToSchema(pss, schema, iterationNode.getProgramOperator().getName());
        // set the serializer
        if (createUtilities) {
            iterationNode.setSerializerForIterationChannel(createSerializer(pss, iterationNode.getPartialSolutionPlanNode()));
        }
        // done, we can now propagate our info down
        try {
            propagateToChannel(schema, iterationNode.getInput(), createUtilities);
        } catch (MissingFieldTypeInfoException e) {
            throw new CompilerPostPassException("Could not set up runtime strategy for input channel to node '" + iterationNode.getProgramOperator().getName() + "'. Missing type information for key field " + e.getFieldNumber());
        }
    } else if (node instanceof WorksetIterationPlanNode) {
        WorksetIterationPlanNode iterationNode = (WorksetIterationPlanNode) node;
        // get the nodes current schema
        T schema;
        if (iterationNode.postPassHelper == null) {
            schema = createEmptySchema();
            iterationNode.postPassHelper = schema;
        } else {
            schema = (T) iterationNode.postPassHelper;
        }
        schema.increaseNumConnectionsThatContributed();
        // add the parent schema to the schema (which refers to the solution set schema)
        if (propagateParentSchemaDown) {
            addSchemaToSchema(parentSchema, schema, iterationNode.getProgramOperator().getName());
        }
        // check whether all outgoing channels have not yet contributed. come back later if not.
        if (schema.getNumConnectionsThatContributed() < iterationNode.getOutgoingChannels().size()) {
            return;
        }
        if (iterationNode.getNextWorkSetPlanNode() instanceof NAryUnionPlanNode) {
            throw new CompilerException("Optimizer cannot compile a workset iteration step function where the next workset is produced by a Union node.");
        }
        if (iterationNode.getSolutionSetDeltaPlanNode() instanceof NAryUnionPlanNode) {
            throw new CompilerException("Optimizer cannot compile a workset iteration step function where the solution set delta is produced by a Union node.");
        }
        // traverse the step function
        // pass an empty schema to the next workset and the parent schema to the solution set delta
        // these first traversals are schema only
        traverse(iterationNode.getNextWorkSetPlanNode(), createEmptySchema(), false);
        traverse(iterationNode.getSolutionSetDeltaPlanNode(), schema, false);
        T wss = (T) iterationNode.getWorksetPlanNode().postPassHelper;
        T sss = (T) iterationNode.getSolutionSetPlanNode().postPassHelper;
        if (wss == null) {
            throw new CompilerException("Error in Optimizer Post Pass: Workset schema is null after first traversal of the step function.");
        }
        if (sss == null) {
            throw new CompilerException("Error in Optimizer Post Pass: Solution set schema is null after first traversal of the step function.");
        }
        // make the second pass and instantiate the utilities
        traverse(iterationNode.getNextWorkSetPlanNode(), wss, createUtilities);
        traverse(iterationNode.getSolutionSetDeltaPlanNode(), sss, createUtilities);
        // the solution set input and the result must have the same schema, this acts as a sanity check.
        try {
            for (Map.Entry<Integer, X> entry : sss) {
                Integer pos = entry.getKey();
                schema.addType(pos, entry.getValue());
            }
        } catch (ConflictingFieldTypeInfoException e) {
            throw new CompilerPostPassException("Conflicting type information for field " + e.getFieldNumber() + " in node '" + iterationNode.getProgramOperator().getName() + "'. Contradicting types between the " + "result of the iteration and the solution set schema: " + e.getPreviousType() + " and " + e.getNewType() + ". Most probable cause: Invalid constant field annotations.");
        }
        // set the serializers and comparators
        if (createUtilities) {
            WorksetIterationNode optNode = iterationNode.getIterationNode();
            iterationNode.setWorksetSerializer(createSerializer(wss, iterationNode.getWorksetPlanNode()));
            iterationNode.setSolutionSetSerializer(createSerializer(sss, iterationNode.getSolutionSetPlanNode()));
            try {
                iterationNode.setSolutionSetComparator(createComparator(optNode.getSolutionSetKeyFields(), null, sss));
            } catch (MissingFieldTypeInfoException ex) {
                throw new CompilerPostPassException("Could not set up the solution set for workset iteration '" + optNode.getOperator().getName() + "'. Missing type information for key field " + ex.getFieldNumber() + '.');
            }
        }
        // done, we can now propagate our info down
        try {
            propagateToChannel(schema, iterationNode.getInitialSolutionSetInput(), createUtilities);
            propagateToChannel(wss, iterationNode.getInitialWorksetInput(), createUtilities);
        } catch (MissingFieldTypeInfoException ex) {
            throw new CompilerPostPassException("Could not set up runtime strategy for input channel to node '" + iterationNode.getProgramOperator().getName() + "'. Missing type information for key field " + ex.getFieldNumber());
        }
    } else if (node instanceof SingleInputPlanNode) {
        SingleInputPlanNode sn = (SingleInputPlanNode) node;
        // get the nodes current schema
        T schema;
        if (sn.postPassHelper == null) {
            schema = createEmptySchema();
            sn.postPassHelper = schema;
        } else {
            schema = (T) sn.postPassHelper;
        }
        schema.increaseNumConnectionsThatContributed();
        SingleInputNode optNode = sn.getSingleInputNode();
        // add the parent schema to the schema
        if (propagateParentSchemaDown) {
            addSchemaToSchema(parentSchema, schema, optNode, 0);
        }
        // check whether all outgoing channels have not yet contributed. come back later if not.
        if (schema.getNumConnectionsThatContributed() < sn.getOutgoingChannels().size()) {
            return;
        }
        // add the nodes local information
        try {
            getSingleInputNodeSchema(sn, schema);
        } catch (ConflictingFieldTypeInfoException e) {
            throw new CompilerPostPassException(getConflictingTypeErrorMessage(e, optNode.getOperator().getName()));
        }
        if (createUtilities) {
            // parameterize the node's driver strategy
            for (int i = 0; i < sn.getDriverStrategy().getNumRequiredComparators(); i++) {
                try {
                    sn.setComparator(createComparator(sn.getKeys(i), sn.getSortOrders(i), schema), i);
                } catch (MissingFieldTypeInfoException e) {
                    throw new CompilerPostPassException("Could not set up runtime strategy for node '" + optNode.getOperator().getName() + "'. Missing type information for key field " + e.getFieldNumber());
                }
            }
        }
        // done, we can now propagate our info down
        try {
            propagateToChannel(schema, sn.getInput(), createUtilities);
        } catch (MissingFieldTypeInfoException e) {
            throw new CompilerPostPassException("Could not set up runtime strategy for input channel to node '" + optNode.getOperator().getName() + "'. Missing type information for field " + e.getFieldNumber());
        }
        // don't forget the broadcast inputs
        for (Channel c : sn.getBroadcastInputs()) {
            try {
                propagateToChannel(createEmptySchema(), c, createUtilities);
            } catch (MissingFieldTypeInfoException e) {
                throw new CompilerPostPassException("Could not set up runtime strategy for broadcast channel in node '" + optNode.getOperator().getName() + "'. Missing type information for field " + e.getFieldNumber());
            }
        }
    } else if (node instanceof DualInputPlanNode) {
        DualInputPlanNode dn = (DualInputPlanNode) node;
        // get the nodes current schema
        T schema1;
        T schema2;
        if (dn.postPassHelper1 == null) {
            schema1 = createEmptySchema();
            schema2 = createEmptySchema();
            dn.postPassHelper1 = schema1;
            dn.postPassHelper2 = schema2;
        } else {
            schema1 = (T) dn.postPassHelper1;
            schema2 = (T) dn.postPassHelper2;
        }
        schema1.increaseNumConnectionsThatContributed();
        schema2.increaseNumConnectionsThatContributed();
        TwoInputNode optNode = dn.getTwoInputNode();
        // add the parent schema to the schema
        if (propagateParentSchemaDown) {
            addSchemaToSchema(parentSchema, schema1, optNode, 0);
            addSchemaToSchema(parentSchema, schema2, optNode, 1);
        }
        // check whether all outgoing channels have not yet contributed. come back later if not.
        if (schema1.getNumConnectionsThatContributed() < dn.getOutgoingChannels().size()) {
            return;
        }
        // add the nodes local information
        try {
            getDualInputNodeSchema(dn, schema1, schema2);
        } catch (ConflictingFieldTypeInfoException e) {
            throw new CompilerPostPassException(getConflictingTypeErrorMessage(e, optNode.getOperator().getName()));
        }
        // parameterize the node's driver strategy
        if (createUtilities) {
            if (dn.getDriverStrategy().getNumRequiredComparators() > 0) {
                // set the individual comparators
                try {
                    dn.setComparator1(createComparator(dn.getKeysForInput1(), dn.getSortOrders(), schema1));
                    dn.setComparator2(createComparator(dn.getKeysForInput2(), dn.getSortOrders(), schema2));
                } catch (MissingFieldTypeInfoException e) {
                    throw new CompilerPostPassException("Could not set up runtime strategy for node '" + optNode.getOperator().getName() + "'. Missing type information for field " + e.getFieldNumber());
                }
                // set the pair comparator
                try {
                    dn.setPairComparator(createPairComparator(dn.getKeysForInput1(), dn.getKeysForInput2(), dn.getSortOrders(), schema1, schema2));
                } catch (MissingFieldTypeInfoException e) {
                    throw new CompilerPostPassException("Could not set up runtime strategy for node '" + optNode.getOperator().getName() + "'. Missing type information for field " + e.getFieldNumber());
                }
            }
        }
        // done, we can now propagate our info down
        try {
            propagateToChannel(schema1, dn.getInput1(), createUtilities);
        } catch (MissingFieldTypeInfoException e) {
            throw new CompilerPostPassException("Could not set up runtime strategy for the first input channel to node '" + optNode.getOperator().getName() + "'. Missing type information for field " + e.getFieldNumber());
        }
        try {
            propagateToChannel(schema2, dn.getInput2(), createUtilities);
        } catch (MissingFieldTypeInfoException e) {
            throw new CompilerPostPassException("Could not set up runtime strategy for the second input channel to node '" + optNode.getOperator().getName() + "'. Missing type information for field " + e.getFieldNumber());
        }
        // don't forget the broadcast inputs
        for (Channel c : dn.getBroadcastInputs()) {
            try {
                propagateToChannel(createEmptySchema(), c, createUtilities);
            } catch (MissingFieldTypeInfoException e) {
                throw new CompilerPostPassException("Could not set up runtime strategy for broadcast channel in node '" + optNode.getOperator().getName() + "'. Missing type information for field " + e.getFieldNumber());
            }
        }
    } else if (node instanceof NAryUnionPlanNode) {
        // only propagate the info down
        try {
            for (Channel channel : node.getInputs()) {
                propagateToChannel(parentSchema, channel, createUtilities);
            }
        } catch (MissingFieldTypeInfoException ex) {
            throw new CompilerPostPassException("Could not set up runtime strategy for the input channel to " + " a union node. Missing type information for field " + ex.getFieldNumber());
        }
    } else // catch the sources of the iterative step functions
    if (node instanceof BulkPartialSolutionPlanNode || node instanceof SolutionSetPlanNode || node instanceof WorksetPlanNode) {
        // get the nodes current schema
        T schema;
        String name;
        if (node instanceof BulkPartialSolutionPlanNode) {
            BulkPartialSolutionPlanNode psn = (BulkPartialSolutionPlanNode) node;
            if (psn.postPassHelper == null) {
                schema = createEmptySchema();
                psn.postPassHelper = schema;
            } else {
                schema = (T) psn.postPassHelper;
            }
            name = "partial solution of bulk iteration '" + psn.getPartialSolutionNode().getIterationNode().getOperator().getName() + "'";
        } else if (node instanceof SolutionSetPlanNode) {
            SolutionSetPlanNode ssn = (SolutionSetPlanNode) node;
            if (ssn.postPassHelper == null) {
                schema = createEmptySchema();
                ssn.postPassHelper = schema;
            } else {
                schema = (T) ssn.postPassHelper;
            }
            name = "solution set of workset iteration '" + ssn.getSolutionSetNode().getIterationNode().getOperator().getName() + "'";
        } else if (node instanceof WorksetPlanNode) {
            WorksetPlanNode wsn = (WorksetPlanNode) node;
            if (wsn.postPassHelper == null) {
                schema = createEmptySchema();
                wsn.postPassHelper = schema;
            } else {
                schema = (T) wsn.postPassHelper;
            }
            name = "workset of workset iteration '" + wsn.getWorksetNode().getIterationNode().getOperator().getName() + "'";
        } else {
            throw new CompilerException();
        }
        schema.increaseNumConnectionsThatContributed();
        // add the parent schema to the schema
        addSchemaToSchema(parentSchema, schema, name);
    } else {
        throw new CompilerPostPassException("Unknown node type encountered: " + node.getClass().getName());
    }
}
Also used : SingleInputNode(org.apache.flink.optimizer.dag.SingleInputNode) SolutionSetPlanNode(org.apache.flink.optimizer.plan.SolutionSetPlanNode) WorksetIterationPlanNode(org.apache.flink.optimizer.plan.WorksetIterationPlanNode) BulkPartialSolutionPlanNode(org.apache.flink.optimizer.plan.BulkPartialSolutionPlanNode) Channel(org.apache.flink.optimizer.plan.Channel) NAryUnionPlanNode(org.apache.flink.optimizer.plan.NAryUnionPlanNode) SingleInputPlanNode(org.apache.flink.optimizer.plan.SingleInputPlanNode) DualInputPlanNode(org.apache.flink.optimizer.plan.DualInputPlanNode) WorksetIterationNode(org.apache.flink.optimizer.dag.WorksetIterationNode) SinkPlanNode(org.apache.flink.optimizer.plan.SinkPlanNode) SourcePlanNode(org.apache.flink.optimizer.plan.SourcePlanNode) CompilerException(org.apache.flink.optimizer.CompilerException) WorksetPlanNode(org.apache.flink.optimizer.plan.WorksetPlanNode) CompilerPostPassException(org.apache.flink.optimizer.CompilerPostPassException) BulkIterationPlanNode(org.apache.flink.optimizer.plan.BulkIterationPlanNode) TwoInputNode(org.apache.flink.optimizer.dag.TwoInputNode)

Aggregations

SingleInputPlanNode (org.apache.flink.optimizer.plan.SingleInputPlanNode)104 Test (org.junit.Test)83 OptimizedPlan (org.apache.flink.optimizer.plan.OptimizedPlan)81 Plan (org.apache.flink.api.common.Plan)73 SinkPlanNode (org.apache.flink.optimizer.plan.SinkPlanNode)72 ExecutionEnvironment (org.apache.flink.api.java.ExecutionEnvironment)71 Tuple2 (org.apache.flink.api.java.tuple.Tuple2)38 Channel (org.apache.flink.optimizer.plan.Channel)32 SourcePlanNode (org.apache.flink.optimizer.plan.SourcePlanNode)32 FieldList (org.apache.flink.api.common.operators.util.FieldList)31 DualInputPlanNode (org.apache.flink.optimizer.plan.DualInputPlanNode)28 DiscardingOutputFormat (org.apache.flink.api.java.io.DiscardingOutputFormat)26 InvalidProgramException (org.apache.flink.api.common.InvalidProgramException)18 JobGraphGenerator (org.apache.flink.optimizer.plantranslate.JobGraphGenerator)16 NAryUnionPlanNode (org.apache.flink.optimizer.plan.NAryUnionPlanNode)14 PlanNode (org.apache.flink.optimizer.plan.PlanNode)14 IdentityGroupReducerCombinable (org.apache.flink.optimizer.testfunctions.IdentityGroupReducerCombinable)14 IdentityMapper (org.apache.flink.optimizer.testfunctions.IdentityMapper)14 GlobalProperties (org.apache.flink.optimizer.dataproperties.GlobalProperties)13 LocalProperties (org.apache.flink.optimizer.dataproperties.LocalProperties)13