Search in sources :

Example 6 with DataSinkNode

use of org.apache.flink.optimizer.dag.DataSinkNode in project flink by apache.

the class GraphCreatingVisitor method preVisit.

@SuppressWarnings("deprecation")
@Override
public boolean preVisit(Operator<?> c) {
    // check if we have been here before
    if (this.con2node.containsKey(c)) {
        return false;
    }
    final OptimizerNode n;
    // create a node for the operator (or sink or source) if we have not been here before
    if (c instanceof GenericDataSinkBase) {
        DataSinkNode dsn = new DataSinkNode((GenericDataSinkBase<?>) c);
        this.sinks.add(dsn);
        n = dsn;
    } else if (c instanceof GenericDataSourceBase) {
        n = new DataSourceNode((GenericDataSourceBase<?, ?>) c);
    } else if (c instanceof MapOperatorBase) {
        n = new MapNode((MapOperatorBase<?, ?, ?>) c);
    } else if (c instanceof MapPartitionOperatorBase) {
        n = new MapPartitionNode((MapPartitionOperatorBase<?, ?, ?>) c);
    } else if (c instanceof FlatMapOperatorBase) {
        n = new FlatMapNode((FlatMapOperatorBase<?, ?, ?>) c);
    } else if (c instanceof FilterOperatorBase) {
        n = new FilterNode((FilterOperatorBase<?, ?>) c);
    } else if (c instanceof ReduceOperatorBase) {
        n = new ReduceNode((ReduceOperatorBase<?, ?>) c);
    } else if (c instanceof GroupCombineOperatorBase) {
        n = new GroupCombineNode((GroupCombineOperatorBase<?, ?, ?>) c);
    } else if (c instanceof GroupReduceOperatorBase) {
        n = new GroupReduceNode((GroupReduceOperatorBase<?, ?, ?>) c);
    } else if (c instanceof InnerJoinOperatorBase) {
        n = new JoinNode((InnerJoinOperatorBase<?, ?, ?, ?>) c);
    } else if (c instanceof OuterJoinOperatorBase) {
        n = new OuterJoinNode((OuterJoinOperatorBase<?, ?, ?, ?>) c);
    } else if (c instanceof CoGroupOperatorBase) {
        n = new CoGroupNode((CoGroupOperatorBase<?, ?, ?, ?>) c);
    } else if (c instanceof CoGroupRawOperatorBase) {
        n = new CoGroupRawNode((CoGroupRawOperatorBase<?, ?, ?, ?>) c);
    } else if (c instanceof CrossOperatorBase) {
        n = new CrossNode((CrossOperatorBase<?, ?, ?, ?>) c);
    } else if (c instanceof BulkIterationBase) {
        n = new BulkIterationNode((BulkIterationBase<?>) c);
    } else if (c instanceof DeltaIterationBase) {
        n = new WorksetIterationNode((DeltaIterationBase<?, ?>) c);
    } else if (c instanceof Union) {
        n = new BinaryUnionNode((Union<?>) c);
    } else if (c instanceof PartitionOperatorBase) {
        n = new PartitionNode((PartitionOperatorBase<?>) c);
    } else if (c instanceof SortPartitionOperatorBase) {
        n = new SortPartitionNode((SortPartitionOperatorBase<?>) c);
    } else if (c instanceof BulkIterationBase.PartialSolutionPlaceHolder) {
        if (this.parent == null) {
            throw new InvalidProgramException("It is currently not supported to create data sinks inside iterations.");
        }
        final BulkIterationBase.PartialSolutionPlaceHolder<?> holder = (BulkIterationBase.PartialSolutionPlaceHolder<?>) c;
        final BulkIterationBase<?> enclosingIteration = holder.getContainingBulkIteration();
        final BulkIterationNode containingIterationNode = (BulkIterationNode) this.parent.con2node.get(enclosingIteration);
        // catch this for the recursive translation of step functions
        BulkPartialSolutionNode p = new BulkPartialSolutionNode(holder, containingIterationNode);
        p.setParallelism(containingIterationNode.getParallelism());
        n = p;
    } else if (c instanceof DeltaIterationBase.WorksetPlaceHolder) {
        if (this.parent == null) {
            throw new InvalidProgramException("It is currently not supported to create data sinks inside iterations.");
        }
        final DeltaIterationBase.WorksetPlaceHolder<?> holder = (DeltaIterationBase.WorksetPlaceHolder<?>) c;
        final DeltaIterationBase<?, ?> enclosingIteration = holder.getContainingWorksetIteration();
        final WorksetIterationNode containingIterationNode = (WorksetIterationNode) this.parent.con2node.get(enclosingIteration);
        // catch this for the recursive translation of step functions
        WorksetNode p = new WorksetNode(holder, containingIterationNode);
        p.setParallelism(containingIterationNode.getParallelism());
        n = p;
    } else if (c instanceof DeltaIterationBase.SolutionSetPlaceHolder) {
        if (this.parent == null) {
            throw new InvalidProgramException("It is currently not supported to create data sinks inside iterations.");
        }
        final DeltaIterationBase.SolutionSetPlaceHolder<?> holder = (DeltaIterationBase.SolutionSetPlaceHolder<?>) c;
        final DeltaIterationBase<?, ?> enclosingIteration = holder.getContainingWorksetIteration();
        final WorksetIterationNode containingIterationNode = (WorksetIterationNode) this.parent.con2node.get(enclosingIteration);
        // catch this for the recursive translation of step functions
        SolutionSetNode p = new SolutionSetNode(holder, containingIterationNode);
        p.setParallelism(containingIterationNode.getParallelism());
        n = p;
    } else {
        throw new IllegalArgumentException("Unknown operator type: " + c);
    }
    this.con2node.put(c, n);
    // key-less reducer (all-reduce)
    if (n.getParallelism() < 1) {
        // set the parallelism
        int par = c.getParallelism();
        if (par > 0) {
            if (this.forceParallelism && par != this.defaultParallelism) {
                par = this.defaultParallelism;
                Optimizer.LOG.warn("The parallelism of nested dataflows (such as step functions in iterations) is " + "currently fixed to the parallelism of the surrounding operator (the iteration).");
            }
        } else {
            par = this.defaultParallelism;
        }
        n.setParallelism(par);
    }
    return true;
}
Also used : FlatMapOperatorBase(org.apache.flink.api.common.operators.base.FlatMapOperatorBase) GroupReduceOperatorBase(org.apache.flink.api.common.operators.base.GroupReduceOperatorBase) ReduceOperatorBase(org.apache.flink.api.common.operators.base.ReduceOperatorBase) FilterNode(org.apache.flink.optimizer.dag.FilterNode) CrossOperatorBase(org.apache.flink.api.common.operators.base.CrossOperatorBase) CoGroupOperatorBase(org.apache.flink.api.common.operators.base.CoGroupOperatorBase) BulkPartialSolutionNode(org.apache.flink.optimizer.dag.BulkPartialSolutionNode) DeltaIterationBase(org.apache.flink.api.common.operators.base.DeltaIterationBase) MapPartitionNode(org.apache.flink.optimizer.dag.MapPartitionNode) SortPartitionOperatorBase(org.apache.flink.api.common.operators.base.SortPartitionOperatorBase) MapPartitionOperatorBase(org.apache.flink.api.common.operators.base.MapPartitionOperatorBase) DataSinkNode(org.apache.flink.optimizer.dag.DataSinkNode) JoinNode(org.apache.flink.optimizer.dag.JoinNode) OuterJoinNode(org.apache.flink.optimizer.dag.OuterJoinNode) OuterJoinOperatorBase(org.apache.flink.api.common.operators.base.OuterJoinOperatorBase) FlatMapNode(org.apache.flink.optimizer.dag.FlatMapNode) GroupReduceOperatorBase(org.apache.flink.api.common.operators.base.GroupReduceOperatorBase) OuterJoinNode(org.apache.flink.optimizer.dag.OuterJoinNode) BulkIterationBase(org.apache.flink.api.common.operators.base.BulkIterationBase) CoGroupRawOperatorBase(org.apache.flink.api.common.operators.base.CoGroupRawOperatorBase) GroupReduceNode(org.apache.flink.optimizer.dag.GroupReduceNode) GenericDataSinkBase(org.apache.flink.api.common.operators.GenericDataSinkBase) WorksetNode(org.apache.flink.optimizer.dag.WorksetNode) CoGroupNode(org.apache.flink.optimizer.dag.CoGroupNode) FlatMapNode(org.apache.flink.optimizer.dag.FlatMapNode) MapNode(org.apache.flink.optimizer.dag.MapNode) GroupCombineNode(org.apache.flink.optimizer.dag.GroupCombineNode) Union(org.apache.flink.api.common.operators.Union) FlatMapOperatorBase(org.apache.flink.api.common.operators.base.FlatMapOperatorBase) MapOperatorBase(org.apache.flink.api.common.operators.base.MapOperatorBase) SolutionSetNode(org.apache.flink.optimizer.dag.SolutionSetNode) OptimizerNode(org.apache.flink.optimizer.dag.OptimizerNode) WorksetIterationNode(org.apache.flink.optimizer.dag.WorksetIterationNode) InvalidProgramException(org.apache.flink.api.common.InvalidProgramException) InnerJoinOperatorBase(org.apache.flink.api.common.operators.base.InnerJoinOperatorBase) DataSourceNode(org.apache.flink.optimizer.dag.DataSourceNode) SortPartitionNode(org.apache.flink.optimizer.dag.SortPartitionNode) MapPartitionOperatorBase(org.apache.flink.api.common.operators.base.MapPartitionOperatorBase) PartitionOperatorBase(org.apache.flink.api.common.operators.base.PartitionOperatorBase) SortPartitionOperatorBase(org.apache.flink.api.common.operators.base.SortPartitionOperatorBase) CoGroupRawNode(org.apache.flink.optimizer.dag.CoGroupRawNode) BinaryUnionNode(org.apache.flink.optimizer.dag.BinaryUnionNode) MapPartitionNode(org.apache.flink.optimizer.dag.MapPartitionNode) PartitionNode(org.apache.flink.optimizer.dag.PartitionNode) SortPartitionNode(org.apache.flink.optimizer.dag.SortPartitionNode) FilterOperatorBase(org.apache.flink.api.common.operators.base.FilterOperatorBase) GroupCombineOperatorBase(org.apache.flink.api.common.operators.base.GroupCombineOperatorBase) BulkIterationNode(org.apache.flink.optimizer.dag.BulkIterationNode) ReduceNode(org.apache.flink.optimizer.dag.ReduceNode) GroupReduceNode(org.apache.flink.optimizer.dag.GroupReduceNode) GenericDataSourceBase(org.apache.flink.api.common.operators.GenericDataSourceBase) CrossNode(org.apache.flink.optimizer.dag.CrossNode)

Example 7 with DataSinkNode

use of org.apache.flink.optimizer.dag.DataSinkNode in project flink by apache.

the class PlanJSONDumpGenerator method visit.

private boolean visit(DumpableNode<?> node, PrintWriter writer, boolean first) {
    // check for duplicate traversal
    if (this.nodeIds.containsKey(node)) {
        return false;
    }
    // assign an id first
    this.nodeIds.put(node, this.nodeCnt++);
    // then recurse
    for (DumpableNode<?> child : node.getPredecessors()) {
        //to set first to false!
        if (visit(child, writer, first)) {
            first = false;
        }
    }
    // check if this node should be skipped from the dump
    final OptimizerNode n = node.getOptimizerNode();
    // start a new node and output node id
    if (!first) {
        writer.print(",\n");
    }
    // open the node
    writer.print("\t{\n");
    // recurse, it is is an iteration node
    if (node instanceof BulkIterationNode || node instanceof BulkIterationPlanNode) {
        DumpableNode<?> innerChild = node instanceof BulkIterationNode ? ((BulkIterationNode) node).getNextPartialSolution() : ((BulkIterationPlanNode) node).getRootOfStepFunction();
        DumpableNode<?> begin = node instanceof BulkIterationNode ? ((BulkIterationNode) node).getPartialSolution() : ((BulkIterationPlanNode) node).getPartialSolutionPlanNode();
        writer.print("\t\t\"step_function\": [\n");
        visit(innerChild, writer, true);
        writer.print("\n\t\t],\n");
        writer.print("\t\t\"partial_solution\": " + this.nodeIds.get(begin) + ",\n");
        writer.print("\t\t\"next_partial_solution\": " + this.nodeIds.get(innerChild) + ",\n");
    } else if (node instanceof WorksetIterationNode || node instanceof WorksetIterationPlanNode) {
        DumpableNode<?> worksetRoot = node instanceof WorksetIterationNode ? ((WorksetIterationNode) node).getNextWorkset() : ((WorksetIterationPlanNode) node).getNextWorkSetPlanNode();
        DumpableNode<?> solutionDelta = node instanceof WorksetIterationNode ? ((WorksetIterationNode) node).getSolutionSetDelta() : ((WorksetIterationPlanNode) node).getSolutionSetDeltaPlanNode();
        DumpableNode<?> workset = node instanceof WorksetIterationNode ? ((WorksetIterationNode) node).getWorksetNode() : ((WorksetIterationPlanNode) node).getWorksetPlanNode();
        DumpableNode<?> solutionSet = node instanceof WorksetIterationNode ? ((WorksetIterationNode) node).getSolutionSetNode() : ((WorksetIterationPlanNode) node).getSolutionSetPlanNode();
        writer.print("\t\t\"step_function\": [\n");
        visit(worksetRoot, writer, true);
        visit(solutionDelta, writer, false);
        writer.print("\n\t\t],\n");
        writer.print("\t\t\"workset\": " + this.nodeIds.get(workset) + ",\n");
        writer.print("\t\t\"solution_set\": " + this.nodeIds.get(solutionSet) + ",\n");
        writer.print("\t\t\"next_workset\": " + this.nodeIds.get(worksetRoot) + ",\n");
        writer.print("\t\t\"solution_delta\": " + this.nodeIds.get(solutionDelta) + ",\n");
    }
    // print the id
    writer.print("\t\t\"id\": " + this.nodeIds.get(node));
    final String type;
    String contents;
    if (n instanceof DataSinkNode) {
        type = "sink";
        contents = n.getOperator().toString();
    } else if (n instanceof DataSourceNode) {
        type = "source";
        contents = n.getOperator().toString();
    } else if (n instanceof BulkIterationNode) {
        type = "bulk_iteration";
        contents = n.getOperator().getName();
    } else if (n instanceof WorksetIterationNode) {
        type = "workset_iteration";
        contents = n.getOperator().getName();
    } else if (n instanceof BinaryUnionNode) {
        type = "pact";
        contents = "";
    } else {
        type = "pact";
        contents = n.getOperator().getName();
    }
    contents = StringUtils.showControlCharacters(contents);
    if (encodeForHTML) {
        contents = StringEscapeUtils.escapeHtml4(contents);
        contents = contents.replace("\\", "&#92;");
    }
    String name = n.getOperatorName();
    if (name.equals("Reduce") && (node instanceof SingleInputPlanNode) && ((SingleInputPlanNode) node).getDriverStrategy() == DriverStrategy.SORTED_GROUP_COMBINE) {
        name = "Combine";
    }
    // output the type identifier
    writer.print(",\n\t\t\"type\": \"" + type + "\"");
    // output node name
    writer.print(",\n\t\t\"pact\": \"" + name + "\"");
    // output node contents
    writer.print(",\n\t\t\"contents\": \"" + contents + "\"");
    // parallelism
    writer.print(",\n\t\t\"parallelism\": \"" + (n.getParallelism() >= 1 ? n.getParallelism() : "default") + "\"");
    // output node predecessors
    Iterator<? extends DumpableConnection<?>> inConns = node.getDumpableInputs().iterator();
    String child1name = "", child2name = "";
    if (inConns != null && inConns.hasNext()) {
        // start predecessor list
        writer.print(",\n\t\t\"predecessors\": [");
        int inputNum = 0;
        while (inConns.hasNext()) {
            final DumpableConnection<?> inConn = inConns.next();
            final DumpableNode<?> source = inConn.getSource();
            writer.print(inputNum == 0 ? "\n" : ",\n");
            if (inputNum == 0) {
                child1name += child1name.length() > 0 ? ", " : "";
                child1name += source.getOptimizerNode().getOperator().getName() + " (id: " + this.nodeIds.get(source) + ")";
            } else if (inputNum == 1) {
                child2name += child2name.length() > 0 ? ", " : "";
                child2name += source.getOptimizerNode().getOperator().getName() + " (id: " + this.nodeIds.get(source) + ")";
            }
            // output predecessor id
            writer.print("\t\t\t{\"id\": " + this.nodeIds.get(source));
            // output connection side
            if (inConns.hasNext() || inputNum > 0) {
                writer.print(", \"side\": \"" + (inputNum == 0 ? "first" : "second") + "\"");
            }
            // output shipping strategy and channel type
            final Channel channel = (inConn instanceof Channel) ? (Channel) inConn : null;
            final ShipStrategyType shipType = channel != null ? channel.getShipStrategy() : inConn.getShipStrategy();
            String shipStrategy = null;
            if (shipType != null) {
                switch(shipType) {
                    case NONE:
                        // nothing
                        break;
                    case FORWARD:
                        shipStrategy = "Forward";
                        break;
                    case BROADCAST:
                        shipStrategy = "Broadcast";
                        break;
                    case PARTITION_HASH:
                        shipStrategy = "Hash Partition";
                        break;
                    case PARTITION_RANGE:
                        shipStrategy = "Range Partition";
                        break;
                    case PARTITION_RANDOM:
                        shipStrategy = "Redistribute";
                        break;
                    case PARTITION_FORCED_REBALANCE:
                        shipStrategy = "Rebalance";
                        break;
                    case PARTITION_CUSTOM:
                        shipStrategy = "Custom Partition";
                        break;
                    default:
                        throw new CompilerException("Unknown ship strategy '" + inConn.getShipStrategy().name() + "' in JSON generator.");
                }
            }
            if (channel != null && channel.getShipStrategyKeys() != null && channel.getShipStrategyKeys().size() > 0) {
                shipStrategy += " on " + (channel.getShipStrategySortOrder() == null ? channel.getShipStrategyKeys().toString() : Utils.createOrdering(channel.getShipStrategyKeys(), channel.getShipStrategySortOrder()).toString());
            }
            if (shipStrategy != null) {
                writer.print(", \"ship_strategy\": \"" + shipStrategy + "\"");
            }
            if (channel != null) {
                String localStrategy = null;
                switch(channel.getLocalStrategy()) {
                    case NONE:
                        break;
                    case SORT:
                        localStrategy = "Sort";
                        break;
                    case COMBININGSORT:
                        localStrategy = "Sort (combining)";
                        break;
                    default:
                        throw new CompilerException("Unknown local strategy " + channel.getLocalStrategy().name());
                }
                if (channel != null && channel.getLocalStrategyKeys() != null && channel.getLocalStrategyKeys().size() > 0) {
                    localStrategy += " on " + (channel.getLocalStrategySortOrder() == null ? channel.getLocalStrategyKeys().toString() : Utils.createOrdering(channel.getLocalStrategyKeys(), channel.getLocalStrategySortOrder()).toString());
                }
                if (localStrategy != null) {
                    writer.print(", \"local_strategy\": \"" + localStrategy + "\"");
                }
                if (channel != null && channel.getTempMode() != TempMode.NONE) {
                    String tempMode = channel.getTempMode().toString();
                    writer.print(", \"temp_mode\": \"" + tempMode + "\"");
                }
                if (channel != null) {
                    String exchangeMode = channel.getDataExchangeMode().toString();
                    writer.print(", \"exchange_mode\": \"" + exchangeMode + "\"");
                }
            }
            writer.print('}');
            inputNum++;
        }
        // finish predecessors
        writer.print("\n\t\t]");
    }
    //---------------------------------------------------------------------------------------
    // the part below here is relevant only to plan nodes with concrete strategies, etc
    //---------------------------------------------------------------------------------------
    final PlanNode p = node.getPlanNode();
    if (p == null) {
        // finish node
        writer.print("\n\t}");
        return true;
    }
    // local strategy
    String locString = null;
    if (p.getDriverStrategy() != null) {
        switch(p.getDriverStrategy()) {
            case NONE:
            case BINARY_NO_OP:
                break;
            case UNARY_NO_OP:
                locString = "No-Op";
                break;
            case MAP:
                locString = "Map";
                break;
            case FLAT_MAP:
                locString = "FlatMap";
                break;
            case MAP_PARTITION:
                locString = "Map Partition";
                break;
            case ALL_REDUCE:
                locString = "Reduce All";
                break;
            case ALL_GROUP_REDUCE:
            case ALL_GROUP_REDUCE_COMBINE:
                locString = "Group Reduce All";
                break;
            case SORTED_REDUCE:
                locString = "Sorted Reduce";
                break;
            case SORTED_PARTIAL_REDUCE:
                locString = "Sorted Combine/Reduce";
                break;
            case SORTED_GROUP_REDUCE:
                locString = "Sorted Group Reduce";
                break;
            case SORTED_GROUP_COMBINE:
                locString = "Sorted Combine";
                break;
            case HYBRIDHASH_BUILD_FIRST:
                locString = "Hybrid Hash (build: " + child1name + ")";
                break;
            case HYBRIDHASH_BUILD_SECOND:
                locString = "Hybrid Hash (build: " + child2name + ")";
                break;
            case HYBRIDHASH_BUILD_FIRST_CACHED:
                locString = "Hybrid Hash (CACHED) (build: " + child1name + ")";
                break;
            case HYBRIDHASH_BUILD_SECOND_CACHED:
                locString = "Hybrid Hash (CACHED) (build: " + child2name + ")";
                break;
            case NESTEDLOOP_BLOCKED_OUTER_FIRST:
                locString = "Nested Loops (Blocked Outer: " + child1name + ")";
                break;
            case NESTEDLOOP_BLOCKED_OUTER_SECOND:
                locString = "Nested Loops (Blocked Outer: " + child2name + ")";
                break;
            case NESTEDLOOP_STREAMED_OUTER_FIRST:
                locString = "Nested Loops (Streamed Outer: " + child1name + ")";
                break;
            case NESTEDLOOP_STREAMED_OUTER_SECOND:
                locString = "Nested Loops (Streamed Outer: " + child2name + ")";
                break;
            case INNER_MERGE:
                locString = "Merge";
                break;
            case CO_GROUP:
                locString = "Co-Group";
                break;
            default:
                locString = p.getDriverStrategy().name();
                break;
        }
        if (locString != null) {
            writer.print(",\n\t\t\"driver_strategy\": \"");
            writer.print(locString);
            writer.print("\"");
        }
    }
    {
        // output node global properties
        final GlobalProperties gp = p.getGlobalProperties();
        writer.print(",\n\t\t\"global_properties\": [\n");
        addProperty(writer, "Partitioning", gp.getPartitioning().name(), true);
        if (gp.getPartitioningFields() != null) {
            addProperty(writer, "Partitioned on", gp.getPartitioningFields().toString(), false);
        }
        if (gp.getPartitioningOrdering() != null) {
            addProperty(writer, "Partitioning Order", gp.getPartitioningOrdering().toString(), false);
        } else {
            addProperty(writer, "Partitioning Order", "(none)", false);
        }
        if (n.getUniqueFields() == null || n.getUniqueFields().size() == 0) {
            addProperty(writer, "Uniqueness", "not unique", false);
        } else {
            addProperty(writer, "Uniqueness", n.getUniqueFields().toString(), false);
        }
        writer.print("\n\t\t]");
    }
    {
        // output node local properties
        LocalProperties lp = p.getLocalProperties();
        writer.print(",\n\t\t\"local_properties\": [\n");
        if (lp.getOrdering() != null) {
            addProperty(writer, "Order", lp.getOrdering().toString(), true);
        } else {
            addProperty(writer, "Order", "(none)", true);
        }
        if (lp.getGroupedFields() != null && lp.getGroupedFields().size() > 0) {
            addProperty(writer, "Grouped on", lp.getGroupedFields().toString(), false);
        } else {
            addProperty(writer, "Grouping", "not grouped", false);
        }
        if (n.getUniqueFields() == null || n.getUniqueFields().size() == 0) {
            addProperty(writer, "Uniqueness", "not unique", false);
        } else {
            addProperty(writer, "Uniqueness", n.getUniqueFields().toString(), false);
        }
        writer.print("\n\t\t]");
    }
    // output node size estimates
    writer.print(",\n\t\t\"estimates\": [\n");
    addProperty(writer, "Est. Output Size", n.getEstimatedOutputSize() == -1 ? "(unknown)" : formatNumber(n.getEstimatedOutputSize(), "B"), true);
    addProperty(writer, "Est. Cardinality", n.getEstimatedNumRecords() == -1 ? "(unknown)" : formatNumber(n.getEstimatedNumRecords()), false);
    writer.print("\t\t]");
    // output node cost
    if (p.getNodeCosts() != null) {
        writer.print(",\n\t\t\"costs\": [\n");
        addProperty(writer, "Network", p.getNodeCosts().getNetworkCost() == -1 ? "(unknown)" : formatNumber(p.getNodeCosts().getNetworkCost(), "B"), true);
        addProperty(writer, "Disk I/O", p.getNodeCosts().getDiskCost() == -1 ? "(unknown)" : formatNumber(p.getNodeCosts().getDiskCost(), "B"), false);
        addProperty(writer, "CPU", p.getNodeCosts().getCpuCost() == -1 ? "(unknown)" : formatNumber(p.getNodeCosts().getCpuCost(), ""), false);
        addProperty(writer, "Cumulative Network", p.getCumulativeCosts().getNetworkCost() == -1 ? "(unknown)" : formatNumber(p.getCumulativeCosts().getNetworkCost(), "B"), false);
        addProperty(writer, "Cumulative Disk I/O", p.getCumulativeCosts().getDiskCost() == -1 ? "(unknown)" : formatNumber(p.getCumulativeCosts().getDiskCost(), "B"), false);
        addProperty(writer, "Cumulative CPU", p.getCumulativeCosts().getCpuCost() == -1 ? "(unknown)" : formatNumber(p.getCumulativeCosts().getCpuCost(), ""), false);
        writer.print("\n\t\t]");
    }
    // output the node compiler hints
    if (n.getOperator().getCompilerHints() != null) {
        CompilerHints hints = n.getOperator().getCompilerHints();
        CompilerHints defaults = new CompilerHints();
        String size = hints.getOutputSize() == defaults.getOutputSize() ? "(none)" : String.valueOf(hints.getOutputSize());
        String card = hints.getOutputCardinality() == defaults.getOutputCardinality() ? "(none)" : String.valueOf(hints.getOutputCardinality());
        String width = hints.getAvgOutputRecordSize() == defaults.getAvgOutputRecordSize() ? "(none)" : String.valueOf(hints.getAvgOutputRecordSize());
        String filter = hints.getFilterFactor() == defaults.getFilterFactor() ? "(none)" : String.valueOf(hints.getFilterFactor());
        writer.print(",\n\t\t\"compiler_hints\": [\n");
        addProperty(writer, "Output Size (bytes)", size, true);
        addProperty(writer, "Output Cardinality", card, false);
        addProperty(writer, "Avg. Output Record Size (bytes)", width, false);
        addProperty(writer, "Filter Factor", filter, false);
        writer.print("\t\t]");
    }
    // finish node
    writer.print("\n\t}");
    return true;
}
Also used : DataSourceNode(org.apache.flink.optimizer.dag.DataSourceNode) CompilerHints(org.apache.flink.api.common.operators.CompilerHints) WorksetIterationPlanNode(org.apache.flink.optimizer.plan.WorksetIterationPlanNode) DataSinkNode(org.apache.flink.optimizer.dag.DataSinkNode) Channel(org.apache.flink.optimizer.plan.Channel) BinaryUnionNode(org.apache.flink.optimizer.dag.BinaryUnionNode) BulkIterationNode(org.apache.flink.optimizer.dag.BulkIterationNode) ShipStrategyType(org.apache.flink.runtime.operators.shipping.ShipStrategyType) SingleInputPlanNode(org.apache.flink.optimizer.plan.SingleInputPlanNode) WorksetIterationPlanNode(org.apache.flink.optimizer.plan.WorksetIterationPlanNode) BulkIterationPlanNode(org.apache.flink.optimizer.plan.BulkIterationPlanNode) PlanNode(org.apache.flink.optimizer.plan.PlanNode) SinkPlanNode(org.apache.flink.optimizer.plan.SinkPlanNode) SingleInputPlanNode(org.apache.flink.optimizer.plan.SingleInputPlanNode) OptimizerNode(org.apache.flink.optimizer.dag.OptimizerNode) WorksetIterationNode(org.apache.flink.optimizer.dag.WorksetIterationNode) GlobalProperties(org.apache.flink.optimizer.dataproperties.GlobalProperties) CompilerException(org.apache.flink.optimizer.CompilerException) LocalProperties(org.apache.flink.optimizer.dataproperties.LocalProperties) BulkIterationPlanNode(org.apache.flink.optimizer.plan.BulkIterationPlanNode)

Example 8 with DataSinkNode

use of org.apache.flink.optimizer.dag.DataSinkNode in project flink by apache.

the class PipelineBreakingTest method testReJoinedBranches.

/**
	 * Tests that branches that are re-joined have place pipeline breakers.
	 * 
	 * <pre>
	 *                                         /-> (sink)
	 *                                        /
	 *                         /-> (reduce) -+          /-> (flatmap) -> (sink)
	 *                        /               \        /
	 *     (source) -> (map) -                (join) -+-----\
	 *                        \               /              \
	 *                         \-> (filter) -+                \
	 *                                       \                (co group) -> (sink)
	 *                                        \                /
	 *                                         \-> (reduce) - /
	 * </pre>
	 */
@Test
public void testReJoinedBranches() {
    try {
        // build a test program
        ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
        DataSet<Tuple2<Long, Long>> data = env.fromElements(33L, 44L).map(new MapFunction<Long, Tuple2<Long, Long>>() {

            @Override
            public Tuple2<Long, Long> map(Long value) {
                return new Tuple2<Long, Long>(value, value);
            }
        });
        DataSet<Tuple2<Long, Long>> reduced = data.groupBy(0).reduce(new SelectOneReducer<Tuple2<Long, Long>>());
        reduced.output(new DiscardingOutputFormat<Tuple2<Long, Long>>());
        DataSet<Tuple2<Long, Long>> filtered = data.filter(new FilterFunction<Tuple2<Long, Long>>() {

            @Override
            public boolean filter(Tuple2<Long, Long> value) throws Exception {
                return false;
            }
        });
        DataSet<Tuple2<Long, Long>> joined = reduced.join(filtered).where(1).equalTo(1).with(new DummyFlatJoinFunction<Tuple2<Long, Long>>());
        joined.flatMap(new IdentityFlatMapper<Tuple2<Long, Long>>()).output(new DiscardingOutputFormat<Tuple2<Long, Long>>());
        joined.coGroup(filtered.groupBy(1).reduceGroup(new Top1GroupReducer<Tuple2<Long, Long>>())).where(0).equalTo(0).with(new DummyCoGroupFunction<Tuple2<Long, Long>, Tuple2<Long, Long>>()).output(new DiscardingOutputFormat<Tuple2<Tuple2<Long, Long>, Tuple2<Long, Long>>>());
        List<DataSinkNode> sinks = convertPlan(env.createProgramPlan());
        // gather the optimizer DAG nodes
        DataSinkNode sinkAfterReduce = sinks.get(0);
        DataSinkNode sinkAfterFlatMap = sinks.get(1);
        DataSinkNode sinkAfterCoGroup = sinks.get(2);
        SingleInputNode reduceNode = (SingleInputNode) sinkAfterReduce.getPredecessorNode();
        SingleInputNode mapNode = (SingleInputNode) reduceNode.getPredecessorNode();
        SingleInputNode flatMapNode = (SingleInputNode) sinkAfterFlatMap.getPredecessorNode();
        TwoInputNode joinNode = (TwoInputNode) flatMapNode.getPredecessorNode();
        SingleInputNode filterNode = (SingleInputNode) joinNode.getSecondPredecessorNode();
        TwoInputNode coGroupNode = (TwoInputNode) sinkAfterCoGroup.getPredecessorNode();
        SingleInputNode otherReduceNode = (SingleInputNode) coGroupNode.getSecondPredecessorNode();
        // test sanity checks (that we constructed the DAG correctly)
        assertEquals(reduceNode, joinNode.getFirstPredecessorNode());
        assertEquals(mapNode, filterNode.getPredecessorNode());
        assertEquals(joinNode, coGroupNode.getFirstPredecessorNode());
        assertEquals(filterNode, otherReduceNode.getPredecessorNode());
        // verify the pipeline breaking status
        assertFalse(sinkAfterReduce.getInputConnection().isBreakingPipeline());
        assertFalse(sinkAfterFlatMap.getInputConnection().isBreakingPipeline());
        assertFalse(sinkAfterCoGroup.getInputConnection().isBreakingPipeline());
        assertFalse(mapNode.getIncomingConnection().isBreakingPipeline());
        assertFalse(flatMapNode.getIncomingConnection().isBreakingPipeline());
        assertFalse(joinNode.getFirstIncomingConnection().isBreakingPipeline());
        assertFalse(coGroupNode.getFirstIncomingConnection().isBreakingPipeline());
        assertFalse(coGroupNode.getSecondIncomingConnection().isBreakingPipeline());
        // these should be pipeline breakers
        assertTrue(reduceNode.getIncomingConnection().isBreakingPipeline());
        assertTrue(filterNode.getIncomingConnection().isBreakingPipeline());
        assertTrue(otherReduceNode.getIncomingConnection().isBreakingPipeline());
        assertTrue(joinNode.getSecondIncomingConnection().isBreakingPipeline());
    } catch (Exception e) {
        e.printStackTrace();
        fail(e.getMessage());
    }
}
Also used : SingleInputNode(org.apache.flink.optimizer.dag.SingleInputNode) ExecutionEnvironment(org.apache.flink.api.java.ExecutionEnvironment) Top1GroupReducer(org.apache.flink.optimizer.testfunctions.Top1GroupReducer) DataSinkNode(org.apache.flink.optimizer.dag.DataSinkNode) Tuple2(org.apache.flink.api.java.tuple.Tuple2) IdentityFlatMapper(org.apache.flink.optimizer.testfunctions.IdentityFlatMapper) DummyCoGroupFunction(org.apache.flink.optimizer.testfunctions.DummyCoGroupFunction) TwoInputNode(org.apache.flink.optimizer.dag.TwoInputNode) Test(org.junit.Test)

Aggregations

DataSinkNode (org.apache.flink.optimizer.dag.DataSinkNode)8 ExecutionEnvironment (org.apache.flink.api.java.ExecutionEnvironment)3 OptimizerNode (org.apache.flink.optimizer.dag.OptimizerNode)3 FilterFunction (org.apache.flink.api.common.functions.FilterFunction)2 Tuple2 (org.apache.flink.api.java.tuple.Tuple2)2 BinaryUnionNode (org.apache.flink.optimizer.dag.BinaryUnionNode)2 BulkIterationNode (org.apache.flink.optimizer.dag.BulkIterationNode)2 DataSourceNode (org.apache.flink.optimizer.dag.DataSourceNode)2 SingleInputNode (org.apache.flink.optimizer.dag.SingleInputNode)2 WorksetIterationNode (org.apache.flink.optimizer.dag.WorksetIterationNode)2 Test (org.junit.Test)2 PrintWriter (java.io.PrintWriter)1 StringWriter (java.io.StringWriter)1 InvalidProgramException (org.apache.flink.api.common.InvalidProgramException)1 CompilerHints (org.apache.flink.api.common.operators.CompilerHints)1 GenericDataSinkBase (org.apache.flink.api.common.operators.GenericDataSinkBase)1 GenericDataSourceBase (org.apache.flink.api.common.operators.GenericDataSourceBase)1 Union (org.apache.flink.api.common.operators.Union)1 BulkIterationBase (org.apache.flink.api.common.operators.base.BulkIterationBase)1 CoGroupOperatorBase (org.apache.flink.api.common.operators.base.CoGroupOperatorBase)1