Search in sources :

Example 1 with Runner

use of org.apache.flink.graph.Runner in project flink by apache.

the class TestUtils method verifyParallelism.

/**
 * Verify algorithm driver parallelism.
 *
 * <p>Based on {@code org.apache.flink.graph.generator.TestUtils}.
 *
 * @param arguments program arguments
 * @param fullParallelismOperatorNames list of regex strings matching the names of full
 *     parallelism operators
 */
static void verifyParallelism(String[] arguments, String... fullParallelismOperatorNames) throws Exception {
    // set a reduced parallelism for the algorithm runner
    final int parallelism = 8;
    arguments = ArrayUtils.addAll(arguments, "--__parallelism", Integer.toString(parallelism));
    // configure the runner but do not execute
    Runner runner = new Runner(arguments).run();
    // we cannot use the actual DataSink since DataSet#writeAsCsv also
    // executes the program; instead, we receive the DataSet and configure
    // with a DiscardingOutputFormat
    DataSet result = runner.getResult();
    if (result != null) {
        result.output(new DiscardingOutputFormat());
    }
    // set the default parallelism higher than the expected parallelism
    ExecutionEnvironment env = runner.getExecutionEnvironment();
    env.setParallelism(2 * parallelism);
    // add default regex exclusions for the added DiscardingOutputFormat
    // and also for any preceding GraphKeyTypeTransform
    List<Pattern> patterns = new ArrayList<>();
    patterns.add(Pattern.compile("DataSink \\(org\\.apache\\.flink\\.api\\.java\\.io\\.DiscardingOutputFormat@[0-9a-f]{1,8}\\)"));
    patterns.add(Pattern.compile("FlatMap \\(Translate results IDs\\)"));
    // add user regex patterns
    for (String largeOperatorName : fullParallelismOperatorNames) {
        patterns.add(Pattern.compile(largeOperatorName));
    }
    Optimizer compiler = new Optimizer(null, new DefaultCostEstimator(), new Configuration());
    OptimizedPlan optimizedPlan = compiler.compile(env.createProgramPlan());
    // walk the job plan from sinks to sources
    List<PlanNode> queue = new ArrayList<>();
    queue.addAll(optimizedPlan.getDataSinks());
    while (queue.size() > 0) {
        PlanNode node = queue.remove(queue.size() - 1);
        // skip operators matching an exclusion pattern; these are the
        // large-scale operators which run at full parallelism
        boolean matched = false;
        for (Pattern pattern : patterns) {
            matched |= pattern.matcher(node.getNodeName()).matches();
        }
        if (!matched) {
            // Data sources may have parallelism of 1, so simply check that the node
            // parallelism has not been increased by setting the default parallelism
            assertTrue("Wrong parallelism for " + node.toString(), node.getParallelism() <= parallelism);
        }
        for (Channel channel : node.getInputs()) {
            queue.add(channel.getSource());
        }
    }
}
Also used : Runner(org.apache.flink.graph.Runner) Pattern(java.util.regex.Pattern) ExecutionEnvironment(org.apache.flink.api.java.ExecutionEnvironment) Configuration(org.apache.flink.configuration.Configuration) DataSet(org.apache.flink.api.java.DataSet) Optimizer(org.apache.flink.optimizer.Optimizer) Channel(org.apache.flink.optimizer.plan.Channel) ArrayList(java.util.ArrayList) DiscardingOutputFormat(org.apache.flink.api.java.io.DiscardingOutputFormat) OptimizedPlan(org.apache.flink.optimizer.plan.OptimizedPlan) PlanNode(org.apache.flink.optimizer.plan.PlanNode) DefaultCostEstimator(org.apache.flink.optimizer.costs.DefaultCostEstimator)

Aggregations

ArrayList (java.util.ArrayList)1 Pattern (java.util.regex.Pattern)1 DataSet (org.apache.flink.api.java.DataSet)1 ExecutionEnvironment (org.apache.flink.api.java.ExecutionEnvironment)1 DiscardingOutputFormat (org.apache.flink.api.java.io.DiscardingOutputFormat)1 Configuration (org.apache.flink.configuration.Configuration)1 Runner (org.apache.flink.graph.Runner)1 Optimizer (org.apache.flink.optimizer.Optimizer)1 DefaultCostEstimator (org.apache.flink.optimizer.costs.DefaultCostEstimator)1 Channel (org.apache.flink.optimizer.plan.Channel)1 OptimizedPlan (org.apache.flink.optimizer.plan.OptimizedPlan)1 PlanNode (org.apache.flink.optimizer.plan.PlanNode)1