use of org.apache.flink.optimizer.costs.DefaultCostEstimator in project flink by apache.
the class CliFrontend method info.
/**
* Executes the info action.
*
* @param args Command line arguments for the info action.
*/
protected int info(String[] args) {
LOG.info("Running 'info' command.");
// Parse command line options
InfoOptions options;
try {
options = CliFrontendParser.parseInfoCommand(args);
} catch (CliArgsException e) {
return handleArgException(e);
} catch (Throwable t) {
return handleError(t);
}
// evaluate help flag
if (options.isPrintHelp()) {
CliFrontendParser.printHelpForInfo();
return 0;
}
if (options.getJarFilePath() == null) {
return handleArgException(new CliArgsException("The program JAR file was not specified."));
}
// -------- build the packaged program -------------
PackagedProgram program;
try {
LOG.info("Building program from JAR file");
program = buildProgram(options);
} catch (Throwable t) {
return handleError(t);
}
try {
int parallelism = options.getParallelism();
LOG.info("Creating program plan dump");
Optimizer compiler = new Optimizer(new DataStatistics(), new DefaultCostEstimator(), config);
FlinkPlan flinkPlan = ClusterClient.getOptimizedPlan(compiler, program, parallelism);
String jsonPlan = null;
if (flinkPlan instanceof OptimizedPlan) {
jsonPlan = new PlanJSONDumpGenerator().getOptimizerPlanAsJSON((OptimizedPlan) flinkPlan);
} else if (flinkPlan instanceof StreamingPlan) {
jsonPlan = ((StreamingPlan) flinkPlan).getStreamingPlanAsJSON();
}
if (jsonPlan != null) {
System.out.println("----------------------- Execution Plan -----------------------");
System.out.println(jsonPlan);
System.out.println("--------------------------------------------------------------");
} else {
System.out.println("JSON plan could not be generated.");
}
String description = program.getDescription();
if (description != null) {
System.out.println();
System.out.println(description);
} else {
System.out.println();
System.out.println("No description provided.");
}
return 0;
} catch (Throwable t) {
return handleError(t);
} finally {
program.deleteExtractedLibraries();
}
}
use of org.apache.flink.optimizer.costs.DefaultCostEstimator in project flink by apache.
the class CliFrontendPackageProgramTest method testPlanWithExternalClass.
/**
* Ensure that we will never have the following error.
*
* <pre>
* org.apache.flink.client.program.ProgramInvocationException: The main method caused an error.
* at org.apache.flink.client.program.PackagedProgram.callMainMethod(PackagedProgram.java:398)
* at org.apache.flink.client.program.PackagedProgram.invokeInteractiveModeForExecution(PackagedProgram.java:301)
* at org.apache.flink.client.program.Client.getOptimizedPlan(Client.java:140)
* at org.apache.flink.client.program.Client.getOptimizedPlanAsJson(Client.java:125)
* at org.apache.flink.client.CliFrontend.info(CliFrontend.java:439)
* at org.apache.flink.client.CliFrontend.parseParameters(CliFrontend.java:931)
* at org.apache.flink.client.CliFrontend.main(CliFrontend.java:951)
* Caused by: java.io.IOException: java.lang.RuntimeException: java.lang.ClassNotFoundException: org.apache.hadoop.hive.ql.io.RCFileInputFormat
* at org.apache.hcatalog.mapreduce.HCatInputFormat.setInput(HCatInputFormat.java:102)
* at org.apache.hcatalog.mapreduce.HCatInputFormat.setInput(HCatInputFormat.java:54)
* at tlabs.CDR_In_Report.createHCatInputFormat(CDR_In_Report.java:322)
* at tlabs.CDR_Out_Report.main(CDR_Out_Report.java:380)
* at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
* at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:57)
* at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
* at java.lang.reflect.Method.invoke(Method.java:622)
* at org.apache.flink.client.program.PackagedProgram.callMainMethod(PackagedProgram.java:383)
* </pre>
*
* The test works as follows:
*
* <ul>
* <li> Use the CliFrontend to invoke a jar file that loads a class which is only available
* in the jarfile itself (via a custom classloader)
* <li> Change the Usercode classloader of the PackagedProgram to a special classloader for this test
* <li> the classloader will accept the special class (and return a String.class)
* </ul>
*/
@Test
public void testPlanWithExternalClass() throws CompilerException, ProgramInvocationException {
// create a final object reference, to be able to change its val later
final boolean[] callme = { false };
try {
String[] arguments = { "--classpath", "file:///tmp/foo", "--classpath", "file:///tmp/bar", "-c", TEST_JAR_CLASSLOADERTEST_CLASS, getTestJarPath(), "true", "arg1", "arg2" };
URL[] classpath = new URL[] { new URL("file:///tmp/foo"), new URL("file:///tmp/bar") };
String[] reducedArguments = { "true", "arg1", "arg2" };
RunOptions options = CliFrontendParser.parseRunCommand(arguments);
assertEquals(getTestJarPath(), options.getJarFilePath());
assertArrayEquals(classpath, options.getClasspaths().toArray());
assertEquals(TEST_JAR_CLASSLOADERTEST_CLASS, options.getEntryPointClassName());
assertArrayEquals(reducedArguments, options.getProgramArgs());
CliFrontend frontend = new CliFrontend(CliFrontendTestUtils.getConfigDir());
PackagedProgram prog = spy(frontend.buildProgram(options));
ClassLoader testClassLoader = new ClassLoader(prog.getUserCodeClassLoader()) {
@Override
public Class<?> loadClass(String name) throws ClassNotFoundException {
if ("org.apache.hadoop.hive.ql.io.RCFileInputFormat".equals(name)) {
callme[0] = true;
// Intentionally return the wrong class.
return String.class;
} else {
return super.loadClass(name);
}
}
};
when(prog.getUserCodeClassLoader()).thenReturn(testClassLoader);
assertEquals(TEST_JAR_CLASSLOADERTEST_CLASS, prog.getMainClassName());
assertArrayEquals(reducedArguments, prog.getArguments());
Configuration c = new Configuration();
Optimizer compiler = new Optimizer(new DataStatistics(), new DefaultCostEstimator(), c);
// we expect this to fail with a "ClassNotFoundException"
ClusterClient.getOptimizedPlanAsJson(compiler, prog, 666);
fail("Should have failed with a ClassNotFoundException");
} catch (ProgramInvocationException e) {
if (!(e.getCause() instanceof ClassNotFoundException)) {
e.printStackTrace();
fail("Program didn't throw ClassNotFoundException");
}
assertTrue("Classloader was not called", callme[0]);
} catch (Exception e) {
e.printStackTrace();
fail("Program failed with the wrong exception: " + e.getClass().getName());
}
}
use of org.apache.flink.optimizer.costs.DefaultCostEstimator in project flink by apache.
the class RemoteExecutor method getOptimizerPlanAsJSON.
@Override
public String getOptimizerPlanAsJSON(Plan plan) throws Exception {
Optimizer opt = new Optimizer(new DataStatistics(), new DefaultCostEstimator(), new Configuration());
OptimizedPlan optPlan = opt.compile(plan);
return new PlanJSONDumpGenerator().getOptimizerPlanAsJSON(optPlan);
}
use of org.apache.flink.optimizer.costs.DefaultCostEstimator in project flink by apache.
the class CompilerTestBase method setup.
// ------------------------------------------------------------------------
@Before
public void setup() {
Configuration flinkConf = new Configuration();
this.dataStats = new DataStatistics();
this.withStatsCompiler = new Optimizer(this.dataStats, new DefaultCostEstimator(), flinkConf);
this.withStatsCompiler.setDefaultParallelism(DEFAULT_PARALLELISM);
this.noStatsCompiler = new Optimizer(null, new DefaultCostEstimator(), flinkConf);
this.noStatsCompiler.setDefaultParallelism(DEFAULT_PARALLELISM);
}
use of org.apache.flink.optimizer.costs.DefaultCostEstimator in project flink by apache.
the class TestUtils method verifyParallelism.
/**
* Verify algorithm driver parallelism.
*
* <p>Based on {@code org.apache.flink.graph.generator.TestUtils}.
*
* @param arguments program arguments
* @param fullParallelismOperatorNames list of regex strings matching the names of full
* parallelism operators
*/
static void verifyParallelism(String[] arguments, String... fullParallelismOperatorNames) throws Exception {
// set a reduced parallelism for the algorithm runner
final int parallelism = 8;
arguments = ArrayUtils.addAll(arguments, "--__parallelism", Integer.toString(parallelism));
// configure the runner but do not execute
Runner runner = new Runner(arguments).run();
// we cannot use the actual DataSink since DataSet#writeAsCsv also
// executes the program; instead, we receive the DataSet and configure
// with a DiscardingOutputFormat
DataSet result = runner.getResult();
if (result != null) {
result.output(new DiscardingOutputFormat());
}
// set the default parallelism higher than the expected parallelism
ExecutionEnvironment env = runner.getExecutionEnvironment();
env.setParallelism(2 * parallelism);
// add default regex exclusions for the added DiscardingOutputFormat
// and also for any preceding GraphKeyTypeTransform
List<Pattern> patterns = new ArrayList<>();
patterns.add(Pattern.compile("DataSink \\(org\\.apache\\.flink\\.api\\.java\\.io\\.DiscardingOutputFormat@[0-9a-f]{1,8}\\)"));
patterns.add(Pattern.compile("FlatMap \\(Translate results IDs\\)"));
// add user regex patterns
for (String largeOperatorName : fullParallelismOperatorNames) {
patterns.add(Pattern.compile(largeOperatorName));
}
Optimizer compiler = new Optimizer(null, new DefaultCostEstimator(), new Configuration());
OptimizedPlan optimizedPlan = compiler.compile(env.createProgramPlan());
// walk the job plan from sinks to sources
List<PlanNode> queue = new ArrayList<>();
queue.addAll(optimizedPlan.getDataSinks());
while (queue.size() > 0) {
PlanNode node = queue.remove(queue.size() - 1);
// skip operators matching an exclusion pattern; these are the
// large-scale operators which run at full parallelism
boolean matched = false;
for (Pattern pattern : patterns) {
matched |= pattern.matcher(node.getNodeName()).matches();
}
if (!matched) {
// Data sources may have parallelism of 1, so simply check that the node
// parallelism has not been increased by setting the default parallelism
assertTrue("Wrong parallelism for " + node.toString(), node.getParallelism() <= parallelism);
}
for (Channel channel : node.getInputs()) {
queue.add(channel.getSource());
}
}
}
Aggregations