use of org.apache.twill.api.ClassAcceptor in project cdap by caskdata.
the class MapReduceRuntimeService method buildJobJar.
/**
* Creates a jar that contains everything that are needed for running the MapReduce program by Hadoop.
*
* @return a new {@link File} containing the job jar
*/
private File buildJobJar(Job job, File tempDir) throws IOException, URISyntaxException {
File jobJar = new File(tempDir, "job.jar");
LOG.debug("Creating Job jar: {}", jobJar);
// For local mode, nothing is needed in the job jar since we use the classloader in the configuration object.
if (MapReduceTaskContextProvider.isLocal(job.getConfiguration())) {
JarOutputStream output = new JarOutputStream(new FileOutputStream(jobJar));
output.close();
return jobJar;
}
// Excludes libraries that are for sure not needed.
// Hadoop - Available from the cluster
// Spark - MR never uses Spark
final HadoopClassExcluder hadoopClassExcluder = new HadoopClassExcluder();
ApplicationBundler appBundler = new ApplicationBundler(new ClassAcceptor() {
@Override
public boolean accept(String className, URL classUrl, URL classPathUrl) {
if (className.startsWith("org.apache.spark") || classPathUrl.toString().contains("spark-assembly")) {
return false;
}
return hadoopClassExcluder.accept(className, classUrl, classPathUrl);
}
});
Set<Class<?>> classes = Sets.newHashSet();
classes.add(MapReduce.class);
classes.add(MapperWrapper.class);
classes.add(ReducerWrapper.class);
classes.add(SLF4JBridgeHandler.class);
// take over the classloading.
if (cConf.getBoolean(Constants.AppFabric.MAPREDUCE_INCLUDE_CUSTOM_CLASSES)) {
try {
Class<? extends InputFormat<?, ?>> inputFormatClass = job.getInputFormatClass();
classes.add(inputFormatClass);
// If it is StreamInputFormat, also add the StreamEventCodec class as well.
if (MapReduceStreamInputFormat.class.isAssignableFrom(inputFormatClass)) {
Class<? extends StreamEventDecoder> decoderType = MapReduceStreamInputFormat.getDecoderClass(job.getConfiguration());
if (decoderType != null) {
classes.add(decoderType);
}
}
} catch (Throwable t) {
LOG.debug("InputFormat class not found: {}", t.getMessage(), t);
// Ignore
}
try {
Class<? extends OutputFormat<?, ?>> outputFormatClass = job.getOutputFormatClass();
classes.add(outputFormatClass);
} catch (Throwable t) {
LOG.debug("OutputFormat class not found: {}", t.getMessage(), t);
// Ignore
}
}
// Add KMS class
if (SecureStoreUtils.isKMSBacked(cConf) && SecureStoreUtils.isKMSCapable()) {
classes.add(SecureStoreUtils.getKMSSecureStore());
}
Class<? extends HBaseDDLExecutor> ddlExecutorClass = new HBaseDDLExecutorFactory(cConf, hConf).get().getClass();
try {
Class<?> hbaseTableUtilClass = HBaseTableUtilFactory.getHBaseTableUtilClass(cConf);
classes.add(hbaseTableUtilClass);
classes.add(ddlExecutorClass);
} catch (ProvisionException e) {
LOG.warn("Not including HBaseTableUtil classes in submitted Job Jar since they are not available");
}
ClassLoader oldCLassLoader = ClassLoaders.setContextClassLoader(new CombineClassLoader(getClass().getClassLoader(), Collections.singleton(ddlExecutorClass.getClassLoader())));
try {
appBundler.createBundle(Locations.toLocation(jobJar), classes);
} finally {
ClassLoaders.setContextClassLoader(oldCLassLoader);
}
LOG.debug("Built MapReduce Job Jar at {}", jobJar.toURI());
return jobJar;
}
use of org.apache.twill.api.ClassAcceptor in project cdap by caskdata.
the class DistributedWorkflowProgramRunner method setupLaunchConfig.
@Override
protected void setupLaunchConfig(LaunchConfig launchConfig, Program program, ProgramOptions options, CConfiguration cConf, Configuration hConf, File tempDir) throws IOException {
WorkflowSpecification spec = program.getApplicationSpecification().getWorkflows().get(program.getName());
List<ClassAcceptor> acceptors = new ArrayList<>();
// Only interested in MapReduce and Spark nodes
Set<SchedulableProgramType> runnerTypes = EnumSet.of(SchedulableProgramType.MAPREDUCE, SchedulableProgramType.SPARK);
for (WorkflowActionNode node : Iterables.filter(spec.getNodeIdMap().values(), WorkflowActionNode.class)) {
// For each type, we only need one node to setup the launch context
ScheduleProgramInfo programInfo = node.getProgram();
if (!runnerTypes.remove(programInfo.getProgramType())) {
continue;
}
// Find the ProgramRunner of the given type and setup the launch context
ProgramType programType = ProgramType.valueOfSchedulableType(programInfo.getProgramType());
ProgramRunner runner = programRunnerFactory.create(programType);
try {
if (runner instanceof DistributedProgramRunner) {
// Call setupLaunchConfig with the corresponding program
ProgramId programId = program.getId().getParent().program(programType, programInfo.getProgramName());
((DistributedProgramRunner) runner).setupLaunchConfig(launchConfig, Programs.create(cConf, program, programId, runner), options, cConf, hConf, tempDir);
acceptors.add(launchConfig.getClassAcceptor());
}
} finally {
if (runner instanceof Closeable) {
Closeables.closeQuietly((Closeable) runner);
}
}
}
// Set the class acceptor
launchConfig.setClassAcceptor(new AndClassAcceptor(acceptors));
// Clear and set the runnable for the workflow driver
launchConfig.clearRunnables();
Resources resources = findDriverResources(program.getApplicationSpecification().getSpark(), program.getApplicationSpecification().getMapReduce(), spec);
resources = SystemArguments.getResources(options.getUserArguments(), resources);
launchConfig.addRunnable(spec.getName(), new WorkflowTwillRunnable(spec.getName()), resources, 1, 0);
}
use of org.apache.twill.api.ClassAcceptor in project cdap by caskdata.
the class ClassPathResources method findClassDependencies.
/**
* Finds all resource names that the given set of classes depends on.
*
* @param classLoader class loader for looking up .class resources
* @param classes set of class names that need to trace dependencies from
* @param result collection to store the resulting resource names
* @param <T> type of the result collection
* @throws IOException if fails to load class bytecode during tracing
*/
private static <T extends Collection<String>> T findClassDependencies(final ClassLoader classLoader, Iterable<String> classes, final T result) throws IOException {
final Set<String> bootstrapClassPaths = getBootstrapClassPaths();
final Set<URL> classPathSeen = Sets.newHashSet();
Dependencies.findClassDependencies(classLoader, new ClassAcceptor() {
@Override
public boolean accept(String className, URL classUrl, URL classPathUrl) {
// Ignore bootstrap classes
if (bootstrapClassPaths.contains(classPathUrl.getFile())) {
return false;
}
// visible through the program classloader.
if (className.startsWith("org.slf4j.impl.")) {
return false;
}
if (!classPathSeen.add(classPathUrl)) {
return true;
}
// Add all resources in the given class path
try {
ClassPath classPath = ClassPath.from(classPathUrl.toURI(), classLoader);
for (ClassPath.ResourceInfo resourceInfo : classPath.getResources()) {
result.add(resourceInfo.getResourceName());
}
} catch (Exception e) {
// If fail to get classes/resources from the classpath, ignore this classpath.
}
return true;
}
}, classes);
return result;
}
use of org.apache.twill.api.ClassAcceptor in project cdap by caskdata.
the class CoprocessorManager method ensureCoprocessorExists.
/**
* Get the location of the coprocessor and ensure it exists, optionally overwriting it if it exists.
* In distributed mode, the coprocessor jar is loaded onto hdfs by the CoprocessorBuildTool,
* but in other modes it is still useful to create the jar on demand.
*
* @param overwrite whether to overwrite the coprocessor if it already exists
* @return the location of the coprocessor
* @throws IOException if there was an issue accessing the location
*/
public synchronized Location ensureCoprocessorExists(boolean overwrite) throws IOException {
final Location targetPath = jarDir.append(getCoprocessorName());
if (!overwrite && targetPath.exists()) {
return targetPath;
}
// ensure the jar directory exists
Locations.mkdirsIfNotExists(jarDir);
StringBuilder buf = new StringBuilder();
for (Class<? extends Coprocessor> c : coprocessors) {
buf.append(c.getName()).append(", ");
}
LOG.debug("Creating jar file for coprocessor classes: {}", buf.toString());
final Map<String, URL> dependentClasses = new HashMap<>();
for (Class<? extends Coprocessor> clz : coprocessors) {
Dependencies.findClassDependencies(clz.getClassLoader(), new ClassAcceptor() {
@Override
public boolean accept(String className, final URL classUrl, URL classPathUrl) {
// other than those comes with HBase, Java, fastutil, and gson
if (className.startsWith("co.cask") || className.startsWith("it.unimi.dsi.fastutil") || className.startsWith("org.apache.tephra") || className.startsWith("com.google.gson")) {
if (!dependentClasses.containsKey(className)) {
dependentClasses.put(className, classUrl);
}
return true;
}
return false;
}
}, clz.getName());
}
if (dependentClasses.isEmpty()) {
return null;
}
// create the coprocessor jar on local filesystem
LOG.debug("Adding " + dependentClasses.size() + " classes to jar");
File jarFile = File.createTempFile("coprocessor", ".jar");
byte[] buffer = new byte[4 * 1024];
try (JarOutputStream jarOutput = new JarOutputStream(new FileOutputStream(jarFile))) {
for (Map.Entry<String, URL> entry : dependentClasses.entrySet()) {
jarOutput.putNextEntry(new JarEntry(entry.getKey().replace('.', File.separatorChar) + ".class"));
try (InputStream inputStream = entry.getValue().openStream()) {
int len = inputStream.read(buffer);
while (len >= 0) {
jarOutput.write(buffer, 0, len);
len = inputStream.read(buffer);
}
}
}
} catch (IOException e) {
LOG.error("Unable to create temporary local coprocessor jar {}.", jarFile.getAbsolutePath(), e);
if (!jarFile.delete()) {
LOG.warn("Unable to clean up temporary local coprocessor jar {}.", jarFile.getAbsolutePath());
}
throw e;
}
// copy the local jar file to the filesystem (HDFS)
// copies to a tmp location then renames the tmp location to the target location in case
// multiple CoprocessorManagers we called at the same time. This should never be the case in distributed
// mode, as coprocessors should all be loaded beforehand using the CoprocessorBuildTool.
final Location tmpLocation = jarDir.getTempFile(".jar");
try {
// Copy jar file into filesystem (HDFS)
Files.copy(jarFile, new OutputSupplier<OutputStream>() {
@Override
public OutputStream getOutput() throws IOException {
return tmpLocation.getOutputStream();
}
});
} catch (IOException e) {
LOG.error("Unable to copy local coprocessor jar to filesystem at {}.", tmpLocation, e);
if (tmpLocation.exists()) {
LOG.info("Deleting partially copied coprocessor jar at {}.", tmpLocation);
try {
if (!tmpLocation.delete()) {
LOG.error("Unable to delete partially copied coprocessor jar at {}.", tmpLocation, e);
}
} catch (IOException e1) {
LOG.error("Unable to delete partially copied coprocessor jar at {}.", tmpLocation, e1);
e.addSuppressed(e1);
}
}
throw e;
} finally {
if (!jarFile.delete()) {
LOG.warn("Unable to clean up temporary local coprocessor jar {}.", jarFile.getAbsolutePath());
}
}
tmpLocation.renameTo(targetPath);
return targetPath;
}
use of org.apache.twill.api.ClassAcceptor in project cdap by caskdata.
the class ClassLoaderTest method testExtraClassPath.
@Test
public void testExtraClassPath() throws IOException, ClassNotFoundException {
File tmpDir = TMP_FOLDER.newFolder();
// Create two jars, one with guava, one with gson
ApplicationBundler bundler = new ApplicationBundler(new ClassAcceptor());
Location guavaJar = Locations.toLocation(new File(tmpDir, "guava.jar"));
bundler.createBundle(guavaJar, ImmutableList.class);
Location gsonJar = Locations.toLocation(new File(tmpDir, "gson.jar"));
bundler.createBundle(gsonJar, Gson.class);
// Unpack them
File guavaDir = BundleJarUtil.unJar(guavaJar, TMP_FOLDER.newFolder());
File gsonDir = BundleJarUtil.unJar(gsonJar, TMP_FOLDER.newFolder());
// Create a DirectoryClassLoader using guava dir as the main directory, with the gson dir in the extra classpath
String extraClassPath = gsonDir.getAbsolutePath() + File.pathSeparatorChar + gsonDir.getAbsolutePath() + "/lib/*";
ClassLoader cl = new DirectoryClassLoader(guavaDir, extraClassPath, null, Arrays.asList("lib"));
// Should be able to load both guava and gson class from the class loader
cl.loadClass(ImmutableList.class.getName());
cl.loadClass(Gson.class.getName());
}
Aggregations