use of org.apache.twill.internal.ApplicationBundler in project cdap by caskdata.
the class AppJarHelper method createDeploymentJar.
public static Location createDeploymentJar(LocationFactory locationFactory, Class<?> clz, Manifest manifest, ClassAcceptor classAcceptor, File... bundleEmbeddedJars) throws IOException {
// Exclude all classes that are visible form the system to the program classloader.
ApplicationBundler bundler = new ApplicationBundler(classAcceptor);
Location jarLocation = locationFactory.create(clz.getName()).getTempFile(".jar");
ClassLoader oldClassLoader = ClassLoaders.setContextClassLoader(clz.getClassLoader());
try {
bundler.createBundle(jarLocation, clz);
} finally {
ClassLoaders.setContextClassLoader(oldClassLoader);
}
Location deployJar = locationFactory.create(clz.getName()).getTempFile(".jar");
Manifest jarManifest = new Manifest(manifest);
jarManifest.getMainAttributes().put(Attributes.Name.MANIFEST_VERSION, "1.0");
jarManifest.getMainAttributes().put(Attributes.Name.MAIN_CLASS, clz.getName());
// Create the program jar for deployment. It removes the "classes/" prefix as that's the convention taken
// by the ApplicationBundler inside Twill.
Set<String> seenEntries = new HashSet<>();
try (JarOutputStream jarOutput = new JarOutputStream(deployJar.getOutputStream(), jarManifest);
JarInputStream jarInput = new JarInputStream(jarLocation.getInputStream())) {
JarEntry jarEntry = jarInput.getNextJarEntry();
while (jarEntry != null) {
boolean isDir = jarEntry.isDirectory();
String entryName = jarEntry.getName();
if (!entryName.equals("classes/")) {
if (entryName.startsWith("classes/")) {
jarEntry = new JarEntry(entryName.substring("classes/".length()));
} else {
jarEntry = new JarEntry(entryName);
}
// create a manifest programmatically so it's possible to have a duplicate entry here
if ("META-INF/MANIFEST.MF".equalsIgnoreCase(jarEntry.getName())) {
jarEntry = jarInput.getNextJarEntry();
continue;
}
if (seenEntries.add(jarEntry.getName())) {
jarOutput.putNextEntry(jarEntry);
if (!isDir) {
ByteStreams.copy(jarInput, jarOutput);
}
}
}
jarEntry = jarInput.getNextJarEntry();
}
for (File embeddedJar : bundleEmbeddedJars) {
jarEntry = new JarEntry("lib/" + embeddedJar.getName());
if (seenEntries.add(jarEntry.getName())) {
jarOutput.putNextEntry(jarEntry);
Files.copy(embeddedJar, jarOutput);
}
}
}
return deployJar;
}
use of org.apache.twill.internal.ApplicationBundler in project cdap by caskdata.
the class RemoteDatasetFramework method createDeploymentJar.
private Location createDeploymentJar(Class<?> clz) throws IOException {
File tempDir = new File(cConf.get(Constants.CFG_LOCAL_DATA_DIR), cConf.get(Constants.AppFabric.TEMP_DIR)).getAbsoluteFile();
tempDir.mkdirs();
File tempFile = File.createTempFile(clz.getName(), ".jar", tempDir);
try {
// Create a bundle jar in a temp location
ClassLoader remembered = ClassLoaders.setContextClassLoader(clz.getClassLoader());
try {
ApplicationBundler bundler = new ApplicationBundler(ImmutableList.of("co.cask.cdap.api", "org.apache.hadoop", "org.apache.hbase", "org.apache.hive"));
bundler.createBundle(Locations.toLocation(tempFile), clz);
} finally {
ClassLoaders.setContextClassLoader(remembered);
}
// Create the program jar for deployment. It removes the "classes/" prefix as that's the convention taken
// by the ApplicationBundler inside Twill.
File destination = File.createTempFile(clz.getName(), ".jar", tempDir);
try (JarOutputStream jarOutput = new JarOutputStream(new FileOutputStream(destination));
JarInputStream jarInput = new JarInputStream(new FileInputStream(tempFile))) {
Set<String> seen = Sets.newHashSet();
JarEntry jarEntry = jarInput.getNextJarEntry();
while (jarEntry != null) {
boolean isDir = jarEntry.isDirectory();
String entryName = jarEntry.getName();
if (!entryName.equals("classes/")) {
if (entryName.startsWith("classes/")) {
jarEntry = new JarEntry(entryName.substring("classes/".length()));
} else {
jarEntry = new JarEntry(entryName);
}
if (seen.add(jarEntry.getName())) {
jarOutput.putNextEntry(jarEntry);
if (!isDir) {
ByteStreams.copy(jarInput, jarOutput);
}
}
}
jarEntry = jarInput.getNextJarEntry();
}
return Locations.toLocation(destination);
}
} finally {
tempFile.delete();
}
}
use of org.apache.twill.internal.ApplicationBundler in project cdap by caskdata.
the class MapReduceRuntimeService method buildJobJar.
/**
* Creates a jar that contains everything that are needed for running the MapReduce program by Hadoop.
*
* @return a new {@link File} containing the job jar
*/
private File buildJobJar(Job job, File tempDir) throws IOException, URISyntaxException {
File jobJar = new File(tempDir, "job.jar");
LOG.debug("Creating Job jar: {}", jobJar);
// For local mode, nothing is needed in the job jar since we use the classloader in the configuration object.
if (MapReduceTaskContextProvider.isLocal(job.getConfiguration())) {
JarOutputStream output = new JarOutputStream(new FileOutputStream(jobJar));
output.close();
return jobJar;
}
// Excludes libraries that are for sure not needed.
// Hadoop - Available from the cluster
// Spark - MR never uses Spark
final HadoopClassExcluder hadoopClassExcluder = new HadoopClassExcluder();
ApplicationBundler appBundler = new ApplicationBundler(new ClassAcceptor() {
@Override
public boolean accept(String className, URL classUrl, URL classPathUrl) {
if (className.startsWith("org.apache.spark") || classPathUrl.toString().contains("spark-assembly")) {
return false;
}
return hadoopClassExcluder.accept(className, classUrl, classPathUrl);
}
});
Set<Class<?>> classes = Sets.newHashSet();
classes.add(MapReduce.class);
classes.add(MapperWrapper.class);
classes.add(ReducerWrapper.class);
classes.add(SLF4JBridgeHandler.class);
// take over the classloading.
if (cConf.getBoolean(Constants.AppFabric.MAPREDUCE_INCLUDE_CUSTOM_CLASSES)) {
try {
Class<? extends InputFormat<?, ?>> inputFormatClass = job.getInputFormatClass();
classes.add(inputFormatClass);
// If it is StreamInputFormat, also add the StreamEventCodec class as well.
if (MapReduceStreamInputFormat.class.isAssignableFrom(inputFormatClass)) {
Class<? extends StreamEventDecoder> decoderType = MapReduceStreamInputFormat.getDecoderClass(job.getConfiguration());
if (decoderType != null) {
classes.add(decoderType);
}
}
} catch (Throwable t) {
LOG.debug("InputFormat class not found: {}", t.getMessage(), t);
// Ignore
}
try {
Class<? extends OutputFormat<?, ?>> outputFormatClass = job.getOutputFormatClass();
classes.add(outputFormatClass);
} catch (Throwable t) {
LOG.debug("OutputFormat class not found: {}", t.getMessage(), t);
// Ignore
}
}
// Add KMS class
if (SecureStoreUtils.isKMSBacked(cConf) && SecureStoreUtils.isKMSCapable()) {
classes.add(SecureStoreUtils.getKMSSecureStore());
}
try {
Class<?> hbaseTableUtilClass = HBaseTableUtilFactory.getHBaseTableUtilClass(cConf);
classes.add(hbaseTableUtilClass);
} catch (ProvisionException e) {
LOG.warn("Not including HBaseTableUtil classes in submitted Job Jar since they are not available");
}
ClassLoader oldCLassLoader = ClassLoaders.setContextClassLoader(getClass().getClassLoader());
try {
appBundler.createBundle(Locations.toLocation(jobJar), classes);
} finally {
ClassLoaders.setContextClassLoader(oldCLassLoader);
}
LOG.debug("Built MapReduce Job Jar at {}", jobJar.toURI());
return jobJar;
}
use of org.apache.twill.internal.ApplicationBundler in project cdap by caskdata.
the class ClassLoaderTest method testExtraClassPath.
@Test
public void testExtraClassPath() throws IOException, ClassNotFoundException {
File tmpDir = TMP_FOLDER.newFolder();
// Create two jars, one with guava, one with gson
ApplicationBundler bundler = new ApplicationBundler(new ClassAcceptor());
Location guavaJar = Locations.toLocation(new File(tmpDir, "guava.jar"));
bundler.createBundle(guavaJar, ImmutableList.class);
Location gsonJar = Locations.toLocation(new File(tmpDir, "gson.jar"));
bundler.createBundle(gsonJar, Gson.class);
// Unpack them
File guavaDir = BundleJarUtil.unJar(guavaJar, TMP_FOLDER.newFolder());
File gsonDir = BundleJarUtil.unJar(gsonJar, TMP_FOLDER.newFolder());
// Create a DirectoryClassLoader using guava dir as the main directory, with the gson dir in the extra classpath
String extraClassPath = gsonDir.getAbsolutePath() + File.pathSeparatorChar + gsonDir.getAbsolutePath() + "/lib/*";
ClassLoader cl = new DirectoryClassLoader(guavaDir, extraClassPath, null, Arrays.asList("lib"));
// Should be able to load both guava and gson class from the class loader
cl.loadClass(ImmutableList.class.getName());
cl.loadClass(Gson.class.getName());
}
use of org.apache.twill.internal.ApplicationBundler in project cdap by caskdata.
the class PluginJarHelper method createPluginJar.
public static Location createPluginJar(LocationFactory locationFactory, Manifest manifest, Class<?> clz, Class<?>... classes) throws IOException {
// include all packages from the given plugin classes
// for example, a plugin may use the org.apache.spark.streaming.kafka.KafkaUtils class,
// which would otherwise get filtered out by the org.apache.spark package filter.
Set<String> includePackages = new HashSet<>();
includePackages.add("org.apache.hadoop.hbase");
includePackages.add(clz.getPackage().getName());
for (Class<?> clazz : classes) {
includePackages.add(clazz.getPackage().getName());
}
ApplicationBundler bundler = new ApplicationBundler(ImmutableList.of("co.cask.cdap.api", "org.apache.hadoop", "org.apache.hive", "org.apache.spark"), includePackages);
Location jarLocation = locationFactory.create(clz.getName()).getTempFile(".jar");
ClassLoader oldClassLoader = ClassLoaders.setContextClassLoader(clz.getClassLoader());
try {
bundler.createBundle(jarLocation, clz, classes);
} finally {
ClassLoaders.setContextClassLoader(oldClassLoader);
}
Location deployJar = locationFactory.create(clz.getName()).getTempFile(".jar");
Manifest jarManifest = new Manifest(manifest);
jarManifest.getMainAttributes().put(Attributes.Name.MANIFEST_VERSION, "1.0");
// by the ApplicationBundler inside Twill.
try (JarOutputStream jarOutput = new JarOutputStream(deployJar.getOutputStream(), jarManifest);
JarInputStream jarInput = new JarInputStream(jarLocation.getInputStream())) {
JarEntry jarEntry = jarInput.getNextJarEntry();
while (jarEntry != null) {
boolean isDir = jarEntry.isDirectory();
String entryName = jarEntry.getName();
if (!entryName.equals("classes/")) {
if (entryName.startsWith("classes/")) {
jarEntry = new JarEntry(entryName.substring("classes/".length()));
} else {
jarEntry = new JarEntry(entryName);
}
// create a manifest programmatically so it's possible to have a duplicate entry here
if ("META-INF/MANIFEST.MF".equalsIgnoreCase(jarEntry.getName())) {
jarEntry = jarInput.getNextJarEntry();
continue;
}
jarOutput.putNextEntry(jarEntry);
if (!isDir) {
ByteStreams.copy(jarInput, jarOutput);
}
}
jarEntry = jarInput.getNextJarEntry();
}
}
return deployJar;
}
Aggregations