Search in sources :

Example 1 with ApplicationBundler

use of org.apache.twill.internal.ApplicationBundler in project cdap by caskdata.

the class AppJarHelper method createDeploymentJar.

public static Location createDeploymentJar(LocationFactory locationFactory, Class<?> clz, Manifest manifest, ClassAcceptor classAcceptor, File... bundleEmbeddedJars) throws IOException {
    // Exclude all classes that are visible form the system to the program classloader.
    ApplicationBundler bundler = new ApplicationBundler(classAcceptor);
    Location jarLocation = locationFactory.create(clz.getName()).getTempFile(".jar");
    ClassLoader oldClassLoader = ClassLoaders.setContextClassLoader(clz.getClassLoader());
    try {
        bundler.createBundle(jarLocation, clz);
    } finally {
        ClassLoaders.setContextClassLoader(oldClassLoader);
    }
    Location deployJar = locationFactory.create(clz.getName()).getTempFile(".jar");
    Manifest jarManifest = new Manifest(manifest);
    jarManifest.getMainAttributes().put(Attributes.Name.MANIFEST_VERSION, "1.0");
    jarManifest.getMainAttributes().put(Attributes.Name.MAIN_CLASS, clz.getName());
    // Create the program jar for deployment. It removes the "classes/" prefix as that's the convention taken
    // by the ApplicationBundler inside Twill.
    Set<String> seenEntries = new HashSet<>();
    try (JarOutputStream jarOutput = new JarOutputStream(deployJar.getOutputStream(), jarManifest);
        JarInputStream jarInput = new JarInputStream(jarLocation.getInputStream())) {
        JarEntry jarEntry = jarInput.getNextJarEntry();
        while (jarEntry != null) {
            boolean isDir = jarEntry.isDirectory();
            String entryName = jarEntry.getName();
            if (!entryName.equals("classes/")) {
                if (entryName.startsWith("classes/")) {
                    jarEntry = new JarEntry(entryName.substring("classes/".length()));
                } else {
                    jarEntry = new JarEntry(entryName);
                }
                // create a manifest programmatically so it's possible to have a duplicate entry here
                if ("META-INF/MANIFEST.MF".equalsIgnoreCase(jarEntry.getName())) {
                    jarEntry = jarInput.getNextJarEntry();
                    continue;
                }
                if (seenEntries.add(jarEntry.getName())) {
                    jarOutput.putNextEntry(jarEntry);
                    if (!isDir) {
                        ByteStreams.copy(jarInput, jarOutput);
                    }
                }
            }
            jarEntry = jarInput.getNextJarEntry();
        }
        for (File embeddedJar : bundleEmbeddedJars) {
            jarEntry = new JarEntry("lib/" + embeddedJar.getName());
            if (seenEntries.add(jarEntry.getName())) {
                jarOutput.putNextEntry(jarEntry);
                Files.copy(embeddedJar, jarOutput);
            }
        }
    }
    return deployJar;
}
Also used : JarInputStream(java.util.jar.JarInputStream) JarOutputStream(java.util.jar.JarOutputStream) Manifest(java.util.jar.Manifest) JarEntry(java.util.jar.JarEntry) ApplicationBundler(org.apache.twill.internal.ApplicationBundler) File(java.io.File) Location(org.apache.twill.filesystem.Location) HashSet(java.util.HashSet)

Example 2 with ApplicationBundler

use of org.apache.twill.internal.ApplicationBundler in project cdap by caskdata.

the class RemoteDatasetFramework method createDeploymentJar.

private Location createDeploymentJar(Class<?> clz) throws IOException {
    File tempDir = new File(cConf.get(Constants.CFG_LOCAL_DATA_DIR), cConf.get(Constants.AppFabric.TEMP_DIR)).getAbsoluteFile();
    tempDir.mkdirs();
    File tempFile = File.createTempFile(clz.getName(), ".jar", tempDir);
    try {
        // Create a bundle jar in a temp location
        ClassLoader remembered = ClassLoaders.setContextClassLoader(clz.getClassLoader());
        try {
            ApplicationBundler bundler = new ApplicationBundler(ImmutableList.of("co.cask.cdap.api", "org.apache.hadoop", "org.apache.hbase", "org.apache.hive"));
            bundler.createBundle(Locations.toLocation(tempFile), clz);
        } finally {
            ClassLoaders.setContextClassLoader(remembered);
        }
        // Create the program jar for deployment. It removes the "classes/" prefix as that's the convention taken
        // by the ApplicationBundler inside Twill.
        File destination = File.createTempFile(clz.getName(), ".jar", tempDir);
        try (JarOutputStream jarOutput = new JarOutputStream(new FileOutputStream(destination));
            JarInputStream jarInput = new JarInputStream(new FileInputStream(tempFile))) {
            Set<String> seen = Sets.newHashSet();
            JarEntry jarEntry = jarInput.getNextJarEntry();
            while (jarEntry != null) {
                boolean isDir = jarEntry.isDirectory();
                String entryName = jarEntry.getName();
                if (!entryName.equals("classes/")) {
                    if (entryName.startsWith("classes/")) {
                        jarEntry = new JarEntry(entryName.substring("classes/".length()));
                    } else {
                        jarEntry = new JarEntry(entryName);
                    }
                    if (seen.add(jarEntry.getName())) {
                        jarOutput.putNextEntry(jarEntry);
                        if (!isDir) {
                            ByteStreams.copy(jarInput, jarOutput);
                        }
                    }
                }
                jarEntry = jarInput.getNextJarEntry();
            }
            return Locations.toLocation(destination);
        }
    } finally {
        tempFile.delete();
    }
}
Also used : JarInputStream(java.util.jar.JarInputStream) FileOutputStream(java.io.FileOutputStream) JarOutputStream(java.util.jar.JarOutputStream) JarEntry(java.util.jar.JarEntry) File(java.io.File) ApplicationBundler(org.apache.twill.internal.ApplicationBundler) FileInputStream(java.io.FileInputStream)

Example 3 with ApplicationBundler

use of org.apache.twill.internal.ApplicationBundler in project cdap by caskdata.

the class MapReduceRuntimeService method buildJobJar.

/**
 * Creates a jar that contains everything that are needed for running the MapReduce program by Hadoop.
 *
 * @return a new {@link File} containing the job jar
 */
private File buildJobJar(Job job, File tempDir) throws IOException, URISyntaxException {
    File jobJar = new File(tempDir, "job.jar");
    LOG.debug("Creating Job jar: {}", jobJar);
    // For local mode, nothing is needed in the job jar since we use the classloader in the configuration object.
    if (MapReduceTaskContextProvider.isLocal(job.getConfiguration())) {
        JarOutputStream output = new JarOutputStream(new FileOutputStream(jobJar));
        output.close();
        return jobJar;
    }
    // Excludes libraries that are for sure not needed.
    // Hadoop - Available from the cluster
    // Spark - MR never uses Spark
    final HadoopClassExcluder hadoopClassExcluder = new HadoopClassExcluder();
    ApplicationBundler appBundler = new ApplicationBundler(new ClassAcceptor() {

        @Override
        public boolean accept(String className, URL classUrl, URL classPathUrl) {
            if (className.startsWith("org.apache.spark") || classPathUrl.toString().contains("spark-assembly")) {
                return false;
            }
            return hadoopClassExcluder.accept(className, classUrl, classPathUrl);
        }
    });
    Set<Class<?>> classes = Sets.newHashSet();
    classes.add(MapReduce.class);
    classes.add(MapperWrapper.class);
    classes.add(ReducerWrapper.class);
    classes.add(SLF4JBridgeHandler.class);
    // take over the classloading.
    if (cConf.getBoolean(Constants.AppFabric.MAPREDUCE_INCLUDE_CUSTOM_CLASSES)) {
        try {
            Class<? extends InputFormat<?, ?>> inputFormatClass = job.getInputFormatClass();
            classes.add(inputFormatClass);
            // If it is StreamInputFormat, also add the StreamEventCodec class as well.
            if (MapReduceStreamInputFormat.class.isAssignableFrom(inputFormatClass)) {
                Class<? extends StreamEventDecoder> decoderType = MapReduceStreamInputFormat.getDecoderClass(job.getConfiguration());
                if (decoderType != null) {
                    classes.add(decoderType);
                }
            }
        } catch (Throwable t) {
            LOG.debug("InputFormat class not found: {}", t.getMessage(), t);
        // Ignore
        }
        try {
            Class<? extends OutputFormat<?, ?>> outputFormatClass = job.getOutputFormatClass();
            classes.add(outputFormatClass);
        } catch (Throwable t) {
            LOG.debug("OutputFormat class not found: {}", t.getMessage(), t);
        // Ignore
        }
    }
    // Add KMS class
    if (SecureStoreUtils.isKMSBacked(cConf) && SecureStoreUtils.isKMSCapable()) {
        classes.add(SecureStoreUtils.getKMSSecureStore());
    }
    try {
        Class<?> hbaseTableUtilClass = HBaseTableUtilFactory.getHBaseTableUtilClass(cConf);
        classes.add(hbaseTableUtilClass);
    } catch (ProvisionException e) {
        LOG.warn("Not including HBaseTableUtil classes in submitted Job Jar since they are not available");
    }
    ClassLoader oldCLassLoader = ClassLoaders.setContextClassLoader(getClass().getClassLoader());
    try {
        appBundler.createBundle(Locations.toLocation(jobJar), classes);
    } finally {
        ClassLoaders.setContextClassLoader(oldCLassLoader);
    }
    LOG.debug("Built MapReduce Job Jar at {}", jobJar.toURI());
    return jobJar;
}
Also used : HadoopClassExcluder(co.cask.cdap.common.twill.HadoopClassExcluder) JarOutputStream(java.util.jar.JarOutputStream) ClassAcceptor(org.apache.twill.api.ClassAcceptor) URL(java.net.URL) ProvisionException(com.google.inject.ProvisionException) FileOutputStream(java.io.FileOutputStream) File(java.io.File) JarFile(java.util.jar.JarFile) ApplicationBundler(org.apache.twill.internal.ApplicationBundler)

Example 4 with ApplicationBundler

use of org.apache.twill.internal.ApplicationBundler in project cdap by caskdata.

the class ClassLoaderTest method testExtraClassPath.

@Test
public void testExtraClassPath() throws IOException, ClassNotFoundException {
    File tmpDir = TMP_FOLDER.newFolder();
    // Create two jars, one with guava, one with gson
    ApplicationBundler bundler = new ApplicationBundler(new ClassAcceptor());
    Location guavaJar = Locations.toLocation(new File(tmpDir, "guava.jar"));
    bundler.createBundle(guavaJar, ImmutableList.class);
    Location gsonJar = Locations.toLocation(new File(tmpDir, "gson.jar"));
    bundler.createBundle(gsonJar, Gson.class);
    // Unpack them
    File guavaDir = BundleJarUtil.unJar(guavaJar, TMP_FOLDER.newFolder());
    File gsonDir = BundleJarUtil.unJar(gsonJar, TMP_FOLDER.newFolder());
    // Create a DirectoryClassLoader using guava dir as the main directory, with the gson dir in the extra classpath
    String extraClassPath = gsonDir.getAbsolutePath() + File.pathSeparatorChar + gsonDir.getAbsolutePath() + "/lib/*";
    ClassLoader cl = new DirectoryClassLoader(guavaDir, extraClassPath, null, Arrays.asList("lib"));
    // Should be able to load both guava and gson class from the class loader
    cl.loadClass(ImmutableList.class.getName());
    cl.loadClass(Gson.class.getName());
}
Also used : ImmutableList(com.google.common.collect.ImmutableList) Gson(com.google.gson.Gson) ClassAcceptor(org.apache.twill.api.ClassAcceptor) File(java.io.File) ApplicationBundler(org.apache.twill.internal.ApplicationBundler) Location(org.apache.twill.filesystem.Location) Test(org.junit.Test)

Example 5 with ApplicationBundler

use of org.apache.twill.internal.ApplicationBundler in project cdap by caskdata.

the class PluginJarHelper method createPluginJar.

public static Location createPluginJar(LocationFactory locationFactory, Manifest manifest, Class<?> clz, Class<?>... classes) throws IOException {
    // include all packages from the given plugin classes
    // for example, a plugin may use the org.apache.spark.streaming.kafka.KafkaUtils class,
    // which would otherwise get filtered out by the org.apache.spark package filter.
    Set<String> includePackages = new HashSet<>();
    includePackages.add("org.apache.hadoop.hbase");
    includePackages.add(clz.getPackage().getName());
    for (Class<?> clazz : classes) {
        includePackages.add(clazz.getPackage().getName());
    }
    ApplicationBundler bundler = new ApplicationBundler(ImmutableList.of("co.cask.cdap.api", "org.apache.hadoop", "org.apache.hive", "org.apache.spark"), includePackages);
    Location jarLocation = locationFactory.create(clz.getName()).getTempFile(".jar");
    ClassLoader oldClassLoader = ClassLoaders.setContextClassLoader(clz.getClassLoader());
    try {
        bundler.createBundle(jarLocation, clz, classes);
    } finally {
        ClassLoaders.setContextClassLoader(oldClassLoader);
    }
    Location deployJar = locationFactory.create(clz.getName()).getTempFile(".jar");
    Manifest jarManifest = new Manifest(manifest);
    jarManifest.getMainAttributes().put(Attributes.Name.MANIFEST_VERSION, "1.0");
    // by the ApplicationBundler inside Twill.
    try (JarOutputStream jarOutput = new JarOutputStream(deployJar.getOutputStream(), jarManifest);
        JarInputStream jarInput = new JarInputStream(jarLocation.getInputStream())) {
        JarEntry jarEntry = jarInput.getNextJarEntry();
        while (jarEntry != null) {
            boolean isDir = jarEntry.isDirectory();
            String entryName = jarEntry.getName();
            if (!entryName.equals("classes/")) {
                if (entryName.startsWith("classes/")) {
                    jarEntry = new JarEntry(entryName.substring("classes/".length()));
                } else {
                    jarEntry = new JarEntry(entryName);
                }
                // create a manifest programmatically so it's possible to have a duplicate entry here
                if ("META-INF/MANIFEST.MF".equalsIgnoreCase(jarEntry.getName())) {
                    jarEntry = jarInput.getNextJarEntry();
                    continue;
                }
                jarOutput.putNextEntry(jarEntry);
                if (!isDir) {
                    ByteStreams.copy(jarInput, jarOutput);
                }
            }
            jarEntry = jarInput.getNextJarEntry();
        }
    }
    return deployJar;
}
Also used : JarInputStream(java.util.jar.JarInputStream) JarOutputStream(java.util.jar.JarOutputStream) Manifest(java.util.jar.Manifest) JarEntry(java.util.jar.JarEntry) ApplicationBundler(org.apache.twill.internal.ApplicationBundler) HashSet(java.util.HashSet) Location(org.apache.twill.filesystem.Location)

Aggregations

ApplicationBundler (org.apache.twill.internal.ApplicationBundler)5 File (java.io.File)4 JarOutputStream (java.util.jar.JarOutputStream)4 JarEntry (java.util.jar.JarEntry)3 JarInputStream (java.util.jar.JarInputStream)3 Location (org.apache.twill.filesystem.Location)3 FileOutputStream (java.io.FileOutputStream)2 HashSet (java.util.HashSet)2 Manifest (java.util.jar.Manifest)2 ClassAcceptor (org.apache.twill.api.ClassAcceptor)2 HadoopClassExcluder (co.cask.cdap.common.twill.HadoopClassExcluder)1 ImmutableList (com.google.common.collect.ImmutableList)1 Gson (com.google.gson.Gson)1 ProvisionException (com.google.inject.ProvisionException)1 FileInputStream (java.io.FileInputStream)1 URL (java.net.URL)1 JarFile (java.util.jar.JarFile)1 Test (org.junit.Test)1