Search in sources :

Example 1 with ClassAcceptor

use of org.apache.twill.api.ClassAcceptor in project cdap by caskdata.

the class MapReduceRuntimeService method buildJobJar.

/**
   * Creates a jar that contains everything that are needed for running the MapReduce program by Hadoop.
   *
   * @return a new {@link File} containing the job jar
   */
private File buildJobJar(Job job, File tempDir) throws IOException, URISyntaxException {
    File jobJar = new File(tempDir, "job.jar");
    LOG.debug("Creating Job jar: {}", jobJar);
    // For local mode, nothing is needed in the job jar since we use the classloader in the configuration object.
    if (MapReduceTaskContextProvider.isLocal(job.getConfiguration())) {
        JarOutputStream output = new JarOutputStream(new FileOutputStream(jobJar));
        output.close();
        return jobJar;
    }
    // Excludes libraries that are for sure not needed.
    // Hadoop - Available from the cluster
    // Spark - MR never uses Spark
    final HadoopClassExcluder hadoopClassExcluder = new HadoopClassExcluder();
    ApplicationBundler appBundler = new ApplicationBundler(new ClassAcceptor() {

        @Override
        public boolean accept(String className, URL classUrl, URL classPathUrl) {
            if (className.startsWith("org.apache.spark") || classPathUrl.toString().contains("spark-assembly")) {
                return false;
            }
            return hadoopClassExcluder.accept(className, classUrl, classPathUrl);
        }
    });
    Set<Class<?>> classes = Sets.newHashSet();
    classes.add(MapReduce.class);
    classes.add(MapperWrapper.class);
    classes.add(ReducerWrapper.class);
    classes.add(SLF4JBridgeHandler.class);
    // take over the classloading.
    if (cConf.getBoolean(Constants.AppFabric.MAPREDUCE_INCLUDE_CUSTOM_CLASSES)) {
        try {
            Class<? extends InputFormat<?, ?>> inputFormatClass = job.getInputFormatClass();
            classes.add(inputFormatClass);
            // If it is StreamInputFormat, also add the StreamEventCodec class as well.
            if (MapReduceStreamInputFormat.class.isAssignableFrom(inputFormatClass)) {
                Class<? extends StreamEventDecoder> decoderType = MapReduceStreamInputFormat.getDecoderClass(job.getConfiguration());
                if (decoderType != null) {
                    classes.add(decoderType);
                }
            }
        } catch (Throwable t) {
            LOG.debug("InputFormat class not found: {}", t.getMessage(), t);
        // Ignore
        }
        try {
            Class<? extends OutputFormat<?, ?>> outputFormatClass = job.getOutputFormatClass();
            classes.add(outputFormatClass);
        } catch (Throwable t) {
            LOG.debug("OutputFormat class not found: {}", t.getMessage(), t);
        // Ignore
        }
    }
    // Add KMS class
    if (SecureStoreUtils.isKMSBacked(cConf) && SecureStoreUtils.isKMSCapable()) {
        classes.add(SecureStoreUtils.getKMSSecureStore());
    }
    Class<? extends HBaseDDLExecutor> ddlExecutorClass = new HBaseDDLExecutorFactory(cConf, hConf).get().getClass();
    try {
        Class<?> hbaseTableUtilClass = HBaseTableUtilFactory.getHBaseTableUtilClass(cConf);
        classes.add(hbaseTableUtilClass);
        classes.add(ddlExecutorClass);
    } catch (ProvisionException e) {
        LOG.warn("Not including HBaseTableUtil classes in submitted Job Jar since they are not available");
    }
    ClassLoader oldCLassLoader = ClassLoaders.setContextClassLoader(new CombineClassLoader(getClass().getClassLoader(), Collections.singleton(ddlExecutorClass.getClassLoader())));
    try {
        appBundler.createBundle(Locations.toLocation(jobJar), classes);
    } finally {
        ClassLoaders.setContextClassLoader(oldCLassLoader);
    }
    LOG.debug("Built MapReduce Job Jar at {}", jobJar.toURI());
    return jobJar;
}
Also used : HadoopClassExcluder(co.cask.cdap.common.twill.HadoopClassExcluder) JarOutputStream(java.util.jar.JarOutputStream) ClassAcceptor(org.apache.twill.api.ClassAcceptor) URL(java.net.URL) CombineClassLoader(co.cask.cdap.common.lang.CombineClassLoader) ProvisionException(com.google.inject.ProvisionException) FileOutputStream(java.io.FileOutputStream) HBaseDDLExecutorFactory(co.cask.cdap.data2.util.hbase.HBaseDDLExecutorFactory) WeakReferenceDelegatorClassLoader(co.cask.cdap.common.lang.WeakReferenceDelegatorClassLoader) CombineClassLoader(co.cask.cdap.common.lang.CombineClassLoader) File(java.io.File) JarFile(java.util.jar.JarFile) ApplicationBundler(org.apache.twill.internal.ApplicationBundler)

Example 2 with ClassAcceptor

use of org.apache.twill.api.ClassAcceptor in project cdap by caskdata.

the class DistributedWorkflowProgramRunner method setupLaunchConfig.

@Override
protected void setupLaunchConfig(LaunchConfig launchConfig, Program program, ProgramOptions options, CConfiguration cConf, Configuration hConf, File tempDir) throws IOException {
    WorkflowSpecification spec = program.getApplicationSpecification().getWorkflows().get(program.getName());
    List<ClassAcceptor> acceptors = new ArrayList<>();
    // Only interested in MapReduce and Spark nodes
    Set<SchedulableProgramType> runnerTypes = EnumSet.of(SchedulableProgramType.MAPREDUCE, SchedulableProgramType.SPARK);
    for (WorkflowActionNode node : Iterables.filter(spec.getNodeIdMap().values(), WorkflowActionNode.class)) {
        // For each type, we only need one node to setup the launch context
        ScheduleProgramInfo programInfo = node.getProgram();
        if (!runnerTypes.remove(programInfo.getProgramType())) {
            continue;
        }
        // Find the ProgramRunner of the given type and setup the launch context
        ProgramType programType = ProgramType.valueOfSchedulableType(programInfo.getProgramType());
        ProgramRunner runner = programRunnerFactory.create(programType);
        try {
            if (runner instanceof DistributedProgramRunner) {
                // Call setupLaunchConfig with the corresponding program
                ProgramId programId = program.getId().getParent().program(programType, programInfo.getProgramName());
                ((DistributedProgramRunner) runner).setupLaunchConfig(launchConfig, Programs.create(cConf, program, programId, runner), options, cConf, hConf, tempDir);
                acceptors.add(launchConfig.getClassAcceptor());
            }
        } finally {
            if (runner instanceof Closeable) {
                Closeables.closeQuietly((Closeable) runner);
            }
        }
    }
    // Set the class acceptor
    launchConfig.setClassAcceptor(new AndClassAcceptor(acceptors));
    // Clear and set the runnable for the workflow driver
    launchConfig.clearRunnables();
    Resources resources = findDriverResources(program.getApplicationSpecification().getSpark(), program.getApplicationSpecification().getMapReduce(), spec);
    resources = SystemArguments.getResources(options.getUserArguments(), resources);
    launchConfig.addRunnable(spec.getName(), new WorkflowTwillRunnable(spec.getName()), resources, 1, 0);
}
Also used : WorkflowActionNode(co.cask.cdap.api.workflow.WorkflowActionNode) Closeable(java.io.Closeable) ArrayList(java.util.ArrayList) ClassAcceptor(org.apache.twill.api.ClassAcceptor) ProgramId(co.cask.cdap.proto.id.ProgramId) WorkflowSpecification(co.cask.cdap.api.workflow.WorkflowSpecification) SchedulableProgramType(co.cask.cdap.api.schedule.SchedulableProgramType) ProgramType(co.cask.cdap.proto.ProgramType) SchedulableProgramType(co.cask.cdap.api.schedule.SchedulableProgramType) Resources(co.cask.cdap.api.Resources) ScheduleProgramInfo(co.cask.cdap.api.workflow.ScheduleProgramInfo) ProgramRunner(co.cask.cdap.app.runtime.ProgramRunner)

Example 3 with ClassAcceptor

use of org.apache.twill.api.ClassAcceptor in project cdap by caskdata.

the class ClassPathResources method findClassDependencies.

/**
   * Finds all resource names that the given set of classes depends on.
   *
   * @param classLoader class loader for looking up .class resources
   * @param classes set of class names that need to trace dependencies from
   * @param result collection to store the resulting resource names
   * @param <T> type of the result collection
   * @throws IOException if fails to load class bytecode during tracing
   */
private static <T extends Collection<String>> T findClassDependencies(final ClassLoader classLoader, Iterable<String> classes, final T result) throws IOException {
    final Set<String> bootstrapClassPaths = getBootstrapClassPaths();
    final Set<URL> classPathSeen = Sets.newHashSet();
    Dependencies.findClassDependencies(classLoader, new ClassAcceptor() {

        @Override
        public boolean accept(String className, URL classUrl, URL classPathUrl) {
            // Ignore bootstrap classes
            if (bootstrapClassPaths.contains(classPathUrl.getFile())) {
                return false;
            }
            // visible through the program classloader.
            if (className.startsWith("org.slf4j.impl.")) {
                return false;
            }
            if (!classPathSeen.add(classPathUrl)) {
                return true;
            }
            // Add all resources in the given class path
            try {
                ClassPath classPath = ClassPath.from(classPathUrl.toURI(), classLoader);
                for (ClassPath.ResourceInfo resourceInfo : classPath.getResources()) {
                    result.add(resourceInfo.getResourceName());
                }
            } catch (Exception e) {
            // If fail to get classes/resources from the classpath, ignore this classpath.
            }
            return true;
        }
    }, classes);
    return result;
}
Also used : ClassPath(co.cask.cdap.common.internal.guava.ClassPath) ResourceInfo(co.cask.cdap.common.internal.guava.ClassPath.ResourceInfo) ClassAcceptor(org.apache.twill.api.ClassAcceptor) URL(java.net.URL) MalformedURLException(java.net.MalformedURLException) URISyntaxException(java.net.URISyntaxException) IOException(java.io.IOException)

Example 4 with ClassAcceptor

use of org.apache.twill.api.ClassAcceptor in project cdap by caskdata.

the class CoprocessorManager method ensureCoprocessorExists.

/**
   * Get the location of the coprocessor and ensure it exists, optionally overwriting it if it exists.
   * In distributed mode, the coprocessor jar is loaded onto hdfs by the CoprocessorBuildTool,
   * but in other modes it is still useful to create the jar on demand.
   *
   * @param overwrite whether to overwrite the coprocessor if it already exists
   * @return the location of the coprocessor
   * @throws IOException if there was an issue accessing the location
   */
public synchronized Location ensureCoprocessorExists(boolean overwrite) throws IOException {
    final Location targetPath = jarDir.append(getCoprocessorName());
    if (!overwrite && targetPath.exists()) {
        return targetPath;
    }
    // ensure the jar directory exists
    Locations.mkdirsIfNotExists(jarDir);
    StringBuilder buf = new StringBuilder();
    for (Class<? extends Coprocessor> c : coprocessors) {
        buf.append(c.getName()).append(", ");
    }
    LOG.debug("Creating jar file for coprocessor classes: {}", buf.toString());
    final Map<String, URL> dependentClasses = new HashMap<>();
    for (Class<? extends Coprocessor> clz : coprocessors) {
        Dependencies.findClassDependencies(clz.getClassLoader(), new ClassAcceptor() {

            @Override
            public boolean accept(String className, final URL classUrl, URL classPathUrl) {
                // other than those comes with HBase, Java, fastutil, and gson
                if (className.startsWith("co.cask") || className.startsWith("it.unimi.dsi.fastutil") || className.startsWith("org.apache.tephra") || className.startsWith("com.google.gson")) {
                    if (!dependentClasses.containsKey(className)) {
                        dependentClasses.put(className, classUrl);
                    }
                    return true;
                }
                return false;
            }
        }, clz.getName());
    }
    if (dependentClasses.isEmpty()) {
        return null;
    }
    // create the coprocessor jar on local filesystem
    LOG.debug("Adding " + dependentClasses.size() + " classes to jar");
    File jarFile = File.createTempFile("coprocessor", ".jar");
    byte[] buffer = new byte[4 * 1024];
    try (JarOutputStream jarOutput = new JarOutputStream(new FileOutputStream(jarFile))) {
        for (Map.Entry<String, URL> entry : dependentClasses.entrySet()) {
            jarOutput.putNextEntry(new JarEntry(entry.getKey().replace('.', File.separatorChar) + ".class"));
            try (InputStream inputStream = entry.getValue().openStream()) {
                int len = inputStream.read(buffer);
                while (len >= 0) {
                    jarOutput.write(buffer, 0, len);
                    len = inputStream.read(buffer);
                }
            }
        }
    } catch (IOException e) {
        LOG.error("Unable to create temporary local coprocessor jar {}.", jarFile.getAbsolutePath(), e);
        if (!jarFile.delete()) {
            LOG.warn("Unable to clean up temporary local coprocessor jar {}.", jarFile.getAbsolutePath());
        }
        throw e;
    }
    // copy the local jar file to the filesystem (HDFS)
    // copies to a tmp location then renames the tmp location to the target location in case
    // multiple CoprocessorManagers we called at the same time. This should never be the case in distributed
    // mode, as coprocessors should all be loaded beforehand using the CoprocessorBuildTool.
    final Location tmpLocation = jarDir.getTempFile(".jar");
    try {
        // Copy jar file into filesystem (HDFS)
        Files.copy(jarFile, new OutputSupplier<OutputStream>() {

            @Override
            public OutputStream getOutput() throws IOException {
                return tmpLocation.getOutputStream();
            }
        });
    } catch (IOException e) {
        LOG.error("Unable to copy local coprocessor jar to filesystem at {}.", tmpLocation, e);
        if (tmpLocation.exists()) {
            LOG.info("Deleting partially copied coprocessor jar at {}.", tmpLocation);
            try {
                if (!tmpLocation.delete()) {
                    LOG.error("Unable to delete partially copied coprocessor jar at {}.", tmpLocation, e);
                }
            } catch (IOException e1) {
                LOG.error("Unable to delete partially copied coprocessor jar at {}.", tmpLocation, e1);
                e.addSuppressed(e1);
            }
        }
        throw e;
    } finally {
        if (!jarFile.delete()) {
            LOG.warn("Unable to clean up temporary local coprocessor jar {}.", jarFile.getAbsolutePath());
        }
    }
    tmpLocation.renameTo(targetPath);
    return targetPath;
}
Also used : HashMap(java.util.HashMap) InputStream(java.io.InputStream) JarOutputStream(java.util.jar.JarOutputStream) OutputStream(java.io.OutputStream) FileOutputStream(java.io.FileOutputStream) JarOutputStream(java.util.jar.JarOutputStream) ClassAcceptor(org.apache.twill.api.ClassAcceptor) IOException(java.io.IOException) JarEntry(java.util.jar.JarEntry) URL(java.net.URL) FileOutputStream(java.io.FileOutputStream) File(java.io.File) HashMap(java.util.HashMap) Map(java.util.Map) Location(org.apache.twill.filesystem.Location)

Example 5 with ClassAcceptor

use of org.apache.twill.api.ClassAcceptor in project cdap by caskdata.

the class ClassLoaderTest method testExtraClassPath.

@Test
public void testExtraClassPath() throws IOException, ClassNotFoundException {
    File tmpDir = TMP_FOLDER.newFolder();
    // Create two jars, one with guava, one with gson
    ApplicationBundler bundler = new ApplicationBundler(new ClassAcceptor());
    Location guavaJar = Locations.toLocation(new File(tmpDir, "guava.jar"));
    bundler.createBundle(guavaJar, ImmutableList.class);
    Location gsonJar = Locations.toLocation(new File(tmpDir, "gson.jar"));
    bundler.createBundle(gsonJar, Gson.class);
    // Unpack them
    File guavaDir = BundleJarUtil.unJar(guavaJar, TMP_FOLDER.newFolder());
    File gsonDir = BundleJarUtil.unJar(gsonJar, TMP_FOLDER.newFolder());
    // Create a DirectoryClassLoader using guava dir as the main directory, with the gson dir in the extra classpath
    String extraClassPath = gsonDir.getAbsolutePath() + File.pathSeparatorChar + gsonDir.getAbsolutePath() + "/lib/*";
    ClassLoader cl = new DirectoryClassLoader(guavaDir, extraClassPath, null, Arrays.asList("lib"));
    // Should be able to load both guava and gson class from the class loader
    cl.loadClass(ImmutableList.class.getName());
    cl.loadClass(Gson.class.getName());
}
Also used : ImmutableList(com.google.common.collect.ImmutableList) Gson(com.google.gson.Gson) ClassAcceptor(org.apache.twill.api.ClassAcceptor) File(java.io.File) ApplicationBundler(org.apache.twill.internal.ApplicationBundler) Location(org.apache.twill.filesystem.Location) Test(org.junit.Test)

Aggregations

ClassAcceptor (org.apache.twill.api.ClassAcceptor)5 File (java.io.File)3 URL (java.net.URL)3 FileOutputStream (java.io.FileOutputStream)2 IOException (java.io.IOException)2 JarOutputStream (java.util.jar.JarOutputStream)2 Location (org.apache.twill.filesystem.Location)2 ApplicationBundler (org.apache.twill.internal.ApplicationBundler)2 Resources (co.cask.cdap.api.Resources)1 SchedulableProgramType (co.cask.cdap.api.schedule.SchedulableProgramType)1 ScheduleProgramInfo (co.cask.cdap.api.workflow.ScheduleProgramInfo)1 WorkflowActionNode (co.cask.cdap.api.workflow.WorkflowActionNode)1 WorkflowSpecification (co.cask.cdap.api.workflow.WorkflowSpecification)1 ProgramRunner (co.cask.cdap.app.runtime.ProgramRunner)1 ClassPath (co.cask.cdap.common.internal.guava.ClassPath)1 ResourceInfo (co.cask.cdap.common.internal.guava.ClassPath.ResourceInfo)1 CombineClassLoader (co.cask.cdap.common.lang.CombineClassLoader)1 WeakReferenceDelegatorClassLoader (co.cask.cdap.common.lang.WeakReferenceDelegatorClassLoader)1 HadoopClassExcluder (co.cask.cdap.common.twill.HadoopClassExcluder)1 HBaseDDLExecutorFactory (co.cask.cdap.data2.util.hbase.HBaseDDLExecutorFactory)1