Search in sources :

Example 6 with LocalizeResource

use of co.cask.cdap.internal.app.runtime.distributed.LocalizeResource in project cdap by caskdata.

the class SparkRuntimeService method startUp.

@Override
protected void startUp() throws Exception {
    // additional spark job initialization at run-time
    // This context is for calling initialize and onFinish on the Spark program
    // Fields injection for the Spark program
    // It has to be done in here instead of in SparkProgramRunner for the @UseDataset injection
    // since the dataset cache being used in Spark is a MultiThreadDatasetCache
    // The AbstractExecutionThreadService guarantees that startUp(), run() and shutDown() all happens in the same thread
    Reflections.visit(spark, spark.getClass(), new PropertyFieldSetter(runtimeContext.getSparkSpecification().getProperties()), new DataSetFieldSetter(runtimeContext.getDatasetCache()), new MetricsFieldSetter(runtimeContext));
    // Creates a temporary directory locally for storing all generated files.
    File tempDir = DirUtils.createTempDir(new File(cConf.get(Constants.CFG_LOCAL_DATA_DIR), cConf.get(Constants.AppFabric.TEMP_DIR)).getAbsoluteFile());
    tempDir.mkdirs();
    this.cleanupTask = createCleanupTask(tempDir, System.getProperties());
    try {
        initialize();
        SparkRuntimeContextConfig contextConfig = new SparkRuntimeContextConfig(runtimeContext.getConfiguration());
        final File jobJar = generateJobJar(tempDir, contextConfig.isLocal(), cConf);
        final List<LocalizeResource> localizeResources = new ArrayList<>();
        String metricsConfPath;
        String classpath = "";
        if (contextConfig.isLocal()) {
            // In local mode, always copy (or link if local) user requested resources
            copyUserResources(context.getLocalizeResources(), tempDir);
            File metricsConf = SparkMetricsSink.writeConfig(new File(tempDir, CDAP_METRICS_PROPERTIES));
            metricsConfPath = metricsConf.getAbsolutePath();
        } else {
            // Localize all user requested files in distributed mode
            distributedUserResources(context.getLocalizeResources(), localizeResources);
            // Localize program jar and the expanding program jar
            File programJar = Locations.linkOrCopy(runtimeContext.getProgram().getJarLocation(), new File(tempDir, SparkRuntimeContextProvider.PROGRAM_JAR_NAME));
            File expandedProgramJar = Locations.linkOrCopy(runtimeContext.getProgram().getJarLocation(), new File(tempDir, SparkRuntimeContextProvider.PROGRAM_JAR_EXPANDED_NAME));
            // Localize both the unexpanded and expanded program jar
            localizeResources.add(new LocalizeResource(programJar));
            localizeResources.add(new LocalizeResource(expandedProgramJar, true));
            // Localize plugins
            if (pluginArchive != null) {
                localizeResources.add(new LocalizeResource(pluginArchive, true));
            }
            // Create and localize the launcher jar, which is for setting up services and classloader for spark containers
            localizeResources.add(new LocalizeResource(createLauncherJar(tempDir)));
            // Create metrics conf file in the current directory since
            // the same value for the "spark.metrics.conf" config needs to be used for both driver and executor processes
            // Also localize the metrics conf file to the executor nodes
            File metricsConf = SparkMetricsSink.writeConfig(new File(CDAP_METRICS_PROPERTIES));
            metricsConfPath = metricsConf.getName();
            localizeResources.add(new LocalizeResource(metricsConf));
            // Localize the cConf file
            localizeResources.add(new LocalizeResource(saveCConf(cConf, tempDir)));
            // Preserves and localize runtime information in the hConf
            Configuration hConf = contextConfig.set(runtimeContext, pluginArchive).getConfiguration();
            localizeResources.add(new LocalizeResource(saveHConf(hConf, tempDir)));
            // Joiner for creating classpath for spark containers
            Joiner joiner = Joiner.on(File.pathSeparator).skipNulls();
            // Localize the spark.jar archive, which contains all CDAP and dependency jars
            File sparkJar = new File(tempDir, CDAP_SPARK_JAR);
            classpath = joiner.join(Iterables.transform(buildDependencyJar(sparkJar), new Function<String, String>() {

                @Override
                public String apply(String name) {
                    return Paths.get("$PWD", CDAP_SPARK_JAR, name).toString();
                }
            }));
            localizeResources.add(new LocalizeResource(sparkJar, true));
            // Localize logback if there is one. It is placed at the beginning of the classpath
            File logbackJar = ProgramRunners.createLogbackJar(new File(tempDir, "logback.xml.jar"));
            if (logbackJar != null) {
                localizeResources.add(new LocalizeResource(logbackJar));
                classpath = joiner.join(Paths.get("$PWD", logbackJar.getName()), classpath);
            }
            // Localize extra jars and append to the end of the classpath
            List<String> extraJars = new ArrayList<>();
            for (URI jarURI : CConfigurationUtil.getExtraJars(cConf)) {
                extraJars.add(Paths.get("$PWD", LocalizationUtils.getLocalizedName(jarURI)).toString());
                localizeResources.add(new LocalizeResource(jarURI, false));
            }
            classpath = joiner.join(classpath, joiner.join(extraJars));
        }
        final Map<String, String> configs = createSubmitConfigs(tempDir, metricsConfPath, classpath, context.getLocalizeResources(), contextConfig.isLocal());
        submitSpark = new Callable<ListenableFuture<RunId>>() {

            @Override
            public ListenableFuture<RunId> call() throws Exception {
                // This happen when stop() was called whiling starting
                if (!isRunning()) {
                    return immediateCancelledFuture();
                }
                return sparkSubmitter.submit(runtimeContext, configs, localizeResources, jobJar, runtimeContext.getRunId());
            }
        };
    } catch (LinkageError e) {
        // of the user program is missing dependencies (CDAP-2543)
        throw new Exception(e.getMessage(), e);
    } catch (Throwable t) {
        cleanupTask.run();
        throw t;
    }
}
Also used : Joiner(com.google.common.base.Joiner) CConfiguration(co.cask.cdap.common.conf.CConfiguration) Configuration(org.apache.hadoop.conf.Configuration) ArrayList(java.util.ArrayList) URI(java.net.URI) DataSetFieldSetter(co.cask.cdap.internal.app.runtime.DataSetFieldSetter) URISyntaxException(java.net.URISyntaxException) UnsupportedTypeException(co.cask.common.internal.io.UnsupportedTypeException) IOException(java.io.IOException) PropertyFieldSetter(co.cask.cdap.common.lang.PropertyFieldSetter) MetricsFieldSetter(co.cask.cdap.internal.app.runtime.MetricsFieldSetter) LocalizeResource(co.cask.cdap.internal.app.runtime.distributed.LocalizeResource) ListenableFuture(com.google.common.util.concurrent.ListenableFuture) File(java.io.File)

Example 7 with LocalizeResource

use of co.cask.cdap.internal.app.runtime.distributed.LocalizeResource in project cdap by caskdata.

the class SparkPackageUtils method prepareSparkResources.

/**
   * Prepares the resources that need to be localized to the Spark client container.
   *
   * @param sparkCompat the spark version to prepare for
   * @param locationFactory the location factory for uploading files
   * @param tempDir a temporary directory for file creation
   * @param localizeResources A map from localized name to {@link LocalizeResource} for this method to update
   * @param env the environment map to update
   * @throws IOException if failed to prepare the spark resources
   */
public static void prepareSparkResources(SparkCompat sparkCompat, LocationFactory locationFactory, File tempDir, Map<String, LocalizeResource> localizeResources, Map<String, String> env) throws IOException {
    Properties sparkConf = getSparkDefaultConf();
    // Localize the spark framework
    SparkFramework framework = prepareSparkFramework(sparkCompat, locationFactory, tempDir);
    framework.addLocalizeResource(localizeResources);
    framework.updateSparkConf(sparkConf);
    framework.updateSparkEnv(env);
    // Localize the spark-defaults.conf file
    File sparkDefaultConfFile = saveSparkDefaultConf(sparkConf, File.createTempFile(SPARK_DEFAULTS_CONF, null, tempDir));
    localizeResources.put(SPARK_DEFAULTS_CONF, new LocalizeResource(sparkDefaultConfFile));
    // Shallow copy all files under directory defined by $HADOOP_CONF_DIR
    // If $HADOOP_CONF_DIR is not defined, use the location of "yarn-site.xml" to determine the directory
    // This is part of workaround for CDAP-5019 (SPARK-13441).
    File hadoopConfDir = null;
    if (System.getenv().containsKey(ApplicationConstants.Environment.HADOOP_CONF_DIR.key())) {
        hadoopConfDir = new File(System.getenv(ApplicationConstants.Environment.HADOOP_CONF_DIR.key()));
    } else {
        URL yarnSiteLocation = SparkPackageUtils.class.getClassLoader().getResource("yarn-site.xml");
        if (yarnSiteLocation != null) {
            try {
                hadoopConfDir = new File(yarnSiteLocation.toURI()).getParentFile();
            } catch (URISyntaxException e) {
                // Shouldn't happen
                LOG.warn("Failed to derive HADOOP_CONF_DIR from yarn-site.xml");
            }
        }
    }
    if (hadoopConfDir != null && hadoopConfDir.isDirectory()) {
        try {
            final File targetFile = File.createTempFile(LOCALIZED_CONF_DIR, ".zip", tempDir);
            try (ZipOutputStream zipOutput = new ZipOutputStream(new BufferedOutputStream(new FileOutputStream(targetFile)))) {
                for (File file : DirUtils.listFiles(hadoopConfDir)) {
                    // Shallow copy of files under the hadoop conf dir. Ignore files that cannot be read
                    if (file.isFile() && file.canRead()) {
                        zipOutput.putNextEntry(new ZipEntry(file.getName()));
                        Files.copy(file.toPath(), zipOutput);
                    }
                }
            }
            localizeResources.put(LOCALIZED_CONF_DIR, new LocalizeResource(targetFile, true));
        } catch (IOException e) {
            LOG.warn("Failed to create archive from {}", hadoopConfDir, e);
        }
    }
}
Also used : ZipOutputStream(java.util.zip.ZipOutputStream) LocalizeResource(co.cask.cdap.internal.app.runtime.distributed.LocalizeResource) FileOutputStream(java.io.FileOutputStream) ZipEntry(java.util.zip.ZipEntry) URISyntaxException(java.net.URISyntaxException) IOException(java.io.IOException) Properties(java.util.Properties) File(java.io.File) BufferedOutputStream(java.io.BufferedOutputStream) URL(java.net.URL)

Example 8 with LocalizeResource

use of co.cask.cdap.internal.app.runtime.distributed.LocalizeResource in project cdap by caskdata.

the class MapReduceRuntimeService method localizeUserResources.

/**
   * Localizes resources requested by users in the MapReduce Program's beforeSubmit phase.
   * In Local mode, also copies resources to a temporary directory.
   *
   * @param job the {@link Job} for this MapReduce program
   * @param targetDir in local mode, a temporary directory to copy the resources to
   * @return a {@link Map} of resource name to the resource path. The resource path will be absolute in local mode,
   * while it will just contain the file name in distributed mode.
   */
private Map<String, String> localizeUserResources(Job job, File targetDir) throws IOException {
    Map<String, String> localizedResources = new HashMap<>();
    Map<String, LocalizeResource> resourcesToLocalize = context.getResourcesToLocalize();
    for (Map.Entry<String, LocalizeResource> entry : resourcesToLocalize.entrySet()) {
        String localizedFilePath;
        String name = entry.getKey();
        Configuration mapredConf = job.getConfiguration();
        if (MapReduceTaskContextProvider.isLocal(mapredConf)) {
            // in local mode, also add localize resources in a temporary directory
            localizedFilePath = LocalizationUtils.localizeResource(entry.getKey(), entry.getValue(), targetDir).getAbsolutePath();
        } else {
            URI uri = entry.getValue().getURI();
            // in distributed mode, use the MapReduce Job object to localize resources
            URI actualURI;
            try {
                actualURI = new URI(uri.getScheme(), uri.getAuthority(), uri.getPath(), uri.getQuery(), name);
            } catch (URISyntaxException e) {
                // If it does though, there is nothing that clients can do to recover, so not propagating a checked exception.
                throw Throwables.propagate(e);
            }
            if (entry.getValue().isArchive()) {
                job.addCacheArchive(actualURI);
            } else {
                job.addCacheFile(actualURI);
            }
            localizedFilePath = name;
        }
        LOG.debug("MapReduce Localizing file {} {}", entry.getKey(), entry.getValue());
        localizedResources.put(name, localizedFilePath);
    }
    return localizedResources;
}
Also used : CConfiguration(co.cask.cdap.common.conf.CConfiguration) Configuration(org.apache.hadoop.conf.Configuration) YarnConfiguration(org.apache.hadoop.yarn.conf.YarnConfiguration) HashMap(java.util.HashMap) LocalizeResource(co.cask.cdap.internal.app.runtime.distributed.LocalizeResource) URISyntaxException(java.net.URISyntaxException) Map(java.util.Map) HashMap(java.util.HashMap) AbstractMap(java.util.AbstractMap) URI(java.net.URI)

Example 9 with LocalizeResource

use of co.cask.cdap.internal.app.runtime.distributed.LocalizeResource in project cdap by caskdata.

the class MasterTwillApplication method prepareExploreResources.

/**
   * Prepares resources to be localized to the explore container.
   */
private void prepareExploreResources(Path tempDir, Configuration hConf, Map<String, LocalizeResource> localizeResources, Collection<String> extraClassPath) throws IOException {
    // Find the jars in the yarn application classpath
    String yarnAppClassPath = Joiner.on(File.pathSeparatorChar).join(hConf.getTrimmedStrings(YarnConfiguration.YARN_APPLICATION_CLASSPATH, YarnConfiguration.DEFAULT_YARN_APPLICATION_CLASSPATH));
    final Set<File> yarnAppJarFiles = new LinkedHashSet<>();
    Iterables.addAll(yarnAppJarFiles, ExploreUtils.getClasspathJarFiles(yarnAppClassPath));
    // Filter out jar files that are already in the yarn application classpath as those,
    // are already available in the Explore container.
    Iterable<File> exploreFiles = Iterables.filter(ExploreUtils.getExploreClasspathJarFiles("tgz", "gz"), new Predicate<File>() {

        @Override
        public boolean apply(File file) {
            return !yarnAppJarFiles.contains(file);
        }
    });
    // Create a zip file that contains all explore jar files.
    // Upload and localizing one big file is fast than many small one.
    String exploreArchiveName = "explore.archive.zip";
    Path exploreArchive = Files.createTempFile(tempDir, "explore.archive", ".zip");
    Set<String> addedJar = new HashSet<>();
    try (ZipOutputStream zos = new ZipOutputStream(Files.newOutputStream(exploreArchive))) {
        zos.setLevel(Deflater.NO_COMPRESSION);
        for (File file : exploreFiles) {
            if (file.getName().endsWith(".tgz") || file.getName().endsWith(".gz")) {
                // It's an archive, hence localize it archive so that it will be expanded to a directory on the container
                localizeResources.put(file.getName(), new LocalizeResource(file, true));
                // Includes the expanded directory, jars under that directory and jars under the "lib" to classpath
                extraClassPath.add(file.getName());
                extraClassPath.add(file.getName() + "/*");
                extraClassPath.add(file.getName() + "/lib/*");
            } else {
                // For jar file, add it to explore archive
                File targetFile = tempDir.resolve(System.currentTimeMillis() + "-" + file.getName()).toFile();
                File resultFile = ExploreServiceUtils.patchHiveClasses(file, targetFile);
                if (resultFile == targetFile) {
                    LOG.info("Rewritten HiveAuthFactory from jar file {} to jar file {}", file, resultFile);
                }
                // don't add duplicate jar
                if (addedJar.add(resultFile.getName())) {
                    zos.putNextEntry(new ZipEntry(resultFile.getName()));
                    Files.copy(resultFile.toPath(), zos);
                    extraClassPath.add(exploreArchiveName + File.separator + resultFile.getName());
                }
            }
        }
    }
    if (!addedJar.isEmpty()) {
        localizeResources.put(exploreArchiveName, new LocalizeResource(exploreArchive.toFile(), true));
    }
    // Explore also depends on MR, hence adding MR jars to the classpath.
    // Depending on how the cluster is configured, we might need to localize the MR framework tgz as well.
    MapReduceContainerHelper.localizeFramework(hConf, localizeResources);
    MapReduceContainerHelper.addMapReduceClassPath(hConf, extraClassPath);
    LOG.trace("Jars in extra classpath after adding jars in explore classpath: {}", extraClassPath);
}
Also used : LinkedHashSet(java.util.LinkedHashSet) Path(java.nio.file.Path) ZipEntry(java.util.zip.ZipEntry) ZipOutputStream(java.util.zip.ZipOutputStream) LocalizeResource(co.cask.cdap.internal.app.runtime.distributed.LocalizeResource) File(java.io.File) HashSet(java.util.HashSet) LinkedHashSet(java.util.LinkedHashSet)

Example 10 with LocalizeResource

use of co.cask.cdap.internal.app.runtime.distributed.LocalizeResource in project cdap by caskdata.

the class MasterTwillApplication method prepareLogSaverResources.

/**
   * Prepares resources that need to be localized to the log saver container.
   */
private void prepareLogSaverResources(Path tempDir, CConfiguration containerCConf, Map<String, LocalizeResource> localizeResources, Collection<String> extraClassPath) throws IOException {
    String configJarName = "log.config.jar";
    String libJarName = "log.lib.jar";
    // Localize log config files
    List<File> configFiles = DirUtils.listFiles(new File(cConf.get(Constants.Logging.PIPELINE_CONFIG_DIR)), "xml");
    if (!configFiles.isEmpty()) {
        Path configJar = Files.createTempFile(tempDir, "log.config", ".jar");
        try (JarOutputStream jarOutput = new JarOutputStream(Files.newOutputStream(configJar))) {
            for (File configFile : configFiles) {
                jarOutput.putNextEntry(new JarEntry(configFile.getName()));
                Files.copy(configFile.toPath(), jarOutput);
                jarOutput.closeEntry();
            }
        }
        localizeResources.put(configJarName, new LocalizeResource(configJar.toUri(), true));
    }
    // It's ok to set to a non-existing directory in case there is no config files
    containerCConf.set(Constants.Logging.PIPELINE_CONFIG_DIR, configJarName);
    // Localize log lib jars
    // First collect jars under each of the configured lib directory
    List<File> libJars = LoggingUtil.getExtensionJars(cConf);
    if (!libJars.isEmpty()) {
        Path libJar = Files.createTempFile("log.lib", ".jar");
        try (JarOutputStream jarOutput = new JarOutputStream(Files.newOutputStream(libJar))) {
            for (File jarFile : libJars) {
                jarOutput.putNextEntry(new JarEntry(jarFile.getName()));
                Files.copy(jarFile.toPath(), jarOutput);
                jarOutput.closeEntry();
                // Add the log lib jar to the container classpath
                extraClassPath.add(libJarName + File.separator + jarFile.getName());
            }
        }
        localizeResources.put(libJarName, new LocalizeResource(libJar.toUri(), true));
    }
    // Set it to empty value since we don't use this in the container.
    // All jars are already added as part of container classpath.
    containerCConf.set(Constants.Logging.PIPELINE_LIBRARY_DIR, "");
}
Also used : Path(java.nio.file.Path) LocalizeResource(co.cask.cdap.internal.app.runtime.distributed.LocalizeResource) JarOutputStream(java.util.jar.JarOutputStream) JarEntry(java.util.jar.JarEntry) File(java.io.File)

Aggregations

LocalizeResource (co.cask.cdap.internal.app.runtime.distributed.LocalizeResource)17 File (java.io.File)11 URI (java.net.URI)8 URISyntaxException (java.net.URISyntaxException)5 ZipOutputStream (java.util.zip.ZipOutputStream)4 Test (org.junit.Test)4 CConfiguration (co.cask.cdap.common.conf.CConfiguration)3 BufferedOutputStream (java.io.BufferedOutputStream)3 FileOutputStream (java.io.FileOutputStream)3 Path (java.nio.file.Path)3 HashMap (java.util.HashMap)3 ZipEntry (java.util.zip.ZipEntry)3 IOException (java.io.IOException)2 OutputStream (java.io.OutputStream)2 ArrayList (java.util.ArrayList)2 Map (java.util.Map)2 Configuration (org.apache.hadoop.conf.Configuration)2 Location (org.apache.twill.filesystem.Location)2 Resources (co.cask.cdap.api.Resources)1 ApplicationSpecification (co.cask.cdap.api.app.ApplicationSpecification)1