Search in sources :

Example 1 with LocalizeResource

use of io.cdap.cdap.internal.app.runtime.distributed.LocalizeResource in project cdap by caskdata.

the class LocalizationUtilsTest method testZip.

@Test
public void testZip() throws IOException {
    String zipFileName = "target";
    File directory = TEMP_FOLDER.newFolder("zip");
    File file1 = File.createTempFile("file1", ".txt", directory);
    File file2 = File.createTempFile("file2", ".txt", directory);
    File zipFile = createZipFile(zipFileName, directory, false);
    File localizationDir = TEMP_FOLDER.newFolder("localZip");
    File localizedResource = LocalizationUtils.localizeResource(zipFileName, new LocalizeResource(zipFile, true), localizationDir);
    Assert.assertTrue(localizedResource.isDirectory());
    File[] files = localizedResource.listFiles();
    Assert.assertNotNull(files);
    Assert.assertEquals(2, files.length);
    if (file1.getName().equals(files[0].getName())) {
        Assert.assertEquals(file2.getName(), files[1].getName());
    } else {
        Assert.assertEquals(file1.getName(), files[1].getName());
        Assert.assertEquals(file2.getName(), files[0].getName());
    }
}
Also used : LocalizeResource(io.cdap.cdap.internal.app.runtime.distributed.LocalizeResource) File(java.io.File) Test(org.junit.Test)

Example 2 with LocalizeResource

use of io.cdap.cdap.internal.app.runtime.distributed.LocalizeResource in project cdap by caskdata.

the class SparkRuntimeService method prepareHBaseDDLExecutorResources.

/**
 * Prepares the {@link HBaseDDLExecutor} implementation for localization.
 */
private void prepareHBaseDDLExecutorResources(File tempDir, CConfiguration cConf, List<LocalizeResource> localizeResources) throws IOException {
    String ddlExecutorExtensionDir = cConf.get(Constants.HBaseDDLExecutor.EXTENSIONS_DIR);
    if (ddlExecutorExtensionDir == null) {
        // Nothing to localize
        return;
    }
    final File target = new File(tempDir, "hbaseddlext.jar");
    BundleJarUtil.createJar(new File(ddlExecutorExtensionDir), target);
    localizeResources.add(new LocalizeResource(target, true));
    cConf.set(Constants.HBaseDDLExecutor.EXTENSIONS_DIR, target.getName());
}
Also used : LocalizeResource(io.cdap.cdap.internal.app.runtime.distributed.LocalizeResource) File(java.io.File)

Example 3 with LocalizeResource

use of io.cdap.cdap.internal.app.runtime.distributed.LocalizeResource in project cdap by caskdata.

the class AbstractSparkSubmitter method createSubmitArguments.

/**
 * Creates the list of arguments that will be used for calling {@link SparkSubmit#main(String[])}.
 *
 * @param runtimeContext the {@link SparkRuntimeContext} for the spark program
 * @param configs set of Spark configurations
 * @param resources list of resources that needs to be localized to Spark containers
 * @param jobFile the job file for Spark
 * @return a list of arguments
 * @throws Exception if there is error while creating submit arguments
 */
private List<String> createSubmitArguments(SparkRuntimeContext runtimeContext, Map<String, String> configs, List<LocalizeResource> resources, URI jobFile) throws Exception {
    SparkSpecification spec = runtimeContext.getSparkSpecification();
    ImmutableList.Builder<String> builder = ImmutableList.builder();
    Iterable<LocalizeResource> archivesIterable = getArchives(resources);
    Iterable<LocalizeResource> filesIterable = getFiles(resources);
    addMaster(configs, builder);
    builder.add("--conf").add("spark.app.name=" + spec.getName());
    configs.putAll(generateSubmitConf());
    BiConsumer<String, String> confAdder = (k, v) -> builder.add("--conf").add(k + "=" + v);
    configs.forEach(confAdder);
    String archives = Joiner.on(',').join(Iterables.transform(archivesIterable, RESOURCE_TO_PATH));
    String files = Joiner.on(',').join(Iterables.transform(filesIterable, RESOURCE_TO_PATH));
    if (!Strings.isNullOrEmpty(archives)) {
        builder.add("--archives").add(archives);
    }
    if (!Strings.isNullOrEmpty(files)) {
        builder.add("--files").add(files);
    }
    URI newJobFile = getJobFile();
    if (newJobFile != null) {
        jobFile = newJobFile;
    }
    boolean isPySpark = jobFile.getPath().endsWith(".py");
    if (isPySpark) {
        // For python, add extra py library files
        String pyFiles = configs.get("spark.submit.pyFiles");
        if (pyFiles != null) {
            builder.add("--py-files").add(pyFiles);
        }
    } else {
        builder.add("--class").add(SparkMainWrapper.class.getName());
    }
    if ("file".equals(jobFile.getScheme())) {
        builder.add(jobFile.getPath());
    } else {
        builder.add(jobFile.toString());
    }
    if (!isPySpark) {
        // Add extra arguments for easily identifying the program from command line.
        // Arguments to user program is always coming from the runtime arguments.
        builder.add("--cdap.spark.program=" + runtimeContext.getProgramRunId().toString());
        builder.add("--cdap.user.main.class=" + spec.getMainClassName());
    }
    return builder.build();
}
Also used : ThreadFactoryBuilder(com.google.common.util.concurrent.ThreadFactoryBuilder) Iterables(com.google.common.collect.Iterables) Arrays(java.util.Arrays) ListenableFuture(com.google.common.util.concurrent.ListenableFuture) SparkMainWrapper(io.cdap.cdap.app.runtime.spark.SparkMainWrapper) LoggerFactory(org.slf4j.LoggerFactory) AtomicBoolean(java.util.concurrent.atomic.AtomicBoolean) Strings(com.google.common.base.Strings) Future(java.util.concurrent.Future) ImmutableList(com.google.common.collect.ImmutableList) Map(java.util.Map) BiConsumer(java.util.function.BiConsumer) Predicates(com.google.common.base.Predicates) URI(java.net.URI) ExecutorService(java.util.concurrent.ExecutorService) Nullable(javax.annotation.Nullable) Function(com.google.common.base.Function) Uninterruptibles(com.google.common.util.concurrent.Uninterruptibles) SparkRuntimeContext(io.cdap.cdap.app.runtime.spark.SparkRuntimeContext) Logger(org.slf4j.Logger) ClassLoaders(io.cdap.cdap.common.lang.ClassLoaders) SparkSubmit(org.apache.spark.deploy.SparkSubmit) SparkSpecification(io.cdap.cdap.api.spark.SparkSpecification) Executors(java.util.concurrent.Executors) CountDownLatch(java.util.concurrent.CountDownLatch) List(java.util.List) Predicate(com.google.common.base.Predicate) LocalizeResource(io.cdap.cdap.internal.app.runtime.distributed.LocalizeResource) Collections(java.util.Collections) AbstractFuture(com.google.common.util.concurrent.AbstractFuture) Joiner(com.google.common.base.Joiner) SparkSpecification(io.cdap.cdap.api.spark.SparkSpecification) SparkMainWrapper(io.cdap.cdap.app.runtime.spark.SparkMainWrapper) ImmutableList(com.google.common.collect.ImmutableList) LocalizeResource(io.cdap.cdap.internal.app.runtime.distributed.LocalizeResource) URI(java.net.URI)

Example 4 with LocalizeResource

use of io.cdap.cdap.internal.app.runtime.distributed.LocalizeResource in project cdap by caskdata.

the class SparkPackageUtils method prepareSparkResources.

/**
 * Prepares the resources that need to be localized to the Spark client container.
 *
 * @param sparkCompat the spark version to prepare for
 * @param locationFactory the location factory for uploading files
 * @param tempDir a temporary directory for file creation
 * @param localizeResources A map from localized name to {@link LocalizeResource} for this method to update
 * @param env the environment map to update
 * @throws IOException if failed to prepare the spark resources
 */
public static void prepareSparkResources(SparkCompat sparkCompat, LocationFactory locationFactory, File tempDir, Map<String, LocalizeResource> localizeResources, Map<String, String> env) throws IOException {
    Properties sparkConf = getSparkDefaultConf();
    // Localize the spark framework
    SparkFramework framework = prepareSparkFramework(sparkCompat, locationFactory, tempDir);
    framework.addLocalizeResource(localizeResources);
    framework.updateSparkConf(sparkConf);
    framework.updateSparkEnv(env);
    // Localize PySpark.
    List<String> pySparkArchives = new ArrayList<>();
    for (File archive : getLocalPySparkArchives(sparkCompat)) {
        localizeResources.put(archive.getName(), new LocalizeResource(archive));
        pySparkArchives.add(archive.getName());
    }
    // Set the PYSPARK_ARCHIVES_PATH environment variable in the YARN container.
    env.put(PYSPARK_ARCHIVES_PATH, Joiner.on(",").join(pySparkArchives));
    // Localize the spark-defaults.conf file
    File sparkDefaultConfFile = saveSparkDefaultConf(sparkConf, File.createTempFile(SPARK_DEFAULTS_CONF, null, tempDir));
    localizeResources.put(SPARK_DEFAULTS_CONF, new LocalizeResource(sparkDefaultConfFile));
    env.putAll(getSparkClientEnv());
    // Shallow copy all files under directory defined by $HADOOP_CONF_DIR and the explore conf directory
    // If $HADOOP_CONF_DIR is not defined, use the location of "yarn-site.xml" to determine the directory
    // This is part of workaround for CDAP-5019 (SPARK-13441) and CDAP-12330
    List<File> configDirs = new ArrayList<>();
    if (System.getenv().containsKey(ApplicationConstants.Environment.HADOOP_CONF_DIR.key())) {
        configDirs.add(new File(System.getenv(ApplicationConstants.Environment.HADOOP_CONF_DIR.key())));
    } else {
        URL yarnSiteLocation = SparkPackageUtils.class.getClassLoader().getResource("yarn-site.xml");
        if (yarnSiteLocation == null || !"file".equals(yarnSiteLocation.getProtocol())) {
            LOG.warn("Failed to derive HADOOP_CONF_DIR from yarn-site.xml location: {}", yarnSiteLocation);
        } else {
            configDirs.add(new File(yarnSiteLocation.getPath()).getParentFile());
        }
    }
    // Include the explore config dirs as well
    Splitter splitter = Splitter.on(File.pathSeparatorChar).omitEmptyStrings();
    for (String dir : splitter.split(System.getProperty(EXPLORE_CONF_DIRS, ""))) {
        configDirs.add(new File(dir));
    }
    if (!configDirs.isEmpty()) {
        File targetFile = File.createTempFile(LOCALIZED_CONF_DIR, ".zip", tempDir);
        Set<String> entries = new HashSet<>();
        try (ZipOutputStream output = new ZipOutputStream(new BufferedOutputStream(new FileOutputStream(targetFile)))) {
            for (File configDir : configDirs) {
                try {
                    LOG.debug("Adding files from {} to {}.zip", configDir, LOCALIZED_CONF_DIR);
                    addConfigFiles(configDir, entries, output);
                } catch (IOException e) {
                    LOG.warn("Failed to create archive from {}", configDir, e);
                }
            }
        }
        localizeResources.put(LOCALIZED_CONF_DIR, new LocalizeResource(targetFile, true));
        env.put("YARN_CONF_DIR", "$PWD/" + LOCALIZED_CONF_DIR);
    }
}
Also used : Splitter(com.google.common.base.Splitter) ArrayList(java.util.ArrayList) IOException(java.io.IOException) Properties(java.util.Properties) URL(java.net.URL) ZipOutputStream(java.util.zip.ZipOutputStream) LocalizeResource(io.cdap.cdap.internal.app.runtime.distributed.LocalizeResource) FileOutputStream(java.io.FileOutputStream) File(java.io.File) BufferedOutputStream(java.io.BufferedOutputStream) HashSet(java.util.HashSet) LinkedHashSet(java.util.LinkedHashSet)

Example 5 with LocalizeResource

use of io.cdap.cdap.internal.app.runtime.distributed.LocalizeResource in project cdap by caskdata.

the class SparkPackageUtils method prepareSparkFramework.

/**
 * Prepares the Spark framework on the location.
 *
 * @param sparkConf the spark configuration
 * @param locationFactory the {@link LocationFactory} for saving the spark framework jar
 * @param tempDir directory for temporary file creation
 * @return A {@link SparkFramework} containing information about the spark framework in localization context.
 * @throws IOException If failed to prepare the framework.
 */
private static SparkFramework prepareSparkFramework(SparkCompat sparkCompat, Properties sparkConf, LocationFactory locationFactory, File tempDir) throws IOException {
    String sparkYarnArchive = sparkConf.getProperty(SPARK_YARN_ARCHIVE);
    if (sparkYarnArchive != null) {
        URI sparkYarnArchiveURI = URI.create(sparkYarnArchive);
        if (locationFactory.getHomeLocation().toURI().getScheme().equals(sparkYarnArchiveURI.getScheme())) {
            Location frameworkLocation = locationFactory.create(URI.create(sparkYarnArchive));
            if (frameworkLocation.exists()) {
                return new SparkFramework(new LocalizeResource(resolveURI(frameworkLocation), true), SPARK_YARN_ARCHIVE);
            }
            LOG.warn("The location {} set by '{}' does not exist.", frameworkLocation, SPARK_YARN_ARCHIVE);
        }
    }
    // If spark.yarn.archive is not defined or doesn't exists, build a archive zip from local FS and upload it
    String sparkVersion = System.getenv(SPARK_VERSION);
    sparkVersion = sparkVersion == null ? sparkCompat.getCompat() : sparkVersion;
    String archiveName = "spark.archive-" + sparkVersion + "-" + VersionInfo.getVersion() + ".zip";
    Location frameworkDir = locationFactory.create("/framework/spark");
    Location frameworkLocation = frameworkDir.append(archiveName);
    if (!frameworkLocation.exists()) {
        File archive = new File(tempDir, archiveName);
        try {
            try (ZipOutputStream zipOutput = new ZipOutputStream(new BufferedOutputStream(new FileOutputStream(archive)))) {
                zipOutput.setLevel(Deflater.NO_COMPRESSION);
                for (File file : getLocalSparkLibrary(sparkCompat)) {
                    zipOutput.putNextEntry(new ZipEntry(file.getName()));
                    Files.copy(file.toPath(), zipOutput);
                    zipOutput.closeEntry();
                }
            }
            // Upload spark archive to the framework location
            frameworkDir.mkdirs("755");
            try (OutputStream os = frameworkLocation.getOutputStream("644")) {
                Files.copy(archive.toPath(), os);
            }
        } finally {
            archive.delete();
        }
    }
    return new SparkFramework(new LocalizeResource(resolveURI(frameworkLocation), true), SPARK_YARN_ARCHIVE);
}
Also used : ZipOutputStream(java.util.zip.ZipOutputStream) LocalizeResource(io.cdap.cdap.internal.app.runtime.distributed.LocalizeResource) FileOutputStream(java.io.FileOutputStream) ZipEntry(java.util.zip.ZipEntry) ZipOutputStream(java.util.zip.ZipOutputStream) BufferedOutputStream(java.io.BufferedOutputStream) OutputStream(java.io.OutputStream) FileOutputStream(java.io.FileOutputStream) URI(java.net.URI) File(java.io.File) BufferedOutputStream(java.io.BufferedOutputStream) Location(org.apache.twill.filesystem.Location)

Aggregations

LocalizeResource (io.cdap.cdap.internal.app.runtime.distributed.LocalizeResource)19 File (java.io.File)12 URI (java.net.URI)8 URISyntaxException (java.net.URISyntaxException)4 ArrayList (java.util.ArrayList)4 Test (org.junit.Test)4 CConfiguration (io.cdap.cdap.common.conf.CConfiguration)3 Path (java.nio.file.Path)3 HashMap (java.util.HashMap)3 Map (java.util.Map)3 ZipOutputStream (java.util.zip.ZipOutputStream)3 Function (com.google.common.base.Function)2 Joiner (com.google.common.base.Joiner)2 ListenableFuture (com.google.common.util.concurrent.ListenableFuture)2 SparkSpecification (io.cdap.cdap.api.spark.SparkSpecification)2 BufferedOutputStream (java.io.BufferedOutputStream)2 FileOutputStream (java.io.FileOutputStream)2 IOException (java.io.IOException)2 HashSet (java.util.HashSet)2 LinkedHashSet (java.util.LinkedHashSet)2