use of io.cdap.cdap.internal.app.runtime.distributed.LocalizeResource in project cdap by caskdata.
the class LocalizationUtilsTest method testZip.
@Test
public void testZip() throws IOException {
String zipFileName = "target";
File directory = TEMP_FOLDER.newFolder("zip");
File file1 = File.createTempFile("file1", ".txt", directory);
File file2 = File.createTempFile("file2", ".txt", directory);
File zipFile = createZipFile(zipFileName, directory, false);
File localizationDir = TEMP_FOLDER.newFolder("localZip");
File localizedResource = LocalizationUtils.localizeResource(zipFileName, new LocalizeResource(zipFile, true), localizationDir);
Assert.assertTrue(localizedResource.isDirectory());
File[] files = localizedResource.listFiles();
Assert.assertNotNull(files);
Assert.assertEquals(2, files.length);
if (file1.getName().equals(files[0].getName())) {
Assert.assertEquals(file2.getName(), files[1].getName());
} else {
Assert.assertEquals(file1.getName(), files[1].getName());
Assert.assertEquals(file2.getName(), files[0].getName());
}
}
use of io.cdap.cdap.internal.app.runtime.distributed.LocalizeResource in project cdap by caskdata.
the class SparkRuntimeService method prepareHBaseDDLExecutorResources.
/**
* Prepares the {@link HBaseDDLExecutor} implementation for localization.
*/
private void prepareHBaseDDLExecutorResources(File tempDir, CConfiguration cConf, List<LocalizeResource> localizeResources) throws IOException {
String ddlExecutorExtensionDir = cConf.get(Constants.HBaseDDLExecutor.EXTENSIONS_DIR);
if (ddlExecutorExtensionDir == null) {
// Nothing to localize
return;
}
final File target = new File(tempDir, "hbaseddlext.jar");
BundleJarUtil.createJar(new File(ddlExecutorExtensionDir), target);
localizeResources.add(new LocalizeResource(target, true));
cConf.set(Constants.HBaseDDLExecutor.EXTENSIONS_DIR, target.getName());
}
use of io.cdap.cdap.internal.app.runtime.distributed.LocalizeResource in project cdap by caskdata.
the class AbstractSparkSubmitter method createSubmitArguments.
/**
* Creates the list of arguments that will be used for calling {@link SparkSubmit#main(String[])}.
*
* @param runtimeContext the {@link SparkRuntimeContext} for the spark program
* @param configs set of Spark configurations
* @param resources list of resources that needs to be localized to Spark containers
* @param jobFile the job file for Spark
* @return a list of arguments
* @throws Exception if there is error while creating submit arguments
*/
private List<String> createSubmitArguments(SparkRuntimeContext runtimeContext, Map<String, String> configs, List<LocalizeResource> resources, URI jobFile) throws Exception {
SparkSpecification spec = runtimeContext.getSparkSpecification();
ImmutableList.Builder<String> builder = ImmutableList.builder();
Iterable<LocalizeResource> archivesIterable = getArchives(resources);
Iterable<LocalizeResource> filesIterable = getFiles(resources);
addMaster(configs, builder);
builder.add("--conf").add("spark.app.name=" + spec.getName());
configs.putAll(generateSubmitConf());
BiConsumer<String, String> confAdder = (k, v) -> builder.add("--conf").add(k + "=" + v);
configs.forEach(confAdder);
String archives = Joiner.on(',').join(Iterables.transform(archivesIterable, RESOURCE_TO_PATH));
String files = Joiner.on(',').join(Iterables.transform(filesIterable, RESOURCE_TO_PATH));
if (!Strings.isNullOrEmpty(archives)) {
builder.add("--archives").add(archives);
}
if (!Strings.isNullOrEmpty(files)) {
builder.add("--files").add(files);
}
URI newJobFile = getJobFile();
if (newJobFile != null) {
jobFile = newJobFile;
}
boolean isPySpark = jobFile.getPath().endsWith(".py");
if (isPySpark) {
// For python, add extra py library files
String pyFiles = configs.get("spark.submit.pyFiles");
if (pyFiles != null) {
builder.add("--py-files").add(pyFiles);
}
} else {
builder.add("--class").add(SparkMainWrapper.class.getName());
}
if ("file".equals(jobFile.getScheme())) {
builder.add(jobFile.getPath());
} else {
builder.add(jobFile.toString());
}
if (!isPySpark) {
// Add extra arguments for easily identifying the program from command line.
// Arguments to user program is always coming from the runtime arguments.
builder.add("--cdap.spark.program=" + runtimeContext.getProgramRunId().toString());
builder.add("--cdap.user.main.class=" + spec.getMainClassName());
}
return builder.build();
}
use of io.cdap.cdap.internal.app.runtime.distributed.LocalizeResource in project cdap by caskdata.
the class SparkPackageUtils method prepareSparkResources.
/**
* Prepares the resources that need to be localized to the Spark client container.
*
* @param sparkCompat the spark version to prepare for
* @param locationFactory the location factory for uploading files
* @param tempDir a temporary directory for file creation
* @param localizeResources A map from localized name to {@link LocalizeResource} for this method to update
* @param env the environment map to update
* @throws IOException if failed to prepare the spark resources
*/
public static void prepareSparkResources(SparkCompat sparkCompat, LocationFactory locationFactory, File tempDir, Map<String, LocalizeResource> localizeResources, Map<String, String> env) throws IOException {
Properties sparkConf = getSparkDefaultConf();
// Localize the spark framework
SparkFramework framework = prepareSparkFramework(sparkCompat, locationFactory, tempDir);
framework.addLocalizeResource(localizeResources);
framework.updateSparkConf(sparkConf);
framework.updateSparkEnv(env);
// Localize PySpark.
List<String> pySparkArchives = new ArrayList<>();
for (File archive : getLocalPySparkArchives(sparkCompat)) {
localizeResources.put(archive.getName(), new LocalizeResource(archive));
pySparkArchives.add(archive.getName());
}
// Set the PYSPARK_ARCHIVES_PATH environment variable in the YARN container.
env.put(PYSPARK_ARCHIVES_PATH, Joiner.on(",").join(pySparkArchives));
// Localize the spark-defaults.conf file
File sparkDefaultConfFile = saveSparkDefaultConf(sparkConf, File.createTempFile(SPARK_DEFAULTS_CONF, null, tempDir));
localizeResources.put(SPARK_DEFAULTS_CONF, new LocalizeResource(sparkDefaultConfFile));
env.putAll(getSparkClientEnv());
// Shallow copy all files under directory defined by $HADOOP_CONF_DIR and the explore conf directory
// If $HADOOP_CONF_DIR is not defined, use the location of "yarn-site.xml" to determine the directory
// This is part of workaround for CDAP-5019 (SPARK-13441) and CDAP-12330
List<File> configDirs = new ArrayList<>();
if (System.getenv().containsKey(ApplicationConstants.Environment.HADOOP_CONF_DIR.key())) {
configDirs.add(new File(System.getenv(ApplicationConstants.Environment.HADOOP_CONF_DIR.key())));
} else {
URL yarnSiteLocation = SparkPackageUtils.class.getClassLoader().getResource("yarn-site.xml");
if (yarnSiteLocation == null || !"file".equals(yarnSiteLocation.getProtocol())) {
LOG.warn("Failed to derive HADOOP_CONF_DIR from yarn-site.xml location: {}", yarnSiteLocation);
} else {
configDirs.add(new File(yarnSiteLocation.getPath()).getParentFile());
}
}
// Include the explore config dirs as well
Splitter splitter = Splitter.on(File.pathSeparatorChar).omitEmptyStrings();
for (String dir : splitter.split(System.getProperty(EXPLORE_CONF_DIRS, ""))) {
configDirs.add(new File(dir));
}
if (!configDirs.isEmpty()) {
File targetFile = File.createTempFile(LOCALIZED_CONF_DIR, ".zip", tempDir);
Set<String> entries = new HashSet<>();
try (ZipOutputStream output = new ZipOutputStream(new BufferedOutputStream(new FileOutputStream(targetFile)))) {
for (File configDir : configDirs) {
try {
LOG.debug("Adding files from {} to {}.zip", configDir, LOCALIZED_CONF_DIR);
addConfigFiles(configDir, entries, output);
} catch (IOException e) {
LOG.warn("Failed to create archive from {}", configDir, e);
}
}
}
localizeResources.put(LOCALIZED_CONF_DIR, new LocalizeResource(targetFile, true));
env.put("YARN_CONF_DIR", "$PWD/" + LOCALIZED_CONF_DIR);
}
}
use of io.cdap.cdap.internal.app.runtime.distributed.LocalizeResource in project cdap by caskdata.
the class SparkPackageUtils method prepareSparkFramework.
/**
* Prepares the Spark framework on the location.
*
* @param sparkConf the spark configuration
* @param locationFactory the {@link LocationFactory} for saving the spark framework jar
* @param tempDir directory for temporary file creation
* @return A {@link SparkFramework} containing information about the spark framework in localization context.
* @throws IOException If failed to prepare the framework.
*/
private static SparkFramework prepareSparkFramework(SparkCompat sparkCompat, Properties sparkConf, LocationFactory locationFactory, File tempDir) throws IOException {
String sparkYarnArchive = sparkConf.getProperty(SPARK_YARN_ARCHIVE);
if (sparkYarnArchive != null) {
URI sparkYarnArchiveURI = URI.create(sparkYarnArchive);
if (locationFactory.getHomeLocation().toURI().getScheme().equals(sparkYarnArchiveURI.getScheme())) {
Location frameworkLocation = locationFactory.create(URI.create(sparkYarnArchive));
if (frameworkLocation.exists()) {
return new SparkFramework(new LocalizeResource(resolveURI(frameworkLocation), true), SPARK_YARN_ARCHIVE);
}
LOG.warn("The location {} set by '{}' does not exist.", frameworkLocation, SPARK_YARN_ARCHIVE);
}
}
// If spark.yarn.archive is not defined or doesn't exists, build a archive zip from local FS and upload it
String sparkVersion = System.getenv(SPARK_VERSION);
sparkVersion = sparkVersion == null ? sparkCompat.getCompat() : sparkVersion;
String archiveName = "spark.archive-" + sparkVersion + "-" + VersionInfo.getVersion() + ".zip";
Location frameworkDir = locationFactory.create("/framework/spark");
Location frameworkLocation = frameworkDir.append(archiveName);
if (!frameworkLocation.exists()) {
File archive = new File(tempDir, archiveName);
try {
try (ZipOutputStream zipOutput = new ZipOutputStream(new BufferedOutputStream(new FileOutputStream(archive)))) {
zipOutput.setLevel(Deflater.NO_COMPRESSION);
for (File file : getLocalSparkLibrary(sparkCompat)) {
zipOutput.putNextEntry(new ZipEntry(file.getName()));
Files.copy(file.toPath(), zipOutput);
zipOutput.closeEntry();
}
}
// Upload spark archive to the framework location
frameworkDir.mkdirs("755");
try (OutputStream os = frameworkLocation.getOutputStream("644")) {
Files.copy(archive.toPath(), os);
}
} finally {
archive.delete();
}
}
return new SparkFramework(new LocalizeResource(resolveURI(frameworkLocation), true), SPARK_YARN_ARCHIVE);
}
Aggregations