use of co.cask.cdap.internal.app.runtime.distributed.LocalizeResource in project cdap by caskdata.
the class SparkRuntimeService method startUp.
@Override
protected void startUp() throws Exception {
// additional spark job initialization at run-time
// This context is for calling initialize and onFinish on the Spark program
// Fields injection for the Spark program
// It has to be done in here instead of in SparkProgramRunner for the @UseDataset injection
// since the dataset cache being used in Spark is a MultiThreadDatasetCache
// The AbstractExecutionThreadService guarantees that startUp(), run() and shutDown() all happens in the same thread
Reflections.visit(spark, spark.getClass(), new PropertyFieldSetter(runtimeContext.getSparkSpecification().getProperties()), new DataSetFieldSetter(runtimeContext.getDatasetCache()), new MetricsFieldSetter(runtimeContext));
// Creates a temporary directory locally for storing all generated files.
File tempDir = DirUtils.createTempDir(new File(cConf.get(Constants.CFG_LOCAL_DATA_DIR), cConf.get(Constants.AppFabric.TEMP_DIR)).getAbsoluteFile());
tempDir.mkdirs();
this.cleanupTask = createCleanupTask(tempDir, System.getProperties());
try {
initialize();
SparkRuntimeContextConfig contextConfig = new SparkRuntimeContextConfig(runtimeContext.getConfiguration());
final File jobJar = generateJobJar(tempDir, contextConfig.isLocal(), cConf);
final List<LocalizeResource> localizeResources = new ArrayList<>();
String metricsConfPath;
String classpath = "";
if (contextConfig.isLocal()) {
// In local mode, always copy (or link if local) user requested resources
copyUserResources(context.getLocalizeResources(), tempDir);
File metricsConf = SparkMetricsSink.writeConfig(new File(tempDir, CDAP_METRICS_PROPERTIES));
metricsConfPath = metricsConf.getAbsolutePath();
} else {
// Localize all user requested files in distributed mode
distributedUserResources(context.getLocalizeResources(), localizeResources);
// Localize program jar and the expanding program jar
File programJar = Locations.linkOrCopy(runtimeContext.getProgram().getJarLocation(), new File(tempDir, SparkRuntimeContextProvider.PROGRAM_JAR_NAME));
File expandedProgramJar = Locations.linkOrCopy(runtimeContext.getProgram().getJarLocation(), new File(tempDir, SparkRuntimeContextProvider.PROGRAM_JAR_EXPANDED_NAME));
// Localize both the unexpanded and expanded program jar
localizeResources.add(new LocalizeResource(programJar));
localizeResources.add(new LocalizeResource(expandedProgramJar, true));
// Localize plugins
if (pluginArchive != null) {
localizeResources.add(new LocalizeResource(pluginArchive, true));
}
// Create and localize the launcher jar, which is for setting up services and classloader for spark containers
localizeResources.add(new LocalizeResource(createLauncherJar(tempDir)));
// Create metrics conf file in the current directory since
// the same value for the "spark.metrics.conf" config needs to be used for both driver and executor processes
// Also localize the metrics conf file to the executor nodes
File metricsConf = SparkMetricsSink.writeConfig(new File(CDAP_METRICS_PROPERTIES));
metricsConfPath = metricsConf.getName();
localizeResources.add(new LocalizeResource(metricsConf));
// Localize the cConf file
localizeResources.add(new LocalizeResource(saveCConf(cConf, tempDir)));
// Preserves and localize runtime information in the hConf
Configuration hConf = contextConfig.set(runtimeContext, pluginArchive).getConfiguration();
localizeResources.add(new LocalizeResource(saveHConf(hConf, tempDir)));
// Joiner for creating classpath for spark containers
Joiner joiner = Joiner.on(File.pathSeparator).skipNulls();
// Localize the spark.jar archive, which contains all CDAP and dependency jars
File sparkJar = new File(tempDir, CDAP_SPARK_JAR);
classpath = joiner.join(Iterables.transform(buildDependencyJar(sparkJar), new Function<String, String>() {
@Override
public String apply(String name) {
return Paths.get("$PWD", CDAP_SPARK_JAR, name).toString();
}
}));
localizeResources.add(new LocalizeResource(sparkJar, true));
// Localize logback if there is one. It is placed at the beginning of the classpath
File logbackJar = ProgramRunners.createLogbackJar(new File(tempDir, "logback.xml.jar"));
if (logbackJar != null) {
localizeResources.add(new LocalizeResource(logbackJar));
classpath = joiner.join(Paths.get("$PWD", logbackJar.getName()), classpath);
}
// Localize extra jars and append to the end of the classpath
List<String> extraJars = new ArrayList<>();
for (URI jarURI : CConfigurationUtil.getExtraJars(cConf)) {
extraJars.add(Paths.get("$PWD", LocalizationUtils.getLocalizedName(jarURI)).toString());
localizeResources.add(new LocalizeResource(jarURI, false));
}
classpath = joiner.join(classpath, joiner.join(extraJars));
}
final Map<String, String> configs = createSubmitConfigs(tempDir, metricsConfPath, classpath, context.getLocalizeResources(), contextConfig.isLocal());
submitSpark = new Callable<ListenableFuture<RunId>>() {
@Override
public ListenableFuture<RunId> call() throws Exception {
// This happen when stop() was called whiling starting
if (!isRunning()) {
return immediateCancelledFuture();
}
return sparkSubmitter.submit(runtimeContext, configs, localizeResources, jobJar, runtimeContext.getRunId());
}
};
} catch (LinkageError e) {
// of the user program is missing dependencies (CDAP-2543)
throw new Exception(e.getMessage(), e);
} catch (Throwable t) {
cleanupTask.run();
throw t;
}
}
use of co.cask.cdap.internal.app.runtime.distributed.LocalizeResource in project cdap by caskdata.
the class SparkPackageUtils method prepareSparkResources.
/**
* Prepares the resources that need to be localized to the Spark client container.
*
* @param sparkCompat the spark version to prepare for
* @param locationFactory the location factory for uploading files
* @param tempDir a temporary directory for file creation
* @param localizeResources A map from localized name to {@link LocalizeResource} for this method to update
* @param env the environment map to update
* @throws IOException if failed to prepare the spark resources
*/
public static void prepareSparkResources(SparkCompat sparkCompat, LocationFactory locationFactory, File tempDir, Map<String, LocalizeResource> localizeResources, Map<String, String> env) throws IOException {
Properties sparkConf = getSparkDefaultConf();
// Localize the spark framework
SparkFramework framework = prepareSparkFramework(sparkCompat, locationFactory, tempDir);
framework.addLocalizeResource(localizeResources);
framework.updateSparkConf(sparkConf);
framework.updateSparkEnv(env);
// Localize the spark-defaults.conf file
File sparkDefaultConfFile = saveSparkDefaultConf(sparkConf, File.createTempFile(SPARK_DEFAULTS_CONF, null, tempDir));
localizeResources.put(SPARK_DEFAULTS_CONF, new LocalizeResource(sparkDefaultConfFile));
// Shallow copy all files under directory defined by $HADOOP_CONF_DIR
// If $HADOOP_CONF_DIR is not defined, use the location of "yarn-site.xml" to determine the directory
// This is part of workaround for CDAP-5019 (SPARK-13441).
File hadoopConfDir = null;
if (System.getenv().containsKey(ApplicationConstants.Environment.HADOOP_CONF_DIR.key())) {
hadoopConfDir = new File(System.getenv(ApplicationConstants.Environment.HADOOP_CONF_DIR.key()));
} else {
URL yarnSiteLocation = SparkPackageUtils.class.getClassLoader().getResource("yarn-site.xml");
if (yarnSiteLocation != null) {
try {
hadoopConfDir = new File(yarnSiteLocation.toURI()).getParentFile();
} catch (URISyntaxException e) {
// Shouldn't happen
LOG.warn("Failed to derive HADOOP_CONF_DIR from yarn-site.xml");
}
}
}
if (hadoopConfDir != null && hadoopConfDir.isDirectory()) {
try {
final File targetFile = File.createTempFile(LOCALIZED_CONF_DIR, ".zip", tempDir);
try (ZipOutputStream zipOutput = new ZipOutputStream(new BufferedOutputStream(new FileOutputStream(targetFile)))) {
for (File file : DirUtils.listFiles(hadoopConfDir)) {
// Shallow copy of files under the hadoop conf dir. Ignore files that cannot be read
if (file.isFile() && file.canRead()) {
zipOutput.putNextEntry(new ZipEntry(file.getName()));
Files.copy(file.toPath(), zipOutput);
}
}
}
localizeResources.put(LOCALIZED_CONF_DIR, new LocalizeResource(targetFile, true));
} catch (IOException e) {
LOG.warn("Failed to create archive from {}", hadoopConfDir, e);
}
}
}
use of co.cask.cdap.internal.app.runtime.distributed.LocalizeResource in project cdap by caskdata.
the class MapReduceRuntimeService method localizeUserResources.
/**
* Localizes resources requested by users in the MapReduce Program's beforeSubmit phase.
* In Local mode, also copies resources to a temporary directory.
*
* @param job the {@link Job} for this MapReduce program
* @param targetDir in local mode, a temporary directory to copy the resources to
* @return a {@link Map} of resource name to the resource path. The resource path will be absolute in local mode,
* while it will just contain the file name in distributed mode.
*/
private Map<String, String> localizeUserResources(Job job, File targetDir) throws IOException {
Map<String, String> localizedResources = new HashMap<>();
Map<String, LocalizeResource> resourcesToLocalize = context.getResourcesToLocalize();
for (Map.Entry<String, LocalizeResource> entry : resourcesToLocalize.entrySet()) {
String localizedFilePath;
String name = entry.getKey();
Configuration mapredConf = job.getConfiguration();
if (MapReduceTaskContextProvider.isLocal(mapredConf)) {
// in local mode, also add localize resources in a temporary directory
localizedFilePath = LocalizationUtils.localizeResource(entry.getKey(), entry.getValue(), targetDir).getAbsolutePath();
} else {
URI uri = entry.getValue().getURI();
// in distributed mode, use the MapReduce Job object to localize resources
URI actualURI;
try {
actualURI = new URI(uri.getScheme(), uri.getAuthority(), uri.getPath(), uri.getQuery(), name);
} catch (URISyntaxException e) {
// If it does though, there is nothing that clients can do to recover, so not propagating a checked exception.
throw Throwables.propagate(e);
}
if (entry.getValue().isArchive()) {
job.addCacheArchive(actualURI);
} else {
job.addCacheFile(actualURI);
}
localizedFilePath = name;
}
LOG.debug("MapReduce Localizing file {} {}", entry.getKey(), entry.getValue());
localizedResources.put(name, localizedFilePath);
}
return localizedResources;
}
use of co.cask.cdap.internal.app.runtime.distributed.LocalizeResource in project cdap by caskdata.
the class MasterTwillApplication method prepareExploreResources.
/**
* Prepares resources to be localized to the explore container.
*/
private void prepareExploreResources(Path tempDir, Configuration hConf, Map<String, LocalizeResource> localizeResources, Collection<String> extraClassPath) throws IOException {
// Find the jars in the yarn application classpath
String yarnAppClassPath = Joiner.on(File.pathSeparatorChar).join(hConf.getTrimmedStrings(YarnConfiguration.YARN_APPLICATION_CLASSPATH, YarnConfiguration.DEFAULT_YARN_APPLICATION_CLASSPATH));
final Set<File> yarnAppJarFiles = new LinkedHashSet<>();
Iterables.addAll(yarnAppJarFiles, ExploreUtils.getClasspathJarFiles(yarnAppClassPath));
// Filter out jar files that are already in the yarn application classpath as those,
// are already available in the Explore container.
Iterable<File> exploreFiles = Iterables.filter(ExploreUtils.getExploreClasspathJarFiles("tgz", "gz"), new Predicate<File>() {
@Override
public boolean apply(File file) {
return !yarnAppJarFiles.contains(file);
}
});
// Create a zip file that contains all explore jar files.
// Upload and localizing one big file is fast than many small one.
String exploreArchiveName = "explore.archive.zip";
Path exploreArchive = Files.createTempFile(tempDir, "explore.archive", ".zip");
Set<String> addedJar = new HashSet<>();
try (ZipOutputStream zos = new ZipOutputStream(Files.newOutputStream(exploreArchive))) {
zos.setLevel(Deflater.NO_COMPRESSION);
for (File file : exploreFiles) {
if (file.getName().endsWith(".tgz") || file.getName().endsWith(".gz")) {
// It's an archive, hence localize it archive so that it will be expanded to a directory on the container
localizeResources.put(file.getName(), new LocalizeResource(file, true));
// Includes the expanded directory, jars under that directory and jars under the "lib" to classpath
extraClassPath.add(file.getName());
extraClassPath.add(file.getName() + "/*");
extraClassPath.add(file.getName() + "/lib/*");
} else {
// For jar file, add it to explore archive
File targetFile = tempDir.resolve(System.currentTimeMillis() + "-" + file.getName()).toFile();
File resultFile = ExploreServiceUtils.patchHiveClasses(file, targetFile);
if (resultFile == targetFile) {
LOG.info("Rewritten HiveAuthFactory from jar file {} to jar file {}", file, resultFile);
}
// don't add duplicate jar
if (addedJar.add(resultFile.getName())) {
zos.putNextEntry(new ZipEntry(resultFile.getName()));
Files.copy(resultFile.toPath(), zos);
extraClassPath.add(exploreArchiveName + File.separator + resultFile.getName());
}
}
}
}
if (!addedJar.isEmpty()) {
localizeResources.put(exploreArchiveName, new LocalizeResource(exploreArchive.toFile(), true));
}
// Explore also depends on MR, hence adding MR jars to the classpath.
// Depending on how the cluster is configured, we might need to localize the MR framework tgz as well.
MapReduceContainerHelper.localizeFramework(hConf, localizeResources);
MapReduceContainerHelper.addMapReduceClassPath(hConf, extraClassPath);
LOG.trace("Jars in extra classpath after adding jars in explore classpath: {}", extraClassPath);
}
use of co.cask.cdap.internal.app.runtime.distributed.LocalizeResource in project cdap by caskdata.
the class MasterTwillApplication method prepareLogSaverResources.
/**
* Prepares resources that need to be localized to the log saver container.
*/
private void prepareLogSaverResources(Path tempDir, CConfiguration containerCConf, Map<String, LocalizeResource> localizeResources, Collection<String> extraClassPath) throws IOException {
String configJarName = "log.config.jar";
String libJarName = "log.lib.jar";
// Localize log config files
List<File> configFiles = DirUtils.listFiles(new File(cConf.get(Constants.Logging.PIPELINE_CONFIG_DIR)), "xml");
if (!configFiles.isEmpty()) {
Path configJar = Files.createTempFile(tempDir, "log.config", ".jar");
try (JarOutputStream jarOutput = new JarOutputStream(Files.newOutputStream(configJar))) {
for (File configFile : configFiles) {
jarOutput.putNextEntry(new JarEntry(configFile.getName()));
Files.copy(configFile.toPath(), jarOutput);
jarOutput.closeEntry();
}
}
localizeResources.put(configJarName, new LocalizeResource(configJar.toUri(), true));
}
// It's ok to set to a non-existing directory in case there is no config files
containerCConf.set(Constants.Logging.PIPELINE_CONFIG_DIR, configJarName);
// Localize log lib jars
// First collect jars under each of the configured lib directory
List<File> libJars = LoggingUtil.getExtensionJars(cConf);
if (!libJars.isEmpty()) {
Path libJar = Files.createTempFile("log.lib", ".jar");
try (JarOutputStream jarOutput = new JarOutputStream(Files.newOutputStream(libJar))) {
for (File jarFile : libJars) {
jarOutput.putNextEntry(new JarEntry(jarFile.getName()));
Files.copy(jarFile.toPath(), jarOutput);
jarOutput.closeEntry();
// Add the log lib jar to the container classpath
extraClassPath.add(libJarName + File.separator + jarFile.getName());
}
}
localizeResources.put(libJarName, new LocalizeResource(libJar.toUri(), true));
}
// Set it to empty value since we don't use this in the container.
// All jars are already added as part of container classpath.
containerCConf.set(Constants.Logging.PIPELINE_LIBRARY_DIR, "");
}
Aggregations