use of co.cask.cdap.internal.app.runtime.batch.MapReduceClassLoader in project cdap by caskdata.
the class DynamicPartitioningOutputCommitter method commitJob.
@Override
public void commitJob(JobContext context) throws IOException {
Configuration configuration = context.getConfiguration();
MapReduceClassLoader classLoader = MapReduceClassLoader.getFromConfiguration(configuration);
BasicMapReduceTaskContext taskContext = classLoader.getTaskContextProvider().get(this.taskContext);
String outputDatasetName = configuration.get(Constants.Dataset.Partitioned.HCONF_ATTR_OUTPUT_DATASET);
outputDataset = taskContext.getDataset(outputDatasetName);
DynamicPartitioner.PartitionWriteOption partitionWriteOption = DynamicPartitioner.PartitionWriteOption.valueOf(configuration.get(PartitionedFileSetArguments.DYNAMIC_PARTITIONER_WRITE_OPTION));
Partitioning partitioning = outputDataset.getPartitioning();
partitionsToAdd = new HashMap<>();
// Go over all files in the temporary directory and keep track of partitions to add for them
FileStatus[] allCommittedTaskPaths = getAllCommittedTaskPaths(context);
for (FileStatus committedTaskPath : allCommittedTaskPaths) {
FileSystem fs = committedTaskPath.getPath().getFileSystem(configuration);
RemoteIterator<LocatedFileStatus> fileIter = fs.listFiles(committedTaskPath.getPath(), true);
while (fileIter.hasNext()) {
Path path = fileIter.next().getPath();
String relativePath = getRelative(committedTaskPath.getPath(), path);
int lastPathSepIdx = relativePath.lastIndexOf(Path.SEPARATOR);
if (lastPathSepIdx == -1) {
// this shouldn't happen because each relative path should consist of at least one partition key and
// the output file name
LOG.warn("Skipping path '{}'. It's relative path '{}' has fewer than two parts", path, relativePath);
continue;
}
// relativePath = "../key1/key2/part-m-00000"
// relativeDir = "../key1/key2"
// fileName = "part-m-00000"
String relativeDir = relativePath.substring(0, lastPathSepIdx);
Path finalDir = new Path(FileOutputFormat.getOutputPath(context), relativeDir);
if (partitionWriteOption == DynamicPartitioner.PartitionWriteOption.CREATE) {
if (fs.exists(finalDir)) {
throw new FileAlreadyExistsException("Final output path already exists: " + finalDir);
}
}
PartitionKey partitionKey = getPartitionKey(partitioning, relativeDir);
partitionsToAdd.put(relativeDir, partitionKey);
}
}
// need to remove any existing partitions, before moving temporary content to final output
if (partitionWriteOption == DynamicPartitioner.PartitionWriteOption.CREATE_OR_OVERWRITE) {
for (Map.Entry<String, PartitionKey> entry : partitionsToAdd.entrySet()) {
if (outputDataset.getPartition(entry.getValue()) != null) {
// this allows reinstating the existing files if there's a rollback.
// alternative is to simply remove the files within the partition's location
// upside to that is easily avoiding explore operations. one downside is that metadata is not removed then
outputDataset.dropPartition(entry.getValue());
}
}
}
// We need to copy to the parent of the FileOutputFormat's outputDir, since we added a _temporary_jobId suffix to
// the original outputDir.
Path finalOutput = FileOutputFormat.getOutputPath(context);
FileContext fc = FileContext.getFileContext(configuration);
// the finalOutput path doesn't have scheme or authority (but 'from' does)
finalOutput = fc.makeQualified(finalOutput);
for (FileStatus from : getAllCommittedTaskPaths(context)) {
mergePaths(fc, from, finalOutput);
}
// compute the metadata to be written to every output partition
Map<String, String> metadata = ConfigurationUtil.getNamedConfigurations(this.taskContext.getConfiguration(), PartitionedFileSetArguments.OUTPUT_PARTITION_METADATA_PREFIX);
boolean allowAppend = partitionWriteOption == DynamicPartitioner.PartitionWriteOption.CREATE_OR_APPEND;
// create all the necessary partitions
for (Map.Entry<String, PartitionKey> entry : partitionsToAdd.entrySet()) {
outputDataset.addPartition(entry.getValue(), entry.getKey(), metadata, true, allowAppend);
}
// delete the job-specific _temporary folder
cleanupJob(context);
// mark all the final output paths with a _SUCCESS file, if configured to do so (default = true)
if (configuration.getBoolean(SUCCESSFUL_JOB_OUTPUT_DIR_MARKER, true)) {
for (String relativePath : partitionsToAdd.keySet()) {
Path pathToMark = new Path(finalOutput, relativePath);
createOrUpdate(fc, new Path(pathToMark, SUCCEEDED_FILE_NAME));
// also create a _SUCCESS-<RunId>, if allowing append
if (allowAppend) {
createOrUpdate(fc, new Path(pathToMark, SUCCEEDED_FILE_NAME + "-" + taskContext.getProgramRunId().getRun()));
}
}
}
}
use of co.cask.cdap.internal.app.runtime.batch.MapReduceClassLoader in project cdap by caskdata.
the class MapReduceContainerLauncher method launch.
/**
* Launches the given main class. The main class will be loaded through the {@link MapReduceClassLoader}.
*
* @param mainClassName the main class to launch
* @param args arguments for the main class
*/
@SuppressWarnings("unused")
public static void launch(String mainClassName, String[] args) throws Exception {
Thread.setDefaultUncaughtExceptionHandler(new UncaughtExceptionHandler());
ClassLoader systemClassLoader = ClassLoader.getSystemClassLoader();
List<URL> urls = ClassLoaders.getClassLoaderURLs(systemClassLoader, new ArrayList<URL>());
// Remove the URL that contains the given main classname to avoid infinite recursion.
// This is needed because we generate a class with the same main classname in order to intercept the main()
// method call from the container launch script.
URL resource = systemClassLoader.getResource(mainClassName.replace('.', '/') + ".class");
if (resource == null) {
throw new IllegalStateException("Failed to find resource for main class " + mainClassName);
}
if (!urls.remove(ClassLoaders.getClassPathURL(mainClassName, resource))) {
throw new IllegalStateException("Failed to remove main class resource " + resource);
}
// Create a MainClassLoader for dataset rewrite
URL[] classLoaderUrls = urls.toArray(new URL[urls.size()]);
ClassLoader mainClassLoader = new MainClassLoader(classLoaderUrls, systemClassLoader.getParent());
// Install the JUL to SLF4J Bridge
try {
mainClassLoader.loadClass(SLF4JBridgeHandler.class.getName()).getDeclaredMethod("install").invoke(null);
} catch (Exception e) {
// Log the error and continue
LOG.warn("Failed to invoke SLF4JBridgeHandler.install() required for jul-to-slf4j bridge", e);
}
ClassLoaders.setContextClassLoader(mainClassLoader);
// Creates the MapReduceClassLoader. It has to be loaded from the MainClassLoader.
try {
final ClassLoader classLoader = (ClassLoader) mainClassLoader.loadClass(MapReduceClassLoader.class.getName()).newInstance();
Runtime.getRuntime().addShutdownHook(new Thread() {
@Override
public void run() {
if (classLoader instanceof AutoCloseable) {
try {
((AutoCloseable) classLoader).close();
} catch (Exception e) {
System.err.println("Failed to close ClassLoader " + classLoader);
e.printStackTrace();
}
}
}
});
Thread.currentThread().setContextClassLoader(classLoader);
// Setup logging and stdout/stderr redirect
// Invoke MapReduceClassLoader.getTaskContextProvider()
classLoader.getClass().getDeclaredMethod("getTaskContextProvider").invoke(classLoader);
// Invoke StandardOutErrorRedirector.redirectToLogger()
classLoader.loadClass("co.cask.cdap.common.logging.StandardOutErrorRedirector").getDeclaredMethod("redirectToLogger", String.class).invoke(null, mainClassName);
Class<?> mainClass = classLoader.loadClass(mainClassName);
Method mainMethod = mainClass.getMethod("main", String[].class);
mainMethod.setAccessible(true);
LOG.info("Launch main class {}.main({})", mainClassName, Arrays.toString(args));
mainMethod.invoke(null, new Object[] { args });
LOG.info("Main method returned {}", mainClassName);
} catch (Throwable t) {
// LOG the exception since this exception will be propagated back to JVM
// and kill the main thread (hence the JVM process).
// If we don't log it here as ERROR, it will be logged by UncaughtExceptionHandler as DEBUG level
LOG.error("Exception raised when calling {}.main(String[]) method", mainClassName, t);
throw t;
}
}
Aggregations