use of org.apache.twill.api.LocalFile in project cdap by cdapio.
the class RemoteExecutionTwillPreparer method localizeFiles.
/**
* Localize files to the target host.
*/
private void localizeFiles(SSHSession session, Map<String, LocalFile> localFiles, String targetPath, RuntimeSpecification runtimeSpec) throws IOException {
// A map to remember what URI has already been uploaded to what target path.
// This helps reducing the bandwidth when same file is uploaded to different target path.
Map<URI, String> localizedFiles = new HashMap<>();
String localizedDir = targetPath + "/.localized";
session.executeAndWait("mkdir -p " + localizedDir);
for (LocalFile localFile : Iterables.concat(localFiles.values(), runtimeSpec.getLocalFiles())) {
URI uri = localFile.getURI();
// If not yet uploaded, upload it
String localizedFile = localizedFiles.get(uri);
if (localizedFile == null) {
String fileName = Hashing.md5().hashString(uri.toString()).toString() + "-" + getFileName(uri);
localizedFile = localizedDir + "/" + fileName;
try (InputStream inputStream = openURI(uri)) {
LOG.debug("Upload file {} to {}@{}:{}", uri, session.getUsername(), session.getAddress(), localizedFile);
// noinspection OctalInteger
session.copy(inputStream, localizedDir, fileName, localFile.getSize(), 0644, localFile.getLastModified(), localFile.getLastModified());
}
localizedFiles.put(uri, localizedFile);
}
// If it is an archive, expand it. If is a file, create a hardlink.
if (localFile.isArchive()) {
String expandedDir = targetPath + "/" + localFile.getName();
LOG.debug("Expanding archive {} on host {} to {}", localizedFile, session.getAddress().getHostName(), expandedDir);
session.executeAndWait("mkdir -p " + expandedDir, "cd " + expandedDir, String.format("jar xf %s", localizedFile));
} else {
LOG.debug("Create hardlink {} on host {} to {}/{}", localizedFile, session.getAddress().getHostName(), targetPath, localFile.getName());
session.executeAndWait(String.format("ln %s %s/%s", localizedFile, targetPath, localFile.getName()));
}
}
}
use of org.apache.twill.api.LocalFile in project cdap by cdapio.
the class AbstractRuntimeTwillPreparer method populateRunnableLocalFiles.
/**
* Based on the given {@link TwillSpecification}, copy file to local filesystem.
* @param spec The {@link TwillSpecification} for populating resource.
*/
private Map<String, Collection<LocalFile>> populateRunnableLocalFiles(TwillSpecification spec, Path stagingDir) throws IOException {
Map<String, Collection<LocalFile>> localFiles = new HashMap<>();
LOG.debug("Populating Runnable LocalFiles");
for (Map.Entry<String, RuntimeSpecification> entry : spec.getRunnables().entrySet()) {
String runnableName = entry.getKey();
for (LocalFile localFile : entry.getValue().getLocalFiles()) {
LocalFile resolvedLocalFile = resolveLocalFile(localFile, stagingDir);
localFiles.computeIfAbsent(runnableName, s -> new ArrayList<>()).add(resolvedLocalFile);
LOG.debug("Added file {}", resolvedLocalFile.getURI());
}
}
LOG.debug("Done Runnable LocalFiles");
return localFiles;
}
use of org.apache.twill.api.LocalFile in project cdap by cdapio.
the class AbstractRuntimeTwillPreparer method saveSpecification.
private TwillRuntimeSpecification saveSpecification(TwillSpecification spec, Path targetFile, Path stagingDir) throws IOException {
final Map<String, Collection<LocalFile>> runnableLocalFiles = populateRunnableLocalFiles(spec, stagingDir);
// Rewrite LocalFiles inside twillSpec
Map<String, RuntimeSpecification> runtimeSpec = spec.getRunnables().entrySet().stream().collect(Collectors.toMap(Map.Entry::getKey, e -> {
RuntimeSpecification value = e.getValue();
return new DefaultRuntimeSpecification(value.getName(), value.getRunnableSpecification(), value.getResourceSpecification(), runnableLocalFiles.getOrDefault(e.getKey(), Collections.emptyList()));
}));
// Serialize into a local temp file.
LOG.debug("Creating {}", targetFile);
try (Writer writer = Files.newBufferedWriter(targetFile, StandardCharsets.UTF_8)) {
EventHandlerSpecification eventHandler = spec.getEventHandler();
if (eventHandler == null) {
eventHandler = new LogOnlyEventHandler().configure();
}
TwillSpecification newTwillSpec = new DefaultTwillSpecification(spec.getName(), runtimeSpec, spec.getOrders(), spec.getPlacementPolicies(), eventHandler);
Map<String, String> configMap = Maps.newHashMap();
for (Map.Entry<String, String> entry : hConf) {
if (entry.getKey().startsWith("twill.")) {
configMap.put(entry.getKey(), entry.getValue());
}
}
TwillRuntimeSpecification twillRuntimeSpec = new TwillRuntimeSpecification(newTwillSpec, "", URI.create("."), "", RunIds.fromString(programRunId.getRun()), twillSpec.getName(), null, logLevels, maxRetries, configMap, runnableConfigs);
TwillRuntimeSpecificationAdapter.create().toJson(twillRuntimeSpec, writer);
LOG.debug("Done {}", targetFile);
return twillRuntimeSpec;
}
}
use of org.apache.twill.api.LocalFile in project cdap by cdapio.
the class TetheringRuntimeJobManager method createLaunchPayload.
/**
* Add select LocalFiles and cConf entries to the control message payload
*/
private TetheringLaunchMessage createLaunchPayload(Collection<? extends LocalFile> localFiles) throws IOException {
TetheringLaunchMessage.Builder builder = new TetheringLaunchMessage.Builder().addFileNames(DistributedProgramRunner.LOGBACK_FILE_NAME).addFileNames(DistributedProgramRunner.PROGRAM_OPTIONS_FILE_NAME).addFileNames(DistributedProgramRunner.APP_SPEC_FILE_NAME);
for (String fileName : builder.getFileNames()) {
LocalFile localFile = localFiles.stream().filter(file -> file.getName().equals(fileName)).findFirst().orElseThrow(() -> new IllegalStateException("Cannot find file" + fileName));
builder.addLocalizeFiles(fileName, getLocalFileAsCompressedBytes(localFile));
}
for (String prefix : SELECT_CCONF_FIELDS) {
String prefixRegex = "^" + prefix.replace(".", "\\.");
builder.addCConfEntries(cConf.getValByRegex(prefixRegex));
}
return builder.build();
}
use of org.apache.twill.api.LocalFile in project cdap by cdapio.
the class DataprocRuntimeJobManager method launch.
@Override
public void launch(RuntimeJobInfo runtimeJobInfo) throws Exception {
String bucket = DataprocUtils.getBucketName(this.bucket);
ProgramRunInfo runInfo = runtimeJobInfo.getProgramRunInfo();
LOG.debug("Launching run {} with following configurations: cluster {}, project {}, region {}, bucket {}.", runInfo.getRun(), clusterName, projectId, region, bucket);
// TODO: CDAP-16408 use fixed directory for caching twill, application, artifact jars
File tempDir = Files.createTempDirectory("dataproc.launcher").toFile();
// on dataproc bucket the run root will be <bucket>/cdap-job/<runid>/. All the files for this run will be copied
// under that base dir.
String runRootPath = getPath(DataprocUtils.CDAP_GCS_ROOT, runInfo.getRun());
try {
// step 1: build twill.jar and launcher.jar and add them to files to be copied to gcs
List<LocalFile> localFiles = getRuntimeLocalFiles(runtimeJobInfo.getLocalizeFiles(), tempDir);
// step 2: upload all the necessary files to gcs so that those files are available to dataproc job
List<Future<LocalFile>> uploadFutures = new ArrayList<>();
for (LocalFile fileToUpload : localFiles) {
String targetFilePath = getPath(runRootPath, fileToUpload.getName());
uploadFutures.add(provisionerContext.execute(() -> uploadFile(bucket, targetFilePath, fileToUpload)).toCompletableFuture());
}
List<LocalFile> uploadedFiles = new ArrayList<>();
for (Future<LocalFile> uploadFuture : uploadFutures) {
uploadedFiles.add(uploadFuture.get());
}
// step 3: build the hadoop job request to be submitted to dataproc
SubmitJobRequest request = getSubmitJobRequest(runtimeJobInfo, uploadedFiles);
// step 4: submit hadoop job to dataproc
try {
Job job = getJobControllerClient().submitJob(request);
LOG.debug("Successfully submitted hadoop job {} to cluster {}.", job.getReference().getJobId(), clusterName);
} catch (AlreadyExistsException ex) {
// the job id already exists, ignore the job.
LOG.warn("The dataproc job {} already exists. Ignoring resubmission of the job.", request.getJob().getReference().getJobId());
}
DataprocUtils.emitMetric(provisionerContext, region, "provisioner.submitJob.response.count");
} catch (Exception e) {
// delete all uploaded gcs files in case of exception
DataprocUtils.deleteGCSPath(getStorageClient(), bucket, runRootPath);
DataprocUtils.emitMetric(provisionerContext, region, "provisioner.submitJob.response.count", e);
throw new Exception(String.format("Error while launching job %s on cluster %s", getJobId(runInfo), clusterName), e);
} finally {
// delete local temp directory
deleteDirectoryContents(tempDir);
}
}
Aggregations