Search in sources :

Example 11 with LocalFile

use of org.apache.twill.api.LocalFile in project cdap by cdapio.

the class RemoteExecutionTwillPreparer method localizeFiles.

/**
 * Localize files to the target host.
 */
private void localizeFiles(SSHSession session, Map<String, LocalFile> localFiles, String targetPath, RuntimeSpecification runtimeSpec) throws IOException {
    // A map to remember what URI has already been uploaded to what target path.
    // This helps reducing the bandwidth when same file is uploaded to different target path.
    Map<URI, String> localizedFiles = new HashMap<>();
    String localizedDir = targetPath + "/.localized";
    session.executeAndWait("mkdir -p " + localizedDir);
    for (LocalFile localFile : Iterables.concat(localFiles.values(), runtimeSpec.getLocalFiles())) {
        URI uri = localFile.getURI();
        // If not yet uploaded, upload it
        String localizedFile = localizedFiles.get(uri);
        if (localizedFile == null) {
            String fileName = Hashing.md5().hashString(uri.toString()).toString() + "-" + getFileName(uri);
            localizedFile = localizedDir + "/" + fileName;
            try (InputStream inputStream = openURI(uri)) {
                LOG.debug("Upload file {} to {}@{}:{}", uri, session.getUsername(), session.getAddress(), localizedFile);
                // noinspection OctalInteger
                session.copy(inputStream, localizedDir, fileName, localFile.getSize(), 0644, localFile.getLastModified(), localFile.getLastModified());
            }
            localizedFiles.put(uri, localizedFile);
        }
        // If it is an archive, expand it. If is a file, create a hardlink.
        if (localFile.isArchive()) {
            String expandedDir = targetPath + "/" + localFile.getName();
            LOG.debug("Expanding archive {} on host {} to {}", localizedFile, session.getAddress().getHostName(), expandedDir);
            session.executeAndWait("mkdir -p " + expandedDir, "cd " + expandedDir, String.format("jar xf %s", localizedFile));
        } else {
            LOG.debug("Create hardlink {} on host {} to {}/{}", localizedFile, session.getAddress().getHostName(), targetPath, localFile.getName());
            session.executeAndWait(String.format("ln %s %s/%s", localizedFile, targetPath, localFile.getName()));
        }
    }
}
Also used : LocalFile(org.apache.twill.api.LocalFile) HashMap(java.util.HashMap) LinkedHashMap(java.util.LinkedHashMap) ByteArrayInputStream(java.io.ByteArrayInputStream) FileInputStream(java.io.FileInputStream) InputStream(java.io.InputStream) URI(java.net.URI)

Example 12 with LocalFile

use of org.apache.twill.api.LocalFile in project cdap by cdapio.

the class AbstractRuntimeTwillPreparer method populateRunnableLocalFiles.

/**
 * Based on the given {@link TwillSpecification}, copy file to local filesystem.
 * @param spec The {@link TwillSpecification} for populating resource.
 */
private Map<String, Collection<LocalFile>> populateRunnableLocalFiles(TwillSpecification spec, Path stagingDir) throws IOException {
    Map<String, Collection<LocalFile>> localFiles = new HashMap<>();
    LOG.debug("Populating Runnable LocalFiles");
    for (Map.Entry<String, RuntimeSpecification> entry : spec.getRunnables().entrySet()) {
        String runnableName = entry.getKey();
        for (LocalFile localFile : entry.getValue().getLocalFiles()) {
            LocalFile resolvedLocalFile = resolveLocalFile(localFile, stagingDir);
            localFiles.computeIfAbsent(runnableName, s -> new ArrayList<>()).add(resolvedLocalFile);
            LOG.debug("Added file {}", resolvedLocalFile.getURI());
        }
    }
    LOG.debug("Done Runnable LocalFiles");
    return localFiles;
}
Also used : ArrayListMultimap(com.google.common.collect.ArrayListMultimap) Arrays(java.util.Arrays) ListMultimap(com.google.common.collect.ListMultimap) URL(java.net.URL) TwillPreparer(org.apache.twill.api.TwillPreparer) LoggerFactory(org.slf4j.LoggerFactory) TimeoutException(java.util.concurrent.TimeoutException) SecureStore(org.apache.twill.api.SecureStore) DirectoryStream(java.nio.file.DirectoryStream) DefaultLocalFile(org.apache.twill.internal.DefaultLocalFile) Configuration(org.apache.hadoop.conf.Configuration) Map(java.util.Map) Cancellable(org.apache.twill.common.Cancellable) URI(java.net.URI) Path(java.nio.file.Path) EventHandlerSpecification(org.apache.twill.api.EventHandlerSpecification) RuntimeSpecification(org.apache.twill.api.RuntimeSpecification) TwillController(org.apache.twill.api.TwillController) LocalFile(org.apache.twill.api.LocalFile) Collection(java.util.Collection) DefaultRuntimeSpecification(org.apache.twill.internal.DefaultRuntimeSpecification) Set(java.util.Set) LoggingContextAccessor(io.cdap.cdap.common.logging.LoggingContextAccessor) Collectors(java.util.stream.Collectors) Sets(com.google.common.collect.Sets) LocationCache(org.apache.twill.internal.io.LocationCache) StandardCharsets(java.nio.charset.StandardCharsets) LoggingContext(io.cdap.cdap.common.logging.LoggingContext) Objects(java.util.Objects) List(java.util.List) ArgumentsCodec(org.apache.twill.internal.json.ArgumentsCodec) JvmOptions(org.apache.twill.internal.JvmOptions) Writer(java.io.Writer) DirUtils(io.cdap.cdap.common.utils.DirUtils) LogHandler(org.apache.twill.api.logging.LogHandler) Joiner(com.google.common.base.Joiner) LogEntry(org.apache.twill.api.logging.LogEntry) Iterables(com.google.common.collect.Iterables) Location(org.apache.twill.filesystem.Location) Paths(org.apache.twill.internal.utils.Paths) Hashing(com.google.common.hash.Hashing) HashMap(java.util.HashMap) Callable(java.util.concurrent.Callable) StandardCopyOption(java.nio.file.StandardCopyOption) ArrayList(java.util.ArrayList) LinkedHashMap(java.util.LinkedHashMap) JarEntry(java.util.jar.JarEntry) ProgramRunId(io.cdap.cdap.proto.id.ProgramRunId) TwillRuntimeSpecification(org.apache.twill.internal.TwillRuntimeSpecification) ProgramOptions(io.cdap.cdap.app.runtime.ProgramOptions) TwillRuntimeSpecificationAdapter(org.apache.twill.internal.json.TwillRuntimeSpecificationAdapter) Locations(io.cdap.cdap.common.io.Locations) Constants(org.apache.twill.internal.Constants) Hasher(com.google.common.hash.Hasher) ApplicationBundler(org.apache.twill.internal.ApplicationBundler) JarOutputStream(java.util.jar.JarOutputStream) LoggingContextHelper(io.cdap.cdap.logging.context.LoggingContextHelper) ClassAcceptor(org.apache.twill.api.ClassAcceptor) Logger(org.slf4j.Logger) Files(java.nio.file.Files) RunIds(io.cdap.cdap.common.app.RunIds) IOException(java.io.IOException) LocationFactory(org.apache.twill.filesystem.LocationFactory) BasicFileAttributes(java.nio.file.attribute.BasicFileAttributes) Maps(com.google.common.collect.Maps) File(java.io.File) TimeUnit(java.util.concurrent.TimeUnit) CConfiguration(io.cdap.cdap.common.conf.CConfiguration) LogOnlyEventHandler(org.apache.twill.internal.LogOnlyEventHandler) DefaultTwillSpecification(org.apache.twill.internal.DefaultTwillSpecification) Preconditions(com.google.common.base.Preconditions) TwillSpecification(org.apache.twill.api.TwillSpecification) Arguments(org.apache.twill.internal.Arguments) Collections(java.util.Collections) InputStream(java.io.InputStream) DefaultLocalFile(org.apache.twill.internal.DefaultLocalFile) LocalFile(org.apache.twill.api.LocalFile) HashMap(java.util.HashMap) LinkedHashMap(java.util.LinkedHashMap) ArrayList(java.util.ArrayList) Collection(java.util.Collection) RuntimeSpecification(org.apache.twill.api.RuntimeSpecification) DefaultRuntimeSpecification(org.apache.twill.internal.DefaultRuntimeSpecification) TwillRuntimeSpecification(org.apache.twill.internal.TwillRuntimeSpecification) Map(java.util.Map) HashMap(java.util.HashMap) LinkedHashMap(java.util.LinkedHashMap)

Example 13 with LocalFile

use of org.apache.twill.api.LocalFile in project cdap by cdapio.

the class AbstractRuntimeTwillPreparer method saveSpecification.

private TwillRuntimeSpecification saveSpecification(TwillSpecification spec, Path targetFile, Path stagingDir) throws IOException {
    final Map<String, Collection<LocalFile>> runnableLocalFiles = populateRunnableLocalFiles(spec, stagingDir);
    // Rewrite LocalFiles inside twillSpec
    Map<String, RuntimeSpecification> runtimeSpec = spec.getRunnables().entrySet().stream().collect(Collectors.toMap(Map.Entry::getKey, e -> {
        RuntimeSpecification value = e.getValue();
        return new DefaultRuntimeSpecification(value.getName(), value.getRunnableSpecification(), value.getResourceSpecification(), runnableLocalFiles.getOrDefault(e.getKey(), Collections.emptyList()));
    }));
    // Serialize into a local temp file.
    LOG.debug("Creating {}", targetFile);
    try (Writer writer = Files.newBufferedWriter(targetFile, StandardCharsets.UTF_8)) {
        EventHandlerSpecification eventHandler = spec.getEventHandler();
        if (eventHandler == null) {
            eventHandler = new LogOnlyEventHandler().configure();
        }
        TwillSpecification newTwillSpec = new DefaultTwillSpecification(spec.getName(), runtimeSpec, spec.getOrders(), spec.getPlacementPolicies(), eventHandler);
        Map<String, String> configMap = Maps.newHashMap();
        for (Map.Entry<String, String> entry : hConf) {
            if (entry.getKey().startsWith("twill.")) {
                configMap.put(entry.getKey(), entry.getValue());
            }
        }
        TwillRuntimeSpecification twillRuntimeSpec = new TwillRuntimeSpecification(newTwillSpec, "", URI.create("."), "", RunIds.fromString(programRunId.getRun()), twillSpec.getName(), null, logLevels, maxRetries, configMap, runnableConfigs);
        TwillRuntimeSpecificationAdapter.create().toJson(twillRuntimeSpec, writer);
        LOG.debug("Done {}", targetFile);
        return twillRuntimeSpec;
    }
}
Also used : ArrayListMultimap(com.google.common.collect.ArrayListMultimap) Arrays(java.util.Arrays) ListMultimap(com.google.common.collect.ListMultimap) URL(java.net.URL) TwillPreparer(org.apache.twill.api.TwillPreparer) LoggerFactory(org.slf4j.LoggerFactory) TimeoutException(java.util.concurrent.TimeoutException) SecureStore(org.apache.twill.api.SecureStore) DirectoryStream(java.nio.file.DirectoryStream) DefaultLocalFile(org.apache.twill.internal.DefaultLocalFile) Configuration(org.apache.hadoop.conf.Configuration) Map(java.util.Map) Cancellable(org.apache.twill.common.Cancellable) URI(java.net.URI) Path(java.nio.file.Path) EventHandlerSpecification(org.apache.twill.api.EventHandlerSpecification) RuntimeSpecification(org.apache.twill.api.RuntimeSpecification) TwillController(org.apache.twill.api.TwillController) LocalFile(org.apache.twill.api.LocalFile) Collection(java.util.Collection) DefaultRuntimeSpecification(org.apache.twill.internal.DefaultRuntimeSpecification) Set(java.util.Set) LoggingContextAccessor(io.cdap.cdap.common.logging.LoggingContextAccessor) Collectors(java.util.stream.Collectors) Sets(com.google.common.collect.Sets) LocationCache(org.apache.twill.internal.io.LocationCache) StandardCharsets(java.nio.charset.StandardCharsets) LoggingContext(io.cdap.cdap.common.logging.LoggingContext) Objects(java.util.Objects) List(java.util.List) ArgumentsCodec(org.apache.twill.internal.json.ArgumentsCodec) JvmOptions(org.apache.twill.internal.JvmOptions) Writer(java.io.Writer) DirUtils(io.cdap.cdap.common.utils.DirUtils) LogHandler(org.apache.twill.api.logging.LogHandler) Joiner(com.google.common.base.Joiner) LogEntry(org.apache.twill.api.logging.LogEntry) Iterables(com.google.common.collect.Iterables) Location(org.apache.twill.filesystem.Location) Paths(org.apache.twill.internal.utils.Paths) Hashing(com.google.common.hash.Hashing) HashMap(java.util.HashMap) Callable(java.util.concurrent.Callable) StandardCopyOption(java.nio.file.StandardCopyOption) ArrayList(java.util.ArrayList) LinkedHashMap(java.util.LinkedHashMap) JarEntry(java.util.jar.JarEntry) ProgramRunId(io.cdap.cdap.proto.id.ProgramRunId) TwillRuntimeSpecification(org.apache.twill.internal.TwillRuntimeSpecification) ProgramOptions(io.cdap.cdap.app.runtime.ProgramOptions) TwillRuntimeSpecificationAdapter(org.apache.twill.internal.json.TwillRuntimeSpecificationAdapter) Locations(io.cdap.cdap.common.io.Locations) Constants(org.apache.twill.internal.Constants) Hasher(com.google.common.hash.Hasher) ApplicationBundler(org.apache.twill.internal.ApplicationBundler) JarOutputStream(java.util.jar.JarOutputStream) LoggingContextHelper(io.cdap.cdap.logging.context.LoggingContextHelper) ClassAcceptor(org.apache.twill.api.ClassAcceptor) Logger(org.slf4j.Logger) Files(java.nio.file.Files) RunIds(io.cdap.cdap.common.app.RunIds) IOException(java.io.IOException) LocationFactory(org.apache.twill.filesystem.LocationFactory) BasicFileAttributes(java.nio.file.attribute.BasicFileAttributes) Maps(com.google.common.collect.Maps) File(java.io.File) TimeUnit(java.util.concurrent.TimeUnit) CConfiguration(io.cdap.cdap.common.conf.CConfiguration) LogOnlyEventHandler(org.apache.twill.internal.LogOnlyEventHandler) DefaultTwillSpecification(org.apache.twill.internal.DefaultTwillSpecification) Preconditions(com.google.common.base.Preconditions) TwillSpecification(org.apache.twill.api.TwillSpecification) Arguments(org.apache.twill.internal.Arguments) Collections(java.util.Collections) InputStream(java.io.InputStream) DefaultRuntimeSpecification(org.apache.twill.internal.DefaultRuntimeSpecification) DefaultTwillSpecification(org.apache.twill.internal.DefaultTwillSpecification) EventHandlerSpecification(org.apache.twill.api.EventHandlerSpecification) RuntimeSpecification(org.apache.twill.api.RuntimeSpecification) DefaultRuntimeSpecification(org.apache.twill.internal.DefaultRuntimeSpecification) TwillRuntimeSpecification(org.apache.twill.internal.TwillRuntimeSpecification) DefaultTwillSpecification(org.apache.twill.internal.DefaultTwillSpecification) TwillSpecification(org.apache.twill.api.TwillSpecification) LogOnlyEventHandler(org.apache.twill.internal.LogOnlyEventHandler) TwillRuntimeSpecification(org.apache.twill.internal.TwillRuntimeSpecification) Collection(java.util.Collection) Map(java.util.Map) HashMap(java.util.HashMap) LinkedHashMap(java.util.LinkedHashMap) Writer(java.io.Writer)

Example 14 with LocalFile

use of org.apache.twill.api.LocalFile in project cdap by cdapio.

the class TetheringRuntimeJobManager method createLaunchPayload.

/**
 * Add select LocalFiles and cConf entries to the control message payload
 */
private TetheringLaunchMessage createLaunchPayload(Collection<? extends LocalFile> localFiles) throws IOException {
    TetheringLaunchMessage.Builder builder = new TetheringLaunchMessage.Builder().addFileNames(DistributedProgramRunner.LOGBACK_FILE_NAME).addFileNames(DistributedProgramRunner.PROGRAM_OPTIONS_FILE_NAME).addFileNames(DistributedProgramRunner.APP_SPEC_FILE_NAME);
    for (String fileName : builder.getFileNames()) {
        LocalFile localFile = localFiles.stream().filter(file -> file.getName().equals(fileName)).findFirst().orElseThrow(() -> new IllegalStateException("Cannot find file" + fileName));
        builder.addLocalizeFiles(fileName, getLocalFileAsCompressedBytes(localFile));
    }
    for (String prefix : SELECT_CCONF_FIELDS) {
        String prefixRegex = "^" + prefix.replace(".", "\\.");
        builder.addCConfEntries(cConf.getValByRegex(prefixRegex));
    }
    return builder.build();
}
Also used : LocalFile(org.apache.twill.api.LocalFile) TetheringLaunchMessage(io.cdap.cdap.internal.tethering.proto.v1.TetheringLaunchMessage)

Example 15 with LocalFile

use of org.apache.twill.api.LocalFile in project cdap by cdapio.

the class DataprocRuntimeJobManager method launch.

@Override
public void launch(RuntimeJobInfo runtimeJobInfo) throws Exception {
    String bucket = DataprocUtils.getBucketName(this.bucket);
    ProgramRunInfo runInfo = runtimeJobInfo.getProgramRunInfo();
    LOG.debug("Launching run {} with following configurations: cluster {}, project {}, region {}, bucket {}.", runInfo.getRun(), clusterName, projectId, region, bucket);
    // TODO: CDAP-16408 use fixed directory for caching twill, application, artifact jars
    File tempDir = Files.createTempDirectory("dataproc.launcher").toFile();
    // on dataproc bucket the run root will be <bucket>/cdap-job/<runid>/. All the files for this run will be copied
    // under that base dir.
    String runRootPath = getPath(DataprocUtils.CDAP_GCS_ROOT, runInfo.getRun());
    try {
        // step 1: build twill.jar and launcher.jar and add them to files to be copied to gcs
        List<LocalFile> localFiles = getRuntimeLocalFiles(runtimeJobInfo.getLocalizeFiles(), tempDir);
        // step 2: upload all the necessary files to gcs so that those files are available to dataproc job
        List<Future<LocalFile>> uploadFutures = new ArrayList<>();
        for (LocalFile fileToUpload : localFiles) {
            String targetFilePath = getPath(runRootPath, fileToUpload.getName());
            uploadFutures.add(provisionerContext.execute(() -> uploadFile(bucket, targetFilePath, fileToUpload)).toCompletableFuture());
        }
        List<LocalFile> uploadedFiles = new ArrayList<>();
        for (Future<LocalFile> uploadFuture : uploadFutures) {
            uploadedFiles.add(uploadFuture.get());
        }
        // step 3: build the hadoop job request to be submitted to dataproc
        SubmitJobRequest request = getSubmitJobRequest(runtimeJobInfo, uploadedFiles);
        // step 4: submit hadoop job to dataproc
        try {
            Job job = getJobControllerClient().submitJob(request);
            LOG.debug("Successfully submitted hadoop job {} to cluster {}.", job.getReference().getJobId(), clusterName);
        } catch (AlreadyExistsException ex) {
            // the job id already exists, ignore the job.
            LOG.warn("The dataproc job {} already exists. Ignoring resubmission of the job.", request.getJob().getReference().getJobId());
        }
        DataprocUtils.emitMetric(provisionerContext, region, "provisioner.submitJob.response.count");
    } catch (Exception e) {
        // delete all uploaded gcs files in case of exception
        DataprocUtils.deleteGCSPath(getStorageClient(), bucket, runRootPath);
        DataprocUtils.emitMetric(provisionerContext, region, "provisioner.submitJob.response.count", e);
        throw new Exception(String.format("Error while launching job %s on cluster %s", getJobId(runInfo), clusterName), e);
    } finally {
        // delete local temp directory
        deleteDirectoryContents(tempDir);
    }
}
Also used : AlreadyExistsException(com.google.api.gax.rpc.AlreadyExistsException) ArrayList(java.util.ArrayList) SubmitJobRequest(com.google.cloud.dataproc.v1beta2.SubmitJobRequest) AlreadyExistsException(com.google.api.gax.rpc.AlreadyExistsException) IOException(java.io.IOException) ApiException(com.google.api.gax.rpc.ApiException) StorageException(com.google.cloud.storage.StorageException) DefaultLocalFile(org.apache.twill.internal.DefaultLocalFile) LocalFile(org.apache.twill.api.LocalFile) Future(java.util.concurrent.Future) HadoopJob(com.google.cloud.dataproc.v1beta2.HadoopJob) Job(com.google.cloud.dataproc.v1beta2.Job) DefaultLocalFile(org.apache.twill.internal.DefaultLocalFile) LocalFile(org.apache.twill.api.LocalFile) File(java.io.File) ProgramRunInfo(io.cdap.cdap.runtime.spi.ProgramRunInfo)

Aggregations

LocalFile (org.apache.twill.api.LocalFile)30 DefaultLocalFile (org.apache.twill.internal.DefaultLocalFile)22 File (java.io.File)18 URI (java.net.URI)16 Location (org.apache.twill.filesystem.Location)14 InputStream (java.io.InputStream)12 ArrayList (java.util.ArrayList)12 HashMap (java.util.HashMap)12 Map (java.util.Map)12 Path (java.nio.file.Path)10 Collection (java.util.Collection)10 LinkedHashMap (java.util.LinkedHashMap)10 TwillRuntimeSpecification (org.apache.twill.internal.TwillRuntimeSpecification)10 IOException (java.io.IOException)8 Hashing (com.google.common.hash.Hashing)6 Writer (java.io.Writer)6 URL (java.net.URL)6 StandardCharsets (java.nio.charset.StandardCharsets)6 DirectoryStream (java.nio.file.DirectoryStream)6 Files (java.nio.file.Files)6