Search in sources :

Example 91 with Location

use of org.apache.twill.filesystem.Location in project cdap by caskdata.

the class StreamTailer method main.

public static void main(String[] args) throws Exception {
    if (args.length < 1) {
        System.out.println(String.format("Usage: java %s [streamName]", StreamTailer.class.getName()));
        return;
    }
    String streamName = args[0];
    CConfiguration cConf = CConfiguration.create();
    Configuration hConf = new Configuration();
    String txClientId = StreamTailer.class.getName();
    Injector injector = Guice.createInjector(new ConfigModule(cConf, hConf), new DataFabricModules(txClientId).getDistributedModules(), new DataSetsModules().getDistributedModules(), new LocationRuntimeModule().getDistributedModules(), new ExploreClientModule(), new ViewAdminModules().getDistributedModules(), new StreamAdminModules().getDistributedModules(), new AuthorizationEnforcementModule().getDistributedModules(), new AuthenticationContextModules().getMasterModule(), new NotificationFeedClientModule());
    StreamAdmin streamAdmin = injector.getInstance(StreamAdmin.class);
    //TODO: get namespace from commandline arguments
    StreamId streamId = NamespaceId.DEFAULT.stream(streamName);
    StreamConfig streamConfig = streamAdmin.getConfig(streamId);
    Location streamLocation = streamConfig.getLocation();
    List<Location> eventFiles = Lists.newArrayList();
    for (Location partition : streamLocation.list()) {
        if (!partition.isDirectory()) {
            continue;
        }
        for (Location file : partition.list()) {
            if (StreamFileType.EVENT.isMatched(file.getName())) {
                eventFiles.add(file);
            }
        }
    }
    int generation = StreamUtils.getGeneration(streamConfig);
    MultiLiveStreamFileReader reader = new MultiLiveStreamFileReader(streamConfig, ImmutableList.copyOf(Iterables.transform(eventFiles, createOffsetConverter(generation))));
    List<StreamEvent> events = Lists.newArrayList();
    while (reader.read(events, 10, 100, TimeUnit.MILLISECONDS) >= 0) {
        for (StreamEvent event : events) {
            System.out.println(event.getTimestamp() + " " + Charsets.UTF_8.decode(event.getBody()));
        }
        events.clear();
    }
    reader.close();
}
Also used : StreamId(co.cask.cdap.proto.id.StreamId) CConfiguration(co.cask.cdap.common.conf.CConfiguration) Configuration(org.apache.hadoop.conf.Configuration) ConfigModule(co.cask.cdap.common.guice.ConfigModule) AuthenticationContextModules(co.cask.cdap.security.auth.context.AuthenticationContextModules) DataSetsModules(co.cask.cdap.data.runtime.DataSetsModules) StreamEvent(co.cask.cdap.api.flow.flowlet.StreamEvent) LocationRuntimeModule(co.cask.cdap.common.guice.LocationRuntimeModule) StreamConfig(co.cask.cdap.data2.transaction.stream.StreamConfig) CConfiguration(co.cask.cdap.common.conf.CConfiguration) ViewAdminModules(co.cask.cdap.data.view.ViewAdminModules) StreamAdmin(co.cask.cdap.data2.transaction.stream.StreamAdmin) ExploreClientModule(co.cask.cdap.explore.guice.ExploreClientModule) Injector(com.google.inject.Injector) NotificationFeedClientModule(co.cask.cdap.notifications.feeds.client.NotificationFeedClientModule) DataFabricModules(co.cask.cdap.data.runtime.DataFabricModules) AuthorizationEnforcementModule(co.cask.cdap.security.authorization.AuthorizationEnforcementModule) Location(org.apache.twill.filesystem.Location)

Example 92 with Location

use of org.apache.twill.filesystem.Location in project cdap by caskdata.

the class MapReduceRuntimeService method copyFileToLocation.

/**
   * Copies a file to the target location.
   *
   * @param targetDir directory where the file should be copied to.
   * @return {@link Location} to the file or {@code null} if given file is {@code null}.
   */
private Location copyFileToLocation(File file, Location targetDir) throws IOException {
    Location targetLocation = targetDir.append(file.getName()).getTempFile(".jar");
    Files.copy(file, Locations.newOutputSupplier(targetLocation));
    return targetLocation;
}
Also used : Location(org.apache.twill.filesystem.Location)

Example 93 with Location

use of org.apache.twill.filesystem.Location in project cdap by caskdata.

the class MapReduceRuntimeService method startUp.

@Override
protected void startUp() throws Exception {
    // Creates a temporary directory locally for storing all generated files.
    File tempDir = createTempDirectory();
    cleanupTask = createCleanupTask(tempDir);
    try {
        Job job = createJob(new File(tempDir, "mapreduce"));
        Configuration mapredConf = job.getConfiguration();
        classLoader = new MapReduceClassLoader(injector, cConf, mapredConf, context.getProgram().getClassLoader(), context.getApplicationSpecification().getPlugins(), context.getPluginInstantiator());
        cleanupTask = createCleanupTask(cleanupTask, classLoader);
        mapredConf.setClassLoader(new WeakReferenceDelegatorClassLoader(classLoader));
        ClassLoaders.setContextClassLoader(mapredConf.getClassLoader());
        context.setJob(job);
        beforeSubmit(job);
        // Localize additional resources that users have requested via BasicMapReduceContext.localize methods
        Map<String, String> localizedUserResources = localizeUserResources(job, tempDir);
        // Override user-defined job name, since we set it and depend on the name.
        // https://issues.cask.co/browse/CDAP-2441
        String jobName = job.getJobName();
        if (!jobName.isEmpty()) {
            LOG.warn("Job name {} is being overridden.", jobName);
        }
        job.setJobName(getJobName(context));
        // Create a temporary location for storing all generated files through the LocationFactory.
        Location tempLocation = createTempLocationDirectory();
        cleanupTask = createCleanupTask(cleanupTask, tempLocation);
        // For local mode, everything is in the configuration classloader already, hence no need to create new jar
        if (!MapReduceTaskContextProvider.isLocal(mapredConf)) {
            // After calling initialize, we know what plugins are needed for the program, hence construct the proper
            // ClassLoader from here and use it for setting up the job
            Location pluginArchive = createPluginArchive(tempLocation);
            if (pluginArchive != null) {
                job.addCacheArchive(pluginArchive.toURI());
                mapredConf.set(Constants.Plugin.ARCHIVE, pluginArchive.getName());
            }
        }
        // set resources for the job
        TaskType.MAP.setResources(mapredConf, context.getMapperResources());
        TaskType.REDUCE.setResources(mapredConf, context.getReducerResources());
        // replace user's Mapper, Reducer, Partitioner, and Comparator classes with our wrappers in job config
        MapperWrapper.wrap(job);
        ReducerWrapper.wrap(job);
        PartitionerWrapper.wrap(job);
        RawComparatorWrapper.CombinerGroupComparatorWrapper.wrap(job);
        RawComparatorWrapper.GroupComparatorWrapper.wrap(job);
        RawComparatorWrapper.KeyComparatorWrapper.wrap(job);
        // packaging job jar which includes cdap classes with dependencies
        File jobJar = buildJobJar(job, tempDir);
        job.setJar(jobJar.toURI().toString());
        Location programJar = programJarLocation;
        if (!MapReduceTaskContextProvider.isLocal(mapredConf)) {
            // Copy and localize the program jar in distributed mode
            programJar = copyProgramJar(tempLocation);
            job.addCacheFile(programJar.toURI());
            // Generate and localize the launcher jar to control the classloader of MapReduce containers processes
            Location launcherJar = createLauncherJar(tempLocation);
            job.addCacheFile(launcherJar.toURI());
            // Launcher.jar should be the first one in the classpath
            List<String> classpath = new ArrayList<>();
            classpath.add(launcherJar.getName());
            // Localize logback.xml
            Location logbackLocation = ProgramRunners.createLogbackJar(tempLocation.append("logback.xml.jar"));
            if (logbackLocation != null) {
                job.addCacheFile(logbackLocation.toURI());
                classpath.add(logbackLocation.getName());
                mapredConf.set("yarn.app.mapreduce.am.env", "CDAP_LOG_DIR=" + ApplicationConstants.LOG_DIR_EXPANSION_VAR);
                mapredConf.set("mapreduce.map.env", "CDAP_LOG_DIR=" + ApplicationConstants.LOG_DIR_EXPANSION_VAR);
                mapredConf.set("mapreduce.reduce.env", "CDAP_LOG_DIR=" + ApplicationConstants.LOG_DIR_EXPANSION_VAR);
            }
            // Get all the jars in jobJar and sort them lexically before adding to the classpath
            // This allows CDAP classes to be picked up first before the Twill classes
            List<String> jarFiles = new ArrayList<>();
            try (JarFile jobJarFile = new JarFile(jobJar)) {
                Enumeration<JarEntry> entries = jobJarFile.entries();
                while (entries.hasMoreElements()) {
                    JarEntry entry = entries.nextElement();
                    if (entry.getName().startsWith("lib/") && entry.getName().endsWith(".jar")) {
                        jarFiles.add("job.jar/" + entry.getName());
                    }
                }
            }
            Collections.sort(jarFiles);
            classpath.addAll(jarFiles);
            classpath.add("job.jar/classes");
            // Add extra jars set in cConf
            for (URI jarURI : CConfigurationUtil.getExtraJars(cConf)) {
                if ("file".equals(jarURI.getScheme())) {
                    Location extraJarLocation = copyFileToLocation(new File(jarURI.getPath()), tempLocation);
                    job.addCacheFile(extraJarLocation.toURI());
                } else {
                    job.addCacheFile(jarURI);
                }
                classpath.add(LocalizationUtils.getLocalizedName(jarURI));
            }
            // Add the mapreduce application classpath at last
            MapReduceContainerHelper.addMapReduceClassPath(mapredConf, classpath);
            mapredConf.set(MRJobConfig.MAPREDUCE_APPLICATION_CLASSPATH, Joiner.on(",").join(classpath));
            mapredConf.set(YarnConfiguration.YARN_APPLICATION_CLASSPATH, Joiner.on(",").join(classpath));
        }
        MapReduceContextConfig contextConfig = new MapReduceContextConfig(mapredConf);
        // We start long-running tx to be used by mapreduce job tasks.
        Transaction tx = txClient.startLong();
        try {
            // We remember tx, so that we can re-use it in mapreduce tasks
            CConfiguration cConfCopy = cConf;
            contextConfig.set(context, cConfCopy, tx, programJar.toURI(), localizedUserResources);
            // submits job and returns immediately. Shouldn't need to set context ClassLoader.
            job.submit();
            // log after the job.submit(), because the jobId is not assigned before then
            LOG.debug("Submitted MapReduce Job: {}.", context);
            this.job = job;
            this.transaction = tx;
        } catch (Throwable t) {
            Transactions.invalidateQuietly(txClient, tx);
            throw t;
        }
    } catch (LinkageError e) {
        // of the user program is missing dependencies (CDAP-2543)
        throw new Exception(e.getMessage(), e);
    } catch (Throwable t) {
        cleanupTask.run();
        // don't log the error. It will be logged by the ProgramControllerServiceAdapter.failed()
        if (t instanceof TransactionFailureException) {
            throw Transactions.propagate((TransactionFailureException) t, Exception.class);
        }
        throw t;
    }
}
Also used : CConfiguration(co.cask.cdap.common.conf.CConfiguration) Configuration(org.apache.hadoop.conf.Configuration) YarnConfiguration(org.apache.hadoop.yarn.conf.YarnConfiguration) ArrayList(java.util.ArrayList) JarFile(java.util.jar.JarFile) JarEntry(java.util.jar.JarEntry) URI(java.net.URI) CConfiguration(co.cask.cdap.common.conf.CConfiguration) ProvisionException(com.google.inject.ProvisionException) IOException(java.io.IOException) TransactionFailureException(org.apache.tephra.TransactionFailureException) URISyntaxException(java.net.URISyntaxException) TransactionConflictException(org.apache.tephra.TransactionConflictException) WeakReferenceDelegatorClassLoader(co.cask.cdap.common.lang.WeakReferenceDelegatorClassLoader) TransactionFailureException(org.apache.tephra.TransactionFailureException) Transaction(org.apache.tephra.Transaction) Job(org.apache.hadoop.mapreduce.Job) File(java.io.File) JarFile(java.util.jar.JarFile) Location(org.apache.twill.filesystem.Location)

Example 94 with Location

use of org.apache.twill.filesystem.Location in project cdap by caskdata.

the class ArtifactStore method getPluginsInArtifact.

private SortedMap<ArtifactDescriptor, Set<PluginClass>> getPluginsInArtifact(Table table, Id.Artifact artifactId, Predicate<PluginClass> filter) {
    SortedMap<ArtifactDescriptor, Set<PluginClass>> result = new TreeMap<>();
    // Make sure the artifact exists
    ArtifactCell parentCell = new ArtifactCell(artifactId);
    byte[] parentDataBytes = table.get(parentCell.rowkey, parentCell.column);
    if (parentDataBytes == null) {
        return null;
    }
    // include any plugin classes that are inside the artifact itself
    ArtifactData parentData = GSON.fromJson(Bytes.toString(parentDataBytes), ArtifactData.class);
    Set<PluginClass> parentPlugins = parentData.meta.getClasses().getPlugins();
    Set<PluginClass> filteredPlugins = Sets.newLinkedHashSet(Iterables.filter(parentPlugins, filter));
    if (!filteredPlugins.isEmpty()) {
        Location parentLocation = Locations.getLocationFromAbsolutePath(locationFactory, parentData.getLocationPath());
        ArtifactDescriptor descriptor = new ArtifactDescriptor(artifactId.toArtifactId(), parentLocation);
        result.put(descriptor, filteredPlugins);
    }
    return result;
}
Also used : Set(java.util.Set) TreeMap(java.util.TreeMap) PluginClass(co.cask.cdap.api.plugin.PluginClass) Location(org.apache.twill.filesystem.Location)

Example 95 with Location

use of org.apache.twill.filesystem.Location in project cdap by caskdata.

the class MapReduceTaskContextProvider method createProgram.

/**
   * Creates a {@link Program} instance based on the information from the {@link MapReduceContextConfig}, using
   * the given program ClassLoader.
   */
private Program createProgram(MapReduceContextConfig contextConfig, ClassLoader programClassLoader) {
    Location programLocation;
    LocationFactory locationFactory = new LocalLocationFactory();
    if (isLocal(contextConfig.getHConf())) {
        // Just create a local location factory. It's for temp usage only as the program location is always absolute.
        programLocation = locationFactory.create(contextConfig.getProgramJarURI());
    } else {
        // In distributed mode, the program jar is localized to the container
        programLocation = locationFactory.create(new File(contextConfig.getProgramJarName()).getAbsoluteFile().toURI());
    }
    return new DefaultProgram(new ProgramDescriptor(contextConfig.getProgramId(), contextConfig.getApplicationSpecification()), programLocation, programClassLoader);
}
Also used : DefaultProgram(co.cask.cdap.app.program.DefaultProgram) ProgramDescriptor(co.cask.cdap.app.program.ProgramDescriptor) LocalLocationFactory(org.apache.twill.filesystem.LocalLocationFactory) File(java.io.File) Location(org.apache.twill.filesystem.Location) LocalLocationFactory(org.apache.twill.filesystem.LocalLocationFactory) LocationFactory(org.apache.twill.filesystem.LocationFactory)

Aggregations

Location (org.apache.twill.filesystem.Location)246 Test (org.junit.Test)104 IOException (java.io.IOException)57 File (java.io.File)39 LocalLocationFactory (org.apache.twill.filesystem.LocalLocationFactory)29 LocationFactory (org.apache.twill.filesystem.LocationFactory)29 FileSet (co.cask.cdap.api.dataset.lib.FileSet)28 StreamEvent (co.cask.cdap.api.flow.flowlet.StreamEvent)27 PartitionedFileSet (co.cask.cdap.api.dataset.lib.PartitionedFileSet)23 CConfiguration (co.cask.cdap.common.conf.CConfiguration)19 NamespaceId (co.cask.cdap.proto.id.NamespaceId)19 Manifest (java.util.jar.Manifest)18 HashMap (java.util.HashMap)17 StreamId (co.cask.cdap.proto.id.StreamId)16 OutputStream (java.io.OutputStream)15 DatasetFramework (co.cask.cdap.data2.dataset2.DatasetFramework)13 TimePartitionedFileSet (co.cask.cdap.api.dataset.lib.TimePartitionedFileSet)11 StreamConfig (co.cask.cdap.data2.transaction.stream.StreamConfig)10 ArrayList (java.util.ArrayList)9 StreamAdmin (co.cask.cdap.data2.transaction.stream.StreamAdmin)8