use of org.apache.twill.filesystem.Location in project cdap by caskdata.
the class StreamTailer method main.
public static void main(String[] args) throws Exception {
if (args.length < 1) {
System.out.println(String.format("Usage: java %s [streamName]", StreamTailer.class.getName()));
return;
}
String streamName = args[0];
CConfiguration cConf = CConfiguration.create();
Configuration hConf = new Configuration();
String txClientId = StreamTailer.class.getName();
Injector injector = Guice.createInjector(new ConfigModule(cConf, hConf), new DataFabricModules(txClientId).getDistributedModules(), new DataSetsModules().getDistributedModules(), new LocationRuntimeModule().getDistributedModules(), new ExploreClientModule(), new ViewAdminModules().getDistributedModules(), new StreamAdminModules().getDistributedModules(), new AuthorizationEnforcementModule().getDistributedModules(), new AuthenticationContextModules().getMasterModule(), new NotificationFeedClientModule());
StreamAdmin streamAdmin = injector.getInstance(StreamAdmin.class);
//TODO: get namespace from commandline arguments
StreamId streamId = NamespaceId.DEFAULT.stream(streamName);
StreamConfig streamConfig = streamAdmin.getConfig(streamId);
Location streamLocation = streamConfig.getLocation();
List<Location> eventFiles = Lists.newArrayList();
for (Location partition : streamLocation.list()) {
if (!partition.isDirectory()) {
continue;
}
for (Location file : partition.list()) {
if (StreamFileType.EVENT.isMatched(file.getName())) {
eventFiles.add(file);
}
}
}
int generation = StreamUtils.getGeneration(streamConfig);
MultiLiveStreamFileReader reader = new MultiLiveStreamFileReader(streamConfig, ImmutableList.copyOf(Iterables.transform(eventFiles, createOffsetConverter(generation))));
List<StreamEvent> events = Lists.newArrayList();
while (reader.read(events, 10, 100, TimeUnit.MILLISECONDS) >= 0) {
for (StreamEvent event : events) {
System.out.println(event.getTimestamp() + " " + Charsets.UTF_8.decode(event.getBody()));
}
events.clear();
}
reader.close();
}
use of org.apache.twill.filesystem.Location in project cdap by caskdata.
the class MapReduceRuntimeService method copyFileToLocation.
/**
* Copies a file to the target location.
*
* @param targetDir directory where the file should be copied to.
* @return {@link Location} to the file or {@code null} if given file is {@code null}.
*/
private Location copyFileToLocation(File file, Location targetDir) throws IOException {
Location targetLocation = targetDir.append(file.getName()).getTempFile(".jar");
Files.copy(file, Locations.newOutputSupplier(targetLocation));
return targetLocation;
}
use of org.apache.twill.filesystem.Location in project cdap by caskdata.
the class MapReduceRuntimeService method startUp.
@Override
protected void startUp() throws Exception {
// Creates a temporary directory locally for storing all generated files.
File tempDir = createTempDirectory();
cleanupTask = createCleanupTask(tempDir);
try {
Job job = createJob(new File(tempDir, "mapreduce"));
Configuration mapredConf = job.getConfiguration();
classLoader = new MapReduceClassLoader(injector, cConf, mapredConf, context.getProgram().getClassLoader(), context.getApplicationSpecification().getPlugins(), context.getPluginInstantiator());
cleanupTask = createCleanupTask(cleanupTask, classLoader);
mapredConf.setClassLoader(new WeakReferenceDelegatorClassLoader(classLoader));
ClassLoaders.setContextClassLoader(mapredConf.getClassLoader());
context.setJob(job);
beforeSubmit(job);
// Localize additional resources that users have requested via BasicMapReduceContext.localize methods
Map<String, String> localizedUserResources = localizeUserResources(job, tempDir);
// Override user-defined job name, since we set it and depend on the name.
// https://issues.cask.co/browse/CDAP-2441
String jobName = job.getJobName();
if (!jobName.isEmpty()) {
LOG.warn("Job name {} is being overridden.", jobName);
}
job.setJobName(getJobName(context));
// Create a temporary location for storing all generated files through the LocationFactory.
Location tempLocation = createTempLocationDirectory();
cleanupTask = createCleanupTask(cleanupTask, tempLocation);
// For local mode, everything is in the configuration classloader already, hence no need to create new jar
if (!MapReduceTaskContextProvider.isLocal(mapredConf)) {
// After calling initialize, we know what plugins are needed for the program, hence construct the proper
// ClassLoader from here and use it for setting up the job
Location pluginArchive = createPluginArchive(tempLocation);
if (pluginArchive != null) {
job.addCacheArchive(pluginArchive.toURI());
mapredConf.set(Constants.Plugin.ARCHIVE, pluginArchive.getName());
}
}
// set resources for the job
TaskType.MAP.setResources(mapredConf, context.getMapperResources());
TaskType.REDUCE.setResources(mapredConf, context.getReducerResources());
// replace user's Mapper, Reducer, Partitioner, and Comparator classes with our wrappers in job config
MapperWrapper.wrap(job);
ReducerWrapper.wrap(job);
PartitionerWrapper.wrap(job);
RawComparatorWrapper.CombinerGroupComparatorWrapper.wrap(job);
RawComparatorWrapper.GroupComparatorWrapper.wrap(job);
RawComparatorWrapper.KeyComparatorWrapper.wrap(job);
// packaging job jar which includes cdap classes with dependencies
File jobJar = buildJobJar(job, tempDir);
job.setJar(jobJar.toURI().toString());
Location programJar = programJarLocation;
if (!MapReduceTaskContextProvider.isLocal(mapredConf)) {
// Copy and localize the program jar in distributed mode
programJar = copyProgramJar(tempLocation);
job.addCacheFile(programJar.toURI());
// Generate and localize the launcher jar to control the classloader of MapReduce containers processes
Location launcherJar = createLauncherJar(tempLocation);
job.addCacheFile(launcherJar.toURI());
// Launcher.jar should be the first one in the classpath
List<String> classpath = new ArrayList<>();
classpath.add(launcherJar.getName());
// Localize logback.xml
Location logbackLocation = ProgramRunners.createLogbackJar(tempLocation.append("logback.xml.jar"));
if (logbackLocation != null) {
job.addCacheFile(logbackLocation.toURI());
classpath.add(logbackLocation.getName());
mapredConf.set("yarn.app.mapreduce.am.env", "CDAP_LOG_DIR=" + ApplicationConstants.LOG_DIR_EXPANSION_VAR);
mapredConf.set("mapreduce.map.env", "CDAP_LOG_DIR=" + ApplicationConstants.LOG_DIR_EXPANSION_VAR);
mapredConf.set("mapreduce.reduce.env", "CDAP_LOG_DIR=" + ApplicationConstants.LOG_DIR_EXPANSION_VAR);
}
// Get all the jars in jobJar and sort them lexically before adding to the classpath
// This allows CDAP classes to be picked up first before the Twill classes
List<String> jarFiles = new ArrayList<>();
try (JarFile jobJarFile = new JarFile(jobJar)) {
Enumeration<JarEntry> entries = jobJarFile.entries();
while (entries.hasMoreElements()) {
JarEntry entry = entries.nextElement();
if (entry.getName().startsWith("lib/") && entry.getName().endsWith(".jar")) {
jarFiles.add("job.jar/" + entry.getName());
}
}
}
Collections.sort(jarFiles);
classpath.addAll(jarFiles);
classpath.add("job.jar/classes");
// Add extra jars set in cConf
for (URI jarURI : CConfigurationUtil.getExtraJars(cConf)) {
if ("file".equals(jarURI.getScheme())) {
Location extraJarLocation = copyFileToLocation(new File(jarURI.getPath()), tempLocation);
job.addCacheFile(extraJarLocation.toURI());
} else {
job.addCacheFile(jarURI);
}
classpath.add(LocalizationUtils.getLocalizedName(jarURI));
}
// Add the mapreduce application classpath at last
MapReduceContainerHelper.addMapReduceClassPath(mapredConf, classpath);
mapredConf.set(MRJobConfig.MAPREDUCE_APPLICATION_CLASSPATH, Joiner.on(",").join(classpath));
mapredConf.set(YarnConfiguration.YARN_APPLICATION_CLASSPATH, Joiner.on(",").join(classpath));
}
MapReduceContextConfig contextConfig = new MapReduceContextConfig(mapredConf);
// We start long-running tx to be used by mapreduce job tasks.
Transaction tx = txClient.startLong();
try {
// We remember tx, so that we can re-use it in mapreduce tasks
CConfiguration cConfCopy = cConf;
contextConfig.set(context, cConfCopy, tx, programJar.toURI(), localizedUserResources);
// submits job and returns immediately. Shouldn't need to set context ClassLoader.
job.submit();
// log after the job.submit(), because the jobId is not assigned before then
LOG.debug("Submitted MapReduce Job: {}.", context);
this.job = job;
this.transaction = tx;
} catch (Throwable t) {
Transactions.invalidateQuietly(txClient, tx);
throw t;
}
} catch (LinkageError e) {
// of the user program is missing dependencies (CDAP-2543)
throw new Exception(e.getMessage(), e);
} catch (Throwable t) {
cleanupTask.run();
// don't log the error. It will be logged by the ProgramControllerServiceAdapter.failed()
if (t instanceof TransactionFailureException) {
throw Transactions.propagate((TransactionFailureException) t, Exception.class);
}
throw t;
}
}
use of org.apache.twill.filesystem.Location in project cdap by caskdata.
the class ArtifactStore method getPluginsInArtifact.
private SortedMap<ArtifactDescriptor, Set<PluginClass>> getPluginsInArtifact(Table table, Id.Artifact artifactId, Predicate<PluginClass> filter) {
SortedMap<ArtifactDescriptor, Set<PluginClass>> result = new TreeMap<>();
// Make sure the artifact exists
ArtifactCell parentCell = new ArtifactCell(artifactId);
byte[] parentDataBytes = table.get(parentCell.rowkey, parentCell.column);
if (parentDataBytes == null) {
return null;
}
// include any plugin classes that are inside the artifact itself
ArtifactData parentData = GSON.fromJson(Bytes.toString(parentDataBytes), ArtifactData.class);
Set<PluginClass> parentPlugins = parentData.meta.getClasses().getPlugins();
Set<PluginClass> filteredPlugins = Sets.newLinkedHashSet(Iterables.filter(parentPlugins, filter));
if (!filteredPlugins.isEmpty()) {
Location parentLocation = Locations.getLocationFromAbsolutePath(locationFactory, parentData.getLocationPath());
ArtifactDescriptor descriptor = new ArtifactDescriptor(artifactId.toArtifactId(), parentLocation);
result.put(descriptor, filteredPlugins);
}
return result;
}
use of org.apache.twill.filesystem.Location in project cdap by caskdata.
the class MapReduceTaskContextProvider method createProgram.
/**
* Creates a {@link Program} instance based on the information from the {@link MapReduceContextConfig}, using
* the given program ClassLoader.
*/
private Program createProgram(MapReduceContextConfig contextConfig, ClassLoader programClassLoader) {
Location programLocation;
LocationFactory locationFactory = new LocalLocationFactory();
if (isLocal(contextConfig.getHConf())) {
// Just create a local location factory. It's for temp usage only as the program location is always absolute.
programLocation = locationFactory.create(contextConfig.getProgramJarURI());
} else {
// In distributed mode, the program jar is localized to the container
programLocation = locationFactory.create(new File(contextConfig.getProgramJarName()).getAbsoluteFile().toURI());
}
return new DefaultProgram(new ProgramDescriptor(contextConfig.getProgramId(), contextConfig.getApplicationSpecification()), programLocation, programClassLoader);
}
Aggregations