use of org.apache.hadoop.mapreduce.JobID in project tez by apache.
the class TestMRInputHelpers method verifyLocationHints.
private void verifyLocationHints(Path inputSplitsDir, List<TaskLocationHint> actual) throws Exception {
JobID jobId = new JobID("dummy", 1);
JobSplit.TaskSplitMetaInfo[] splitsInfo = SplitMetaInfoReader.readSplitMetaInfo(jobId, remoteFs, conf, inputSplitsDir);
int splitsCount = splitsInfo.length;
List<TaskLocationHint> locationHints = new ArrayList<TaskLocationHint>(splitsCount);
for (int i = 0; i < splitsCount; ++i) {
locationHints.add(TaskLocationHint.createTaskLocationHint(new HashSet<String>(Arrays.asList(splitsInfo[i].getLocations())), null));
}
Assert.assertEquals(locationHints, actual);
}
use of org.apache.hadoop.mapreduce.JobID in project carbondata by apache.
the class CarbonStreamInputFormatTest method setUp.
@Override
protected void setUp() throws Exception {
tablePath = new File("target/stream_input").getCanonicalPath();
String dbName = "default";
String tableName = "stream_table_input";
identifier = AbsoluteTableIdentifier.from(tablePath, new CarbonTableIdentifier(dbName, tableName, UUID.randomUUID().toString()));
JobID jobId = CarbonInputFormatUtil.getJobId(new Date(), 0);
TaskID taskId = new TaskID(jobId, TaskType.MAP, 0);
taskAttemptId = new TaskAttemptID(taskId, 0);
hadoopConf = new Configuration();
taskAttemptContext = new TaskAttemptContextImpl(hadoopConf, taskAttemptId);
}
use of org.apache.hadoop.mapreduce.JobID in project carbondata by apache.
the class CarbonStreamOutputFormatTest method setUp.
@Override
protected void setUp() throws Exception {
super.setUp();
JobID jobId = CarbonInputFormatUtil.getJobId(new Date(), 0);
TaskID taskId = new TaskID(jobId, TaskType.MAP, 0);
taskAttemptId = new TaskAttemptID(taskId, 0);
hadoopConf = new Configuration();
hadoopConf.set("mapred.job.id", jobId.toString());
hadoopConf.set("mapred.tip.id", taskAttemptId.getTaskID().toString());
hadoopConf.set("mapred.task.id", taskAttemptId.toString());
hadoopConf.setBoolean("mapred.task.is.map", true);
hadoopConf.setInt("mapred.task.partition", 0);
tablePath = new File("target/stream_output").getCanonicalPath();
String dbName = "default";
String tableName = "stream_table_output";
AbsoluteTableIdentifier identifier = AbsoluteTableIdentifier.from(tablePath, new CarbonTableIdentifier(dbName, tableName, UUID.randomUUID().toString()));
CarbonTable table = StoreCreator.createTable(identifier);
String factFilePath = new File("../hadoop/src/test/resources/data.csv").getCanonicalPath();
carbonLoadModel = StoreCreator.buildCarbonLoadModel(table, factFilePath, identifier);
}
use of org.apache.hadoop.mapreduce.JobID in project hive by apache.
the class TempletonControllerJob method run.
/**
* Enqueue the job and print out the job id for later collection.
* @see org.apache.hive.hcatalog.templeton.CompleteDelegator
*/
@Override
public int run(String[] args) throws IOException, InterruptedException, ClassNotFoundException, TException {
if (LOG.isDebugEnabled()) {
LOG.debug("Preparing to submit job: " + Arrays.toString(args));
}
Configuration conf = getConf();
conf.set(JAR_ARGS_NAME, TempletonUtils.encodeArray(args));
String memoryMb = appConf.mapperMemoryMb();
if (memoryMb != null && memoryMb.length() != 0) {
conf.set(AppConfig.HADOOP_MAP_MEMORY_MB, memoryMb);
}
String amMemoryMB = appConf.amMemoryMb();
if (amMemoryMB != null && !amMemoryMB.isEmpty()) {
conf.set(AppConfig.HADOOP_MR_AM_MEMORY_MB, amMemoryMB);
}
String amJavaOpts = appConf.controllerAMChildOpts();
if (amJavaOpts != null && !amJavaOpts.isEmpty()) {
conf.set(AppConfig.HADOOP_MR_AM_JAVA_OPTS, amJavaOpts);
}
String user = UserGroupInformation.getCurrentUser().getShortUserName();
conf.set("user.name", user);
job = new Job(conf);
job.setJarByClass(LaunchMapper.class);
job.setJobName(TempletonControllerJob.class.getSimpleName());
job.setMapperClass(LaunchMapper.class);
job.setMapOutputKeyClass(Text.class);
job.setMapOutputValueClass(Text.class);
job.setInputFormatClass(SingleInputFormat.class);
NullOutputFormat<NullWritable, NullWritable> of = new NullOutputFormat<NullWritable, NullWritable>();
job.setOutputFormatClass(of.getClass());
job.setNumReduceTasks(0);
JobClient jc = new JobClient(new JobConf(job.getConfiguration()));
if (UserGroupInformation.isSecurityEnabled()) {
Token<DelegationTokenIdentifier> mrdt = jc.getDelegationToken(new Text("mr token"));
job.getCredentials().addToken(new Text("mr token"), mrdt);
}
LauncherDelegator.JobType jobType = LauncherDelegator.JobType.valueOf(conf.get(JOB_TYPE));
String tokenStrForm = null;
if (jobType == LauncherDelegator.JobType.HIVE) {
tokenStrForm = addToken(job, user, SecureProxySupport.HIVE_SERVICE);
} else {
tokenStrForm = addToken(job, user, SecureProxySupport.HCAT_SERVICE);
}
job.submit();
JobID submittedJobId = job.getJobID();
if (tokenStrForm != null) {
// so that it can be cancelled later from CompleteDelegator
DelegationTokenCache.getStringFormTokenCache().storeDelegationToken(submittedJobId.toString(), tokenStrForm);
LOG.debug("Added delegation token for jobId=" + submittedJobId.toString() + " user=" + user);
}
return 0;
}
use of org.apache.hadoop.mapreduce.JobID in project hive by apache.
the class HiveIcebergOutputCommitter method abortJob.
/**
* Removes the generated data files if there is a commit file already generated for them.
* The cleanup at the end removes the temporary directories as well.
* @param originalContext The job context
* @param status The status of the job
* @throws IOException if there is a failure deleting the files
*/
@Override
public void abortJob(JobContext originalContext, int status) throws IOException {
JobContext jobContext = TezUtil.enrichContextWithVertexId(originalContext);
JobConf jobConf = jobContext.getJobConf();
LOG.info("Job {} is aborted. Data file cleaning started", jobContext.getJobID());
Collection<String> outputs = HiveIcebergStorageHandler.outputTables(jobContext.getJobConf());
Collection<String> jobLocations = new ConcurrentLinkedQueue<>();
ExecutorService fileExecutor = fileExecutor(jobConf);
ExecutorService tableExecutor = tableExecutor(jobConf, outputs.size());
try {
// Cleans up the changes for the output tables in parallel
Tasks.foreach(outputs).suppressFailureWhenFinished().executeWith(tableExecutor).onFailure((output, exc) -> LOG.warn("Failed cleanup table {} on abort job", output, exc)).run(output -> {
LOG.info("Cleaning job for jobID: {}, table: {}", jobContext.getJobID(), output);
Table table = HiveIcebergStorageHandler.table(jobConf, output);
String jobLocation = generateJobLocation(table.location(), jobConf, jobContext.getJobID());
jobLocations.add(jobLocation);
// list jobLocation to get number of forCommit files
// we do this because map/reduce num in jobConf is unreliable and we have no access to vertex status info
int numTasks = listForCommits(jobConf, jobLocation).size();
Collection<DataFile> dataFiles = dataFiles(numTasks, fileExecutor, table.location(), jobContext, table.io(), false);
// Check if we have files already committed and remove data files if there are any
if (dataFiles.size() > 0) {
Tasks.foreach(dataFiles).retry(3).suppressFailureWhenFinished().executeWith(fileExecutor).onFailure((file, exc) -> LOG.warn("Failed to remove data file {} on abort job", file.path(), exc)).run(file -> table.io().deleteFile(file.path().toString()));
}
}, IOException.class);
} finally {
fileExecutor.shutdown();
if (tableExecutor != null) {
tableExecutor.shutdown();
}
}
LOG.info("Job {} is aborted. Data file cleaning finished", jobContext.getJobID());
cleanup(jobContext, jobLocations);
}
Aggregations