Search in sources :

Example 66 with JobID

use of org.apache.hadoop.mapreduce.JobID in project tez by apache.

the class TestMRInputHelpers method verifyLocationHints.

private void verifyLocationHints(Path inputSplitsDir, List<TaskLocationHint> actual) throws Exception {
    JobID jobId = new JobID("dummy", 1);
    JobSplit.TaskSplitMetaInfo[] splitsInfo = SplitMetaInfoReader.readSplitMetaInfo(jobId, remoteFs, conf, inputSplitsDir);
    int splitsCount = splitsInfo.length;
    List<TaskLocationHint> locationHints = new ArrayList<TaskLocationHint>(splitsCount);
    for (int i = 0; i < splitsCount; ++i) {
        locationHints.add(TaskLocationHint.createTaskLocationHint(new HashSet<String>(Arrays.asList(splitsInfo[i].getLocations())), null));
    }
    Assert.assertEquals(locationHints, actual);
}
Also used : TaskLocationHint(org.apache.tez.dag.api.TaskLocationHint) ArrayList(java.util.ArrayList) JobID(org.apache.hadoop.mapreduce.JobID) TaskLocationHint(org.apache.tez.dag.api.TaskLocationHint) HashSet(java.util.HashSet)

Example 67 with JobID

use of org.apache.hadoop.mapreduce.JobID in project carbondata by apache.

the class CarbonStreamInputFormatTest method setUp.

@Override
protected void setUp() throws Exception {
    tablePath = new File("target/stream_input").getCanonicalPath();
    String dbName = "default";
    String tableName = "stream_table_input";
    identifier = AbsoluteTableIdentifier.from(tablePath, new CarbonTableIdentifier(dbName, tableName, UUID.randomUUID().toString()));
    JobID jobId = CarbonInputFormatUtil.getJobId(new Date(), 0);
    TaskID taskId = new TaskID(jobId, TaskType.MAP, 0);
    taskAttemptId = new TaskAttemptID(taskId, 0);
    hadoopConf = new Configuration();
    taskAttemptContext = new TaskAttemptContextImpl(hadoopConf, taskAttemptId);
}
Also used : TaskID(org.apache.hadoop.mapreduce.TaskID) CarbonTableIdentifier(org.apache.carbondata.core.metadata.CarbonTableIdentifier) Configuration(org.apache.hadoop.conf.Configuration) TaskAttemptID(org.apache.hadoop.mapreduce.TaskAttemptID) TaskAttemptContextImpl(org.apache.hadoop.mapreduce.task.TaskAttemptContextImpl) File(java.io.File) JobID(org.apache.hadoop.mapreduce.JobID) Date(java.util.Date)

Example 68 with JobID

use of org.apache.hadoop.mapreduce.JobID in project carbondata by apache.

the class CarbonStreamOutputFormatTest method setUp.

@Override
protected void setUp() throws Exception {
    super.setUp();
    JobID jobId = CarbonInputFormatUtil.getJobId(new Date(), 0);
    TaskID taskId = new TaskID(jobId, TaskType.MAP, 0);
    taskAttemptId = new TaskAttemptID(taskId, 0);
    hadoopConf = new Configuration();
    hadoopConf.set("mapred.job.id", jobId.toString());
    hadoopConf.set("mapred.tip.id", taskAttemptId.getTaskID().toString());
    hadoopConf.set("mapred.task.id", taskAttemptId.toString());
    hadoopConf.setBoolean("mapred.task.is.map", true);
    hadoopConf.setInt("mapred.task.partition", 0);
    tablePath = new File("target/stream_output").getCanonicalPath();
    String dbName = "default";
    String tableName = "stream_table_output";
    AbsoluteTableIdentifier identifier = AbsoluteTableIdentifier.from(tablePath, new CarbonTableIdentifier(dbName, tableName, UUID.randomUUID().toString()));
    CarbonTable table = StoreCreator.createTable(identifier);
    String factFilePath = new File("../hadoop/src/test/resources/data.csv").getCanonicalPath();
    carbonLoadModel = StoreCreator.buildCarbonLoadModel(table, factFilePath, identifier);
}
Also used : CarbonTable(org.apache.carbondata.core.metadata.schema.table.CarbonTable) TaskID(org.apache.hadoop.mapreduce.TaskID) Configuration(org.apache.hadoop.conf.Configuration) CarbonTableIdentifier(org.apache.carbondata.core.metadata.CarbonTableIdentifier) AbsoluteTableIdentifier(org.apache.carbondata.core.metadata.AbsoluteTableIdentifier) TaskAttemptID(org.apache.hadoop.mapreduce.TaskAttemptID) File(java.io.File) JobID(org.apache.hadoop.mapreduce.JobID) Date(java.util.Date)

Example 69 with JobID

use of org.apache.hadoop.mapreduce.JobID in project hive by apache.

the class TempletonControllerJob method run.

/**
 * Enqueue the job and print out the job id for later collection.
 * @see org.apache.hive.hcatalog.templeton.CompleteDelegator
 */
@Override
public int run(String[] args) throws IOException, InterruptedException, ClassNotFoundException, TException {
    if (LOG.isDebugEnabled()) {
        LOG.debug("Preparing to submit job: " + Arrays.toString(args));
    }
    Configuration conf = getConf();
    conf.set(JAR_ARGS_NAME, TempletonUtils.encodeArray(args));
    String memoryMb = appConf.mapperMemoryMb();
    if (memoryMb != null && memoryMb.length() != 0) {
        conf.set(AppConfig.HADOOP_MAP_MEMORY_MB, memoryMb);
    }
    String amMemoryMB = appConf.amMemoryMb();
    if (amMemoryMB != null && !amMemoryMB.isEmpty()) {
        conf.set(AppConfig.HADOOP_MR_AM_MEMORY_MB, amMemoryMB);
    }
    String amJavaOpts = appConf.controllerAMChildOpts();
    if (amJavaOpts != null && !amJavaOpts.isEmpty()) {
        conf.set(AppConfig.HADOOP_MR_AM_JAVA_OPTS, amJavaOpts);
    }
    String user = UserGroupInformation.getCurrentUser().getShortUserName();
    conf.set("user.name", user);
    job = new Job(conf);
    job.setJarByClass(LaunchMapper.class);
    job.setJobName(TempletonControllerJob.class.getSimpleName());
    job.setMapperClass(LaunchMapper.class);
    job.setMapOutputKeyClass(Text.class);
    job.setMapOutputValueClass(Text.class);
    job.setInputFormatClass(SingleInputFormat.class);
    NullOutputFormat<NullWritable, NullWritable> of = new NullOutputFormat<NullWritable, NullWritable>();
    job.setOutputFormatClass(of.getClass());
    job.setNumReduceTasks(0);
    JobClient jc = new JobClient(new JobConf(job.getConfiguration()));
    if (UserGroupInformation.isSecurityEnabled()) {
        Token<DelegationTokenIdentifier> mrdt = jc.getDelegationToken(new Text("mr token"));
        job.getCredentials().addToken(new Text("mr token"), mrdt);
    }
    LauncherDelegator.JobType jobType = LauncherDelegator.JobType.valueOf(conf.get(JOB_TYPE));
    String tokenStrForm = null;
    if (jobType == LauncherDelegator.JobType.HIVE) {
        tokenStrForm = addToken(job, user, SecureProxySupport.HIVE_SERVICE);
    } else {
        tokenStrForm = addToken(job, user, SecureProxySupport.HCAT_SERVICE);
    }
    job.submit();
    JobID submittedJobId = job.getJobID();
    if (tokenStrForm != null) {
        // so that it can be cancelled later from CompleteDelegator
        DelegationTokenCache.getStringFormTokenCache().storeDelegationToken(submittedJobId.toString(), tokenStrForm);
        LOG.debug("Added delegation token for jobId=" + submittedJobId.toString() + " user=" + user);
    }
    return 0;
}
Also used : Configuration(org.apache.hadoop.conf.Configuration) DelegationTokenIdentifier(org.apache.hadoop.mapreduce.security.token.delegation.DelegationTokenIdentifier) LauncherDelegator(org.apache.hive.hcatalog.templeton.LauncherDelegator) Text(org.apache.hadoop.io.Text) NullWritable(org.apache.hadoop.io.NullWritable) JobClient(org.apache.hadoop.mapred.JobClient) Job(org.apache.hadoop.mapreduce.Job) JobConf(org.apache.hadoop.mapred.JobConf) NullOutputFormat(org.apache.hadoop.mapreduce.lib.output.NullOutputFormat) JobID(org.apache.hadoop.mapreduce.JobID)

Example 70 with JobID

use of org.apache.hadoop.mapreduce.JobID in project hive by apache.

the class HiveIcebergOutputCommitter method abortJob.

/**
 * Removes the generated data files if there is a commit file already generated for them.
 * The cleanup at the end removes the temporary directories as well.
 * @param originalContext The job context
 * @param status The status of the job
 * @throws IOException if there is a failure deleting the files
 */
@Override
public void abortJob(JobContext originalContext, int status) throws IOException {
    JobContext jobContext = TezUtil.enrichContextWithVertexId(originalContext);
    JobConf jobConf = jobContext.getJobConf();
    LOG.info("Job {} is aborted. Data file cleaning started", jobContext.getJobID());
    Collection<String> outputs = HiveIcebergStorageHandler.outputTables(jobContext.getJobConf());
    Collection<String> jobLocations = new ConcurrentLinkedQueue<>();
    ExecutorService fileExecutor = fileExecutor(jobConf);
    ExecutorService tableExecutor = tableExecutor(jobConf, outputs.size());
    try {
        // Cleans up the changes for the output tables in parallel
        Tasks.foreach(outputs).suppressFailureWhenFinished().executeWith(tableExecutor).onFailure((output, exc) -> LOG.warn("Failed cleanup table {} on abort job", output, exc)).run(output -> {
            LOG.info("Cleaning job for jobID: {}, table: {}", jobContext.getJobID(), output);
            Table table = HiveIcebergStorageHandler.table(jobConf, output);
            String jobLocation = generateJobLocation(table.location(), jobConf, jobContext.getJobID());
            jobLocations.add(jobLocation);
            // list jobLocation to get number of forCommit files
            // we do this because map/reduce num in jobConf is unreliable and we have no access to vertex status info
            int numTasks = listForCommits(jobConf, jobLocation).size();
            Collection<DataFile> dataFiles = dataFiles(numTasks, fileExecutor, table.location(), jobContext, table.io(), false);
            // Check if we have files already committed and remove data files if there are any
            if (dataFiles.size() > 0) {
                Tasks.foreach(dataFiles).retry(3).suppressFailureWhenFinished().executeWith(fileExecutor).onFailure((file, exc) -> LOG.warn("Failed to remove data file {} on abort job", file.path(), exc)).run(file -> table.io().deleteFile(file.path().toString()));
            }
        }, IOException.class);
    } finally {
        fileExecutor.shutdown();
        if (tableExecutor != null) {
            tableExecutor.shutdown();
        }
    }
    LOG.info("Job {} is aborted. Data file cleaning finished", jobContext.getJobID());
    cleanup(jobContext, jobLocations);
}
Also used : NotFoundException(org.apache.iceberg.exceptions.NotFoundException) Arrays(java.util.Arrays) ImmutableMap(org.apache.iceberg.relocated.com.google.common.collect.ImmutableMap) FileSystem(org.apache.hadoop.fs.FileSystem) Catalogs(org.apache.iceberg.mr.Catalogs) ObjectInputStream(java.io.ObjectInputStream) LoggerFactory(org.slf4j.LoggerFactory) AppendFiles(org.apache.iceberg.AppendFiles) OutputFile(org.apache.iceberg.io.OutputFile) FileStatus(org.apache.hadoop.fs.FileStatus) TaskType(org.apache.hadoop.mapreduce.TaskType) OutputCommitter(org.apache.hadoop.mapred.OutputCommitter) TaskAttemptContext(org.apache.hadoop.mapred.TaskAttemptContext) Map(java.util.Map) Configuration(org.apache.hadoop.conf.Configuration) Path(org.apache.hadoop.fs.Path) ObjectOutputStream(java.io.ObjectOutputStream) TaskAttemptID(org.apache.hadoop.mapred.TaskAttemptID) JobID(org.apache.hadoop.mapreduce.JobID) DataFile(org.apache.iceberg.DataFile) ExecutorService(java.util.concurrent.ExecutorService) Properties(java.util.Properties) Logger(org.slf4j.Logger) Table(org.apache.iceberg.Table) Collection(java.util.Collection) HiveConf(org.apache.hadoop.hive.conf.HiveConf) InputFormatConfig(org.apache.iceberg.mr.InputFormatConfig) ThreadFactoryBuilder(org.apache.iceberg.relocated.com.google.common.util.concurrent.ThreadFactoryBuilder) Set(java.util.Set) IOException(java.io.IOException) Collectors(java.util.stream.Collectors) Executors(java.util.concurrent.Executors) JobConf(org.apache.hadoop.mapred.JobConf) Util(org.apache.iceberg.hadoop.Util) JobContext(org.apache.hadoop.mapred.JobContext) ReplacePartitions(org.apache.iceberg.ReplacePartitions) Tasks(org.apache.iceberg.util.Tasks) Optional(java.util.Optional) SessionStateUtil(org.apache.hadoop.hive.ql.session.SessionStateUtil) Expressions(org.apache.iceberg.expressions.Expressions) FileIO(org.apache.iceberg.io.FileIO) VisibleForTesting(org.apache.iceberg.relocated.com.google.common.annotations.VisibleForTesting) ConcurrentLinkedQueue(java.util.concurrent.ConcurrentLinkedQueue) DataFile(org.apache.iceberg.DataFile) Table(org.apache.iceberg.Table) ExecutorService(java.util.concurrent.ExecutorService) JobContext(org.apache.hadoop.mapred.JobContext) ConcurrentLinkedQueue(java.util.concurrent.ConcurrentLinkedQueue) JobConf(org.apache.hadoop.mapred.JobConf)

Aggregations

JobID (org.apache.hadoop.mapreduce.JobID)79 Test (org.junit.Test)33 IOException (java.io.IOException)25 Configuration (org.apache.hadoop.conf.Configuration)20 TaskAttemptID (org.apache.hadoop.mapreduce.TaskAttemptID)20 TaskID (org.apache.hadoop.mapreduce.TaskID)18 JobId (org.apache.hadoop.mapreduce.v2.api.records.JobId)17 Path (org.apache.hadoop.fs.Path)14 ArrayList (java.util.ArrayList)11 Job (org.apache.hadoop.mapreduce.Job)11 JobConf (org.apache.hadoop.mapred.JobConf)10 FileSystem (org.apache.hadoop.fs.FileSystem)9 JobContext (org.apache.hadoop.mapreduce.JobContext)7 JobContextImpl (org.apache.hadoop.mapreduce.task.JobContextImpl)7 EventHandler (org.apache.hadoop.yarn.event.EventHandler)7 HashMap (java.util.HashMap)6 TaskAttemptContext (org.apache.hadoop.mapreduce.TaskAttemptContext)6 TaskAttemptInfo (org.apache.hadoop.mapreduce.jobhistory.JobHistoryParser.TaskAttemptInfo)6 OutputCommitter (org.apache.hadoop.mapreduce.OutputCommitter)5 Event (org.apache.hadoop.mapreduce.jobhistory.Event)5