Search in sources :

Example 66 with TaskAttemptContext

use of org.apache.hadoop.mapreduce.TaskAttemptContext in project hive by apache.

the class TestE2EScenarios method copyTable.

private void copyTable(String in, String out) throws IOException, InterruptedException {
    Job ijob = new Job();
    Job ojob = new Job();
    HCatInputFormat inpy = new HCatInputFormat();
    inpy.setInput(ijob, null, in);
    HCatOutputFormat oupy = new HCatOutputFormat();
    oupy.setOutput(ojob, OutputJobInfo.create(null, out, new HashMap<String, String>()));
    // Test HCatContext
    System.err.println("HCatContext INSTANCE is present : " + HCatContext.INSTANCE.getConf().isPresent());
    if (HCatContext.INSTANCE.getConf().isPresent()) {
        System.err.println("HCatContext tinyint->int promotion says " + HCatContext.INSTANCE.getConf().get().getBoolean(HCatConstants.HCAT_DATA_TINY_SMALL_INT_PROMOTION, HCatConstants.HCAT_DATA_TINY_SMALL_INT_PROMOTION_DEFAULT));
    }
    HCatSchema tableSchema = inpy.getTableSchema(ijob.getConfiguration());
    System.err.println("Copying from [" + in + "] to [" + out + "] with schema : " + tableSchema.toString());
    oupy.setSchema(ojob, tableSchema);
    oupy.checkOutputSpecs(ojob);
    OutputCommitter oc = oupy.getOutputCommitter(createTaskAttemptContext(ojob.getConfiguration()));
    oc.setupJob(ojob);
    for (InputSplit split : inpy.getSplits(ijob)) {
        TaskAttemptContext rtaskContext = createTaskAttemptContext(ijob.getConfiguration());
        TaskAttemptContext wtaskContext = createTaskAttemptContext(ojob.getConfiguration());
        RecordReader<WritableComparable, HCatRecord> rr = inpy.createRecordReader(split, rtaskContext);
        rr.initialize(split, rtaskContext);
        OutputCommitter taskOc = oupy.getOutputCommitter(wtaskContext);
        taskOc.setupTask(wtaskContext);
        RecordWriter<WritableComparable<?>, HCatRecord> rw = oupy.getRecordWriter(wtaskContext);
        while (rr.nextKeyValue()) {
            rw.write(rr.getCurrentKey(), rr.getCurrentValue());
        }
        rw.close(wtaskContext);
        taskOc.commitTask(wtaskContext);
        rr.close();
    }
    oc.commitJob(ojob);
}
Also used : OutputCommitter(org.apache.hadoop.mapreduce.OutputCommitter) HCatSchema(org.apache.hive.hcatalog.data.schema.HCatSchema) HashMap(java.util.HashMap) WritableComparable(org.apache.hadoop.io.WritableComparable) HCatOutputFormat(org.apache.hive.hcatalog.mapreduce.HCatOutputFormat) TaskAttemptContext(org.apache.hadoop.mapreduce.TaskAttemptContext) Job(org.apache.hadoop.mapreduce.Job) InputSplit(org.apache.hadoop.mapreduce.InputSplit) HCatInputFormat(org.apache.hive.hcatalog.mapreduce.HCatInputFormat) HCatRecord(org.apache.hive.hcatalog.data.HCatRecord)

Example 67 with TaskAttemptContext

use of org.apache.hadoop.mapreduce.TaskAttemptContext in project hive by apache.

the class HCatOutputFormatWriter method write.

@Override
public void write(Iterator<HCatRecord> recordItr) throws HCatException {
    int id = sp.getId();
    setVarsInConf(id);
    HCatOutputFormat outFormat = new HCatOutputFormat();
    TaskAttemptContext cntxt = ShimLoader.getHadoopShims().getHCatShim().createTaskAttemptContext(conf, new TaskAttemptID(ShimLoader.getHadoopShims().getHCatShim().createTaskID(), id));
    OutputCommitter committer = null;
    RecordWriter<WritableComparable<?>, HCatRecord> writer;
    try {
        committer = outFormat.getOutputCommitter(cntxt);
        committer.setupTask(cntxt);
        writer = outFormat.getRecordWriter(cntxt);
        while (recordItr.hasNext()) {
            HCatRecord rec = recordItr.next();
            writer.write(null, rec);
        }
        writer.close(cntxt);
        if (committer.needsTaskCommit(cntxt)) {
            committer.commitTask(cntxt);
        }
    } catch (IOException e) {
        if (null != committer) {
            try {
                committer.abortTask(cntxt);
            } catch (IOException e1) {
                throw new HCatException(ErrorType.ERROR_INTERNAL_EXCEPTION, e1);
            }
        }
        throw new HCatException("Failed while writing", e);
    } catch (InterruptedException e) {
        if (null != committer) {
            try {
                committer.abortTask(cntxt);
            } catch (IOException e1) {
                throw new HCatException(ErrorType.ERROR_INTERNAL_EXCEPTION, e1);
            }
        }
        throw new HCatException("Failed while writing", e);
    }
}
Also used : OutputCommitter(org.apache.hadoop.mapreduce.OutputCommitter) WritableComparable(org.apache.hadoop.io.WritableComparable) TaskAttemptID(org.apache.hadoop.mapreduce.TaskAttemptID) HCatException(org.apache.hive.hcatalog.common.HCatException) HCatOutputFormat(org.apache.hive.hcatalog.mapreduce.HCatOutputFormat) TaskAttemptContext(org.apache.hadoop.mapreduce.TaskAttemptContext) IOException(java.io.IOException) HCatRecord(org.apache.hive.hcatalog.data.HCatRecord)

Example 68 with TaskAttemptContext

use of org.apache.hadoop.mapreduce.TaskAttemptContext in project hive by apache.

the class DynamicPartitionFileRecordWriterContainer method close.

@Override
public void close(TaskAttemptContext context) throws IOException, InterruptedException {
    Reporter reporter = InternalUtil.createReporter(context);
    for (RecordWriter<? super WritableComparable<?>, ? super Writable> bwriter : baseDynamicWriters.values()) {
        // We are in RecordWriter.close() make sense that the context would be
        // TaskInputOutput.
        bwriter.close(reporter);
    }
    TaskCommitContextRegistry.getInstance().register(context, new TaskCommitContextRegistry.TaskCommitterProxy() {

        @Override
        public void abortTask(TaskAttemptContext context) throws IOException {
            for (Map.Entry<String, OutputJobInfo> outputJobInfoEntry : dynamicOutputJobInfo.entrySet()) {
                String dynKey = outputJobInfoEntry.getKey();
                OutputJobInfo outputJobInfo = outputJobInfoEntry.getValue();
                LOG.info("Aborting task-attempt for " + outputJobInfo.getLocation());
                baseDynamicCommitters.get(dynKey).abortTask(dynamicContexts.get(dynKey));
            }
        }

        @Override
        public void commitTask(TaskAttemptContext context) throws IOException {
            for (Map.Entry<String, OutputJobInfo> outputJobInfoEntry : dynamicOutputJobInfo.entrySet()) {
                String dynKey = outputJobInfoEntry.getKey();
                OutputJobInfo outputJobInfo = outputJobInfoEntry.getValue();
                LOG.info("Committing task-attempt for " + outputJobInfo.getLocation());
                TaskAttemptContext dynContext = dynamicContexts.get(dynKey);
                OutputCommitter dynCommitter = baseDynamicCommitters.get(dynKey);
                if (dynCommitter.needsTaskCommit(dynContext)) {
                    dynCommitter.commitTask(dynContext);
                } else {
                    LOG.info("Skipping commitTask() for " + outputJobInfo.getLocation());
                }
            }
        }
    });
}
Also used : FileOutputCommitter(org.apache.hadoop.mapreduce.lib.output.FileOutputCommitter) OutputCommitter(org.apache.hadoop.mapreduce.OutputCommitter) Reporter(org.apache.hadoop.mapred.Reporter) TaskAttemptContext(org.apache.hadoop.mapreduce.TaskAttemptContext) IOException(java.io.IOException)

Example 69 with TaskAttemptContext

use of org.apache.hadoop.mapreduce.TaskAttemptContext in project hive by apache.

the class HCatInputFormatReader method read.

@Override
public Iterator<HCatRecord> read() throws HCatException {
    HCatInputFormat inpFmt = new HCatInputFormat();
    RecordReader<WritableComparable, HCatRecord> rr;
    try {
        TaskAttemptContext cntxt = ShimLoader.getHadoopShims().getHCatShim().createTaskAttemptContext(conf, new TaskAttemptID());
        rr = inpFmt.createRecordReader(split, cntxt);
        rr.initialize(split, cntxt);
    } catch (IOException e) {
        throw new HCatException(ErrorType.ERROR_NOT_INITIALIZED, e);
    } catch (InterruptedException e) {
        throw new HCatException(ErrorType.ERROR_NOT_INITIALIZED, e);
    }
    return new HCatRecordItr(rr);
}
Also used : WritableComparable(org.apache.hadoop.io.WritableComparable) TaskAttemptID(org.apache.hadoop.mapreduce.TaskAttemptID) HCatException(org.apache.hive.hcatalog.common.HCatException) TaskAttemptContext(org.apache.hadoop.mapreduce.TaskAttemptContext) IOException(java.io.IOException) HCatInputFormat(org.apache.hive.hcatalog.mapreduce.HCatInputFormat) HCatRecord(org.apache.hive.hcatalog.data.HCatRecord)

Example 70 with TaskAttemptContext

use of org.apache.hadoop.mapreduce.TaskAttemptContext in project crunch by cloudera.

the class CrunchMultipleOutputs method getContext.

// Create a taskAttemptContext for the named output with 
// output format and output key/value types put in the context
private TaskAttemptContext getContext(String nameOutput) throws IOException {
    TaskAttemptContext taskContext = taskContexts.get(nameOutput);
    if (taskContext != null) {
        return taskContext;
    }
    // The following trick leverages the instantiation of a record writer via
    // the job thus supporting arbitrary output formats.
    Job job = new Job(context.getConfiguration());
    job.getConfiguration().set("crunch.namedoutput", nameOutput);
    job.setOutputFormatClass(getNamedOutputFormatClass(context, nameOutput));
    job.setOutputKeyClass(getNamedOutputKeyClass(context, nameOutput));
    job.setOutputValueClass(getNamedOutputValueClass(context, nameOutput));
    taskContext = TaskAttemptContextFactory.create(job.getConfiguration(), context.getTaskAttemptID());
    taskContexts.put(nameOutput, taskContext);
    return taskContext;
}
Also used : TaskAttemptContext(org.apache.hadoop.mapreduce.TaskAttemptContext) Job(org.apache.hadoop.mapreduce.Job)

Aggregations

TaskAttemptContext (org.apache.hadoop.mapreduce.TaskAttemptContext)110 Configuration (org.apache.hadoop.conf.Configuration)58 Job (org.apache.hadoop.mapreduce.Job)44 Path (org.apache.hadoop.fs.Path)39 TaskAttemptContextImpl (org.apache.hadoop.mapreduce.task.TaskAttemptContextImpl)38 InputSplit (org.apache.hadoop.mapreduce.InputSplit)36 Test (org.junit.Test)35 TaskAttemptID (org.apache.hadoop.mapreduce.TaskAttemptID)33 JobContext (org.apache.hadoop.mapreduce.JobContext)28 IOException (java.io.IOException)27 File (java.io.File)22 LongWritable (org.apache.hadoop.io.LongWritable)22 JobContextImpl (org.apache.hadoop.mapreduce.task.JobContextImpl)21 RecordWriter (org.apache.hadoop.mapreduce.RecordWriter)19 MapContextImpl (org.apache.hadoop.mapreduce.task.MapContextImpl)17 FileSystem (org.apache.hadoop.fs.FileSystem)16 OutputCommitter (org.apache.hadoop.mapreduce.OutputCommitter)12 ArrayList (java.util.ArrayList)11 BytesWritable (org.apache.hadoop.io.BytesWritable)10 MapFile (org.apache.hadoop.io.MapFile)10