Search in sources :

Example 1 with HCatOutputFormat

use of org.apache.hive.hcatalog.mapreduce.HCatOutputFormat in project hive by apache.

the class HCatOutputFormatWriter method prepareWrite.

@Override
public WriterContext prepareWrite() throws HCatException {
    OutputJobInfo jobInfo = OutputJobInfo.create(we.getDbName(), we.getTableName(), we.getPartitionKVs());
    Job job;
    try {
        job = new Job(conf);
        HCatOutputFormat.setOutput(job, jobInfo);
        HCatOutputFormat.setSchema(job, HCatOutputFormat.getTableSchema(job.getConfiguration()));
        HCatOutputFormat outFormat = new HCatOutputFormat();
        outFormat.checkOutputSpecs(job);
        outFormat.getOutputCommitter(ShimLoader.getHadoopShims().getHCatShim().createTaskAttemptContext(job.getConfiguration(), ShimLoader.getHadoopShims().getHCatShim().createTaskAttemptID())).setupJob(job);
    } catch (IOException e) {
        throw new HCatException(ErrorType.ERROR_NOT_INITIALIZED, e);
    } catch (InterruptedException e) {
        throw new HCatException(ErrorType.ERROR_NOT_INITIALIZED, e);
    }
    WriterContextImpl cntxt = new WriterContextImpl();
    cntxt.setConf(job.getConfiguration());
    return cntxt;
}
Also used : HCatException(org.apache.hive.hcatalog.common.HCatException) OutputJobInfo(org.apache.hive.hcatalog.mapreduce.OutputJobInfo) HCatOutputFormat(org.apache.hive.hcatalog.mapreduce.HCatOutputFormat) IOException(java.io.IOException) Job(org.apache.hadoop.mapreduce.Job)

Example 2 with HCatOutputFormat

use of org.apache.hive.hcatalog.mapreduce.HCatOutputFormat in project hive by apache.

the class TestE2EScenarios method copyTable.

private void copyTable(String in, String out) throws IOException, InterruptedException {
    Job ijob = new Job();
    Job ojob = new Job();
    HCatInputFormat inpy = new HCatInputFormat();
    inpy.setInput(ijob, null, in);
    HCatOutputFormat oupy = new HCatOutputFormat();
    oupy.setOutput(ojob, OutputJobInfo.create(null, out, new HashMap<String, String>()));
    // Test HCatContext
    System.err.println("HCatContext INSTANCE is present : " + HCatContext.INSTANCE.getConf().isPresent());
    if (HCatContext.INSTANCE.getConf().isPresent()) {
        System.err.println("HCatContext tinyint->int promotion says " + HCatContext.INSTANCE.getConf().get().getBoolean(HCatConstants.HCAT_DATA_TINY_SMALL_INT_PROMOTION, HCatConstants.HCAT_DATA_TINY_SMALL_INT_PROMOTION_DEFAULT));
    }
    HCatSchema tableSchema = inpy.getTableSchema(ijob.getConfiguration());
    System.err.println("Copying from [" + in + "] to [" + out + "] with schema : " + tableSchema.toString());
    oupy.setSchema(ojob, tableSchema);
    oupy.checkOutputSpecs(ojob);
    OutputCommitter oc = oupy.getOutputCommitter(createTaskAttemptContext(ojob.getConfiguration()));
    oc.setupJob(ojob);
    for (InputSplit split : inpy.getSplits(ijob)) {
        TaskAttemptContext rtaskContext = createTaskAttemptContext(ijob.getConfiguration());
        TaskAttemptContext wtaskContext = createTaskAttemptContext(ojob.getConfiguration());
        RecordReader<WritableComparable, HCatRecord> rr = inpy.createRecordReader(split, rtaskContext);
        rr.initialize(split, rtaskContext);
        OutputCommitter taskOc = oupy.getOutputCommitter(wtaskContext);
        taskOc.setupTask(wtaskContext);
        RecordWriter<WritableComparable<?>, HCatRecord> rw = oupy.getRecordWriter(wtaskContext);
        while (rr.nextKeyValue()) {
            rw.write(rr.getCurrentKey(), rr.getCurrentValue());
        }
        rw.close(wtaskContext);
        taskOc.commitTask(wtaskContext);
        rr.close();
    }
    oc.commitJob(ojob);
}
Also used : OutputCommitter(org.apache.hadoop.mapreduce.OutputCommitter) HCatSchema(org.apache.hive.hcatalog.data.schema.HCatSchema) HashMap(java.util.HashMap) WritableComparable(org.apache.hadoop.io.WritableComparable) HCatOutputFormat(org.apache.hive.hcatalog.mapreduce.HCatOutputFormat) TaskAttemptContext(org.apache.hadoop.mapreduce.TaskAttemptContext) Job(org.apache.hadoop.mapreduce.Job) InputSplit(org.apache.hadoop.mapreduce.InputSplit) HCatInputFormat(org.apache.hive.hcatalog.mapreduce.HCatInputFormat) HCatRecord(org.apache.hive.hcatalog.data.HCatRecord)

Example 3 with HCatOutputFormat

use of org.apache.hive.hcatalog.mapreduce.HCatOutputFormat in project hive by apache.

the class HCatOutputFormatWriter method write.

@Override
public void write(Iterator<HCatRecord> recordItr) throws HCatException {
    int id = sp.getId();
    setVarsInConf(id);
    HCatOutputFormat outFormat = new HCatOutputFormat();
    TaskAttemptContext cntxt = ShimLoader.getHadoopShims().getHCatShim().createTaskAttemptContext(conf, new TaskAttemptID(ShimLoader.getHadoopShims().getHCatShim().createTaskID(), id));
    OutputCommitter committer = null;
    RecordWriter<WritableComparable<?>, HCatRecord> writer;
    try {
        committer = outFormat.getOutputCommitter(cntxt);
        committer.setupTask(cntxt);
        writer = outFormat.getRecordWriter(cntxt);
        while (recordItr.hasNext()) {
            HCatRecord rec = recordItr.next();
            writer.write(null, rec);
        }
        writer.close(cntxt);
        if (committer.needsTaskCommit(cntxt)) {
            committer.commitTask(cntxt);
        }
    } catch (IOException e) {
        if (null != committer) {
            try {
                committer.abortTask(cntxt);
            } catch (IOException e1) {
                throw new HCatException(ErrorType.ERROR_INTERNAL_EXCEPTION, e1);
            }
        }
        throw new HCatException("Failed while writing", e);
    } catch (InterruptedException e) {
        if (null != committer) {
            try {
                committer.abortTask(cntxt);
            } catch (IOException e1) {
                throw new HCatException(ErrorType.ERROR_INTERNAL_EXCEPTION, e1);
            }
        }
        throw new HCatException("Failed while writing", e);
    }
}
Also used : OutputCommitter(org.apache.hadoop.mapreduce.OutputCommitter) WritableComparable(org.apache.hadoop.io.WritableComparable) TaskAttemptID(org.apache.hadoop.mapreduce.TaskAttemptID) HCatException(org.apache.hive.hcatalog.common.HCatException) HCatOutputFormat(org.apache.hive.hcatalog.mapreduce.HCatOutputFormat) TaskAttemptContext(org.apache.hadoop.mapreduce.TaskAttemptContext) IOException(java.io.IOException) HCatRecord(org.apache.hive.hcatalog.data.HCatRecord)

Aggregations

HCatOutputFormat (org.apache.hive.hcatalog.mapreduce.HCatOutputFormat)3 IOException (java.io.IOException)2 WritableComparable (org.apache.hadoop.io.WritableComparable)2 Job (org.apache.hadoop.mapreduce.Job)2 OutputCommitter (org.apache.hadoop.mapreduce.OutputCommitter)2 TaskAttemptContext (org.apache.hadoop.mapreduce.TaskAttemptContext)2 HCatException (org.apache.hive.hcatalog.common.HCatException)2 HCatRecord (org.apache.hive.hcatalog.data.HCatRecord)2 HashMap (java.util.HashMap)1 InputSplit (org.apache.hadoop.mapreduce.InputSplit)1 TaskAttemptID (org.apache.hadoop.mapreduce.TaskAttemptID)1 HCatSchema (org.apache.hive.hcatalog.data.schema.HCatSchema)1 HCatInputFormat (org.apache.hive.hcatalog.mapreduce.HCatInputFormat)1 OutputJobInfo (org.apache.hive.hcatalog.mapreduce.OutputJobInfo)1