use of org.apache.hadoop.mapreduce.TaskAttemptContext in project hive by apache.
the class TestE2EScenarios method copyTable.
private void copyTable(String in, String out) throws IOException, InterruptedException {
Job ijob = new Job();
Job ojob = new Job();
HCatInputFormat inpy = new HCatInputFormat();
inpy.setInput(ijob, null, in);
HCatOutputFormat oupy = new HCatOutputFormat();
oupy.setOutput(ojob, OutputJobInfo.create(null, out, new HashMap<String, String>()));
// Test HCatContext
System.err.println("HCatContext INSTANCE is present : " + HCatContext.INSTANCE.getConf().isPresent());
if (HCatContext.INSTANCE.getConf().isPresent()) {
System.err.println("HCatContext tinyint->int promotion says " + HCatContext.INSTANCE.getConf().get().getBoolean(HCatConstants.HCAT_DATA_TINY_SMALL_INT_PROMOTION, HCatConstants.HCAT_DATA_TINY_SMALL_INT_PROMOTION_DEFAULT));
}
HCatSchema tableSchema = inpy.getTableSchema(ijob.getConfiguration());
System.err.println("Copying from [" + in + "] to [" + out + "] with schema : " + tableSchema.toString());
oupy.setSchema(ojob, tableSchema);
oupy.checkOutputSpecs(ojob);
OutputCommitter oc = oupy.getOutputCommitter(createTaskAttemptContext(ojob.getConfiguration()));
oc.setupJob(ojob);
for (InputSplit split : inpy.getSplits(ijob)) {
TaskAttemptContext rtaskContext = createTaskAttemptContext(ijob.getConfiguration());
TaskAttemptContext wtaskContext = createTaskAttemptContext(ojob.getConfiguration());
RecordReader<WritableComparable, HCatRecord> rr = inpy.createRecordReader(split, rtaskContext);
rr.initialize(split, rtaskContext);
OutputCommitter taskOc = oupy.getOutputCommitter(wtaskContext);
taskOc.setupTask(wtaskContext);
RecordWriter<WritableComparable<?>, HCatRecord> rw = oupy.getRecordWriter(wtaskContext);
while (rr.nextKeyValue()) {
rw.write(rr.getCurrentKey(), rr.getCurrentValue());
}
rw.close(wtaskContext);
taskOc.commitTask(wtaskContext);
rr.close();
}
oc.commitJob(ojob);
}
use of org.apache.hadoop.mapreduce.TaskAttemptContext in project hive by apache.
the class HCatOutputFormatWriter method write.
@Override
public void write(Iterator<HCatRecord> recordItr) throws HCatException {
int id = sp.getId();
setVarsInConf(id);
HCatOutputFormat outFormat = new HCatOutputFormat();
TaskAttemptContext cntxt = ShimLoader.getHadoopShims().getHCatShim().createTaskAttemptContext(conf, new TaskAttemptID(ShimLoader.getHadoopShims().getHCatShim().createTaskID(), id));
OutputCommitter committer = null;
RecordWriter<WritableComparable<?>, HCatRecord> writer;
try {
committer = outFormat.getOutputCommitter(cntxt);
committer.setupTask(cntxt);
writer = outFormat.getRecordWriter(cntxt);
while (recordItr.hasNext()) {
HCatRecord rec = recordItr.next();
writer.write(null, rec);
}
writer.close(cntxt);
if (committer.needsTaskCommit(cntxt)) {
committer.commitTask(cntxt);
}
} catch (IOException e) {
if (null != committer) {
try {
committer.abortTask(cntxt);
} catch (IOException e1) {
throw new HCatException(ErrorType.ERROR_INTERNAL_EXCEPTION, e1);
}
}
throw new HCatException("Failed while writing", e);
} catch (InterruptedException e) {
if (null != committer) {
try {
committer.abortTask(cntxt);
} catch (IOException e1) {
throw new HCatException(ErrorType.ERROR_INTERNAL_EXCEPTION, e1);
}
}
throw new HCatException("Failed while writing", e);
}
}
use of org.apache.hadoop.mapreduce.TaskAttemptContext in project hive by apache.
the class DynamicPartitionFileRecordWriterContainer method close.
@Override
public void close(TaskAttemptContext context) throws IOException, InterruptedException {
Reporter reporter = InternalUtil.createReporter(context);
for (RecordWriter<? super WritableComparable<?>, ? super Writable> bwriter : baseDynamicWriters.values()) {
// We are in RecordWriter.close() make sense that the context would be
// TaskInputOutput.
bwriter.close(reporter);
}
TaskCommitContextRegistry.getInstance().register(context, new TaskCommitContextRegistry.TaskCommitterProxy() {
@Override
public void abortTask(TaskAttemptContext context) throws IOException {
for (Map.Entry<String, OutputJobInfo> outputJobInfoEntry : dynamicOutputJobInfo.entrySet()) {
String dynKey = outputJobInfoEntry.getKey();
OutputJobInfo outputJobInfo = outputJobInfoEntry.getValue();
LOG.info("Aborting task-attempt for " + outputJobInfo.getLocation());
baseDynamicCommitters.get(dynKey).abortTask(dynamicContexts.get(dynKey));
}
}
@Override
public void commitTask(TaskAttemptContext context) throws IOException {
for (Map.Entry<String, OutputJobInfo> outputJobInfoEntry : dynamicOutputJobInfo.entrySet()) {
String dynKey = outputJobInfoEntry.getKey();
OutputJobInfo outputJobInfo = outputJobInfoEntry.getValue();
LOG.info("Committing task-attempt for " + outputJobInfo.getLocation());
TaskAttemptContext dynContext = dynamicContexts.get(dynKey);
OutputCommitter dynCommitter = baseDynamicCommitters.get(dynKey);
if (dynCommitter.needsTaskCommit(dynContext)) {
dynCommitter.commitTask(dynContext);
} else {
LOG.info("Skipping commitTask() for " + outputJobInfo.getLocation());
}
}
}
});
}
use of org.apache.hadoop.mapreduce.TaskAttemptContext in project hive by apache.
the class HCatInputFormatReader method read.
@Override
public Iterator<HCatRecord> read() throws HCatException {
HCatInputFormat inpFmt = new HCatInputFormat();
RecordReader<WritableComparable, HCatRecord> rr;
try {
TaskAttemptContext cntxt = ShimLoader.getHadoopShims().getHCatShim().createTaskAttemptContext(conf, new TaskAttemptID());
rr = inpFmt.createRecordReader(split, cntxt);
rr.initialize(split, cntxt);
} catch (IOException e) {
throw new HCatException(ErrorType.ERROR_NOT_INITIALIZED, e);
} catch (InterruptedException e) {
throw new HCatException(ErrorType.ERROR_NOT_INITIALIZED, e);
}
return new HCatRecordItr(rr);
}
use of org.apache.hadoop.mapreduce.TaskAttemptContext in project crunch by cloudera.
the class CrunchMultipleOutputs method getContext.
// Create a taskAttemptContext for the named output with
// output format and output key/value types put in the context
private TaskAttemptContext getContext(String nameOutput) throws IOException {
TaskAttemptContext taskContext = taskContexts.get(nameOutput);
if (taskContext != null) {
return taskContext;
}
// The following trick leverages the instantiation of a record writer via
// the job thus supporting arbitrary output formats.
Job job = new Job(context.getConfiguration());
job.getConfiguration().set("crunch.namedoutput", nameOutput);
job.setOutputFormatClass(getNamedOutputFormatClass(context, nameOutput));
job.setOutputKeyClass(getNamedOutputKeyClass(context, nameOutput));
job.setOutputValueClass(getNamedOutputValueClass(context, nameOutput));
taskContext = TaskAttemptContextFactory.create(job.getConfiguration(), context.getTaskAttemptID());
taskContexts.put(nameOutput, taskContext);
return taskContext;
}
Aggregations