use of org.apache.hadoop.mapreduce.OutputCommitter in project flink by apache.
the class HadoopOutputFormatTest method testCloseWithNeedsTaskCommitTrue.
@Test
public void testCloseWithNeedsTaskCommitTrue() throws Exception {
RecordWriter<String, Long> recordWriter = Mockito.mock(DummyRecordWriter.class);
OutputCommitter outputCommitter = setupOutputCommitter(true);
HadoopOutputFormat<String, Long> hadoopOutputFormat = setupHadoopOutputFormat(new DummyOutputFormat(), Job.getInstance(), recordWriter, outputCommitter, new Configuration());
hadoopOutputFormat.close();
verify(outputCommitter, times(1)).commitTask(any(TaskAttemptContext.class));
verify(recordWriter, times(1)).close(any(TaskAttemptContext.class));
}
use of org.apache.hadoop.mapreduce.OutputCommitter in project flink by apache.
the class HadoopOutputFormatTest method testOpen.
@Test
public void testOpen() throws Exception {
OutputFormat<String, Long> dummyOutputFormat = mock(DummyOutputFormat.class);
OutputCommitter outputCommitter = setupOutputCommitter(true);
when(dummyOutputFormat.getOutputCommitter(any(TaskAttemptContext.class))).thenReturn(outputCommitter);
HadoopOutputFormat<String, Long> hadoopOutputFormat = setupHadoopOutputFormat(dummyOutputFormat, Job.getInstance(), new DummyRecordWriter(), setupOutputCommitter(true), new Configuration());
hadoopOutputFormat.open(1, 4);
verify(hadoopOutputFormat.outputCommitter, times(1)).setupJob(any(JobContext.class));
verify(hadoopOutputFormat.mapreduceOutputFormat, times(1)).getRecordWriter(any(TaskAttemptContext.class));
}
use of org.apache.hadoop.mapreduce.OutputCommitter in project hive by apache.
the class TestE2EScenarios method copyTable.
private void copyTable(String in, String out) throws IOException, InterruptedException {
Job ijob = new Job();
Job ojob = new Job();
HCatInputFormat inpy = new HCatInputFormat();
inpy.setInput(ijob, null, in);
HCatOutputFormat oupy = new HCatOutputFormat();
oupy.setOutput(ojob, OutputJobInfo.create(null, out, new HashMap<String, String>()));
// Test HCatContext
System.err.println("HCatContext INSTANCE is present : " + HCatContext.INSTANCE.getConf().isPresent());
if (HCatContext.INSTANCE.getConf().isPresent()) {
System.err.println("HCatContext tinyint->int promotion says " + HCatContext.INSTANCE.getConf().get().getBoolean(HCatConstants.HCAT_DATA_TINY_SMALL_INT_PROMOTION, HCatConstants.HCAT_DATA_TINY_SMALL_INT_PROMOTION_DEFAULT));
}
HCatSchema tableSchema = inpy.getTableSchema(ijob.getConfiguration());
System.err.println("Copying from [" + in + "] to [" + out + "] with schema : " + tableSchema.toString());
oupy.setSchema(ojob, tableSchema);
oupy.checkOutputSpecs(ojob);
OutputCommitter oc = oupy.getOutputCommitter(createTaskAttemptContext(ojob.getConfiguration()));
oc.setupJob(ojob);
for (InputSplit split : inpy.getSplits(ijob)) {
TaskAttemptContext rtaskContext = createTaskAttemptContext(ijob.getConfiguration());
TaskAttemptContext wtaskContext = createTaskAttemptContext(ojob.getConfiguration());
RecordReader<WritableComparable, HCatRecord> rr = inpy.createRecordReader(split, rtaskContext);
rr.initialize(split, rtaskContext);
OutputCommitter taskOc = oupy.getOutputCommitter(wtaskContext);
taskOc.setupTask(wtaskContext);
RecordWriter<WritableComparable<?>, HCatRecord> rw = oupy.getRecordWriter(wtaskContext);
while (rr.nextKeyValue()) {
rw.write(rr.getCurrentKey(), rr.getCurrentValue());
}
rw.close(wtaskContext);
taskOc.commitTask(wtaskContext);
rr.close();
}
oc.commitJob(ojob);
}
use of org.apache.hadoop.mapreduce.OutputCommitter in project hive by apache.
the class HCatOutputFormatWriter method write.
@Override
public void write(Iterator<HCatRecord> recordItr) throws HCatException {
int id = sp.getId();
setVarsInConf(id);
HCatOutputFormat outFormat = new HCatOutputFormat();
TaskAttemptContext cntxt = ShimLoader.getHadoopShims().getHCatShim().createTaskAttemptContext(conf, new TaskAttemptID(ShimLoader.getHadoopShims().getHCatShim().createTaskID(), id));
OutputCommitter committer = null;
RecordWriter<WritableComparable<?>, HCatRecord> writer;
try {
committer = outFormat.getOutputCommitter(cntxt);
committer.setupTask(cntxt);
writer = outFormat.getRecordWriter(cntxt);
while (recordItr.hasNext()) {
HCatRecord rec = recordItr.next();
writer.write(null, rec);
}
writer.close(cntxt);
if (committer.needsTaskCommit(cntxt)) {
committer.commitTask(cntxt);
}
} catch (IOException e) {
if (null != committer) {
try {
committer.abortTask(cntxt);
} catch (IOException e1) {
throw new HCatException(ErrorType.ERROR_INTERNAL_EXCEPTION, e1);
}
}
throw new HCatException("Failed while writing", e);
} catch (InterruptedException e) {
if (null != committer) {
try {
committer.abortTask(cntxt);
} catch (IOException e1) {
throw new HCatException(ErrorType.ERROR_INTERNAL_EXCEPTION, e1);
}
}
throw new HCatException("Failed while writing", e);
}
}
use of org.apache.hadoop.mapreduce.OutputCommitter in project hive by apache.
the class DynamicPartitionFileRecordWriterContainer method close.
@Override
public void close(TaskAttemptContext context) throws IOException, InterruptedException {
Reporter reporter = InternalUtil.createReporter(context);
for (RecordWriter<? super WritableComparable<?>, ? super Writable> bwriter : baseDynamicWriters.values()) {
// We are in RecordWriter.close() make sense that the context would be
// TaskInputOutput.
bwriter.close(reporter);
}
TaskCommitContextRegistry.getInstance().register(context, new TaskCommitContextRegistry.TaskCommitterProxy() {
@Override
public void abortTask(TaskAttemptContext context) throws IOException {
for (Map.Entry<String, OutputJobInfo> outputJobInfoEntry : dynamicOutputJobInfo.entrySet()) {
String dynKey = outputJobInfoEntry.getKey();
OutputJobInfo outputJobInfo = outputJobInfoEntry.getValue();
LOG.info("Aborting task-attempt for " + outputJobInfo.getLocation());
baseDynamicCommitters.get(dynKey).abortTask(dynamicContexts.get(dynKey));
}
}
@Override
public void commitTask(TaskAttemptContext context) throws IOException {
for (Map.Entry<String, OutputJobInfo> outputJobInfoEntry : dynamicOutputJobInfo.entrySet()) {
String dynKey = outputJobInfoEntry.getKey();
OutputJobInfo outputJobInfo = outputJobInfoEntry.getValue();
LOG.info("Committing task-attempt for " + outputJobInfo.getLocation());
TaskAttemptContext dynContext = dynamicContexts.get(dynKey);
OutputCommitter dynCommitter = baseDynamicCommitters.get(dynKey);
if (dynCommitter.needsTaskCommit(dynContext)) {
dynCommitter.commitTask(dynContext);
} else {
LOG.info("Skipping commitTask() for " + outputJobInfo.getLocation());
}
}
}
});
}
Aggregations