use of org.apache.hadoop.mapred.JobContextImpl in project hive by apache.
the class TestHiveIcebergOutputCommitter method testSuccessfulMultipleTasksPartitionedWrite.
@Test
public void testSuccessfulMultipleTasksPartitionedWrite() throws IOException {
HiveIcebergOutputCommitter committer = new HiveIcebergOutputCommitter();
Table table = table(temp.getRoot().getPath(), true);
JobConf conf = jobConf(table, 2);
List<Record> expected = writeRecords(table.name(), 2, 0, true, false, conf);
committer.commitJob(new JobContextImpl(conf, JOB_ID));
// Expecting 6 files with fanout-, 8 with ClusteredWriter where writing to already completed partitions is allowed.
HiveIcebergTestUtils.validateFiles(table, conf, JOB_ID, 8);
HiveIcebergTestUtils.validateData(table, expected, 0);
}
use of org.apache.hadoop.mapred.JobContextImpl in project hive by apache.
the class TestHiveIcebergOutputCommitter method testRetryTask.
@Test
public void testRetryTask() throws IOException {
HiveIcebergOutputCommitter committer = new HiveIcebergOutputCommitter();
Table table = table(temp.getRoot().getPath(), false);
JobConf conf = jobConf(table, 2);
// Write records and abort the tasks
writeRecords(table.name(), 2, 0, false, true, conf);
HiveIcebergTestUtils.validateFiles(table, conf, JOB_ID, 0);
HiveIcebergTestUtils.validateData(table, Collections.emptyList(), 0);
// Write records but do not abort the tasks
// The data files remain since we can not identify them but should not be read
writeRecords(table.name(), 2, 1, false, false, conf);
HiveIcebergTestUtils.validateFiles(table, conf, JOB_ID, 2);
HiveIcebergTestUtils.validateData(table, Collections.emptyList(), 0);
// Write and commit the records
List<Record> expected = writeRecords(table.name(), 2, 2, true, false, conf);
committer.commitJob(new JobContextImpl(conf, JOB_ID));
HiveIcebergTestUtils.validateFiles(table, conf, JOB_ID, 4);
HiveIcebergTestUtils.validateData(table, expected, 0);
}
use of org.apache.hadoop.mapred.JobContextImpl in project flink by apache.
the class HadoopOutputFormatBase method finalizeGlobal.
@Override
public void finalizeGlobal(int parallelism) throws IOException {
try {
JobContext jobContext = new JobContextImpl(this.jobConf, new JobID());
OutputCommitter outputCommitter = this.jobConf.getOutputCommitter();
// finalize HDFS output format
outputCommitter.commitJob(jobContext);
} catch (Exception e) {
throw new RuntimeException(e);
}
}
use of org.apache.hadoop.mapred.JobContextImpl in project flink by apache.
the class HadoopOutputFormatBase method open.
/**
* create the temporary output file for hadoop RecordWriter.
*
* @param taskNumber The number of the parallel instance.
* @param numTasks The number of parallel tasks.
* @throws java.io.IOException
*/
@Override
public void open(int taskNumber, int numTasks) throws IOException {
// enforce sequential open() calls
synchronized (OPEN_MUTEX) {
if (Integer.toString(taskNumber + 1).length() > 6) {
throw new IOException("Task id too large.");
}
TaskAttemptID taskAttemptID = TaskAttemptID.forName("attempt__0000_r_" + String.format("%" + (6 - Integer.toString(taskNumber + 1).length()) + "s", " ").replace(" ", "0") + Integer.toString(taskNumber + 1) + "_0");
this.jobConf.set("mapred.task.id", taskAttemptID.toString());
this.jobConf.setInt("mapred.task.partition", taskNumber + 1);
// for hadoop 2.2
this.jobConf.set("mapreduce.task.attempt.id", taskAttemptID.toString());
this.jobConf.setInt("mapreduce.task.partition", taskNumber + 1);
this.context = new TaskAttemptContextImpl(this.jobConf, taskAttemptID);
this.outputCommitter = this.jobConf.getOutputCommitter();
JobContext jobContext = new JobContextImpl(this.jobConf, new JobID());
this.outputCommitter.setupJob(jobContext);
this.recordWriter = this.mapredOutputFormat.getRecordWriter(null, this.jobConf, Integer.toString(taskNumber + 1), new HadoopDummyProgressable());
}
}
Aggregations