use of org.apache.hadoop.mapred.TaskAttemptContextImpl in project flink by apache.
the class HadoopOutputFormatBase method open.
/**
* create the temporary output file for hadoop RecordWriter.
*
* @param taskNumber The number of the parallel instance.
* @param numTasks The number of parallel tasks.
* @throws java.io.IOException
*/
@Override
public void open(int taskNumber, int numTasks) throws IOException {
// enforce sequential open() calls
synchronized (OPEN_MUTEX) {
if (Integer.toString(taskNumber + 1).length() > 6) {
throw new IOException("Task id too large.");
}
TaskAttemptID taskAttemptID = TaskAttemptID.forName("attempt__0000_r_" + String.format("%" + (6 - Integer.toString(taskNumber + 1).length()) + "s", " ").replace(" ", "0") + Integer.toString(taskNumber + 1) + "_0");
this.jobConf.set("mapred.task.id", taskAttemptID.toString());
this.jobConf.setInt("mapred.task.partition", taskNumber + 1);
// for hadoop 2.2
this.jobConf.set("mapreduce.task.attempt.id", taskAttemptID.toString());
this.jobConf.setInt("mapreduce.task.partition", taskNumber + 1);
this.context = new TaskAttemptContextImpl(this.jobConf, taskAttemptID);
this.outputCommitter = this.jobConf.getOutputCommitter();
JobContext jobContext = new JobContextImpl(this.jobConf, new JobID());
this.outputCommitter.setupJob(jobContext);
this.recordWriter = this.mapredOutputFormat.getRecordWriter(null, this.jobConf, Integer.toString(taskNumber + 1), new HadoopDummyProgressable());
}
}
use of org.apache.hadoop.mapred.TaskAttemptContextImpl in project hive by apache.
the class TestHiveIcebergOutputCommitter method writeRecords.
/**
* Write random records to the given table using separate {@link HiveIcebergOutputCommitter} and
* a separate {@link HiveIcebergRecordWriter} for every task.
* @param name The name of the table to get the table object from the conf
* @param taskNum The number of tasks in the job handled by the committer
* @param attemptNum The id used for attempt number generation
* @param commitTasks If <code>true</code> the tasks will be committed
* @param abortTasks If <code>true</code> the tasks will be aborted - needed so we can simulate no commit/no abort
* situation
* @param conf The job configuration
* @param committer The output committer that should be used for committing/aborting the tasks
* @return The random generated records which were appended to the table
* @throws IOException Propagating {@link HiveIcebergRecordWriter} exceptions
*/
private List<Record> writeRecords(String name, int taskNum, int attemptNum, boolean commitTasks, boolean abortTasks, JobConf conf, OutputCommitter committer) throws IOException {
List<Record> expected = new ArrayList<>(RECORD_NUM * taskNum);
Table table = HiveIcebergStorageHandler.table(conf, name);
FileIO io = table.io();
Schema schema = HiveIcebergStorageHandler.schema(conf);
PartitionSpec spec = table.spec();
for (int i = 0; i < taskNum; ++i) {
List<Record> records = TestHelper.generateRandomRecords(schema, RECORD_NUM, i + attemptNum);
TaskAttemptID taskId = new TaskAttemptID(JOB_ID.getJtIdentifier(), JOB_ID.getId(), TaskType.MAP, i, attemptNum);
int partitionId = taskId.getTaskID().getId();
String operationId = QUERY_ID + "-" + JOB_ID;
FileFormat fileFormat = FileFormat.PARQUET;
OutputFileFactory outputFileFactory = OutputFileFactory.builderFor(table, partitionId, attemptNum).format(fileFormat).operationId(operationId).build();
HiveFileWriterFactory hfwf = new HiveFileWriterFactory(table, fileFormat, schema, null, fileFormat, null, null, null, null);
HiveIcebergRecordWriter testWriter = new HiveIcebergRecordWriter(schema, spec, fileFormat, hfwf, outputFileFactory, io, TARGET_FILE_SIZE, TezUtil.taskAttemptWrapper(taskId), conf.get(Catalogs.NAME));
Container<Record> container = new Container<>();
for (Record record : records) {
container.set(record);
testWriter.write(container);
}
testWriter.close(false);
if (commitTasks) {
committer.commitTask(new TaskAttemptContextImpl(conf, taskId));
expected.addAll(records);
} else if (abortTasks) {
committer.abortTask(new TaskAttemptContextImpl(conf, taskId));
}
}
return expected;
}
Aggregations