Search in sources :

Example 6 with TaskAttemptContextImpl

use of org.apache.hadoop.mapred.TaskAttemptContextImpl in project flink by apache.

the class HadoopOutputFormatBase method open.

/**
 * create the temporary output file for hadoop RecordWriter.
 *
 * @param taskNumber The number of the parallel instance.
 * @param numTasks The number of parallel tasks.
 * @throws java.io.IOException
 */
@Override
public void open(int taskNumber, int numTasks) throws IOException {
    // enforce sequential open() calls
    synchronized (OPEN_MUTEX) {
        if (Integer.toString(taskNumber + 1).length() > 6) {
            throw new IOException("Task id too large.");
        }
        TaskAttemptID taskAttemptID = TaskAttemptID.forName("attempt__0000_r_" + String.format("%" + (6 - Integer.toString(taskNumber + 1).length()) + "s", " ").replace(" ", "0") + Integer.toString(taskNumber + 1) + "_0");
        this.jobConf.set("mapred.task.id", taskAttemptID.toString());
        this.jobConf.setInt("mapred.task.partition", taskNumber + 1);
        // for hadoop 2.2
        this.jobConf.set("mapreduce.task.attempt.id", taskAttemptID.toString());
        this.jobConf.setInt("mapreduce.task.partition", taskNumber + 1);
        this.context = new TaskAttemptContextImpl(this.jobConf, taskAttemptID);
        this.outputCommitter = this.jobConf.getOutputCommitter();
        JobContext jobContext = new JobContextImpl(this.jobConf, new JobID());
        this.outputCommitter.setupJob(jobContext);
        this.recordWriter = this.mapredOutputFormat.getRecordWriter(null, this.jobConf, Integer.toString(taskNumber + 1), new HadoopDummyProgressable());
    }
}
Also used : JobContextImpl(org.apache.hadoop.mapred.JobContextImpl) TaskAttemptID(org.apache.hadoop.mapred.TaskAttemptID) TaskAttemptContextImpl(org.apache.hadoop.mapred.TaskAttemptContextImpl) IOException(java.io.IOException) JobContext(org.apache.hadoop.mapred.JobContext) HadoopDummyProgressable(org.apache.flink.api.java.hadoop.mapred.wrapper.HadoopDummyProgressable) JobID(org.apache.hadoop.mapred.JobID)

Example 7 with TaskAttemptContextImpl

use of org.apache.hadoop.mapred.TaskAttemptContextImpl in project hive by apache.

the class TestHiveIcebergOutputCommitter method writeRecords.

/**
 * Write random records to the given table using separate {@link HiveIcebergOutputCommitter} and
 * a separate {@link HiveIcebergRecordWriter} for every task.
 * @param name The name of the table to get the table object from the conf
 * @param taskNum The number of tasks in the job handled by the committer
 * @param attemptNum The id used for attempt number generation
 * @param commitTasks If <code>true</code> the tasks will be committed
 * @param abortTasks If <code>true</code> the tasks will be aborted - needed so we can simulate no commit/no abort
 *                   situation
 * @param conf The job configuration
 * @param committer The output committer that should be used for committing/aborting the tasks
 * @return The random generated records which were appended to the table
 * @throws IOException Propagating {@link HiveIcebergRecordWriter} exceptions
 */
private List<Record> writeRecords(String name, int taskNum, int attemptNum, boolean commitTasks, boolean abortTasks, JobConf conf, OutputCommitter committer) throws IOException {
    List<Record> expected = new ArrayList<>(RECORD_NUM * taskNum);
    Table table = HiveIcebergStorageHandler.table(conf, name);
    FileIO io = table.io();
    Schema schema = HiveIcebergStorageHandler.schema(conf);
    PartitionSpec spec = table.spec();
    for (int i = 0; i < taskNum; ++i) {
        List<Record> records = TestHelper.generateRandomRecords(schema, RECORD_NUM, i + attemptNum);
        TaskAttemptID taskId = new TaskAttemptID(JOB_ID.getJtIdentifier(), JOB_ID.getId(), TaskType.MAP, i, attemptNum);
        int partitionId = taskId.getTaskID().getId();
        String operationId = QUERY_ID + "-" + JOB_ID;
        FileFormat fileFormat = FileFormat.PARQUET;
        OutputFileFactory outputFileFactory = OutputFileFactory.builderFor(table, partitionId, attemptNum).format(fileFormat).operationId(operationId).build();
        HiveFileWriterFactory hfwf = new HiveFileWriterFactory(table, fileFormat, schema, null, fileFormat, null, null, null, null);
        HiveIcebergRecordWriter testWriter = new HiveIcebergRecordWriter(schema, spec, fileFormat, hfwf, outputFileFactory, io, TARGET_FILE_SIZE, TezUtil.taskAttemptWrapper(taskId), conf.get(Catalogs.NAME));
        Container<Record> container = new Container<>();
        for (Record record : records) {
            container.set(record);
            testWriter.write(container);
        }
        testWriter.close(false);
        if (commitTasks) {
            committer.commitTask(new TaskAttemptContextImpl(conf, taskId));
            expected.addAll(records);
        } else if (abortTasks) {
            committer.abortTask(new TaskAttemptContextImpl(conf, taskId));
        }
    }
    return expected;
}
Also used : OutputFileFactory(org.apache.iceberg.io.OutputFileFactory) Table(org.apache.iceberg.Table) TaskAttemptID(org.apache.hadoop.mapred.TaskAttemptID) Schema(org.apache.iceberg.Schema) ArrayList(java.util.ArrayList) FileFormat(org.apache.iceberg.FileFormat) PartitionSpec(org.apache.iceberg.PartitionSpec) FileIO(org.apache.iceberg.io.FileIO) Container(org.apache.iceberg.mr.mapred.Container) TaskAttemptContextImpl(org.apache.hadoop.mapred.TaskAttemptContextImpl) Record(org.apache.iceberg.data.Record)

Aggregations

TaskAttemptContextImpl (org.apache.hadoop.mapred.TaskAttemptContextImpl)7 TaskAttemptID (org.apache.hadoop.mapred.TaskAttemptID)4 IOException (java.io.IOException)3 JobConf (org.apache.hadoop.mapred.JobConf)3 Table (org.apache.iceberg.Table)2 Test (org.junit.Test)2 ColumnHandle (com.facebook.presto.spi.ColumnHandle)1 ImmutableList (com.google.common.collect.ImmutableList)1 UnknownHostException (java.net.UnknownHostException)1 ArrayList (java.util.ArrayList)1 CarbonTable (org.apache.carbondata.core.metadata.schema.table.CarbonTable)1 QueryModel (org.apache.carbondata.core.scan.model.QueryModel)1 CarbonInputSplit (org.apache.carbondata.hadoop.CarbonInputSplit)1 CarbonProjection (org.apache.carbondata.hadoop.CarbonProjection)1 CarbonTableInputFormat (org.apache.carbondata.hadoop.api.CarbonTableInputFormat)1 CarbonTableCacheModel (org.apache.carbondata.presto.impl.CarbonTableCacheModel)1 HadoopDummyProgressable (org.apache.flink.api.java.hadoop.mapred.wrapper.HadoopDummyProgressable)1 Configuration (org.apache.hadoop.conf.Configuration)1 JobContext (org.apache.hadoop.mapred.JobContext)1 JobContextImpl (org.apache.hadoop.mapred.JobContextImpl)1