Search in sources :

Example 6 with JobContextImpl

use of org.apache.hadoop.mapred.JobContextImpl in project ignite by apache.

the class HadoopV2MapTask method run0.

/**
 * {@inheritDoc}
 */
@SuppressWarnings({ "ConstantConditions", "unchecked" })
@Override
public void run0(HadoopV2TaskContext taskCtx) throws IgniteCheckedException {
    OutputFormat outputFormat = null;
    Exception err = null;
    JobContextImpl jobCtx = taskCtx.jobContext();
    if (taskCtx.taskInfo().hasMapperIndex())
        HadoopMapperUtils.mapperIndex(taskCtx.taskInfo().mapperIndex());
    else
        HadoopMapperUtils.clearMapperIndex();
    try {
        HadoopV2Context hadoopCtx = hadoopContext();
        InputSplit nativeSplit = hadoopCtx.getInputSplit();
        if (nativeSplit == null)
            throw new IgniteCheckedException("Input split cannot be null.");
        InputFormat inFormat = ReflectionUtils.newInstance(jobCtx.getInputFormatClass(), hadoopCtx.getConfiguration());
        RecordReader reader = inFormat.createRecordReader(nativeSplit, hadoopCtx);
        reader.initialize(nativeSplit, hadoopCtx);
        hadoopCtx.reader(reader);
        HadoopJobInfo jobInfo = taskCtx.job().info();
        outputFormat = jobInfo.hasCombiner() || jobInfo.hasReducer() ? null : prepareWriter(jobCtx);
        Mapper mapper = ReflectionUtils.newInstance(jobCtx.getMapperClass(), hadoopCtx.getConfiguration());
        try {
            mapper.run(new WrappedMapper().getMapContext(hadoopCtx));
            taskCtx.onMapperFinished();
        } finally {
            closeWriter();
        }
        commit(outputFormat);
    } catch (InterruptedException e) {
        err = e;
        Thread.currentThread().interrupt();
        throw new IgniteInterruptedCheckedException(e);
    } catch (Exception e) {
        err = e;
        throw new IgniteCheckedException(e);
    } finally {
        HadoopMapperUtils.clearMapperIndex();
        if (err != null)
            abort(outputFormat);
    }
}
Also used : HadoopJobInfo(org.apache.ignite.internal.processors.hadoop.HadoopJobInfo) JobContextImpl(org.apache.hadoop.mapred.JobContextImpl) RecordReader(org.apache.hadoop.mapreduce.RecordReader) OutputFormat(org.apache.hadoop.mapreduce.OutputFormat) IgniteCheckedException(org.apache.ignite.IgniteCheckedException) IgniteInterruptedCheckedException(org.apache.ignite.internal.IgniteInterruptedCheckedException) Mapper(org.apache.hadoop.mapreduce.Mapper) WrappedMapper(org.apache.hadoop.mapreduce.lib.map.WrappedMapper) IgniteInterruptedCheckedException(org.apache.ignite.internal.IgniteInterruptedCheckedException) IgniteCheckedException(org.apache.ignite.IgniteCheckedException) WrappedMapper(org.apache.hadoop.mapreduce.lib.map.WrappedMapper) InputFormat(org.apache.hadoop.mapreduce.InputFormat) InputSplit(org.apache.hadoop.mapreduce.InputSplit)

Example 7 with JobContextImpl

use of org.apache.hadoop.mapred.JobContextImpl in project ignite by apache.

the class HadoopV2ReduceTask method run0.

/**
 * {@inheritDoc}
 */
@SuppressWarnings({ "ConstantConditions", "unchecked" })
@Override
public void run0(HadoopV2TaskContext taskCtx) throws IgniteCheckedException {
    OutputFormat outputFormat = null;
    Exception err = null;
    JobContextImpl jobCtx = taskCtx.jobContext();
    // Set mapper index for combiner tasks
    if (!reduce && taskCtx.taskInfo().hasMapperIndex())
        HadoopMapperUtils.mapperIndex(taskCtx.taskInfo().mapperIndex());
    else
        HadoopMapperUtils.clearMapperIndex();
    try {
        outputFormat = reduce || !taskCtx.job().info().hasReducer() ? prepareWriter(jobCtx) : null;
        Reducer reducer;
        if (reduce)
            reducer = ReflectionUtils.newInstance(jobCtx.getReducerClass(), jobCtx.getConfiguration());
        else
            reducer = ReflectionUtils.newInstance(jobCtx.getCombinerClass(), jobCtx.getConfiguration());
        try {
            reducer.run(new WrappedReducer().getReducerContext(hadoopContext()));
            if (!reduce)
                taskCtx.onMapperFinished();
        } finally {
            closeWriter();
        }
        commit(outputFormat);
    } catch (InterruptedException e) {
        err = e;
        Thread.currentThread().interrupt();
        throw new IgniteInterruptedCheckedException(e);
    } catch (Exception e) {
        err = e;
        throw new IgniteCheckedException(e);
    } finally {
        if (!reduce)
            HadoopMapperUtils.clearMapperIndex();
        if (err != null)
            abort(outputFormat);
    }
}
Also used : IgniteInterruptedCheckedException(org.apache.ignite.internal.IgniteInterruptedCheckedException) JobContextImpl(org.apache.hadoop.mapred.JobContextImpl) IgniteCheckedException(org.apache.ignite.IgniteCheckedException) OutputFormat(org.apache.hadoop.mapreduce.OutputFormat) WrappedReducer(org.apache.hadoop.mapreduce.lib.reduce.WrappedReducer) WrappedReducer(org.apache.hadoop.mapreduce.lib.reduce.WrappedReducer) Reducer(org.apache.hadoop.mapreduce.Reducer) IgniteCheckedException(org.apache.ignite.IgniteCheckedException) IgniteInterruptedCheckedException(org.apache.ignite.internal.IgniteInterruptedCheckedException)

Example 8 with JobContextImpl

use of org.apache.hadoop.mapred.JobContextImpl in project ignite by apache.

the class HadoopV2CleanupTask method run0.

/**
 * {@inheritDoc}
 */
@SuppressWarnings("ConstantConditions")
@Override
public void run0(HadoopV2TaskContext taskCtx) throws IgniteCheckedException {
    JobContextImpl jobCtx = taskCtx.jobContext();
    try {
        OutputFormat outputFormat = getOutputFormat(jobCtx);
        OutputCommitter committer = outputFormat.getOutputCommitter(hadoopContext());
        if (committer != null) {
            if (abort)
                committer.abortJob(jobCtx, JobStatus.State.FAILED);
            else
                committer.commitJob(jobCtx);
        }
    } catch (ClassNotFoundException | IOException e) {
        throw new IgniteCheckedException(e);
    } catch (InterruptedException e) {
        Thread.currentThread().interrupt();
        throw new IgniteInterruptedCheckedException(e);
    }
}
Also used : OutputCommitter(org.apache.hadoop.mapreduce.OutputCommitter) IgniteInterruptedCheckedException(org.apache.ignite.internal.IgniteInterruptedCheckedException) JobContextImpl(org.apache.hadoop.mapred.JobContextImpl) IgniteCheckedException(org.apache.ignite.IgniteCheckedException) OutputFormat(org.apache.hadoop.mapreduce.OutputFormat) IOException(java.io.IOException)

Example 9 with JobContextImpl

use of org.apache.hadoop.mapred.JobContextImpl in project hive by apache.

the class HiveIcebergStorageHandler method generateJobContext.

/**
 * Generates a JobContext for the OutputCommitter for the specific table.
 * @param configuration The configuration used for as a base of the JobConf
 * @param tableName The name of the table we are planning to commit
 * @param overwrite If we have to overwrite the existing table or just add the new data
 * @return The generated JobContext
 */
private Optional<JobContext> generateJobContext(Configuration configuration, String tableName, boolean overwrite) {
    JobConf jobConf = new JobConf(configuration);
    Optional<SessionStateUtil.CommitInfo> commitInfo = SessionStateUtil.getCommitInfo(jobConf, tableName);
    if (commitInfo.isPresent()) {
        JobID jobID = JobID.forName(commitInfo.get().getJobIdStr());
        commitInfo.get().getProps().forEach(jobConf::set);
        jobConf.setBoolean(InputFormatConfig.IS_OVERWRITE, overwrite);
        // we should only commit this current table because
        // for multi-table inserts, this hook method will be called sequentially for each target table
        jobConf.set(InputFormatConfig.OUTPUT_TABLES, tableName);
        return Optional.of(new JobContextImpl(jobConf, jobID, null));
    } else {
        // most likely empty write scenario
        LOG.debug("Unable to find commit information in query state for table: {}", tableName);
        return Optional.empty();
    }
}
Also used : JobContextImpl(org.apache.hadoop.mapred.JobContextImpl) JobConf(org.apache.hadoop.mapred.JobConf) JobID(org.apache.hadoop.mapred.JobID)

Example 10 with JobContextImpl

use of org.apache.hadoop.mapred.JobContextImpl in project hive by apache.

the class TestHiveIcebergOutputCommitter method testAbortJob.

@Test
public void testAbortJob() throws IOException {
    HiveIcebergOutputCommitter committer = new HiveIcebergOutputCommitter();
    Table table = table(temp.getRoot().getPath(), false);
    JobConf conf = jobConf(table, 1);
    writeRecords(table.name(), 1, 0, true, false, conf);
    committer.abortJob(new JobContextImpl(conf, JOB_ID), JobStatus.State.FAILED);
    HiveIcebergTestUtils.validateFiles(table, conf, JOB_ID, 0);
    HiveIcebergTestUtils.validateData(table, Collections.emptyList(), 0);
}
Also used : JobContextImpl(org.apache.hadoop.mapred.JobContextImpl) Table(org.apache.iceberg.Table) JobConf(org.apache.hadoop.mapred.JobConf) Test(org.junit.Test)

Aggregations

JobContextImpl (org.apache.hadoop.mapred.JobContextImpl)14 JobConf (org.apache.hadoop.mapred.JobConf)8 Test (org.junit.Test)7 Table (org.apache.iceberg.Table)6 Record (org.apache.iceberg.data.Record)5 IOException (java.io.IOException)4 JobID (org.apache.hadoop.mapred.JobID)4 OutputFormat (org.apache.hadoop.mapreduce.OutputFormat)4 IgniteCheckedException (org.apache.ignite.IgniteCheckedException)4 IgniteInterruptedCheckedException (org.apache.ignite.internal.IgniteInterruptedCheckedException)4 JobContext (org.apache.hadoop.mapred.JobContext)2 TaskAttemptID (org.apache.hadoop.mapred.TaskAttemptID)2 InputSplit (org.apache.hadoop.mapreduce.InputSplit)2 OutputCommitter (org.apache.hadoop.mapreduce.OutputCommitter)2 StreamEvent (co.cask.cdap.api.flow.flowlet.StreamEvent)1 IdentityStreamEventDecoder (co.cask.cdap.data.stream.decoder.IdentityStreamEventDecoder)1 AuthenticationTestContext (co.cask.cdap.security.auth.context.AuthenticationTestContext)1 NoOpAuthorizer (co.cask.cdap.security.spi.authorization.NoOpAuthorizer)1 File (java.io.File)1 HadoopDummyProgressable (org.apache.flink.api.java.hadoop.mapred.wrapper.HadoopDummyProgressable)1