Search in sources :

Example 1 with HadoopDefaultJobInfo

use of org.apache.ignite.internal.processors.hadoop.HadoopDefaultJobInfo in project ignite by apache.

the class HadoopV2JobSelfTest method testCustomSerializationApplying.

/**
     * Tests that {@link HadoopJobEx} provides wrapped serializer if it's set in configuration.
     *
     * @throws IgniteCheckedException If fails.
     */
public void testCustomSerializationApplying() throws IgniteCheckedException {
    JobConf cfg = new JobConf();
    cfg.setMapOutputKeyClass(IntWritable.class);
    cfg.setMapOutputValueClass(Text.class);
    cfg.set(CommonConfigurationKeys.IO_SERIALIZATIONS_KEY, CustomSerialization.class.getName());
    HadoopDefaultJobInfo info = createJobInfo(cfg);
    final UUID uuid = UUID.randomUUID();
    HadoopJobId id = new HadoopJobId(uuid, 1);
    HadoopJobEx job = info.createJob(HadoopV2Job.class, id, log, null, new HadoopHelperImpl());
    HadoopTaskContext taskCtx = job.getTaskContext(new HadoopTaskInfo(HadoopTaskType.MAP, null, 0, 0, null));
    HadoopSerialization ser = taskCtx.keySerialization();
    assertEquals(HadoopSerializationWrapper.class.getName(), ser.getClass().getName());
    DataInput in = new DataInputStream(new ByteArrayInputStream(new byte[0]));
    assertEquals(TEST_SERIALIZED_VALUE, ser.read(in, null).toString());
    ser = taskCtx.valueSerialization();
    assertEquals(HadoopSerializationWrapper.class.getName(), ser.getClass().getName());
    assertEquals(TEST_SERIALIZED_VALUE, ser.read(in, null).toString());
}
Also used : HadoopHelperImpl(org.apache.ignite.internal.processors.hadoop.HadoopHelperImpl) DataInputStream(java.io.DataInputStream) HadoopJobId(org.apache.ignite.internal.processors.hadoop.HadoopJobId) DataInput(java.io.DataInput) ByteArrayInputStream(java.io.ByteArrayInputStream) HadoopJobEx(org.apache.ignite.internal.processors.hadoop.HadoopJobEx) HadoopTaskContext(org.apache.ignite.internal.processors.hadoop.HadoopTaskContext) HadoopTaskInfo(org.apache.ignite.internal.processors.hadoop.HadoopTaskInfo) HadoopDefaultJobInfo(org.apache.ignite.internal.processors.hadoop.HadoopDefaultJobInfo) HadoopSerializationWrapper(org.apache.ignite.internal.processors.hadoop.impl.v2.HadoopSerializationWrapper) UUID(java.util.UUID) HadoopSerialization(org.apache.ignite.internal.processors.hadoop.HadoopSerialization) JobConf(org.apache.hadoop.mapred.JobConf)

Example 2 with HadoopDefaultJobInfo

use of org.apache.ignite.internal.processors.hadoop.HadoopDefaultJobInfo in project ignite by apache.

the class HadoopUtils method createJobInfo.

/**
     * Creates JobInfo from hadoop configuration.
     *
     * @param cfg Hadoop configuration.
     * @return Job info.
     * @throws IgniteCheckedException If failed.
     */
public static HadoopDefaultJobInfo createJobInfo(Configuration cfg) throws IgniteCheckedException {
    JobConf jobConf = new JobConf(cfg);
    boolean hasCombiner = jobConf.get("mapred.combiner.class") != null || jobConf.get(MRJobConfig.COMBINE_CLASS_ATTR) != null;
    int numReduces = jobConf.getNumReduceTasks();
    jobConf.setBooleanIfUnset("mapred.mapper.new-api", jobConf.get(OLD_MAP_CLASS_ATTR) == null);
    if (jobConf.getUseNewMapper()) {
        String mode = "new map API";
        ensureNotSet(jobConf, "mapred.input.format.class", mode);
        ensureNotSet(jobConf, OLD_MAP_CLASS_ATTR, mode);
        if (numReduces != 0)
            ensureNotSet(jobConf, "mapred.partitioner.class", mode);
        else
            ensureNotSet(jobConf, "mapred.output.format.class", mode);
    } else {
        String mode = "map compatibility";
        ensureNotSet(jobConf, MRJobConfig.INPUT_FORMAT_CLASS_ATTR, mode);
        ensureNotSet(jobConf, MRJobConfig.MAP_CLASS_ATTR, mode);
        if (numReduces != 0)
            ensureNotSet(jobConf, MRJobConfig.PARTITIONER_CLASS_ATTR, mode);
        else
            ensureNotSet(jobConf, MRJobConfig.OUTPUT_FORMAT_CLASS_ATTR, mode);
    }
    if (numReduces != 0) {
        jobConf.setBooleanIfUnset("mapred.reducer.new-api", jobConf.get(OLD_REDUCE_CLASS_ATTR) == null);
        if (jobConf.getUseNewReducer()) {
            String mode = "new reduce API";
            ensureNotSet(jobConf, "mapred.output.format.class", mode);
            ensureNotSet(jobConf, OLD_REDUCE_CLASS_ATTR, mode);
        } else {
            String mode = "reduce compatibility";
            ensureNotSet(jobConf, MRJobConfig.OUTPUT_FORMAT_CLASS_ATTR, mode);
            ensureNotSet(jobConf, MRJobConfig.REDUCE_CLASS_ATTR, mode);
        }
    }
    Map<String, String> props = new HashMap<>();
    for (Map.Entry<String, String> entry : jobConf) props.put(entry.getKey(), entry.getValue());
    return new HadoopDefaultJobInfo(jobConf.getJobName(), jobConf.getUser(), hasCombiner, numReduces, props);
}
Also used : HashMap(java.util.HashMap) HadoopDefaultJobInfo(org.apache.ignite.internal.processors.hadoop.HadoopDefaultJobInfo) JobConf(org.apache.hadoop.mapred.JobConf) HashMap(java.util.HashMap) Map(java.util.Map)

Example 3 with HadoopDefaultJobInfo

use of org.apache.ignite.internal.processors.hadoop.HadoopDefaultJobInfo in project ignite by apache.

the class HadoopTasksV1Test method getHadoopJob.

/**
     * Creates WordCount hadoop job for API v1.
     *
     * @param inFile Input file name for the job.
     * @param outFile Output file name for the job.
     * @return Hadoop job.
     * @throws IOException If fails.
     */
@Override
public HadoopJobEx getHadoopJob(String inFile, String outFile) throws Exception {
    JobConf jobConf = HadoopWordCount1.getJob(inFile, outFile);
    setupFileSystems(jobConf);
    HadoopDefaultJobInfo jobInfo = createJobInfo(jobConf);
    UUID uuid = new UUID(0, 0);
    HadoopJobId jobId = new HadoopJobId(uuid, 0);
    return jobInfo.createJob(HadoopV2Job.class, jobId, log, null, new HadoopHelperImpl());
}
Also used : HadoopHelperImpl(org.apache.ignite.internal.processors.hadoop.HadoopHelperImpl) HadoopDefaultJobInfo(org.apache.ignite.internal.processors.hadoop.HadoopDefaultJobInfo) UUID(java.util.UUID) JobConf(org.apache.hadoop.mapred.JobConf) HadoopJobId(org.apache.ignite.internal.processors.hadoop.HadoopJobId)

Example 4 with HadoopDefaultJobInfo

use of org.apache.ignite.internal.processors.hadoop.HadoopDefaultJobInfo in project ignite by apache.

the class HadoopTasksV2Test method getHadoopJob.

/**
     * Creates WordCount hadoop job for API v2.
     *
     * @param inFile Input file name for the job.
     * @param outFile Output file name for the job.
     * @return Hadoop job.
     * @throws Exception if fails.
     */
@Override
public HadoopJobEx getHadoopJob(String inFile, String outFile) throws Exception {
    Job job = Job.getInstance();
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(IntWritable.class);
    HadoopWordCount2.setTasksClasses(job, true, true, true, false);
    Configuration conf = job.getConfiguration();
    setupFileSystems(conf);
    FileInputFormat.setInputPaths(job, new Path(inFile));
    FileOutputFormat.setOutputPath(job, new Path(outFile));
    job.setJarByClass(HadoopWordCount2.class);
    Job hadoopJob = HadoopWordCount2.getJob(inFile, outFile);
    HadoopDefaultJobInfo jobInfo = createJobInfo(hadoopJob.getConfiguration());
    UUID uuid = new UUID(0, 0);
    HadoopJobId jobId = new HadoopJobId(uuid, 0);
    return jobInfo.createJob(HadoopV2Job.class, jobId, log, null, new HadoopHelperImpl());
}
Also used : Path(org.apache.hadoop.fs.Path) Configuration(org.apache.hadoop.conf.Configuration) HadoopHelperImpl(org.apache.ignite.internal.processors.hadoop.HadoopHelperImpl) HadoopDefaultJobInfo(org.apache.ignite.internal.processors.hadoop.HadoopDefaultJobInfo) HadoopV2Job(org.apache.ignite.internal.processors.hadoop.impl.v2.HadoopV2Job) Job(org.apache.hadoop.mapreduce.Job) UUID(java.util.UUID) HadoopJobId(org.apache.ignite.internal.processors.hadoop.HadoopJobId)

Example 5 with HadoopDefaultJobInfo

use of org.apache.ignite.internal.processors.hadoop.HadoopDefaultJobInfo in project ignite by apache.

the class HadoopProtocolSubmitJobTask method run.

/** {@inheritDoc} */
@Override
public HadoopJobStatus run(ComputeJobContext jobCtx, Hadoop hadoop, HadoopProtocolTaskArguments args) throws IgniteCheckedException {
    UUID nodeId = UUID.fromString(args.<String>get(0));
    Integer id = args.get(1);
    HadoopDefaultJobInfo info = args.get(2);
    assert nodeId != null;
    assert id != null;
    assert info != null;
    HadoopJobId jobId = new HadoopJobId(nodeId, id);
    hadoop.submit(jobId, info);
    HadoopJobStatus res = hadoop.status(jobId);
    if (// Submission failed.
    res == null)
        res = new HadoopJobStatus(jobId, info.jobName(), info.user(), 0, 0, 0, 0, PHASE_CANCELLING, true, 1);
    return res;
}
Also used : HadoopJobStatus(org.apache.ignite.internal.processors.hadoop.HadoopJobStatus) HadoopDefaultJobInfo(org.apache.ignite.internal.processors.hadoop.HadoopDefaultJobInfo) UUID(java.util.UUID) HadoopJobId(org.apache.ignite.internal.processors.hadoop.HadoopJobId)

Aggregations

HadoopDefaultJobInfo (org.apache.ignite.internal.processors.hadoop.HadoopDefaultJobInfo)5 UUID (java.util.UUID)4 HadoopJobId (org.apache.ignite.internal.processors.hadoop.HadoopJobId)4 JobConf (org.apache.hadoop.mapred.JobConf)3 HadoopHelperImpl (org.apache.ignite.internal.processors.hadoop.HadoopHelperImpl)3 ByteArrayInputStream (java.io.ByteArrayInputStream)1 DataInput (java.io.DataInput)1 DataInputStream (java.io.DataInputStream)1 HashMap (java.util.HashMap)1 Map (java.util.Map)1 Configuration (org.apache.hadoop.conf.Configuration)1 Path (org.apache.hadoop.fs.Path)1 Job (org.apache.hadoop.mapreduce.Job)1 HadoopJobEx (org.apache.ignite.internal.processors.hadoop.HadoopJobEx)1 HadoopJobStatus (org.apache.ignite.internal.processors.hadoop.HadoopJobStatus)1 HadoopSerialization (org.apache.ignite.internal.processors.hadoop.HadoopSerialization)1 HadoopTaskContext (org.apache.ignite.internal.processors.hadoop.HadoopTaskContext)1 HadoopTaskInfo (org.apache.ignite.internal.processors.hadoop.HadoopTaskInfo)1 HadoopSerializationWrapper (org.apache.ignite.internal.processors.hadoop.impl.v2.HadoopSerializationWrapper)1 HadoopV2Job (org.apache.ignite.internal.processors.hadoop.impl.v2.HadoopV2Job)1