Search in sources :

Example 11 with HadoopJobId

use of org.apache.ignite.internal.processors.hadoop.HadoopJobId in project ignite by apache.

the class HadoopTeraSortTest method teraSort.

/**
 * Does actual test TeraSort job Through Ignite API
 *
 * @param gzip Whether to use GZIP.
 */
protected final void teraSort(boolean gzip) throws Exception {
    System.out.println("TeraSort ===============================================================");
    getFileSystem().delete(new Path(sortOutDir), true);
    final JobConf jobConf = new JobConf();
    jobConf.setUser(getUser());
    jobConf.set("fs.defaultFS", getFsBase());
    log().info("Desired number of reduces: " + numReduces());
    jobConf.set("mapreduce.job.reduces", String.valueOf(numReduces()));
    log().info("Desired number of maps: " + numMaps());
    final long splitSize = dataSizeBytes() / numMaps();
    log().info("Desired split size: " + splitSize);
    // Force the split to be of the desired size:
    jobConf.set("mapred.min.split.size", String.valueOf(splitSize));
    jobConf.set("mapred.max.split.size", String.valueOf(splitSize));
    jobConf.setBoolean(HadoopJobProperty.SHUFFLE_MAPPER_STRIPED_OUTPUT.propertyName(), true);
    jobConf.setInt(HadoopJobProperty.SHUFFLE_MSG_SIZE.propertyName(), 4096);
    if (gzip)
        jobConf.setBoolean(HadoopJobProperty.SHUFFLE_MSG_GZIP.propertyName(), true);
    jobConf.set(HadoopJobProperty.JOB_PARTIALLY_RAW_COMPARATOR.propertyName(), TextPartiallyRawComparator.class.getName());
    Job job = setupConfig(jobConf);
    HadoopJobId jobId = new HadoopJobId(UUID.randomUUID(), 1);
    IgniteInternalFuture<?> fut = grid(0).hadoop().submit(jobId, createJobInfo(job.getConfiguration(), null));
    fut.get();
}
Also used : Path(org.apache.hadoop.fs.Path) TextPartiallyRawComparator(org.apache.ignite.hadoop.io.TextPartiallyRawComparator) Job(org.apache.hadoop.mapreduce.Job) JobConf(org.apache.hadoop.mapred.JobConf) HadoopJobId(org.apache.ignite.internal.processors.hadoop.HadoopJobId)

Example 12 with HadoopJobId

use of org.apache.ignite.internal.processors.hadoop.HadoopJobId in project ignite by apache.

the class HadoopV2JobSelfTest method testCustomSerializationApplying.

/**
 * Tests that {@link HadoopJobEx} provides wrapped serializer if it's set in configuration.
 *
 * @throws IgniteCheckedException If fails.
 */
public void testCustomSerializationApplying() throws IgniteCheckedException {
    JobConf cfg = new JobConf();
    cfg.setMapOutputKeyClass(IntWritable.class);
    cfg.setMapOutputValueClass(Text.class);
    cfg.set(CommonConfigurationKeys.IO_SERIALIZATIONS_KEY, CustomSerialization.class.getName());
    HadoopDefaultJobInfo info = createJobInfo(cfg, null);
    final UUID uuid = UUID.randomUUID();
    HadoopJobId id = new HadoopJobId(uuid, 1);
    HadoopJobEx job = info.createJob(HadoopV2Job.class, id, log, null, new HadoopHelperImpl());
    HadoopTaskContext taskCtx = job.getTaskContext(new HadoopTaskInfo(HadoopTaskType.MAP, null, 0, 0, null));
    HadoopSerialization ser = taskCtx.keySerialization();
    assertEquals(HadoopSerializationWrapper.class.getName(), ser.getClass().getName());
    DataInput in = new DataInputStream(new ByteArrayInputStream(new byte[0]));
    assertEquals(TEST_SERIALIZED_VALUE, ser.read(in, null).toString());
    ser = taskCtx.valueSerialization();
    assertEquals(HadoopSerializationWrapper.class.getName(), ser.getClass().getName());
    assertEquals(TEST_SERIALIZED_VALUE, ser.read(in, null).toString());
}
Also used : HadoopHelperImpl(org.apache.ignite.internal.processors.hadoop.HadoopHelperImpl) DataInputStream(java.io.DataInputStream) HadoopJobId(org.apache.ignite.internal.processors.hadoop.HadoopJobId) DataInput(java.io.DataInput) ByteArrayInputStream(java.io.ByteArrayInputStream) HadoopJobEx(org.apache.ignite.internal.processors.hadoop.HadoopJobEx) HadoopTaskContext(org.apache.ignite.internal.processors.hadoop.HadoopTaskContext) HadoopTaskInfo(org.apache.ignite.internal.processors.hadoop.HadoopTaskInfo) HadoopDefaultJobInfo(org.apache.ignite.internal.processors.hadoop.HadoopDefaultJobInfo) HadoopSerializationWrapper(org.apache.ignite.internal.processors.hadoop.impl.v2.HadoopSerializationWrapper) UUID(java.util.UUID) HadoopSerialization(org.apache.ignite.internal.processors.hadoop.HadoopSerialization) JobConf(org.apache.hadoop.mapred.JobConf)

Example 13 with HadoopJobId

use of org.apache.ignite.internal.processors.hadoop.HadoopJobId in project ignite by apache.

the class HadoopClientProtocol method getNewJobID.

/**
 * {@inheritDoc}
 */
@Override
public JobID getNewJobID() throws IOException, InterruptedException {
    try {
        conf.setLong(HadoopCommonUtils.REQ_NEW_JOBID_TS_PROPERTY, U.currentTimeMillis());
        HadoopJobId jobID = execute(HadoopProtocolNextTaskIdTask.class);
        conf.setLong(HadoopCommonUtils.RESPONSE_NEW_JOBID_TS_PROPERTY, U.currentTimeMillis());
        return new JobID(jobID.globalId().toString(), jobID.localId());
    } catch (GridClientException e) {
        throw new IOException("Failed to get new job ID.", e);
    }
}
Also used : GridClientException(org.apache.ignite.internal.client.GridClientException) IOException(java.io.IOException) HadoopJobId(org.apache.ignite.internal.processors.hadoop.HadoopJobId) JobID(org.apache.hadoop.mapreduce.JobID)

Example 14 with HadoopJobId

use of org.apache.ignite.internal.processors.hadoop.HadoopJobId in project ignite by apache.

the class HadoopTaskExecutionSelfTest method testMapperException.

/**
 * @throws Exception If failed.
 */
public void testMapperException() throws Exception {
    prepareFile("/testFile", 1000);
    Configuration cfg = new Configuration();
    cfg.setStrings("fs.igfs.impl", IgniteHadoopFileSystem.class.getName());
    Job job = Job.getInstance(cfg);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(IntWritable.class);
    job.setMapperClass(FailMapper.class);
    job.setNumReduceTasks(0);
    job.setInputFormatClass(TextInputFormat.class);
    FileInputFormat.setInputPaths(job, new Path("igfs://" + igfsName + "@/"));
    FileOutputFormat.setOutputPath(job, new Path("igfs://" + igfsName + "@/output/"));
    job.setJarByClass(getClass());
    final IgniteInternalFuture<?> fut = grid(0).hadoop().submit(new HadoopJobId(UUID.randomUUID(), 3), createJobInfo(job.getConfiguration(), null));
    GridTestUtils.assertThrows(log, new Callable<Object>() {

        @Override
        public Object call() throws Exception {
            fut.get();
            return null;
        }
    }, IgniteCheckedException.class, null);
}
Also used : IgfsPath(org.apache.ignite.igfs.IgfsPath) Path(org.apache.hadoop.fs.Path) Configuration(org.apache.hadoop.conf.Configuration) HadoopConfiguration(org.apache.ignite.configuration.HadoopConfiguration) FileSystemConfiguration(org.apache.ignite.configuration.FileSystemConfiguration) Job(org.apache.hadoop.mapreduce.Job) HadoopJobId(org.apache.ignite.internal.processors.hadoop.HadoopJobId) IgniteCheckedException(org.apache.ignite.IgniteCheckedException) IOException(java.io.IOException) HadoopTaskCancelledException(org.apache.ignite.internal.processors.hadoop.HadoopTaskCancelledException) IgniteHadoopFileSystem(org.apache.ignite.hadoop.fs.v1.IgniteHadoopFileSystem)

Example 15 with HadoopJobId

use of org.apache.ignite.internal.processors.hadoop.HadoopJobId in project ignite by apache.

the class HadoopTaskExecutionSelfTest method testTaskCancelling.

/**
 * @throws Exception If failed.
 */
public void testTaskCancelling() throws Exception {
    Configuration cfg = prepareJobForCancelling();
    HadoopJobId jobId = new HadoopJobId(UUID.randomUUID(), 1);
    final IgniteInternalFuture<?> fut = grid(0).hadoop().submit(jobId, createJobInfo(cfg, null));
    if (!GridTestUtils.waitForCondition(new GridAbsPredicate() {

        @Override
        public boolean apply() {
            return splitsCount.get() > 0;
        }
    }, 20000)) {
        U.dumpThreads(log);
        assertTrue(false);
    }
    if (!GridTestUtils.waitForCondition(new GridAbsPredicate() {

        @Override
        public boolean apply() {
            return executedTasks.get() == splitsCount.get();
        }
    }, 20000)) {
        U.dumpThreads(log);
        assertTrue(false);
    }
    // Fail mapper with id "1", cancels others
    failMapperId.set(1);
    GridTestUtils.assertThrows(log, new Callable<Object>() {

        @Override
        public Object call() throws Exception {
            fut.get();
            return null;
        }
    }, IgniteCheckedException.class, null);
    assertEquals(executedTasks.get(), cancelledTasks.get() + 1);
}
Also used : Configuration(org.apache.hadoop.conf.Configuration) HadoopConfiguration(org.apache.ignite.configuration.HadoopConfiguration) FileSystemConfiguration(org.apache.ignite.configuration.FileSystemConfiguration) GridAbsPredicate(org.apache.ignite.internal.util.lang.GridAbsPredicate) HadoopJobId(org.apache.ignite.internal.processors.hadoop.HadoopJobId) IgniteCheckedException(org.apache.ignite.IgniteCheckedException) IOException(java.io.IOException) HadoopTaskCancelledException(org.apache.ignite.internal.processors.hadoop.HadoopTaskCancelledException)

Aggregations

HadoopJobId (org.apache.ignite.internal.processors.hadoop.HadoopJobId)39 UUID (java.util.UUID)15 Path (org.apache.hadoop.fs.Path)13 Job (org.apache.hadoop.mapreduce.Job)13 IgniteCheckedException (org.apache.ignite.IgniteCheckedException)10 Configuration (org.apache.hadoop.conf.Configuration)9 HadoopConfiguration (org.apache.ignite.configuration.HadoopConfiguration)7 IgfsPath (org.apache.ignite.igfs.IgfsPath)7 IOException (java.io.IOException)6 JobConf (org.apache.hadoop.mapred.JobConf)5 FileSystemConfiguration (org.apache.ignite.configuration.FileSystemConfiguration)5 HadoopDefaultJobInfo (org.apache.ignite.internal.processors.hadoop.HadoopDefaultJobInfo)4 IgniteHadoopFileSystem (org.apache.ignite.hadoop.fs.v1.IgniteHadoopFileSystem)3 HadoopHelperImpl (org.apache.ignite.internal.processors.hadoop.HadoopHelperImpl)3 HadoopJobEx (org.apache.ignite.internal.processors.hadoop.HadoopJobEx)3 HadoopTaskCancelledException (org.apache.ignite.internal.processors.hadoop.HadoopTaskCancelledException)3 HadoopTaskInfo (org.apache.ignite.internal.processors.hadoop.HadoopTaskInfo)3 ArrayList (java.util.ArrayList)2 IgniteConfiguration (org.apache.ignite.configuration.IgniteConfiguration)2 HadoopMapReducePlan (org.apache.ignite.hadoop.HadoopMapReducePlan)2