use of org.apache.ignite.internal.processors.hadoop.HadoopJobId in project ignite by apache.
the class HadoopTeraSortTest method teraSort.
/**
* Does actual test TeraSort job Through Ignite API
*
* @param gzip Whether to use GZIP.
*/
protected final void teraSort(boolean gzip) throws Exception {
System.out.println("TeraSort ===============================================================");
getFileSystem().delete(new Path(sortOutDir), true);
final JobConf jobConf = new JobConf();
jobConf.setUser(getUser());
jobConf.set("fs.defaultFS", getFsBase());
log().info("Desired number of reduces: " + numReduces());
jobConf.set("mapreduce.job.reduces", String.valueOf(numReduces()));
log().info("Desired number of maps: " + numMaps());
final long splitSize = dataSizeBytes() / numMaps();
log().info("Desired split size: " + splitSize);
// Force the split to be of the desired size:
jobConf.set("mapred.min.split.size", String.valueOf(splitSize));
jobConf.set("mapred.max.split.size", String.valueOf(splitSize));
jobConf.setBoolean(HadoopJobProperty.SHUFFLE_MAPPER_STRIPED_OUTPUT.propertyName(), true);
jobConf.setInt(HadoopJobProperty.SHUFFLE_MSG_SIZE.propertyName(), 4096);
if (gzip)
jobConf.setBoolean(HadoopJobProperty.SHUFFLE_MSG_GZIP.propertyName(), true);
jobConf.set(HadoopJobProperty.JOB_PARTIALLY_RAW_COMPARATOR.propertyName(), TextPartiallyRawComparator.class.getName());
Job job = setupConfig(jobConf);
HadoopJobId jobId = new HadoopJobId(UUID.randomUUID(), 1);
IgniteInternalFuture<?> fut = grid(0).hadoop().submit(jobId, createJobInfo(job.getConfiguration(), null));
fut.get();
}
use of org.apache.ignite.internal.processors.hadoop.HadoopJobId in project ignite by apache.
the class HadoopV2JobSelfTest method testCustomSerializationApplying.
/**
* Tests that {@link HadoopJobEx} provides wrapped serializer if it's set in configuration.
*
* @throws IgniteCheckedException If fails.
*/
public void testCustomSerializationApplying() throws IgniteCheckedException {
JobConf cfg = new JobConf();
cfg.setMapOutputKeyClass(IntWritable.class);
cfg.setMapOutputValueClass(Text.class);
cfg.set(CommonConfigurationKeys.IO_SERIALIZATIONS_KEY, CustomSerialization.class.getName());
HadoopDefaultJobInfo info = createJobInfo(cfg, null);
final UUID uuid = UUID.randomUUID();
HadoopJobId id = new HadoopJobId(uuid, 1);
HadoopJobEx job = info.createJob(HadoopV2Job.class, id, log, null, new HadoopHelperImpl());
HadoopTaskContext taskCtx = job.getTaskContext(new HadoopTaskInfo(HadoopTaskType.MAP, null, 0, 0, null));
HadoopSerialization ser = taskCtx.keySerialization();
assertEquals(HadoopSerializationWrapper.class.getName(), ser.getClass().getName());
DataInput in = new DataInputStream(new ByteArrayInputStream(new byte[0]));
assertEquals(TEST_SERIALIZED_VALUE, ser.read(in, null).toString());
ser = taskCtx.valueSerialization();
assertEquals(HadoopSerializationWrapper.class.getName(), ser.getClass().getName());
assertEquals(TEST_SERIALIZED_VALUE, ser.read(in, null).toString());
}
use of org.apache.ignite.internal.processors.hadoop.HadoopJobId in project ignite by apache.
the class HadoopClientProtocol method getNewJobID.
/**
* {@inheritDoc}
*/
@Override
public JobID getNewJobID() throws IOException, InterruptedException {
try {
conf.setLong(HadoopCommonUtils.REQ_NEW_JOBID_TS_PROPERTY, U.currentTimeMillis());
HadoopJobId jobID = execute(HadoopProtocolNextTaskIdTask.class);
conf.setLong(HadoopCommonUtils.RESPONSE_NEW_JOBID_TS_PROPERTY, U.currentTimeMillis());
return new JobID(jobID.globalId().toString(), jobID.localId());
} catch (GridClientException e) {
throw new IOException("Failed to get new job ID.", e);
}
}
use of org.apache.ignite.internal.processors.hadoop.HadoopJobId in project ignite by apache.
the class HadoopTaskExecutionSelfTest method testMapperException.
/**
* @throws Exception If failed.
*/
public void testMapperException() throws Exception {
prepareFile("/testFile", 1000);
Configuration cfg = new Configuration();
cfg.setStrings("fs.igfs.impl", IgniteHadoopFileSystem.class.getName());
Job job = Job.getInstance(cfg);
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(IntWritable.class);
job.setMapperClass(FailMapper.class);
job.setNumReduceTasks(0);
job.setInputFormatClass(TextInputFormat.class);
FileInputFormat.setInputPaths(job, new Path("igfs://" + igfsName + "@/"));
FileOutputFormat.setOutputPath(job, new Path("igfs://" + igfsName + "@/output/"));
job.setJarByClass(getClass());
final IgniteInternalFuture<?> fut = grid(0).hadoop().submit(new HadoopJobId(UUID.randomUUID(), 3), createJobInfo(job.getConfiguration(), null));
GridTestUtils.assertThrows(log, new Callable<Object>() {
@Override
public Object call() throws Exception {
fut.get();
return null;
}
}, IgniteCheckedException.class, null);
}
use of org.apache.ignite.internal.processors.hadoop.HadoopJobId in project ignite by apache.
the class HadoopTaskExecutionSelfTest method testTaskCancelling.
/**
* @throws Exception If failed.
*/
public void testTaskCancelling() throws Exception {
Configuration cfg = prepareJobForCancelling();
HadoopJobId jobId = new HadoopJobId(UUID.randomUUID(), 1);
final IgniteInternalFuture<?> fut = grid(0).hadoop().submit(jobId, createJobInfo(cfg, null));
if (!GridTestUtils.waitForCondition(new GridAbsPredicate() {
@Override
public boolean apply() {
return splitsCount.get() > 0;
}
}, 20000)) {
U.dumpThreads(log);
assertTrue(false);
}
if (!GridTestUtils.waitForCondition(new GridAbsPredicate() {
@Override
public boolean apply() {
return executedTasks.get() == splitsCount.get();
}
}, 20000)) {
U.dumpThreads(log);
assertTrue(false);
}
// Fail mapper with id "1", cancels others
failMapperId.set(1);
GridTestUtils.assertThrows(log, new Callable<Object>() {
@Override
public Object call() throws Exception {
fut.get();
return null;
}
}, IgniteCheckedException.class, null);
assertEquals(executedTasks.get(), cancelledTasks.get() + 1);
}
Aggregations