use of org.apache.hadoop.mapred.JobClient in project hadoop by apache.
the class RandomTextWriter method run.
/**
* This is the main routine for launching a distributed random write job.
* It runs 10 maps/node and each node writes 1 gig of data to a DFS file.
* The reduce doesn't do anything.
*
* @throws IOException
*/
public int run(String[] args) throws Exception {
if (args.length == 0) {
return printUsage();
}
Configuration conf = getConf();
JobClient client = new JobClient(conf);
ClusterStatus cluster = client.getClusterStatus();
int numMapsPerHost = conf.getInt(MAPS_PER_HOST, 10);
long numBytesToWritePerMap = conf.getLong(BYTES_PER_MAP, 1 * 1024 * 1024 * 1024);
if (numBytesToWritePerMap == 0) {
System.err.println("Cannot have " + BYTES_PER_MAP + " set to 0");
return -2;
}
long totalBytesToWrite = conf.getLong(TOTAL_BYTES, numMapsPerHost * numBytesToWritePerMap * cluster.getTaskTrackers());
int numMaps = (int) (totalBytesToWrite / numBytesToWritePerMap);
if (numMaps == 0 && totalBytesToWrite > 0) {
numMaps = 1;
conf.setLong(BYTES_PER_MAP, totalBytesToWrite);
}
conf.setInt(MRJobConfig.NUM_MAPS, numMaps);
Job job = Job.getInstance(conf);
job.setJarByClass(RandomTextWriter.class);
job.setJobName("random-text-writer");
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(Text.class);
job.setInputFormatClass(RandomWriter.RandomInputFormat.class);
job.setMapperClass(RandomTextMapper.class);
Class<? extends OutputFormat> outputFormatClass = SequenceFileOutputFormat.class;
List<String> otherArgs = new ArrayList<String>();
for (int i = 0; i < args.length; ++i) {
try {
if ("-outFormat".equals(args[i])) {
outputFormatClass = Class.forName(args[++i]).asSubclass(OutputFormat.class);
} else {
otherArgs.add(args[i]);
}
} catch (ArrayIndexOutOfBoundsException except) {
System.out.println("ERROR: Required parameter missing from " + args[i - 1]);
// exits
return printUsage();
}
}
job.setOutputFormatClass(outputFormatClass);
FileOutputFormat.setOutputPath(job, new Path(otherArgs.get(0)));
System.out.println("Running " + numMaps + " maps.");
// reducer NONE
job.setNumReduceTasks(0);
Date startTime = new Date();
System.out.println("Job started: " + startTime);
int ret = job.waitForCompletion(true) ? 0 : 1;
Date endTime = new Date();
System.out.println("Job ended: " + endTime);
System.out.println("The job took " + (endTime.getTime() - startTime.getTime()) / 1000 + " seconds.");
return ret;
}
use of org.apache.hadoop.mapred.JobClient in project hadoop by apache.
the class RandomWriter method run.
/**
* This is the main routine for launching a distributed random write job.
* It runs 10 maps/node and each node writes 1 gig of data to a DFS file.
* The reduce doesn't do anything.
*
* @throws IOException
*/
public int run(String[] args) throws Exception {
if (args.length == 0) {
System.out.println("Usage: writer <out-dir>");
ToolRunner.printGenericCommandUsage(System.out);
return 2;
}
Path outDir = new Path(args[0]);
Configuration conf = getConf();
JobClient client = new JobClient(conf);
ClusterStatus cluster = client.getClusterStatus();
int numMapsPerHost = conf.getInt(MAPS_PER_HOST, 10);
long numBytesToWritePerMap = conf.getLong(BYTES_PER_MAP, 1 * 1024 * 1024 * 1024);
if (numBytesToWritePerMap == 0) {
System.err.println("Cannot have" + BYTES_PER_MAP + " set to 0");
return -2;
}
long totalBytesToWrite = conf.getLong(TOTAL_BYTES, numMapsPerHost * numBytesToWritePerMap * cluster.getTaskTrackers());
int numMaps = (int) (totalBytesToWrite / numBytesToWritePerMap);
if (numMaps == 0 && totalBytesToWrite > 0) {
numMaps = 1;
conf.setLong(BYTES_PER_MAP, totalBytesToWrite);
}
conf.setInt(MRJobConfig.NUM_MAPS, numMaps);
Job job = Job.getInstance(conf);
job.setJarByClass(RandomWriter.class);
job.setJobName("random-writer");
FileOutputFormat.setOutputPath(job, outDir);
job.setOutputKeyClass(BytesWritable.class);
job.setOutputValueClass(BytesWritable.class);
job.setInputFormatClass(RandomInputFormat.class);
job.setMapperClass(RandomMapper.class);
job.setReducerClass(Reducer.class);
job.setOutputFormatClass(SequenceFileOutputFormat.class);
System.out.println("Running " + numMaps + " maps.");
// reducer NONE
job.setNumReduceTasks(0);
Date startTime = new Date();
System.out.println("Job started: " + startTime);
int ret = job.waitForCompletion(true) ? 0 : 1;
Date endTime = new Date();
System.out.println("Job ended: " + endTime);
System.out.println("The job took " + (endTime.getTime() - startTime.getTime()) / 1000 + " seconds.");
return ret;
}
use of org.apache.hadoop.mapred.JobClient in project hadoop by apache.
the class TestChainMapReduce method testChain.
@Test
public void testChain() throws Exception {
Path inDir = new Path("testing/chain/input");
Path outDir = new Path("testing/chain/output");
// Hack for local FS that does not have the concept of a 'mounting point'
if (isLocalFS()) {
String localPathRoot = System.getProperty("test.build.data", "/tmp").replace(' ', '+');
inDir = new Path(localPathRoot, inDir);
outDir = new Path(localPathRoot, outDir);
}
JobConf conf = createJobConf();
conf.setBoolean("localFS", isLocalFS());
conf.setInt("mapreduce.job.maps", 1);
cleanFlags(conf);
FileSystem fs = FileSystem.get(conf);
fs.delete(outDir, true);
if (!fs.mkdirs(inDir)) {
throw new IOException("Mkdirs failed to create " + inDir.toString());
}
DataOutputStream file = fs.create(new Path(inDir, "part-0"));
file.writeBytes("1\n2\n");
file.close();
conf.setJobName("chain");
conf.setInputFormat(TextInputFormat.class);
conf.setOutputFormat(TextOutputFormat.class);
conf.set("a", "X");
JobConf mapAConf = new JobConf(false);
mapAConf.set("a", "A");
ChainMapper.addMapper(conf, AMap.class, LongWritable.class, Text.class, LongWritable.class, Text.class, true, mapAConf);
ChainMapper.addMapper(conf, BMap.class, LongWritable.class, Text.class, LongWritable.class, Text.class, false, null);
JobConf reduceConf = new JobConf(false);
reduceConf.set("a", "C");
ChainReducer.setReducer(conf, CReduce.class, LongWritable.class, Text.class, LongWritable.class, Text.class, true, reduceConf);
ChainReducer.addMapper(conf, DMap.class, LongWritable.class, Text.class, LongWritable.class, Text.class, false, null);
JobConf mapEConf = new JobConf(false);
mapEConf.set("a", "E");
ChainReducer.addMapper(conf, EMap.class, LongWritable.class, Text.class, LongWritable.class, Text.class, true, mapEConf);
FileInputFormat.setInputPaths(conf, inDir);
FileOutputFormat.setOutputPath(conf, outDir);
JobClient jc = new JobClient(conf);
RunningJob job = jc.submitJob(conf);
while (!job.isComplete()) {
Thread.sleep(100);
}
assertTrue(getFlag(conf, "configure.A"));
assertTrue(getFlag(conf, "configure.B"));
assertTrue(getFlag(conf, "configure.C"));
assertTrue(getFlag(conf, "configure.D"));
assertTrue(getFlag(conf, "configure.E"));
assertTrue(getFlag(conf, "map.A.value.1"));
assertTrue(getFlag(conf, "map.A.value.2"));
assertTrue(getFlag(conf, "map.B.value.1"));
assertTrue(getFlag(conf, "map.B.value.2"));
assertTrue(getFlag(conf, "reduce.C.value.2"));
assertTrue(getFlag(conf, "reduce.C.value.1"));
assertTrue(getFlag(conf, "map.D.value.1"));
assertTrue(getFlag(conf, "map.D.value.2"));
assertTrue(getFlag(conf, "map.E.value.1"));
assertTrue(getFlag(conf, "map.E.value.2"));
assertTrue(getFlag(conf, "close.A"));
assertTrue(getFlag(conf, "close.B"));
assertTrue(getFlag(conf, "close.C"));
assertTrue(getFlag(conf, "close.D"));
assertTrue(getFlag(conf, "close.E"));
}
use of org.apache.hadoop.mapred.JobClient in project hadoop by apache.
the class TestKeyFieldBasedComparator method configure.
public void configure(String keySpec, int expect) throws Exception {
Path testdir = new Path(TEST_DIR.getAbsolutePath());
Path inDir = new Path(testdir, "in");
Path outDir = new Path(testdir, "out");
FileSystem fs = getFileSystem();
fs.delete(testdir, true);
conf.setInputFormat(TextInputFormat.class);
FileInputFormat.setInputPaths(conf, inDir);
FileOutputFormat.setOutputPath(conf, outDir);
conf.setOutputKeyClass(Text.class);
conf.setOutputValueClass(LongWritable.class);
conf.setNumMapTasks(1);
conf.setNumReduceTasks(1);
conf.setOutputFormat(TextOutputFormat.class);
conf.setOutputKeyComparatorClass(KeyFieldBasedComparator.class);
conf.setKeyFieldComparatorOptions(keySpec);
conf.setKeyFieldPartitionerOptions("-k1.1,1.1");
conf.set(JobContext.MAP_OUTPUT_KEY_FIELD_SEPERATOR, " ");
conf.setMapperClass(InverseMapper.class);
conf.setReducerClass(IdentityReducer.class);
if (!fs.mkdirs(testdir)) {
throw new IOException("Mkdirs failed to create " + testdir.toString());
}
if (!fs.mkdirs(inDir)) {
throw new IOException("Mkdirs failed to create " + inDir.toString());
}
// set up input data in 2 files
Path inFile = new Path(inDir, "part0");
FileOutputStream fos = new FileOutputStream(inFile.toString());
fos.write((line1 + "\n").getBytes());
fos.write((line2 + "\n").getBytes());
fos.close();
JobClient jc = new JobClient(conf);
RunningJob r_job = jc.submitJob(conf);
while (!r_job.isComplete()) {
Thread.sleep(1000);
}
if (!r_job.isSuccessful()) {
fail("Oops! The job broke due to an unexpected error");
}
Path[] outputFiles = FileUtil.stat2Paths(getFileSystem().listStatus(outDir, new Utils.OutputFileUtils.OutputFilesFilter()));
if (outputFiles.length > 0) {
InputStream is = getFileSystem().open(outputFiles[0]);
BufferedReader reader = new BufferedReader(new InputStreamReader(is));
String line = reader.readLine();
//that we have two lines
if (expect == 1) {
assertTrue(line.startsWith(line1));
} else if (expect == 2) {
assertTrue(line.startsWith(line2));
}
line = reader.readLine();
if (expect == 1) {
assertTrue(line.startsWith(line2));
} else if (expect == 2) {
assertTrue(line.startsWith(line1));
}
reader.close();
}
}
use of org.apache.hadoop.mapred.JobClient in project hadoop by apache.
the class TestMultipleOutputs method _testMOWithJavaSerialization.
protected void _testMOWithJavaSerialization(boolean withCounters) throws Exception {
Path inDir = getDir(IN_DIR);
Path outDir = getDir(OUT_DIR);
JobConf conf = createJobConf();
FileSystem fs = FileSystem.get(conf);
DataOutputStream file = fs.create(new Path(inDir, "part-0"));
file.writeBytes("a\nb\n\nc\nd\ne");
file.close();
fs.delete(inDir, true);
fs.delete(outDir, true);
file = fs.create(new Path(inDir, "part-1"));
file.writeBytes("a\nb\n\nc\nd\ne");
file.close();
conf.setJobName("mo");
conf.set("io.serializations", "org.apache.hadoop.io.serializer.JavaSerialization," + "org.apache.hadoop.io.serializer.WritableSerialization");
conf.setInputFormat(TextInputFormat.class);
conf.setMapOutputKeyClass(Long.class);
conf.setMapOutputValueClass(String.class);
conf.setOutputKeyComparatorClass(JavaSerializationComparator.class);
conf.setOutputKeyClass(Long.class);
conf.setOutputValueClass(String.class);
conf.setOutputFormat(TextOutputFormat.class);
MultipleOutputs.addNamedOutput(conf, "text", TextOutputFormat.class, Long.class, String.class);
MultipleOutputs.setCountersEnabled(conf, withCounters);
conf.setMapperClass(MOJavaSerDeMap.class);
conf.setReducerClass(MOJavaSerDeReduce.class);
FileInputFormat.setInputPaths(conf, inDir);
FileOutputFormat.setOutputPath(conf, outDir);
JobClient jc = new JobClient(conf);
RunningJob job = jc.submitJob(conf);
while (!job.isComplete()) {
Thread.sleep(100);
}
// assert number of named output part files
int namedOutputCount = 0;
FileStatus[] statuses = fs.listStatus(outDir);
for (FileStatus status : statuses) {
if (status.getPath().getName().equals("text-m-00000") || status.getPath().getName().equals("text-r-00000")) {
namedOutputCount++;
}
}
assertEquals(2, namedOutputCount);
// assert TextOutputFormat files correctness
BufferedReader reader = new BufferedReader(new InputStreamReader(fs.open(new Path(FileOutputFormat.getOutputPath(conf), "text-r-00000"))));
int count = 0;
String line = reader.readLine();
while (line != null) {
assertTrue(line.endsWith("text"));
line = reader.readLine();
count++;
}
reader.close();
assertFalse(count == 0);
Counters.Group counters = job.getCounters().getGroup(MultipleOutputs.class.getName());
if (!withCounters) {
assertEquals(0, counters.size());
} else {
assertEquals(1, counters.size());
assertEquals(2, counters.getCounter("text"));
}
}
Aggregations