use of org.apache.hadoop.mapred.JobClient in project hadoop by apache.
the class TestMultipleOutputs method _testMultipleOutputs.
protected void _testMultipleOutputs(boolean withCounters) throws Exception {
Path inDir = getDir(IN_DIR);
Path outDir = getDir(OUT_DIR);
JobConf conf = createJobConf();
FileSystem fs = FileSystem.get(conf);
DataOutputStream file = fs.create(new Path(inDir, "part-0"));
file.writeBytes("a\nb\n\nc\nd\ne");
file.close();
file = fs.create(new Path(inDir, "part-1"));
file.writeBytes("a\nb\n\nc\nd\ne");
file.close();
conf.setJobName("mo");
conf.setInputFormat(TextInputFormat.class);
conf.setOutputKeyClass(LongWritable.class);
conf.setOutputValueClass(Text.class);
conf.setMapOutputKeyClass(LongWritable.class);
conf.setMapOutputValueClass(Text.class);
conf.setOutputFormat(TextOutputFormat.class);
MultipleOutputs.addNamedOutput(conf, "text", TextOutputFormat.class, LongWritable.class, Text.class);
MultipleOutputs.addMultiNamedOutput(conf, "sequence", SequenceFileOutputFormat.class, LongWritable.class, Text.class);
MultipleOutputs.setCountersEnabled(conf, withCounters);
conf.setMapperClass(MOMap.class);
conf.setReducerClass(MOReduce.class);
FileInputFormat.setInputPaths(conf, inDir);
FileOutputFormat.setOutputPath(conf, outDir);
JobClient jc = new JobClient(conf);
RunningJob job = jc.submitJob(conf);
while (!job.isComplete()) {
Thread.sleep(100);
}
// assert number of named output part files
int namedOutputCount = 0;
FileStatus[] statuses = fs.listStatus(outDir);
for (FileStatus status : statuses) {
if (status.getPath().getName().equals("text-m-00000") || status.getPath().getName().equals("text-m-00001") || status.getPath().getName().equals("text-r-00000") || status.getPath().getName().equals("sequence_A-m-00000") || status.getPath().getName().equals("sequence_A-m-00001") || status.getPath().getName().equals("sequence_B-m-00000") || status.getPath().getName().equals("sequence_B-m-00001") || status.getPath().getName().equals("sequence_B-r-00000") || status.getPath().getName().equals("sequence_C-r-00000")) {
namedOutputCount++;
}
}
assertEquals(9, namedOutputCount);
// assert TextOutputFormat files correctness
BufferedReader reader = new BufferedReader(new InputStreamReader(fs.open(new Path(FileOutputFormat.getOutputPath(conf), "text-r-00000"))));
int count = 0;
String line = reader.readLine();
while (line != null) {
assertTrue(line.endsWith("text"));
line = reader.readLine();
count++;
}
reader.close();
assertFalse(count == 0);
// assert SequenceOutputFormat files correctness
SequenceFile.Reader seqReader = new SequenceFile.Reader(fs, new Path(FileOutputFormat.getOutputPath(conf), "sequence_B-r-00000"), conf);
assertEquals(LongWritable.class, seqReader.getKeyClass());
assertEquals(Text.class, seqReader.getValueClass());
count = 0;
LongWritable key = new LongWritable();
Text value = new Text();
while (seqReader.next(key, value)) {
assertEquals("sequence", value.toString());
count++;
}
seqReader.close();
assertFalse(count == 0);
Counters.Group counters = job.getCounters().getGroup(MultipleOutputs.class.getName());
if (!withCounters) {
assertEquals(0, counters.size());
} else {
assertEquals(4, counters.size());
assertEquals(4, counters.getCounter("text"));
assertEquals(2, counters.getCounter("sequence_A"));
assertEquals(4, counters.getCounter("sequence_B"));
assertEquals(2, counters.getCounter("sequence_C"));
}
}
use of org.apache.hadoop.mapred.JobClient in project hive by apache.
the class Driver method getClusterStatus.
/**
* Return the status information about the Map-Reduce cluster
*/
public ClusterStatus getClusterStatus() throws Exception {
ClusterStatus cs;
try {
JobConf job = new JobConf(conf);
JobClient jc = new JobClient(job);
cs = jc.getClusterStatus();
} catch (Exception e) {
e.printStackTrace();
throw e;
}
LOG.info("Returning cluster status: " + cs.toString());
return cs;
}
use of org.apache.hadoop.mapred.JobClient in project hive by apache.
the class HCatUtil method getJobTrackerDelegationToken.
public static Token<org.apache.hadoop.mapreduce.security.token.delegation.DelegationTokenIdentifier> getJobTrackerDelegationToken(Configuration conf, String userName) throws Exception {
// LOG.info("getJobTrackerDelegationToken("+conf+","+userName+")");
JobClient jcl = new JobClient(new JobConf(conf, HCatOutputFormat.class));
Token<org.apache.hadoop.mapreduce.security.token.delegation.DelegationTokenIdentifier> t = jcl.getDelegationToken(new Text(userName));
// LOG.info("got "+t);
return t;
// return null;
}
use of org.apache.hadoop.mapred.JobClient in project whirr by apache.
the class HadoopServiceTest method test.
@Test
public void test() throws Exception {
Configuration conf = getConfiguration();
JobConf job = new JobConf(conf, HadoopServiceTest.class);
JobClient client = new JobClient(job);
waitForTaskTrackers(client);
FileSystem fs = FileSystem.get(conf);
OutputStream os = fs.create(new Path("input"));
Writer wr = new OutputStreamWriter(os);
wr.write("b a\n");
wr.close();
job.setMapperClass(TokenCountMapper.class);
job.setReducerClass(LongSumReducer.class);
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(LongWritable.class);
FileInputFormat.setInputPaths(job, new Path("input"));
FileOutputFormat.setOutputPath(job, new Path("output"));
JobClient.runJob(job);
FSDataInputStream in = fs.open(new Path("output/part-00000"));
BufferedReader reader = new BufferedReader(new InputStreamReader(in));
assertEquals("a\t1", reader.readLine());
assertEquals("b\t1", reader.readLine());
assertNull(reader.readLine());
reader.close();
}
use of org.apache.hadoop.mapred.JobClient in project h2o-2 by h2oai.
the class gen_flatfile method run.
@Override
public int run(String[] args) throws Exception {
Configuration conf = getConf();
String trackerIpPort = conf.get("mapred.job.tracker");
// System.err.println("mapred.job.tracker: " + trackerIpPort);
String[] arr = trackerIpPort.split(":");
String host = arr[0];
int port = Integer.parseInt(arr[1]);
// System.err.println("host: " + host);
// System.err.println("port: " + port);
InetSocketAddress addr = new InetSocketAddress(host, port);
JobClient client = new JobClient(addr, conf);
Collection<String> names = client.getClusterStatus(true).getActiveTrackerNames();
for (String name : names) {
String n = name.substring("tracker_".length(), name.indexOf(':'));
String s = InetAddress.getByName(n).getHostAddress();
System.out.println(s);
}
return 0;
}
Aggregations