use of org.apache.hadoop.mapred.Counters in project hadoop by apache.
the class TestMultipleOutputs method _testMOWithJavaSerialization.
protected void _testMOWithJavaSerialization(boolean withCounters) throws Exception {
Path inDir = getDir(IN_DIR);
Path outDir = getDir(OUT_DIR);
JobConf conf = createJobConf();
FileSystem fs = FileSystem.get(conf);
DataOutputStream file = fs.create(new Path(inDir, "part-0"));
file.writeBytes("a\nb\n\nc\nd\ne");
file.close();
fs.delete(inDir, true);
fs.delete(outDir, true);
file = fs.create(new Path(inDir, "part-1"));
file.writeBytes("a\nb\n\nc\nd\ne");
file.close();
conf.setJobName("mo");
conf.set("io.serializations", "org.apache.hadoop.io.serializer.JavaSerialization," + "org.apache.hadoop.io.serializer.WritableSerialization");
conf.setInputFormat(TextInputFormat.class);
conf.setMapOutputKeyClass(Long.class);
conf.setMapOutputValueClass(String.class);
conf.setOutputKeyComparatorClass(JavaSerializationComparator.class);
conf.setOutputKeyClass(Long.class);
conf.setOutputValueClass(String.class);
conf.setOutputFormat(TextOutputFormat.class);
MultipleOutputs.addNamedOutput(conf, "text", TextOutputFormat.class, Long.class, String.class);
MultipleOutputs.setCountersEnabled(conf, withCounters);
conf.setMapperClass(MOJavaSerDeMap.class);
conf.setReducerClass(MOJavaSerDeReduce.class);
FileInputFormat.setInputPaths(conf, inDir);
FileOutputFormat.setOutputPath(conf, outDir);
JobClient jc = new JobClient(conf);
RunningJob job = jc.submitJob(conf);
while (!job.isComplete()) {
Thread.sleep(100);
}
// assert number of named output part files
int namedOutputCount = 0;
FileStatus[] statuses = fs.listStatus(outDir);
for (FileStatus status : statuses) {
if (status.getPath().getName().equals("text-m-00000") || status.getPath().getName().equals("text-r-00000")) {
namedOutputCount++;
}
}
assertEquals(2, namedOutputCount);
// assert TextOutputFormat files correctness
BufferedReader reader = new BufferedReader(new InputStreamReader(fs.open(new Path(FileOutputFormat.getOutputPath(conf), "text-r-00000"))));
int count = 0;
String line = reader.readLine();
while (line != null) {
assertTrue(line.endsWith("text"));
line = reader.readLine();
count++;
}
reader.close();
assertFalse(count == 0);
Counters.Group counters = job.getCounters().getGroup(MultipleOutputs.class.getName());
if (!withCounters) {
assertEquals(0, counters.size());
} else {
assertEquals(1, counters.size());
assertEquals(2, counters.getCounter("text"));
}
}
use of org.apache.hadoop.mapred.Counters in project hadoop by apache.
the class TestMultipleOutputs method _testMultipleOutputs.
protected void _testMultipleOutputs(boolean withCounters) throws Exception {
Path inDir = getDir(IN_DIR);
Path outDir = getDir(OUT_DIR);
JobConf conf = createJobConf();
FileSystem fs = FileSystem.get(conf);
DataOutputStream file = fs.create(new Path(inDir, "part-0"));
file.writeBytes("a\nb\n\nc\nd\ne");
file.close();
file = fs.create(new Path(inDir, "part-1"));
file.writeBytes("a\nb\n\nc\nd\ne");
file.close();
conf.setJobName("mo");
conf.setInputFormat(TextInputFormat.class);
conf.setOutputKeyClass(LongWritable.class);
conf.setOutputValueClass(Text.class);
conf.setMapOutputKeyClass(LongWritable.class);
conf.setMapOutputValueClass(Text.class);
conf.setOutputFormat(TextOutputFormat.class);
MultipleOutputs.addNamedOutput(conf, "text", TextOutputFormat.class, LongWritable.class, Text.class);
MultipleOutputs.addMultiNamedOutput(conf, "sequence", SequenceFileOutputFormat.class, LongWritable.class, Text.class);
MultipleOutputs.setCountersEnabled(conf, withCounters);
conf.setMapperClass(MOMap.class);
conf.setReducerClass(MOReduce.class);
FileInputFormat.setInputPaths(conf, inDir);
FileOutputFormat.setOutputPath(conf, outDir);
JobClient jc = new JobClient(conf);
RunningJob job = jc.submitJob(conf);
while (!job.isComplete()) {
Thread.sleep(100);
}
// assert number of named output part files
int namedOutputCount = 0;
FileStatus[] statuses = fs.listStatus(outDir);
for (FileStatus status : statuses) {
if (status.getPath().getName().equals("text-m-00000") || status.getPath().getName().equals("text-m-00001") || status.getPath().getName().equals("text-r-00000") || status.getPath().getName().equals("sequence_A-m-00000") || status.getPath().getName().equals("sequence_A-m-00001") || status.getPath().getName().equals("sequence_B-m-00000") || status.getPath().getName().equals("sequence_B-m-00001") || status.getPath().getName().equals("sequence_B-r-00000") || status.getPath().getName().equals("sequence_C-r-00000")) {
namedOutputCount++;
}
}
assertEquals(9, namedOutputCount);
// assert TextOutputFormat files correctness
BufferedReader reader = new BufferedReader(new InputStreamReader(fs.open(new Path(FileOutputFormat.getOutputPath(conf), "text-r-00000"))));
int count = 0;
String line = reader.readLine();
while (line != null) {
assertTrue(line.endsWith("text"));
line = reader.readLine();
count++;
}
reader.close();
assertFalse(count == 0);
// assert SequenceOutputFormat files correctness
SequenceFile.Reader seqReader = new SequenceFile.Reader(fs, new Path(FileOutputFormat.getOutputPath(conf), "sequence_B-r-00000"), conf);
assertEquals(LongWritable.class, seqReader.getKeyClass());
assertEquals(Text.class, seqReader.getValueClass());
count = 0;
LongWritable key = new LongWritable();
Text value = new Text();
while (seqReader.next(key, value)) {
assertEquals("sequence", value.toString());
count++;
}
seqReader.close();
assertFalse(count == 0);
Counters.Group counters = job.getCounters().getGroup(MultipleOutputs.class.getName());
if (!withCounters) {
assertEquals(0, counters.size());
} else {
assertEquals(4, counters.size());
assertEquals(4, counters.getCounter("text"));
assertEquals(2, counters.getCounter("sequence_A"));
assertEquals(4, counters.getCounter("sequence_B"));
assertEquals(2, counters.getCounter("sequence_C"));
}
}
use of org.apache.hadoop.mapred.Counters in project hadoop by apache.
the class TestPipes method runProgram.
static void runProgram(MiniMRCluster mr, MiniDFSCluster dfs, Path program, Path inputPath, Path outputPath, int numMaps, int numReduces, String[] expectedResults, JobConf conf) throws IOException {
Path wordExec = new Path("testing/bin/application");
JobConf job = null;
if (conf == null) {
job = mr.createJobConf();
} else {
job = new JobConf(conf);
}
job.setNumMapTasks(numMaps);
job.setNumReduceTasks(numReduces);
{
FileSystem fs = dfs.getFileSystem();
fs.delete(wordExec.getParent(), true);
fs.copyFromLocalFile(program, wordExec);
Submitter.setExecutable(job, fs.makeQualified(wordExec).toString());
Submitter.setIsJavaRecordReader(job, true);
Submitter.setIsJavaRecordWriter(job, true);
FileInputFormat.setInputPaths(job, inputPath);
FileOutputFormat.setOutputPath(job, outputPath);
RunningJob rJob = null;
if (numReduces == 0) {
rJob = Submitter.jobSubmit(job);
while (!rJob.isComplete()) {
try {
Thread.sleep(1000);
} catch (InterruptedException ie) {
throw new RuntimeException(ie);
}
}
} else {
rJob = Submitter.runJob(job);
}
assertTrue("pipes job failed", rJob.isSuccessful());
Counters counters = rJob.getCounters();
Counters.Group wordCountCounters = counters.getGroup("WORDCOUNT");
int numCounters = 0;
for (Counter c : wordCountCounters) {
System.out.println(c);
++numCounters;
}
assertTrue("No counters found!", (numCounters > 0));
}
List<String> results = new ArrayList<String>();
for (Path p : FileUtil.stat2Paths(dfs.getFileSystem().listStatus(outputPath, new Utils.OutputFileUtils.OutputFilesFilter()))) {
results.add(MapReduceTestUtil.readOutput(p, job));
}
assertEquals("number of reduces is wrong", expectedResults.length, results.size());
for (int i = 0; i < results.size(); i++) {
assertEquals("pipes program " + program + " output " + i + " wrong", expectedResults[i], results.get(i));
}
}
use of org.apache.hadoop.mapred.Counters in project hadoop by apache.
the class TestStreamingStatus method validateUserCounter.
// Validate if user counter is incremented properly
void validateUserCounter(StreamJob job, int expectedCounterValue) throws IOException {
Counters counters = job.running_.getCounters();
assertEquals(expectedCounterValue, counters.findCounter("myOwnCounterGroup", "myOwnCounter").getValue());
}
use of org.apache.hadoop.mapred.Counters in project hive by apache.
the class HadoopJobExecHelper method checkFatalErrors.
public boolean checkFatalErrors(Counters ctrs, StringBuilder errMsg) {
if (ctrs == null) {
// we may still be able to retrieve the job status - so ignore
return false;
}
// check for number of created files
Counters.Counter cntr = ctrs.findCounter(HiveConf.getVar(job, ConfVars.HIVECOUNTERGROUP), Operator.HIVECOUNTERCREATEDFILES);
long numFiles = cntr != null ? cntr.getValue() : 0;
long upperLimit = HiveConf.getLongVar(job, HiveConf.ConfVars.MAXCREATEDFILES);
if (numFiles > upperLimit) {
errMsg.append("total number of created files now is " + numFiles + ", which exceeds ").append(upperLimit);
return true;
}
return this.callBackObj.checkFatalErrors(ctrs, errMsg);
}
Aggregations