Search in sources :

Example 1 with Counters

use of org.apache.hadoop.mapred.Counters in project hadoop by apache.

the class TestMultipleOutputs method _testMOWithJavaSerialization.

protected void _testMOWithJavaSerialization(boolean withCounters) throws Exception {
    Path inDir = getDir(IN_DIR);
    Path outDir = getDir(OUT_DIR);
    JobConf conf = createJobConf();
    FileSystem fs = FileSystem.get(conf);
    DataOutputStream file = fs.create(new Path(inDir, "part-0"));
    file.writeBytes("a\nb\n\nc\nd\ne");
    file.close();
    fs.delete(inDir, true);
    fs.delete(outDir, true);
    file = fs.create(new Path(inDir, "part-1"));
    file.writeBytes("a\nb\n\nc\nd\ne");
    file.close();
    conf.setJobName("mo");
    conf.set("io.serializations", "org.apache.hadoop.io.serializer.JavaSerialization," + "org.apache.hadoop.io.serializer.WritableSerialization");
    conf.setInputFormat(TextInputFormat.class);
    conf.setMapOutputKeyClass(Long.class);
    conf.setMapOutputValueClass(String.class);
    conf.setOutputKeyComparatorClass(JavaSerializationComparator.class);
    conf.setOutputKeyClass(Long.class);
    conf.setOutputValueClass(String.class);
    conf.setOutputFormat(TextOutputFormat.class);
    MultipleOutputs.addNamedOutput(conf, "text", TextOutputFormat.class, Long.class, String.class);
    MultipleOutputs.setCountersEnabled(conf, withCounters);
    conf.setMapperClass(MOJavaSerDeMap.class);
    conf.setReducerClass(MOJavaSerDeReduce.class);
    FileInputFormat.setInputPaths(conf, inDir);
    FileOutputFormat.setOutputPath(conf, outDir);
    JobClient jc = new JobClient(conf);
    RunningJob job = jc.submitJob(conf);
    while (!job.isComplete()) {
        Thread.sleep(100);
    }
    // assert number of named output part files
    int namedOutputCount = 0;
    FileStatus[] statuses = fs.listStatus(outDir);
    for (FileStatus status : statuses) {
        if (status.getPath().getName().equals("text-m-00000") || status.getPath().getName().equals("text-r-00000")) {
            namedOutputCount++;
        }
    }
    assertEquals(2, namedOutputCount);
    // assert TextOutputFormat files correctness
    BufferedReader reader = new BufferedReader(new InputStreamReader(fs.open(new Path(FileOutputFormat.getOutputPath(conf), "text-r-00000"))));
    int count = 0;
    String line = reader.readLine();
    while (line != null) {
        assertTrue(line.endsWith("text"));
        line = reader.readLine();
        count++;
    }
    reader.close();
    assertFalse(count == 0);
    Counters.Group counters = job.getCounters().getGroup(MultipleOutputs.class.getName());
    if (!withCounters) {
        assertEquals(0, counters.size());
    } else {
        assertEquals(1, counters.size());
        assertEquals(2, counters.getCounter("text"));
    }
}
Also used : Path(org.apache.hadoop.fs.Path) FileStatus(org.apache.hadoop.fs.FileStatus) InputStreamReader(java.io.InputStreamReader) DataOutputStream(java.io.DataOutputStream) JobClient(org.apache.hadoop.mapred.JobClient) FileSystem(org.apache.hadoop.fs.FileSystem) RunningJob(org.apache.hadoop.mapred.RunningJob) BufferedReader(java.io.BufferedReader) Counters(org.apache.hadoop.mapred.Counters) JobConf(org.apache.hadoop.mapred.JobConf)

Example 2 with Counters

use of org.apache.hadoop.mapred.Counters in project hadoop by apache.

the class TestMultipleOutputs method _testMultipleOutputs.

protected void _testMultipleOutputs(boolean withCounters) throws Exception {
    Path inDir = getDir(IN_DIR);
    Path outDir = getDir(OUT_DIR);
    JobConf conf = createJobConf();
    FileSystem fs = FileSystem.get(conf);
    DataOutputStream file = fs.create(new Path(inDir, "part-0"));
    file.writeBytes("a\nb\n\nc\nd\ne");
    file.close();
    file = fs.create(new Path(inDir, "part-1"));
    file.writeBytes("a\nb\n\nc\nd\ne");
    file.close();
    conf.setJobName("mo");
    conf.setInputFormat(TextInputFormat.class);
    conf.setOutputKeyClass(LongWritable.class);
    conf.setOutputValueClass(Text.class);
    conf.setMapOutputKeyClass(LongWritable.class);
    conf.setMapOutputValueClass(Text.class);
    conf.setOutputFormat(TextOutputFormat.class);
    MultipleOutputs.addNamedOutput(conf, "text", TextOutputFormat.class, LongWritable.class, Text.class);
    MultipleOutputs.addMultiNamedOutput(conf, "sequence", SequenceFileOutputFormat.class, LongWritable.class, Text.class);
    MultipleOutputs.setCountersEnabled(conf, withCounters);
    conf.setMapperClass(MOMap.class);
    conf.setReducerClass(MOReduce.class);
    FileInputFormat.setInputPaths(conf, inDir);
    FileOutputFormat.setOutputPath(conf, outDir);
    JobClient jc = new JobClient(conf);
    RunningJob job = jc.submitJob(conf);
    while (!job.isComplete()) {
        Thread.sleep(100);
    }
    // assert number of named output part files
    int namedOutputCount = 0;
    FileStatus[] statuses = fs.listStatus(outDir);
    for (FileStatus status : statuses) {
        if (status.getPath().getName().equals("text-m-00000") || status.getPath().getName().equals("text-m-00001") || status.getPath().getName().equals("text-r-00000") || status.getPath().getName().equals("sequence_A-m-00000") || status.getPath().getName().equals("sequence_A-m-00001") || status.getPath().getName().equals("sequence_B-m-00000") || status.getPath().getName().equals("sequence_B-m-00001") || status.getPath().getName().equals("sequence_B-r-00000") || status.getPath().getName().equals("sequence_C-r-00000")) {
            namedOutputCount++;
        }
    }
    assertEquals(9, namedOutputCount);
    // assert TextOutputFormat files correctness
    BufferedReader reader = new BufferedReader(new InputStreamReader(fs.open(new Path(FileOutputFormat.getOutputPath(conf), "text-r-00000"))));
    int count = 0;
    String line = reader.readLine();
    while (line != null) {
        assertTrue(line.endsWith("text"));
        line = reader.readLine();
        count++;
    }
    reader.close();
    assertFalse(count == 0);
    // assert SequenceOutputFormat files correctness
    SequenceFile.Reader seqReader = new SequenceFile.Reader(fs, new Path(FileOutputFormat.getOutputPath(conf), "sequence_B-r-00000"), conf);
    assertEquals(LongWritable.class, seqReader.getKeyClass());
    assertEquals(Text.class, seqReader.getValueClass());
    count = 0;
    LongWritable key = new LongWritable();
    Text value = new Text();
    while (seqReader.next(key, value)) {
        assertEquals("sequence", value.toString());
        count++;
    }
    seqReader.close();
    assertFalse(count == 0);
    Counters.Group counters = job.getCounters().getGroup(MultipleOutputs.class.getName());
    if (!withCounters) {
        assertEquals(0, counters.size());
    } else {
        assertEquals(4, counters.size());
        assertEquals(4, counters.getCounter("text"));
        assertEquals(2, counters.getCounter("sequence_A"));
        assertEquals(4, counters.getCounter("sequence_B"));
        assertEquals(2, counters.getCounter("sequence_C"));
    }
}
Also used : Path(org.apache.hadoop.fs.Path) FileStatus(org.apache.hadoop.fs.FileStatus) InputStreamReader(java.io.InputStreamReader) DataOutputStream(java.io.DataOutputStream) InputStreamReader(java.io.InputStreamReader) BufferedReader(java.io.BufferedReader) Text(org.apache.hadoop.io.Text) JobClient(org.apache.hadoop.mapred.JobClient) SequenceFile(org.apache.hadoop.io.SequenceFile) FileSystem(org.apache.hadoop.fs.FileSystem) RunningJob(org.apache.hadoop.mapred.RunningJob) BufferedReader(java.io.BufferedReader) Counters(org.apache.hadoop.mapred.Counters) LongWritable(org.apache.hadoop.io.LongWritable) JobConf(org.apache.hadoop.mapred.JobConf)

Example 3 with Counters

use of org.apache.hadoop.mapred.Counters in project hadoop by apache.

the class TestPipes method runProgram.

static void runProgram(MiniMRCluster mr, MiniDFSCluster dfs, Path program, Path inputPath, Path outputPath, int numMaps, int numReduces, String[] expectedResults, JobConf conf) throws IOException {
    Path wordExec = new Path("testing/bin/application");
    JobConf job = null;
    if (conf == null) {
        job = mr.createJobConf();
    } else {
        job = new JobConf(conf);
    }
    job.setNumMapTasks(numMaps);
    job.setNumReduceTasks(numReduces);
    {
        FileSystem fs = dfs.getFileSystem();
        fs.delete(wordExec.getParent(), true);
        fs.copyFromLocalFile(program, wordExec);
        Submitter.setExecutable(job, fs.makeQualified(wordExec).toString());
        Submitter.setIsJavaRecordReader(job, true);
        Submitter.setIsJavaRecordWriter(job, true);
        FileInputFormat.setInputPaths(job, inputPath);
        FileOutputFormat.setOutputPath(job, outputPath);
        RunningJob rJob = null;
        if (numReduces == 0) {
            rJob = Submitter.jobSubmit(job);
            while (!rJob.isComplete()) {
                try {
                    Thread.sleep(1000);
                } catch (InterruptedException ie) {
                    throw new RuntimeException(ie);
                }
            }
        } else {
            rJob = Submitter.runJob(job);
        }
        assertTrue("pipes job failed", rJob.isSuccessful());
        Counters counters = rJob.getCounters();
        Counters.Group wordCountCounters = counters.getGroup("WORDCOUNT");
        int numCounters = 0;
        for (Counter c : wordCountCounters) {
            System.out.println(c);
            ++numCounters;
        }
        assertTrue("No counters found!", (numCounters > 0));
    }
    List<String> results = new ArrayList<String>();
    for (Path p : FileUtil.stat2Paths(dfs.getFileSystem().listStatus(outputPath, new Utils.OutputFileUtils.OutputFilesFilter()))) {
        results.add(MapReduceTestUtil.readOutput(p, job));
    }
    assertEquals("number of reduces is wrong", expectedResults.length, results.size());
    for (int i = 0; i < results.size(); i++) {
        assertEquals("pipes program " + program + " output " + i + " wrong", expectedResults[i], results.get(i));
    }
}
Also used : Path(org.apache.hadoop.fs.Path) ArrayList(java.util.ArrayList) Counter(org.apache.hadoop.mapred.Counters.Counter) Utils(org.apache.hadoop.mapred.Utils) StringUtils(org.apache.hadoop.util.StringUtils) FileSystem(org.apache.hadoop.fs.FileSystem) RunningJob(org.apache.hadoop.mapred.RunningJob) Counters(org.apache.hadoop.mapred.Counters) JobConf(org.apache.hadoop.mapred.JobConf)

Example 4 with Counters

use of org.apache.hadoop.mapred.Counters in project hadoop by apache.

the class TestStreamingStatus method validateUserCounter.

// Validate if user counter is incremented properly
void validateUserCounter(StreamJob job, int expectedCounterValue) throws IOException {
    Counters counters = job.running_.getCounters();
    assertEquals(expectedCounterValue, counters.findCounter("myOwnCounterGroup", "myOwnCounter").getValue());
}
Also used : Counters(org.apache.hadoop.mapred.Counters)

Example 5 with Counters

use of org.apache.hadoop.mapred.Counters in project hive by apache.

the class HadoopJobExecHelper method checkFatalErrors.

public boolean checkFatalErrors(Counters ctrs, StringBuilder errMsg) {
    if (ctrs == null) {
        // we may still be able to retrieve the job status - so ignore
        return false;
    }
    // check for number of created files
    Counters.Counter cntr = ctrs.findCounter(HiveConf.getVar(job, ConfVars.HIVECOUNTERGROUP), Operator.HIVECOUNTERCREATEDFILES);
    long numFiles = cntr != null ? cntr.getValue() : 0;
    long upperLimit = HiveConf.getLongVar(job, HiveConf.ConfVars.MAXCREATEDFILES);
    if (numFiles > upperLimit) {
        errMsg.append("total number of created files now is " + numFiles + ", which exceeds ").append(upperLimit);
        return true;
    }
    return this.callBackObj.checkFatalErrors(ctrs, errMsg);
}
Also used : Counters(org.apache.hadoop.mapred.Counters) Counter(org.apache.hadoop.mapred.Counters.Counter)

Aggregations

Counters (org.apache.hadoop.mapred.Counters)23 RunningJob (org.apache.hadoop.mapred.RunningJob)14 Path (org.apache.hadoop.fs.Path)13 JobConf (org.apache.hadoop.mapred.JobConf)12 FileSystem (org.apache.hadoop.fs.FileSystem)11 Counter (org.apache.hadoop.mapred.Counters.Counter)6 JobClient (org.apache.hadoop.mapred.JobClient)6 FSDataOutputStream (org.apache.hadoop.fs.FSDataOutputStream)5 Text (org.apache.hadoop.io.Text)5 FileStatus (org.apache.hadoop.fs.FileStatus)4 LineReader (org.apache.hadoop.util.LineReader)4 BufferedReader (java.io.BufferedReader)3 DataOutputStream (java.io.DataOutputStream)3 IOException (java.io.IOException)3 InputStreamReader (java.io.InputStreamReader)3 ArrayList (java.util.ArrayList)3 CommandLine (org.apache.commons.cli.CommandLine)3 CommandLineParser (org.apache.commons.cli.CommandLineParser)3 GnuParser (org.apache.commons.cli.GnuParser)3 HelpFormatter (org.apache.commons.cli.HelpFormatter)3