Search in sources :

Example 31 with Counters

use of org.apache.hadoop.mapreduce.Counters in project druid by druid-io.

the class IndexGeneratorJob method getStats.

@Override
public Map<String, Object> getStats() {
    if (job == null) {
        return null;
    }
    try {
        Counters jobCounters = job.getCounters();
        Map<String, Object> metrics = TaskMetricsUtils.makeIngestionRowMetrics(jobCounters.findCounter(HadoopDruidIndexerConfig.IndexJobCounters.ROWS_PROCESSED_COUNTER).getValue(), jobCounters.findCounter(HadoopDruidIndexerConfig.IndexJobCounters.ROWS_PROCESSED_WITH_ERRORS_COUNTER).getValue(), jobCounters.findCounter(HadoopDruidIndexerConfig.IndexJobCounters.ROWS_UNPARSEABLE_COUNTER).getValue(), jobCounters.findCounter(HadoopDruidIndexerConfig.IndexJobCounters.ROWS_THROWN_AWAY_COUNTER).getValue());
        return metrics;
    } catch (IllegalStateException ise) {
        log.debug("Couldn't get counters due to job state");
        return null;
    } catch (Exception e) {
        log.debug(e, "Encountered exception in getStats().");
        return null;
    }
}
Also used : Counters(org.apache.hadoop.mapreduce.Counters) TimeoutException(java.util.concurrent.TimeoutException) InvalidJobConfException(org.apache.hadoop.mapred.InvalidJobConfException) FileNotFoundException(java.io.FileNotFoundException) ParseException(org.apache.druid.java.util.common.parsers.ParseException) RejectedExecutionException(java.util.concurrent.RejectedExecutionException) IOException(java.io.IOException) ExecutionException(java.util.concurrent.ExecutionException)

Example 32 with Counters

use of org.apache.hadoop.mapreduce.Counters in project druid by druid-io.

the class IndexGeneratorJob method run.

@Override
public boolean run() {
    try {
        job = Job.getInstance(new Configuration(), StringUtils.format("%s-index-generator-%s", config.getDataSource(), config.getIntervals()));
        job.getConfiguration().set("io.sort.record.percent", "0.23");
        JobHelper.injectSystemProperties(job.getConfiguration(), config);
        config.addJobProperties(job);
        // inject druid properties like deep storage bindings
        JobHelper.injectDruidProperties(job.getConfiguration(), config);
        job.setMapperClass(IndexGeneratorMapper.class);
        job.setMapOutputValueClass(BytesWritable.class);
        SortableBytes.useSortableBytesAsMapOutputKey(job, IndexGeneratorPartitioner.class);
        int numReducers = Iterables.size(config.getAllBuckets().get());
        if (numReducers == 0) {
            throw new RuntimeException("No buckets?? seems there is no data to index.");
        }
        if (config.getSchema().getTuningConfig().getUseCombiner()) {
            job.setCombinerClass(IndexGeneratorCombiner.class);
            job.setCombinerKeyGroupingComparatorClass(BytesWritable.Comparator.class);
        }
        job.setNumReduceTasks(numReducers);
        setReducerClass(job);
        job.setOutputKeyClass(BytesWritable.class);
        job.setOutputValueClass(Text.class);
        job.setOutputFormatClass(IndexGeneratorOutputFormat.class);
        FileOutputFormat.setOutputPath(job, config.makeIntermediatePath());
        config.addInputPaths(job);
        config.intoConfiguration(job);
        JobHelper.setupClasspath(JobHelper.distributedClassPath(config.getWorkingPath()), JobHelper.distributedClassPath(config.makeIntermediatePath()), job);
        job.submit();
        log.info("Job %s submitted, status available at %s", job.getJobName(), job.getTrackingURL());
        // Store the jobId in the file
        if (job.getJobID() != null) {
            JobHelper.writeJobIdToFile(config.getHadoopJobIdFileName(), job.getJobID().toString());
        }
        try {
            boolean success = job.waitForCompletion(true);
            Counters counters = job.getCounters();
            if (counters == null) {
                log.info("No counters found for job [%s]", job.getJobName());
            } else {
                Counter invalidRowCount = counters.findCounter(HadoopDruidIndexerConfig.IndexJobCounters.INVALID_ROW_COUNTER);
                if (invalidRowCount != null) {
                    jobStats.setInvalidRowCount(invalidRowCount.getValue());
                } else {
                    log.info("No invalid row counter found for job [%s]", job.getJobName());
                }
            }
            return success;
        } catch (IOException ioe) {
            if (!Utils.checkAppSuccessForJobIOException(ioe, job, config.isUseYarnRMJobStatusFallback())) {
                throw ioe;
            } else {
                return true;
            }
        }
    } catch (Exception e) {
        throw new RuntimeException(e);
    }
}
Also used : Counter(org.apache.hadoop.mapreduce.Counter) Configuration(org.apache.hadoop.conf.Configuration) BytesWritable(org.apache.hadoop.io.BytesWritable) Counters(org.apache.hadoop.mapreduce.Counters) IOException(java.io.IOException) TimeoutException(java.util.concurrent.TimeoutException) InvalidJobConfException(org.apache.hadoop.mapred.InvalidJobConfException) FileNotFoundException(java.io.FileNotFoundException) ParseException(org.apache.druid.java.util.common.parsers.ParseException) RejectedExecutionException(java.util.concurrent.RejectedExecutionException) IOException(java.io.IOException) ExecutionException(java.util.concurrent.ExecutionException)

Example 33 with Counters

use of org.apache.hadoop.mapreduce.Counters in project Cloud9 by lintool.

the class TrecForwardIndexBuilder method run.

/**
 * Runs this tool.
 */
@SuppressWarnings("static-access")
public int run(String[] args) throws Exception {
    Options options = new Options();
    options.addOption(OptionBuilder.withArgName("path").hasArg().withDescription("(required) collection path").create(COLLECTION_OPTION));
    options.addOption(OptionBuilder.withArgName("path").hasArg().withDescription("(required) output index path").create(INDEX_OPTION));
    options.addOption(OptionBuilder.withArgName("path").hasArg().withDescription("(required) DocnoMapping data").create(MAPPING_OPTION));
    CommandLine cmdline;
    CommandLineParser parser = new GnuParser();
    try {
        cmdline = parser.parse(options, args);
    } catch (ParseException exp) {
        System.err.println("Error parsing command line: " + exp.getMessage());
        return -1;
    }
    if (!cmdline.hasOption(COLLECTION_OPTION) || !cmdline.hasOption(INDEX_OPTION) || !cmdline.hasOption(MAPPING_OPTION)) {
        HelpFormatter formatter = new HelpFormatter();
        formatter.printHelp(this.getClass().getName(), options);
        ToolRunner.printGenericCommandUsage(System.out);
        return -1;
    }
    String collectionPath = cmdline.getOptionValue(COLLECTION_OPTION);
    String indexFile = cmdline.getOptionValue(INDEX_OPTION);
    String mappingFile = cmdline.getOptionValue(MAPPING_OPTION);
    String tmpDir = "tmp-" + TrecForwardIndexBuilder.class.getSimpleName() + "-" + random.nextInt(10000);
    Job job = new Job(getConf(), TrecForwardIndexBuilder.class.getSimpleName() + ":" + collectionPath);
    job.setJarByClass(TrecForwardIndexBuilder.class);
    FileSystem fs = FileSystem.get(getConf());
    LOG.info("Tool name: " + TrecForwardIndexBuilder.class.getSimpleName());
    LOG.info(" - collection path: " + collectionPath);
    LOG.info(" - index file: " + indexFile);
    LOG.info(" - DocnoMapping file: " + mappingFile);
    LOG.info(" - temp output directory: " + tmpDir);
    job.setNumReduceTasks(1);
    if (job.getConfiguration().get("mapred.job.tracker").equals("local")) {
        job.getConfiguration().set(DOCNO_MAPPING_FILE_PROPERTY, mappingFile);
    } else {
        DistributedCache.addCacheFile(new URI(mappingFile), job.getConfiguration());
    }
    FileInputFormat.setInputPaths(job, new Path(collectionPath));
    FileOutputFormat.setOutputPath(job, new Path(tmpDir));
    FileOutputFormat.setCompressOutput(job, false);
    job.setInputFormatClass(TrecDocumentInputFormat.class);
    job.setOutputKeyClass(IntWritable.class);
    job.setOutputValueClass(Text.class);
    job.setMapperClass(MyMapper.class);
    // delete the output directory if it exists already
    FileSystem.get(getConf()).delete(new Path(tmpDir), true);
    job.waitForCompletion(true);
    Counters counters = job.getCounters();
    int numDocs = (int) counters.findCounter(Count.DOCS).getValue();
    String inputFile = tmpDir + "/" + "part-r-00000";
    LOG.info("Writing " + numDocs + " doc offseta to " + indexFile);
    LineReader reader = new LineReader(fs.open(new Path(inputFile)));
    FSDataOutputStream writer = fs.create(new Path(indexFile), true);
    writer.writeUTF(edu.umd.cloud9.collection.trec.TrecForwardIndex.class.getCanonicalName());
    writer.writeUTF(collectionPath);
    writer.writeInt(numDocs);
    int cnt = 0;
    Text line = new Text();
    while (reader.readLine(line) > 0) {
        String[] arr = line.toString().split("\\t");
        long offset = Long.parseLong(arr[1]);
        int len = Integer.parseInt(arr[2]);
        writer.writeLong(offset);
        writer.writeInt(len);
        cnt++;
        if (cnt % 100000 == 0) {
            LOG.info(cnt + " docs");
        }
    }
    reader.close();
    writer.close();
    LOG.info(cnt + " docs total. Done!");
    if (numDocs != cnt) {
        throw new RuntimeException("Unexpected number of documents in building forward index!");
    }
    fs.delete(new Path(tmpDir), true);
    return 0;
}
Also used : Path(org.apache.hadoop.fs.Path) Options(org.apache.commons.cli.Options) GnuParser(org.apache.commons.cli.GnuParser) Text(org.apache.hadoop.io.Text) URI(java.net.URI) HelpFormatter(org.apache.commons.cli.HelpFormatter) CommandLine(org.apache.commons.cli.CommandLine) FileSystem(org.apache.hadoop.fs.FileSystem) LineReader(org.apache.hadoop.util.LineReader) Counters(org.apache.hadoop.mapreduce.Counters) CommandLineParser(org.apache.commons.cli.CommandLineParser) ParseException(org.apache.commons.cli.ParseException) FSDataOutputStream(org.apache.hadoop.fs.FSDataOutputStream) Job(org.apache.hadoop.mapreduce.Job)

Example 34 with Counters

use of org.apache.hadoop.mapreduce.Counters in project Cloud9 by lintool.

the class CountMedlineCitations method run.

@SuppressWarnings("static-access")
public int run(String[] args) throws Exception {
    Options options = new Options();
    options.addOption(OptionBuilder.withArgName("path").hasArg().withDescription("(required) collection path").create(COLLECTION_OPTION));
    options.addOption(OptionBuilder.withArgName("path").hasArg().withDescription("(required) output path").create(OUTPUT_OPTION));
    options.addOption(OptionBuilder.withArgName("path").hasArg().withDescription("(required) DocnoMapping data").create(MAPPING_OPTION));
    CommandLine cmdline;
    CommandLineParser parser = new GnuParser();
    try {
        cmdline = parser.parse(options, args);
    } catch (ParseException exp) {
        System.err.println("Error parsing command line: " + exp.getMessage());
        return -1;
    }
    if (!cmdline.hasOption(COLLECTION_OPTION) || !cmdline.hasOption(OUTPUT_OPTION) || !cmdline.hasOption(MAPPING_OPTION)) {
        HelpFormatter formatter = new HelpFormatter();
        formatter.printHelp(this.getClass().getName(), options);
        ToolRunner.printGenericCommandUsage(System.out);
        return -1;
    }
    String inputPath = cmdline.getOptionValue(COLLECTION_OPTION);
    String outputPath = cmdline.getOptionValue(OUTPUT_OPTION);
    String mappingFile = cmdline.getOptionValue(MAPPING_OPTION);
    LOG.info("Tool: " + CountMedlineCitations.class.getSimpleName());
    LOG.info(" - input: " + inputPath);
    LOG.info(" - output dir: " + outputPath);
    LOG.info(" - docno mapping file: " + mappingFile);
    Job job = new Job(getConf(), CountMedlineCitations.class.getSimpleName() + ":" + inputPath);
    job.setJarByClass(CountMedlineCitations.class);
    job.setNumReduceTasks(0);
    // Pass in the class name as a String; this is makes the mapper general in being able to load
    // any collection of Indexable objects that has docid/docno mapping specified by a DocnoMapping
    // object.
    job.getConfiguration().set("DocnoMappingClass", MedlineDocnoMapping.class.getCanonicalName());
    // Put the mapping file in the distributed cache so each map worker will have it.
    DistributedCache.addCacheFile(new URI(mappingFile), job.getConfiguration());
    FileInputFormat.setInputPaths(job, new Path(inputPath));
    FileOutputFormat.setOutputPath(job, new Path(outputPath));
    FileOutputFormat.setCompressOutput(job, false);
    job.setInputFormatClass(MedlineCitationInputFormat.class);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(IntWritable.class);
    job.setMapperClass(MyMapper.class);
    // Delete the output directory if it exists already.
    FileSystem.get(job.getConfiguration()).delete(new Path(outputPath), true);
    job.waitForCompletion(true);
    Counters counters = job.getCounters();
    int numDocs = (int) counters.findCounter(Count.DOCS).getValue();
    LOG.info("Read " + numDocs + " docs.");
    return numDocs;
}
Also used : Path(org.apache.hadoop.fs.Path) Options(org.apache.commons.cli.Options) GnuParser(org.apache.commons.cli.GnuParser) URI(java.net.URI) HelpFormatter(org.apache.commons.cli.HelpFormatter) CommandLine(org.apache.commons.cli.CommandLine) Counters(org.apache.hadoop.mapreduce.Counters) CommandLineParser(org.apache.commons.cli.CommandLineParser) ParseException(org.apache.commons.cli.ParseException) Job(org.apache.hadoop.mapreduce.Job)

Example 35 with Counters

use of org.apache.hadoop.mapreduce.Counters in project gora by apache.

the class Verify method verify.

public boolean verify(long expectedReferenced) throws Exception {
    if (job == null) {
        throw new IllegalStateException("You should call run() first");
    }
    Counters counters = job.getCounters();
    Counter referenced = counters.findCounter(Counts.REFERENCED);
    Counter unreferenced = counters.findCounter(Counts.UNREFERENCED);
    Counter undefined = counters.findCounter(Counts.UNDEFINED);
    boolean success = true;
    // assert
    if (expectedReferenced != referenced.getValue()) {
        LOG.error("Expected referenced count does not match with actual referenced count. " + "expected referenced=" + expectedReferenced + " ,actual=" + referenced.getValue());
        success = false;
    }
    if (unreferenced.getValue() > 0) {
        LOG.error("Unreferenced nodes were not expected. Unreferenced count=" + unreferenced.getValue());
        success = false;
    }
    if (undefined.getValue() > 0) {
        LOG.error("Found an undefined node. Undefined count=" + undefined.getValue());
        success = false;
    }
    return success;
}
Also used : Counter(org.apache.hadoop.mapreduce.Counter) Counters(org.apache.hadoop.mapreduce.Counters)

Aggregations

Counters (org.apache.hadoop.mapreduce.Counters)72 Test (org.junit.Test)24 Job (org.apache.hadoop.mapreduce.Job)21 Path (org.apache.hadoop.fs.Path)14 Configuration (org.apache.hadoop.conf.Configuration)13 Counter (org.apache.hadoop.mapreduce.Counter)11 Task (org.apache.hadoop.mapreduce.v2.app.job.Task)8 TaskId (org.apache.hadoop.mapreduce.v2.api.records.TaskId)7 PhoenixScrutinyJobCounters (org.apache.phoenix.mapreduce.index.PhoenixScrutinyJobCounters)7 BaseTest (org.apache.phoenix.query.BaseTest)7 IOException (java.io.IOException)6 TaskAttemptId (org.apache.hadoop.mapreduce.v2.api.records.TaskAttemptId)6 TaskAttempt (org.apache.hadoop.mapreduce.v2.app.job.TaskAttempt)6 YarnConfiguration (org.apache.hadoop.yarn.conf.YarnConfiguration)6 HdfsConfiguration (org.apache.hadoop.hdfs.HdfsConfiguration)5 TableName (org.apache.hadoop.hbase.TableName)4 JobId (org.apache.hadoop.mapreduce.v2.api.records.JobId)4 File (java.io.File)3 URI (java.net.URI)3 FileSystem (org.apache.hadoop.fs.FileSystem)3