Search in sources :

Example 21 with Counter

use of org.apache.hadoop.mapreduce.Counter in project nutch by apache.

the class IndexingJob method index.

public void index(Path crawlDb, Path linkDb, List<Path> segments, boolean noCommit, boolean deleteGone, String params, boolean filter, boolean normalize, boolean addBinaryContent, boolean base64) throws IOException, InterruptedException, ClassNotFoundException {
    SimpleDateFormat sdf = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss");
    long start = System.currentTimeMillis();
    LOG.info("Indexer: starting at {}", sdf.format(start));
    final Job job = NutchJob.getInstance(getConf());
    job.setJobName("Indexer");
    Configuration conf = job.getConfiguration();
    LOG.info("Indexer: deleting gone documents: {}", deleteGone);
    LOG.info("Indexer: URL filtering: {}", filter);
    LOG.info("Indexer: URL normalizing: {}", normalize);
    if (addBinaryContent) {
        if (base64) {
            LOG.info("Indexer: adding binary content as Base64");
        } else {
            LOG.info("Indexer: adding binary content");
        }
    }
    IndexWriters writers = new IndexWriters(getConf());
    LOG.info(writers.describe());
    IndexerMapReduce.initMRJob(crawlDb, linkDb, segments, job, addBinaryContent);
    // NOW PASSED ON THE COMMAND LINE AS A HADOOP PARAM
    // job.set(SolrConstants.SERVER_URL, solrUrl);
    conf.setBoolean(IndexerMapReduce.INDEXER_DELETE, deleteGone);
    conf.setBoolean(IndexerMapReduce.URL_FILTERING, filter);
    conf.setBoolean(IndexerMapReduce.URL_NORMALIZING, normalize);
    conf.setBoolean(IndexerMapReduce.INDEXER_BINARY_AS_BASE64, base64);
    if (params != null) {
        conf.set(IndexerMapReduce.INDEXER_PARAMS, params);
    }
    job.setReduceSpeculativeExecution(false);
    final Path tmp = new Path("tmp_" + System.currentTimeMillis() + "-" + new Random().nextInt());
    FileOutputFormat.setOutputPath(job, tmp);
    try {
        try {
            int complete = job.waitForCompletion(true) ? 0 : 1;
        } catch (InterruptedException | ClassNotFoundException e) {
            LOG.error(StringUtils.stringifyException(e));
            throw e;
        }
        // do the commits once and for all the reducers in one go
        if (!noCommit) {
            writers.open(conf, "commit");
            writers.commit();
        }
        LOG.info("Indexer: number of documents indexed, deleted, or skipped:");
        for (Counter counter : job.getCounters().getGroup("IndexerStatus")) {
            LOG.info("Indexer: {}  {}", String.format(Locale.ROOT, "%6d", counter.getValue()), counter.getName());
        }
        long end = System.currentTimeMillis();
        LOG.info("Indexer: finished at " + sdf.format(end) + ", elapsed: " + TimingUtil.elapsedTime(start, end));
    } finally {
        tmp.getFileSystem(conf).delete(tmp, true);
    }
}
Also used : Path(org.apache.hadoop.fs.Path) Counter(org.apache.hadoop.mapreduce.Counter) Configuration(org.apache.hadoop.conf.Configuration) NutchConfiguration(org.apache.nutch.util.NutchConfiguration) Random(java.util.Random) NutchJob(org.apache.nutch.util.NutchJob) Job(org.apache.hadoop.mapreduce.Job) SimpleDateFormat(java.text.SimpleDateFormat)

Example 22 with Counter

use of org.apache.hadoop.mapreduce.Counter in project incubator-gobblin by apache.

the class CompactionSlaEventHelper method getRecordCount.

private static long getRecordCount(Optional<Job> job) {
    if (!job.isPresent()) {
        return -1l;
    }
    Counters counters = null;
    try {
        counters = job.get().getCounters();
    } catch (IOException e) {
        LOG.debug("Failed to get job counters. Record count will not be set. ", e);
        return -1l;
    }
    Counter recordCounter = counters.findCounter(AvroKeyDedupReducer.EVENT_COUNTER.RECORD_COUNT);
    if (recordCounter != null && recordCounter.getValue() != 0) {
        return recordCounter.getValue();
    }
    recordCounter = counters.findCounter(AvroKeyMapper.EVENT_COUNTER.RECORD_COUNT);
    if (recordCounter != null && recordCounter.getValue() != 0) {
        return recordCounter.getValue();
    }
    LOG.debug("Non zero record count not found in both mapper and reducer counters");
    return -1l;
}
Also used : Counter(org.apache.hadoop.mapreduce.Counter) Counters(org.apache.hadoop.mapreduce.Counters) IOException(java.io.IOException)

Example 23 with Counter

use of org.apache.hadoop.mapreduce.Counter in project incubator-gobblin by apache.

the class MRJobLauncher method countersToMetrics.

/**
 * Create a {@link org.apache.gobblin.metrics.GobblinMetrics} instance for this job run from the Hadoop counters.
 */
@VisibleForTesting
void countersToMetrics(GobblinMetrics metrics) throws IOException {
    Optional<Counters> counters = Optional.fromNullable(this.job.getCounters());
    if (counters.isPresent()) {
        // Write job-level counters
        CounterGroup jobCounterGroup = counters.get().getGroup(MetricGroup.JOB.name());
        for (Counter jobCounter : jobCounterGroup) {
            metrics.getCounter(jobCounter.getName()).inc(jobCounter.getValue());
        }
        // Write task-level counters
        CounterGroup taskCounterGroup = counters.get().getGroup(MetricGroup.TASK.name());
        for (Counter taskCounter : taskCounterGroup) {
            metrics.getCounter(taskCounter.getName()).inc(taskCounter.getValue());
        }
    }
}
Also used : Counter(org.apache.hadoop.mapreduce.Counter) CounterGroup(org.apache.hadoop.mapreduce.CounterGroup) Counters(org.apache.hadoop.mapreduce.Counters) VisibleForTesting(com.google.common.annotations.VisibleForTesting)

Example 24 with Counter

use of org.apache.hadoop.mapreduce.Counter in project elephant-bird by twitter.

the class HadoopUtils method getCounter.

/**
 * MapReduce counters are available only with {@link TaskInputOutputContext},
 * but most interfaces use super classes, though the actual object is a
 * subclass (e.g. Mapper.Context).
 * <br>
 * This utility method checks the type and returns the appropriate counter.
 * In the rare (may be unexpected) case where ctx is not a
 * TaskInputOutputContext, a dummy counter is returned after printing
 * a warning.
 */
public static Counter getCounter(JobContext ctx, String group, String counter) {
    if (ctx instanceof TaskInputOutputContext<?, ?, ?, ?>) {
        Counter c = HadoopCompat.getCounter((TaskInputOutputContext<?, ?, ?, ?>) ctx, group, counter);
        if (c != null) {
            return c;
        }
    }
    String name = group + ":" + counter;
    LOG.warn("Using a dummy counter for " + name + " because it does not already exist.");
    return HadoopCompat.newGenericCounter(name, name, 0);
}
Also used : Counter(org.apache.hadoop.mapreduce.Counter) TaskInputOutputContext(org.apache.hadoop.mapreduce.TaskInputOutputContext)

Example 25 with Counter

use of org.apache.hadoop.mapreduce.Counter in project elephant-bird by twitter.

the class PigCounterHelper method incrCounter.

/**
 * Mocks the Reporter.incrCounter, but adds buffering.
 * See org.apache.hadoop.mapred.Reporter's incrCounter.
 */
public void incrCounter(String group, String counterName, long incr) {
    PigStatusReporter reporter = PigStatusReporter.getInstance();
    if (reporter != null) {
        // common case
        Counter counter = reporter.getCounter(group, counterName);
        if (counter != null) {
            HadoopCompat.incrementCounter(counter, incr);
            if (counterStringMap_.size() > 0) {
                for (Map.Entry<Pair<String, String>, Long> entry : counterStringMap_.entrySet()) {
                    HadoopCompat.incrementCounter(reporter.getCounter(entry.getKey().first, entry.getKey().second), entry.getValue());
                }
                counterStringMap_.clear();
            }
            return;
        }
    }
    // In the case when reporter is not available, or we can't get the Counter,
    // store in the local map.
    Pair<String, String> key = new Pair<String, String>(group, counterName);
    Long currentValue = counterStringMap_.get(key);
    counterStringMap_.put(key, (currentValue == null ? 0 : currentValue) + incr);
}
Also used : Counter(org.apache.hadoop.mapreduce.Counter) PigStatusReporter(org.apache.pig.tools.pigstats.PigStatusReporter) Map(java.util.Map) Pair(org.apache.pig.impl.util.Pair)

Aggregations

Counter (org.apache.hadoop.mapreduce.Counter)51 Configuration (org.apache.hadoop.conf.Configuration)15 CounterGroup (org.apache.hadoop.mapreduce.CounterGroup)13 Job (org.apache.hadoop.mapreduce.Job)12 Counters (org.apache.hadoop.mapreduce.Counters)11 IOException (java.io.IOException)8 Path (org.apache.hadoop.fs.Path)7 Map (java.util.Map)4 FileSystem (org.apache.hadoop.fs.FileSystem)4 Test (org.junit.Test)4 TaskCounter (org.apache.hadoop.mapreduce.TaskCounter)3 FileNotFoundException (java.io.FileNotFoundException)2 SimpleDateFormat (java.text.SimpleDateFormat)2 ArrayList (java.util.ArrayList)2 ExecutionException (java.util.concurrent.ExecutionException)2 RejectedExecutionException (java.util.concurrent.RejectedExecutionException)2 TimeoutException (java.util.concurrent.TimeoutException)2 Schema (org.apache.avro.Schema)2 CustomOutputCommitter (org.apache.hadoop.CustomOutputCommitter)2 BytesWritable (org.apache.hadoop.io.BytesWritable)2