use of org.apache.hadoop.mapreduce.Counter in project nutch by apache.
the class IndexingJob method index.
public void index(Path crawlDb, Path linkDb, List<Path> segments, boolean noCommit, boolean deleteGone, String params, boolean filter, boolean normalize, boolean addBinaryContent, boolean base64) throws IOException, InterruptedException, ClassNotFoundException {
SimpleDateFormat sdf = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss");
long start = System.currentTimeMillis();
LOG.info("Indexer: starting at {}", sdf.format(start));
final Job job = NutchJob.getInstance(getConf());
job.setJobName("Indexer");
Configuration conf = job.getConfiguration();
LOG.info("Indexer: deleting gone documents: {}", deleteGone);
LOG.info("Indexer: URL filtering: {}", filter);
LOG.info("Indexer: URL normalizing: {}", normalize);
if (addBinaryContent) {
if (base64) {
LOG.info("Indexer: adding binary content as Base64");
} else {
LOG.info("Indexer: adding binary content");
}
}
IndexWriters writers = new IndexWriters(getConf());
LOG.info(writers.describe());
IndexerMapReduce.initMRJob(crawlDb, linkDb, segments, job, addBinaryContent);
// NOW PASSED ON THE COMMAND LINE AS A HADOOP PARAM
// job.set(SolrConstants.SERVER_URL, solrUrl);
conf.setBoolean(IndexerMapReduce.INDEXER_DELETE, deleteGone);
conf.setBoolean(IndexerMapReduce.URL_FILTERING, filter);
conf.setBoolean(IndexerMapReduce.URL_NORMALIZING, normalize);
conf.setBoolean(IndexerMapReduce.INDEXER_BINARY_AS_BASE64, base64);
if (params != null) {
conf.set(IndexerMapReduce.INDEXER_PARAMS, params);
}
job.setReduceSpeculativeExecution(false);
final Path tmp = new Path("tmp_" + System.currentTimeMillis() + "-" + new Random().nextInt());
FileOutputFormat.setOutputPath(job, tmp);
try {
try {
int complete = job.waitForCompletion(true) ? 0 : 1;
} catch (InterruptedException | ClassNotFoundException e) {
LOG.error(StringUtils.stringifyException(e));
throw e;
}
// do the commits once and for all the reducers in one go
if (!noCommit) {
writers.open(conf, "commit");
writers.commit();
}
LOG.info("Indexer: number of documents indexed, deleted, or skipped:");
for (Counter counter : job.getCounters().getGroup("IndexerStatus")) {
LOG.info("Indexer: {} {}", String.format(Locale.ROOT, "%6d", counter.getValue()), counter.getName());
}
long end = System.currentTimeMillis();
LOG.info("Indexer: finished at " + sdf.format(end) + ", elapsed: " + TimingUtil.elapsedTime(start, end));
} finally {
tmp.getFileSystem(conf).delete(tmp, true);
}
}
use of org.apache.hadoop.mapreduce.Counter in project incubator-gobblin by apache.
the class CompactionSlaEventHelper method getRecordCount.
private static long getRecordCount(Optional<Job> job) {
if (!job.isPresent()) {
return -1l;
}
Counters counters = null;
try {
counters = job.get().getCounters();
} catch (IOException e) {
LOG.debug("Failed to get job counters. Record count will not be set. ", e);
return -1l;
}
Counter recordCounter = counters.findCounter(AvroKeyDedupReducer.EVENT_COUNTER.RECORD_COUNT);
if (recordCounter != null && recordCounter.getValue() != 0) {
return recordCounter.getValue();
}
recordCounter = counters.findCounter(AvroKeyMapper.EVENT_COUNTER.RECORD_COUNT);
if (recordCounter != null && recordCounter.getValue() != 0) {
return recordCounter.getValue();
}
LOG.debug("Non zero record count not found in both mapper and reducer counters");
return -1l;
}
use of org.apache.hadoop.mapreduce.Counter in project incubator-gobblin by apache.
the class MRJobLauncher method countersToMetrics.
/**
* Create a {@link org.apache.gobblin.metrics.GobblinMetrics} instance for this job run from the Hadoop counters.
*/
@VisibleForTesting
void countersToMetrics(GobblinMetrics metrics) throws IOException {
Optional<Counters> counters = Optional.fromNullable(this.job.getCounters());
if (counters.isPresent()) {
// Write job-level counters
CounterGroup jobCounterGroup = counters.get().getGroup(MetricGroup.JOB.name());
for (Counter jobCounter : jobCounterGroup) {
metrics.getCounter(jobCounter.getName()).inc(jobCounter.getValue());
}
// Write task-level counters
CounterGroup taskCounterGroup = counters.get().getGroup(MetricGroup.TASK.name());
for (Counter taskCounter : taskCounterGroup) {
metrics.getCounter(taskCounter.getName()).inc(taskCounter.getValue());
}
}
}
use of org.apache.hadoop.mapreduce.Counter in project elephant-bird by twitter.
the class HadoopUtils method getCounter.
/**
* MapReduce counters are available only with {@link TaskInputOutputContext},
* but most interfaces use super classes, though the actual object is a
* subclass (e.g. Mapper.Context).
* <br>
* This utility method checks the type and returns the appropriate counter.
* In the rare (may be unexpected) case where ctx is not a
* TaskInputOutputContext, a dummy counter is returned after printing
* a warning.
*/
public static Counter getCounter(JobContext ctx, String group, String counter) {
if (ctx instanceof TaskInputOutputContext<?, ?, ?, ?>) {
Counter c = HadoopCompat.getCounter((TaskInputOutputContext<?, ?, ?, ?>) ctx, group, counter);
if (c != null) {
return c;
}
}
String name = group + ":" + counter;
LOG.warn("Using a dummy counter for " + name + " because it does not already exist.");
return HadoopCompat.newGenericCounter(name, name, 0);
}
use of org.apache.hadoop.mapreduce.Counter in project elephant-bird by twitter.
the class PigCounterHelper method incrCounter.
/**
* Mocks the Reporter.incrCounter, but adds buffering.
* See org.apache.hadoop.mapred.Reporter's incrCounter.
*/
public void incrCounter(String group, String counterName, long incr) {
PigStatusReporter reporter = PigStatusReporter.getInstance();
if (reporter != null) {
// common case
Counter counter = reporter.getCounter(group, counterName);
if (counter != null) {
HadoopCompat.incrementCounter(counter, incr);
if (counterStringMap_.size() > 0) {
for (Map.Entry<Pair<String, String>, Long> entry : counterStringMap_.entrySet()) {
HadoopCompat.incrementCounter(reporter.getCounter(entry.getKey().first, entry.getKey().second), entry.getValue());
}
counterStringMap_.clear();
}
return;
}
}
// In the case when reporter is not available, or we can't get the Counter,
// store in the local map.
Pair<String, String> key = new Pair<String, String>(group, counterName);
Long currentValue = counterStringMap_.get(key);
counterStringMap_.put(key, (currentValue == null ? 0 : currentValue) + incr);
}
Aggregations