use of org.apache.hadoop.mapreduce.Counter in project druid by druid-io.
the class IndexGeneratorJob method run.
@Override
public boolean run() {
try {
job = Job.getInstance(new Configuration(), StringUtils.format("%s-index-generator-%s", config.getDataSource(), config.getIntervals()));
job.getConfiguration().set("io.sort.record.percent", "0.23");
JobHelper.injectSystemProperties(job.getConfiguration(), config);
config.addJobProperties(job);
// inject druid properties like deep storage bindings
JobHelper.injectDruidProperties(job.getConfiguration(), config);
job.setMapperClass(IndexGeneratorMapper.class);
job.setMapOutputValueClass(BytesWritable.class);
SortableBytes.useSortableBytesAsMapOutputKey(job, IndexGeneratorPartitioner.class);
int numReducers = Iterables.size(config.getAllBuckets().get());
if (numReducers == 0) {
throw new RuntimeException("No buckets?? seems there is no data to index.");
}
if (config.getSchema().getTuningConfig().getUseCombiner()) {
job.setCombinerClass(IndexGeneratorCombiner.class);
job.setCombinerKeyGroupingComparatorClass(BytesWritable.Comparator.class);
}
job.setNumReduceTasks(numReducers);
setReducerClass(job);
job.setOutputKeyClass(BytesWritable.class);
job.setOutputValueClass(Text.class);
job.setOutputFormatClass(IndexGeneratorOutputFormat.class);
FileOutputFormat.setOutputPath(job, config.makeIntermediatePath());
config.addInputPaths(job);
config.intoConfiguration(job);
JobHelper.setupClasspath(JobHelper.distributedClassPath(config.getWorkingPath()), JobHelper.distributedClassPath(config.makeIntermediatePath()), job);
job.submit();
log.info("Job %s submitted, status available at %s", job.getJobName(), job.getTrackingURL());
// Store the jobId in the file
if (job.getJobID() != null) {
JobHelper.writeJobIdToFile(config.getHadoopJobIdFileName(), job.getJobID().toString());
}
try {
boolean success = job.waitForCompletion(true);
Counters counters = job.getCounters();
if (counters == null) {
log.info("No counters found for job [%s]", job.getJobName());
} else {
Counter invalidRowCount = counters.findCounter(HadoopDruidIndexerConfig.IndexJobCounters.INVALID_ROW_COUNTER);
if (invalidRowCount != null) {
jobStats.setInvalidRowCount(invalidRowCount.getValue());
} else {
log.info("No invalid row counter found for job [%s]", job.getJobName());
}
}
return success;
} catch (IOException ioe) {
if (!Utils.checkAppSuccessForJobIOException(ioe, job, config.isUseYarnRMJobStatusFallback())) {
throw ioe;
} else {
return true;
}
}
} catch (Exception e) {
throw new RuntimeException(e);
}
}
use of org.apache.hadoop.mapreduce.Counter in project Gaffer by gchq.
the class SampleDataAndCreateSplitsFileTool method run.
@Override
public int run(final String[] strings) throws OperationException {
final List<Job> jobs;
try {
LOGGER.info("Creating job using SampleDataForSplitPointsJobFactory");
jobs = jobFactory.createJobs(operation, store);
} catch (final IOException e) {
LOGGER.error("Failed to create Hadoop job: {}", e.getMessage());
throw new OperationException("Failed to create the Hadoop job: " + e.getMessage(), e);
}
for (final Job job : jobs) {
try {
LOGGER.info("Running SampleDataForSplitPoints job (job name is {})", job.getJobName());
job.waitForCompletion(true);
} catch (final IOException | InterruptedException | ClassNotFoundException e) {
LOGGER.error("Exception running job: {}", e.getMessage());
throw new OperationException("Error while waiting for job to complete: " + e.getMessage(), e);
}
try {
if (!job.isSuccessful()) {
LOGGER.error("Job was not successful (job name is {})", job.getJobName());
throw new OperationException("Error running job");
}
} catch (final IOException e) {
LOGGER.error("Exception running job: {}", e.getMessage());
throw new OperationException("Error running job" + e.getMessage(), e);
}
// Find the number of records output
// NB In the following line use mapred.Task.Counter.REDUCE_OUTPUT_RECORDS rather than
// mapreduce.TaskCounter.REDUCE_OUTPUT_RECORDS as this is more compatible with earlier
// versions of Hadoop.
Counter counter;
try {
counter = job.getCounters().findCounter(TaskCounter.REDUCE_OUTPUT_RECORDS);
LOGGER.info("Number of records output = {}", counter.getValue());
} catch (final IOException e) {
LOGGER.error("Failed to get counter org.apache.hadoop.mapred.TaskCounter.REDUCE_OUTPUT_RECORDS from job: {}", e.getMessage());
throw new OperationException("Failed to get counter: " + TaskCounter.REDUCE_OUTPUT_RECORDS, e);
}
long outputEveryNthRecord;
if (counter.getValue() < 2 || expectedNumberOfSplits < 1) {
outputEveryNthRecord = 1;
} else {
outputEveryNthRecord = counter.getValue() / expectedNumberOfSplits;
}
if (outputEveryNthRecord < 1) {
outputEveryNthRecord = 1;
}
final Path resultsFile = new Path(operation.getOutputPath(), "part-r-00000");
LOGGER.info("Will output every {}-th record from {}", outputEveryNthRecord, resultsFile);
// Read through resulting file, pick out the split points and write to file.
final Configuration conf = getConf();
final FileSystem fs;
try {
fs = FileSystem.get(conf);
} catch (final IOException e) {
LOGGER.error("Exception getting filesystem: {}", e.getMessage());
throw new OperationException("Failed to get filesystem from configuration: " + e.getMessage(), e);
}
writeSplits(fs, resultsFile, outputEveryNthRecord, expectedNumberOfSplits);
try {
fs.delete(resultsFile, true);
LOGGER.info("Deleted the results file {}", resultsFile);
} catch (final IOException e) {
LOGGER.error("Failed to delete the results file {}", resultsFile);
throw new OperationException("Failed to delete the results file: " + e.getMessage(), e);
}
}
return SUCCESS_RESPONSE;
}
use of org.apache.hadoop.mapreduce.Counter in project gora by apache.
the class Verify method verify.
public boolean verify(long expectedReferenced) throws Exception {
if (job == null) {
throw new IllegalStateException("You should call run() first");
}
Counters counters = job.getCounters();
Counter referenced = counters.findCounter(Counts.REFERENCED);
Counter unreferenced = counters.findCounter(Counts.UNREFERENCED);
Counter undefined = counters.findCounter(Counts.UNDEFINED);
boolean success = true;
// assert
if (expectedReferenced != referenced.getValue()) {
LOG.error("Expected referenced count does not match with actual referenced count. " + "expected referenced=" + expectedReferenced + " ,actual=" + referenced.getValue());
success = false;
}
if (unreferenced.getValue() > 0) {
LOG.error("Unreferenced nodes were not expected. Unreferenced count=" + unreferenced.getValue());
success = false;
}
if (undefined.getValue() > 0) {
LOG.error("Found an undefined node. Undefined count=" + undefined.getValue());
success = false;
}
return success;
}
use of org.apache.hadoop.mapreduce.Counter in project h2o-3 by h2oai.
the class h2omapper method run2.
private int run2(Context context) throws IOException, InterruptedException {
Configuration conf = context.getConfiguration();
Counter counter = context.getCounter(H2O_MAPPER_COUNTER.HADOOP_COUNTER_HEARTBEAT);
Thread counterThread = new CounterThread(context, counter);
counterThread.start();
String mapredLocalDir = conf.get("mapred.local.dir");
String ice_root;
if (mapredLocalDir.contains(",")) {
ice_root = mapredLocalDir.split(",")[0];
} else {
ice_root = mapredLocalDir;
}
String driverIp = conf.get(H2O_DRIVER_IP_KEY);
String driverPortString = conf.get(H2O_DRIVER_PORT_KEY);
int driverPort = Integer.parseInt(driverPortString);
ServerSocket ss = new ServerSocket();
InetSocketAddress sa = new InetSocketAddress("127.0.0.1", 0);
ss.bind(sa);
int localPort = ss.getLocalPort();
List<String> argsList = new ArrayList<String>();
// Arguments set inside the mapper.
argsList.add("-ice_root");
argsList.add(ice_root);
argsList.add("-hdfs_skip");
// Arguments passed by the driver.
int argsLength = Integer.parseInt(conf.get(H2O_MAPPER_ARGS_LENGTH));
for (int i = 0; i < argsLength; i++) {
String arg = conf.get(H2O_MAPPER_ARGS_BASE + Integer.toString(i));
argsList.add(arg);
}
// Config files passed by the driver.
int confLength = Integer.parseInt(conf.get(H2O_MAPPER_CONF_LENGTH));
for (int i = 0; i < confLength; i++) {
String arg = conf.get(H2O_MAPPER_CONF_ARG_BASE + Integer.toString(i));
// For files which are not passed as args (i.e. SSL certs)
if (null != arg && !arg.isEmpty()) {
argsList.add(arg);
}
String basename = conf.get(H2O_MAPPER_CONF_BASENAME_BASE + Integer.toString(i));
File f = new File(ice_root);
boolean b = f.exists();
if (!b) {
boolean success = f.mkdirs();
if (!success) {
Log.POST(103, "mkdirs(" + f.toString() + ") failed");
return -1;
}
Log.POST(104, "after mkdirs()");
}
String fileName = ice_root + File.separator + basename;
String payload = conf.get(H2O_MAPPER_CONF_PAYLOAD_BASE + Integer.toString(i));
byte[] byteArr = h2odriver.convertStringToByteArr(payload);
h2odriver.writeBinaryFile(fileName, byteArr);
if (null != arg && !arg.isEmpty()) {
argsList.add(fileName);
}
// Need to modify this config here as we don't know the destination dir for keys when generating it
if ("default-security.config".equals(basename)) {
modifyKeyPath(fileName, ice_root);
}
}
String[] args = argsList.toArray(new String[argsList.size()]);
try {
_embeddedH2OConfig = new EmbeddedH2OConfig();
_embeddedH2OConfig.setDriverCallbackIp(driverIp);
_embeddedH2OConfig.setDriverCallbackPort(driverPort);
_embeddedH2OConfig.setMapperCallbackPort(localPort);
H2O.setEmbeddedH2OConfig(_embeddedH2OConfig);
Log.POST(11, "After setEmbeddedH2OConfig");
//-------------------------------------------------------------
water.H2OApp.main(args);
//-------------------------------------------------------------
Log.POST(12, "After main");
} catch (Exception e) {
Log.POST(13, "Exception in main");
Log.POST(13, e.toString());
}
Log.POST(14, "Waiting for exit");
// EmbeddedH2OConfig will send a one-byte exit status to this socket.
Socket sock = ss.accept();
System.out.println("Wait for exit woke up from accept");
byte[] b = new byte[1];
InputStream is = sock.getInputStream();
int expectedBytes = 1;
int receivedBytes = 0;
while (receivedBytes < expectedBytes) {
int n = is.read(b, receivedBytes, expectedBytes - receivedBytes);
System.out.println("is.read returned " + n);
if (n < 0) {
System.exit(112);
}
receivedBytes += n;
}
int exitStatus = (int) b[0];
System.out.println("Received exitStatus " + exitStatus);
return exitStatus;
}
use of org.apache.hadoop.mapreduce.Counter in project hbase by apache.
the class RowCounter method run.
@Override
public int run(String[] args) throws Exception {
if (args.length < 1) {
printUsage("Wrong number of parameters: " + args.length);
return -1;
}
Job job = createSubmittableJob(getConf(), args);
if (job == null) {
return -1;
}
boolean success = job.waitForCompletion(true);
final long expectedCount = getConf().getLong(EXPECTED_COUNT_KEY, -1);
if (success && expectedCount != -1) {
final Counter counter = job.getCounters().findCounter(RowCounterMapper.Counters.ROWS);
success = expectedCount == counter.getValue();
if (!success) {
LOG.error("Failing job because count of '" + counter.getValue() + "' does not match expected count of '" + expectedCount + "'");
}
}
return (success ? 0 : 1);
}
Aggregations