Search in sources :

Example 31 with Counter

use of org.apache.hadoop.mapreduce.Counter in project druid by druid-io.

the class IndexGeneratorJob method run.

@Override
public boolean run() {
    try {
        job = Job.getInstance(new Configuration(), StringUtils.format("%s-index-generator-%s", config.getDataSource(), config.getIntervals()));
        job.getConfiguration().set("io.sort.record.percent", "0.23");
        JobHelper.injectSystemProperties(job.getConfiguration(), config);
        config.addJobProperties(job);
        // inject druid properties like deep storage bindings
        JobHelper.injectDruidProperties(job.getConfiguration(), config);
        job.setMapperClass(IndexGeneratorMapper.class);
        job.setMapOutputValueClass(BytesWritable.class);
        SortableBytes.useSortableBytesAsMapOutputKey(job, IndexGeneratorPartitioner.class);
        int numReducers = Iterables.size(config.getAllBuckets().get());
        if (numReducers == 0) {
            throw new RuntimeException("No buckets?? seems there is no data to index.");
        }
        if (config.getSchema().getTuningConfig().getUseCombiner()) {
            job.setCombinerClass(IndexGeneratorCombiner.class);
            job.setCombinerKeyGroupingComparatorClass(BytesWritable.Comparator.class);
        }
        job.setNumReduceTasks(numReducers);
        setReducerClass(job);
        job.setOutputKeyClass(BytesWritable.class);
        job.setOutputValueClass(Text.class);
        job.setOutputFormatClass(IndexGeneratorOutputFormat.class);
        FileOutputFormat.setOutputPath(job, config.makeIntermediatePath());
        config.addInputPaths(job);
        config.intoConfiguration(job);
        JobHelper.setupClasspath(JobHelper.distributedClassPath(config.getWorkingPath()), JobHelper.distributedClassPath(config.makeIntermediatePath()), job);
        job.submit();
        log.info("Job %s submitted, status available at %s", job.getJobName(), job.getTrackingURL());
        // Store the jobId in the file
        if (job.getJobID() != null) {
            JobHelper.writeJobIdToFile(config.getHadoopJobIdFileName(), job.getJobID().toString());
        }
        try {
            boolean success = job.waitForCompletion(true);
            Counters counters = job.getCounters();
            if (counters == null) {
                log.info("No counters found for job [%s]", job.getJobName());
            } else {
                Counter invalidRowCount = counters.findCounter(HadoopDruidIndexerConfig.IndexJobCounters.INVALID_ROW_COUNTER);
                if (invalidRowCount != null) {
                    jobStats.setInvalidRowCount(invalidRowCount.getValue());
                } else {
                    log.info("No invalid row counter found for job [%s]", job.getJobName());
                }
            }
            return success;
        } catch (IOException ioe) {
            if (!Utils.checkAppSuccessForJobIOException(ioe, job, config.isUseYarnRMJobStatusFallback())) {
                throw ioe;
            } else {
                return true;
            }
        }
    } catch (Exception e) {
        throw new RuntimeException(e);
    }
}
Also used : Counter(org.apache.hadoop.mapreduce.Counter) Configuration(org.apache.hadoop.conf.Configuration) BytesWritable(org.apache.hadoop.io.BytesWritable) Counters(org.apache.hadoop.mapreduce.Counters) IOException(java.io.IOException) TimeoutException(java.util.concurrent.TimeoutException) InvalidJobConfException(org.apache.hadoop.mapred.InvalidJobConfException) FileNotFoundException(java.io.FileNotFoundException) ParseException(org.apache.druid.java.util.common.parsers.ParseException) RejectedExecutionException(java.util.concurrent.RejectedExecutionException) IOException(java.io.IOException) ExecutionException(java.util.concurrent.ExecutionException)

Example 32 with Counter

use of org.apache.hadoop.mapreduce.Counter in project Gaffer by gchq.

the class SampleDataAndCreateSplitsFileTool method run.

@Override
public int run(final String[] strings) throws OperationException {
    final List<Job> jobs;
    try {
        LOGGER.info("Creating job using SampleDataForSplitPointsJobFactory");
        jobs = jobFactory.createJobs(operation, store);
    } catch (final IOException e) {
        LOGGER.error("Failed to create Hadoop job: {}", e.getMessage());
        throw new OperationException("Failed to create the Hadoop job: " + e.getMessage(), e);
    }
    for (final Job job : jobs) {
        try {
            LOGGER.info("Running SampleDataForSplitPoints job (job name is {})", job.getJobName());
            job.waitForCompletion(true);
        } catch (final IOException | InterruptedException | ClassNotFoundException e) {
            LOGGER.error("Exception running job: {}", e.getMessage());
            throw new OperationException("Error while waiting for job to complete: " + e.getMessage(), e);
        }
        try {
            if (!job.isSuccessful()) {
                LOGGER.error("Job was not successful (job name is {})", job.getJobName());
                throw new OperationException("Error running job");
            }
        } catch (final IOException e) {
            LOGGER.error("Exception running job: {}", e.getMessage());
            throw new OperationException("Error running job" + e.getMessage(), e);
        }
        // Find the number of records output
        // NB In the following line use mapred.Task.Counter.REDUCE_OUTPUT_RECORDS rather than
        // mapreduce.TaskCounter.REDUCE_OUTPUT_RECORDS as this is more compatible with earlier
        // versions of Hadoop.
        Counter counter;
        try {
            counter = job.getCounters().findCounter(TaskCounter.REDUCE_OUTPUT_RECORDS);
            LOGGER.info("Number of records output = {}", counter.getValue());
        } catch (final IOException e) {
            LOGGER.error("Failed to get counter org.apache.hadoop.mapred.TaskCounter.REDUCE_OUTPUT_RECORDS from job: {}", e.getMessage());
            throw new OperationException("Failed to get counter: " + TaskCounter.REDUCE_OUTPUT_RECORDS, e);
        }
        long outputEveryNthRecord;
        if (counter.getValue() < 2 || expectedNumberOfSplits < 1) {
            outputEveryNthRecord = 1;
        } else {
            outputEveryNthRecord = counter.getValue() / expectedNumberOfSplits;
        }
        if (outputEveryNthRecord < 1) {
            outputEveryNthRecord = 1;
        }
        final Path resultsFile = new Path(operation.getOutputPath(), "part-r-00000");
        LOGGER.info("Will output every {}-th record from {}", outputEveryNthRecord, resultsFile);
        // Read through resulting file, pick out the split points and write to file.
        final Configuration conf = getConf();
        final FileSystem fs;
        try {
            fs = FileSystem.get(conf);
        } catch (final IOException e) {
            LOGGER.error("Exception getting filesystem: {}", e.getMessage());
            throw new OperationException("Failed to get filesystem from configuration: " + e.getMessage(), e);
        }
        writeSplits(fs, resultsFile, outputEveryNthRecord, expectedNumberOfSplits);
        try {
            fs.delete(resultsFile, true);
            LOGGER.info("Deleted the results file {}", resultsFile);
        } catch (final IOException e) {
            LOGGER.error("Failed to delete the results file {}", resultsFile);
            throw new OperationException("Failed to delete the results file: " + e.getMessage(), e);
        }
    }
    return SUCCESS_RESPONSE;
}
Also used : Path(org.apache.hadoop.fs.Path) Counter(org.apache.hadoop.mapreduce.Counter) TaskCounter(org.apache.hadoop.mapreduce.TaskCounter) Configuration(org.apache.hadoop.conf.Configuration) FileSystem(org.apache.hadoop.fs.FileSystem) IOException(java.io.IOException) Job(org.apache.hadoop.mapreduce.Job) OperationException(uk.gov.gchq.gaffer.operation.OperationException)

Example 33 with Counter

use of org.apache.hadoop.mapreduce.Counter in project gora by apache.

the class Verify method verify.

public boolean verify(long expectedReferenced) throws Exception {
    if (job == null) {
        throw new IllegalStateException("You should call run() first");
    }
    Counters counters = job.getCounters();
    Counter referenced = counters.findCounter(Counts.REFERENCED);
    Counter unreferenced = counters.findCounter(Counts.UNREFERENCED);
    Counter undefined = counters.findCounter(Counts.UNDEFINED);
    boolean success = true;
    // assert
    if (expectedReferenced != referenced.getValue()) {
        LOG.error("Expected referenced count does not match with actual referenced count. " + "expected referenced=" + expectedReferenced + " ,actual=" + referenced.getValue());
        success = false;
    }
    if (unreferenced.getValue() > 0) {
        LOG.error("Unreferenced nodes were not expected. Unreferenced count=" + unreferenced.getValue());
        success = false;
    }
    if (undefined.getValue() > 0) {
        LOG.error("Found an undefined node. Undefined count=" + undefined.getValue());
        success = false;
    }
    return success;
}
Also used : Counter(org.apache.hadoop.mapreduce.Counter) Counters(org.apache.hadoop.mapreduce.Counters)

Example 34 with Counter

use of org.apache.hadoop.mapreduce.Counter in project h2o-3 by h2oai.

the class h2omapper method run2.

private int run2(Context context) throws IOException, InterruptedException {
    Configuration conf = context.getConfiguration();
    Counter counter = context.getCounter(H2O_MAPPER_COUNTER.HADOOP_COUNTER_HEARTBEAT);
    Thread counterThread = new CounterThread(context, counter);
    counterThread.start();
    String mapredLocalDir = conf.get("mapred.local.dir");
    String ice_root;
    if (mapredLocalDir.contains(",")) {
        ice_root = mapredLocalDir.split(",")[0];
    } else {
        ice_root = mapredLocalDir;
    }
    String driverIp = conf.get(H2O_DRIVER_IP_KEY);
    String driverPortString = conf.get(H2O_DRIVER_PORT_KEY);
    int driverPort = Integer.parseInt(driverPortString);
    ServerSocket ss = new ServerSocket();
    InetSocketAddress sa = new InetSocketAddress("127.0.0.1", 0);
    ss.bind(sa);
    int localPort = ss.getLocalPort();
    List<String> argsList = new ArrayList<String>();
    // Arguments set inside the mapper.
    argsList.add("-ice_root");
    argsList.add(ice_root);
    argsList.add("-hdfs_skip");
    // Arguments passed by the driver.
    int argsLength = Integer.parseInt(conf.get(H2O_MAPPER_ARGS_LENGTH));
    for (int i = 0; i < argsLength; i++) {
        String arg = conf.get(H2O_MAPPER_ARGS_BASE + Integer.toString(i));
        argsList.add(arg);
    }
    // Config files passed by the driver.
    int confLength = Integer.parseInt(conf.get(H2O_MAPPER_CONF_LENGTH));
    for (int i = 0; i < confLength; i++) {
        String arg = conf.get(H2O_MAPPER_CONF_ARG_BASE + Integer.toString(i));
        // For files which are not passed as args (i.e. SSL certs)
        if (null != arg && !arg.isEmpty()) {
            argsList.add(arg);
        }
        String basename = conf.get(H2O_MAPPER_CONF_BASENAME_BASE + Integer.toString(i));
        File f = new File(ice_root);
        boolean b = f.exists();
        if (!b) {
            boolean success = f.mkdirs();
            if (!success) {
                Log.POST(103, "mkdirs(" + f.toString() + ") failed");
                return -1;
            }
            Log.POST(104, "after mkdirs()");
        }
        String fileName = ice_root + File.separator + basename;
        String payload = conf.get(H2O_MAPPER_CONF_PAYLOAD_BASE + Integer.toString(i));
        byte[] byteArr = h2odriver.convertStringToByteArr(payload);
        h2odriver.writeBinaryFile(fileName, byteArr);
        if (null != arg && !arg.isEmpty()) {
            argsList.add(fileName);
        }
        // Need to modify this config here as we don't know the destination dir for keys when generating it
        if ("default-security.config".equals(basename)) {
            modifyKeyPath(fileName, ice_root);
        }
    }
    String[] args = argsList.toArray(new String[argsList.size()]);
    try {
        _embeddedH2OConfig = new EmbeddedH2OConfig();
        _embeddedH2OConfig.setDriverCallbackIp(driverIp);
        _embeddedH2OConfig.setDriverCallbackPort(driverPort);
        _embeddedH2OConfig.setMapperCallbackPort(localPort);
        H2O.setEmbeddedH2OConfig(_embeddedH2OConfig);
        Log.POST(11, "After setEmbeddedH2OConfig");
        //-------------------------------------------------------------
        water.H2OApp.main(args);
        //-------------------------------------------------------------
        Log.POST(12, "After main");
    } catch (Exception e) {
        Log.POST(13, "Exception in main");
        Log.POST(13, e.toString());
    }
    Log.POST(14, "Waiting for exit");
    // EmbeddedH2OConfig will send a one-byte exit status to this socket.
    Socket sock = ss.accept();
    System.out.println("Wait for exit woke up from accept");
    byte[] b = new byte[1];
    InputStream is = sock.getInputStream();
    int expectedBytes = 1;
    int receivedBytes = 0;
    while (receivedBytes < expectedBytes) {
        int n = is.read(b, receivedBytes, expectedBytes - receivedBytes);
        System.out.println("is.read returned " + n);
        if (n < 0) {
            System.exit(112);
        }
        receivedBytes += n;
    }
    int exitStatus = (int) b[0];
    System.out.println("Received exitStatus " + exitStatus);
    return exitStatus;
}
Also used : Configuration(org.apache.hadoop.conf.Configuration) ArrayList(java.util.ArrayList) Counter(org.apache.hadoop.mapreduce.Counter)

Example 35 with Counter

use of org.apache.hadoop.mapreduce.Counter in project hbase by apache.

the class RowCounter method run.

@Override
public int run(String[] args) throws Exception {
    if (args.length < 1) {
        printUsage("Wrong number of parameters: " + args.length);
        return -1;
    }
    Job job = createSubmittableJob(getConf(), args);
    if (job == null) {
        return -1;
    }
    boolean success = job.waitForCompletion(true);
    final long expectedCount = getConf().getLong(EXPECTED_COUNT_KEY, -1);
    if (success && expectedCount != -1) {
        final Counter counter = job.getCounters().findCounter(RowCounterMapper.Counters.ROWS);
        success = expectedCount == counter.getValue();
        if (!success) {
            LOG.error("Failing job because count of '" + counter.getValue() + "' does not match expected count of '" + expectedCount + "'");
        }
    }
    return (success ? 0 : 1);
}
Also used : Counter(org.apache.hadoop.mapreduce.Counter) Job(org.apache.hadoop.mapreduce.Job)

Aggregations

Counter (org.apache.hadoop.mapreduce.Counter)51 Configuration (org.apache.hadoop.conf.Configuration)15 CounterGroup (org.apache.hadoop.mapreduce.CounterGroup)13 Job (org.apache.hadoop.mapreduce.Job)12 Counters (org.apache.hadoop.mapreduce.Counters)11 IOException (java.io.IOException)8 Path (org.apache.hadoop.fs.Path)7 Map (java.util.Map)4 FileSystem (org.apache.hadoop.fs.FileSystem)4 Test (org.junit.Test)4 TaskCounter (org.apache.hadoop.mapreduce.TaskCounter)3 FileNotFoundException (java.io.FileNotFoundException)2 SimpleDateFormat (java.text.SimpleDateFormat)2 ArrayList (java.util.ArrayList)2 ExecutionException (java.util.concurrent.ExecutionException)2 RejectedExecutionException (java.util.concurrent.RejectedExecutionException)2 TimeoutException (java.util.concurrent.TimeoutException)2 Schema (org.apache.avro.Schema)2 CustomOutputCommitter (org.apache.hadoop.CustomOutputCommitter)2 BytesWritable (org.apache.hadoop.io.BytesWritable)2