Search in sources :

Example 61 with Counters

use of org.apache.hadoop.mapreduce.Counters in project hbase by apache.

the class TestSyncTable method testSyncTableIgnoreTimestampsTrue.

@Test
public void testSyncTableIgnoreTimestampsTrue() throws Exception {
    final TableName sourceTableName = TableName.valueOf(name.getMethodName() + "_source");
    final TableName targetTableName = TableName.valueOf(name.getMethodName() + "_target");
    Path testDir = TEST_UTIL.getDataTestDirOnTestFS("testSyncTableIgnoreTimestampsTrue");
    long current = EnvironmentEdgeManager.currentTime();
    writeTestData(sourceTableName, targetTableName, current - 1000, current);
    hashSourceTable(sourceTableName, testDir, "--ignoreTimestamps=true");
    Counters syncCounters = syncTables(sourceTableName, targetTableName, testDir, "--ignoreTimestamps=true");
    assertEqualTables(90, sourceTableName, targetTableName, true);
    assertEquals(50, syncCounters.findCounter(Counter.ROWSWITHDIFFS).getValue());
    assertEquals(10, syncCounters.findCounter(Counter.SOURCEMISSINGROWS).getValue());
    assertEquals(10, syncCounters.findCounter(Counter.TARGETMISSINGROWS).getValue());
    assertEquals(30, syncCounters.findCounter(Counter.SOURCEMISSINGCELLS).getValue());
    assertEquals(30, syncCounters.findCounter(Counter.TARGETMISSINGCELLS).getValue());
    assertEquals(20, syncCounters.findCounter(Counter.DIFFERENTCELLVALUES).getValue());
    TEST_UTIL.deleteTable(sourceTableName);
    TEST_UTIL.deleteTable(targetTableName);
}
Also used : Path(org.apache.hadoop.fs.Path) TableName(org.apache.hadoop.hbase.TableName) Counters(org.apache.hadoop.mapreduce.Counters) Test(org.junit.Test)

Example 62 with Counters

use of org.apache.hadoop.mapreduce.Counters in project druid by druid-io.

the class DetermineHashedPartitionsJob method getStats.

@Override
public Map<String, Object> getStats() {
    if (groupByJob == null) {
        return null;
    }
    try {
        Counters jobCounters = groupByJob.getCounters();
        Map<String, Object> metrics = TaskMetricsUtils.makeIngestionRowMetrics(jobCounters.findCounter(HadoopDruidIndexerConfig.IndexJobCounters.ROWS_PROCESSED_COUNTER).getValue(), jobCounters.findCounter(HadoopDruidIndexerConfig.IndexJobCounters.ROWS_PROCESSED_WITH_ERRORS_COUNTER).getValue(), jobCounters.findCounter(HadoopDruidIndexerConfig.IndexJobCounters.ROWS_UNPARSEABLE_COUNTER).getValue(), jobCounters.findCounter(HadoopDruidIndexerConfig.IndexJobCounters.ROWS_THROWN_AWAY_COUNTER).getValue());
        return metrics;
    } catch (IllegalStateException ise) {
        log.debug("Couldn't get counters due to job state");
        return null;
    } catch (Exception e) {
        log.debug(e, "Encountered exception in getStats().");
        return null;
    }
}
Also used : Counters(org.apache.hadoop.mapreduce.Counters) IOException(java.io.IOException)

Example 63 with Counters

use of org.apache.hadoop.mapreduce.Counters in project Cloud9 by lintool.

the class CountTrecDocuments method run.

/**
 * Runs this tool.
 */
@SuppressWarnings("static-access")
public int run(String[] args) throws Exception {
    Options options = new Options();
    options.addOption(OptionBuilder.withArgName("path").hasArg().withDescription("(required) collection path").create(COLLECTION_OPTION));
    options.addOption(OptionBuilder.withArgName("path").hasArg().withDescription("(required) output path").create(OUTPUT_OPTION));
    options.addOption(OptionBuilder.withArgName("path").hasArg().withDescription("(required) DocnoMapping data").create(MAPPING_OPTION));
    options.addOption(OptionBuilder.withArgName("path").hasArg().withDescription("(optional) output file to write the number of records").create(COUNT_OPTION));
    CommandLine cmdline;
    CommandLineParser parser = new GnuParser();
    try {
        cmdline = parser.parse(options, args);
    } catch (ParseException exp) {
        System.err.println("Error parsing command line: " + exp.getMessage());
        return -1;
    }
    if (!cmdline.hasOption(COLLECTION_OPTION) || !cmdline.hasOption(OUTPUT_OPTION) || !cmdline.hasOption(MAPPING_OPTION)) {
        HelpFormatter formatter = new HelpFormatter();
        formatter.printHelp(this.getClass().getName(), options);
        ToolRunner.printGenericCommandUsage(System.out);
        return -1;
    }
    String inputPath = cmdline.getOptionValue(COLLECTION_OPTION);
    String outputPath = cmdline.getOptionValue(OUTPUT_OPTION);
    String mappingFile = cmdline.getOptionValue(MAPPING_OPTION);
    LOG.info("Tool: " + CountTrecDocuments.class.getSimpleName());
    LOG.info(" - input: " + inputPath);
    LOG.info(" - output dir: " + outputPath);
    LOG.info(" - docno mapping file: " + mappingFile);
    Job job = new Job(getConf(), CountTrecDocuments.class.getSimpleName());
    job.setJarByClass(CountTrecDocuments.class);
    job.setNumReduceTasks(0);
    // Pass in the class name as a String; this is makes the mapper general in being able to load
    // any collection of Indexable objects that has docid/docno mapping specified by a DocnoMapping
    // object.
    job.getConfiguration().set("DocnoMappingClass", TrecDocnoMapping.class.getCanonicalName());
    // Put the mapping file in the distributed cache so each map worker will have it.
    DistributedCache.addCacheFile(new URI(mappingFile), job.getConfiguration());
    FileInputFormat.setInputPaths(job, new Path(inputPath));
    FileOutputFormat.setOutputPath(job, new Path(outputPath));
    FileOutputFormat.setCompressOutput(job, false);
    job.setInputFormatClass(TrecDocumentInputFormat.class);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(IntWritable.class);
    job.setMapperClass(MyMapper.class);
    // Delete the output directory if it exists already.
    FileSystem.get(job.getConfiguration()).delete(new Path(outputPath), true);
    job.waitForCompletion(true);
    Counters counters = job.getCounters();
    int numDocs = (int) counters.findCounter(Count.DOCS).getValue();
    LOG.info("Read " + numDocs + " docs.");
    if (cmdline.hasOption(COUNT_OPTION)) {
        String f = cmdline.getOptionValue(COUNT_OPTION);
        FileSystem fs = FileSystem.get(getConf());
        FSDataOutputStream out = fs.create(new Path(f));
        out.write(new Integer(numDocs).toString().getBytes());
        out.close();
    }
    return numDocs;
}
Also used : Path(org.apache.hadoop.fs.Path) Options(org.apache.commons.cli.Options) GnuParser(org.apache.commons.cli.GnuParser) URI(java.net.URI) HelpFormatter(org.apache.commons.cli.HelpFormatter) CommandLine(org.apache.commons.cli.CommandLine) FileSystem(org.apache.hadoop.fs.FileSystem) Counters(org.apache.hadoop.mapreduce.Counters) CommandLineParser(org.apache.commons.cli.CommandLineParser) ParseException(org.apache.commons.cli.ParseException) FSDataOutputStream(org.apache.hadoop.fs.FSDataOutputStream) Job(org.apache.hadoop.mapreduce.Job)

Example 64 with Counters

use of org.apache.hadoop.mapreduce.Counters in project cdap by caskdata.

the class MapReduceMetricsWriter method reportStats.

public void reportStats() throws IOException, InterruptedException {
    Counters jobCounters = jobConf.getCounters();
    reportMapredStats(jobCounters);
}
Also used : Counters(org.apache.hadoop.mapreduce.Counters)

Example 65 with Counters

use of org.apache.hadoop.mapreduce.Counters in project phoenix by apache.

the class IndexScrutinyToolForTenantIT method testTenantViewAndIndexEqual.

/**
 * Tests that the config for max number of output rows is observed
 */
@Test
public void testTenantViewAndIndexEqual() throws Exception {
    connTenant.createStatement().execute(String.format(upsertQueryStr, tenantViewName, tenantId, 1, "x"));
    connTenant.commit();
    String[] argValues = getArgValues("", tenantViewName, indexNameTenant, 10L, SourceTable.INDEX_TABLE_SOURCE, false, null, null, tenantId, EnvironmentEdgeManager.currentTimeMillis());
    List<Job> completedJobs = runScrutiny(IndexScrutinyMapperForTest.class, argValues);
    // Sunny case, both index and view are equal. 1 row
    assertEquals(1, completedJobs.size());
    for (Job job : completedJobs) {
        assertTrue(job.isSuccessful());
        Counters counters = job.getCounters();
        assertEquals(1, getCounterValue(counters, VALID_ROW_COUNT));
        assertEquals(0, getCounterValue(counters, INVALID_ROW_COUNT));
    }
}
Also used : Counters(org.apache.hadoop.mapreduce.Counters) Job(org.apache.hadoop.mapreduce.Job) IndexScrutinyMapperForTest(org.apache.phoenix.mapreduce.index.IndexScrutinyMapperForTest) Test(org.junit.Test)

Aggregations

Counters (org.apache.hadoop.mapreduce.Counters)111 Job (org.apache.hadoop.mapreduce.Job)37 Test (org.junit.Test)37 Configuration (org.apache.hadoop.conf.Configuration)23 IOException (java.io.IOException)21 Path (org.apache.hadoop.fs.Path)21 Counter (org.apache.hadoop.mapreduce.Counter)18 IndexScrutinyMapperForTest (org.apache.phoenix.mapreduce.index.IndexScrutinyMapperForTest)14 Task (org.apache.hadoop.mapreduce.v2.app.job.Task)9 Connection (java.sql.Connection)8 CounterGroup (org.apache.hadoop.mapreduce.CounterGroup)8 TaskAttemptId (org.apache.hadoop.mapreduce.v2.api.records.TaskAttemptId)7 TaskAttempt (org.apache.hadoop.mapreduce.v2.app.job.TaskAttempt)7 PreparedStatement (java.sql.PreparedStatement)6 TaskId (org.apache.hadoop.mapreduce.v2.api.records.TaskId)6 ArrayList (java.util.ArrayList)5 Value (org.apache.accumulo.core.data.Value)5 Text (org.apache.hadoop.io.Text)5 YarnConfiguration (org.apache.hadoop.yarn.conf.YarnConfiguration)5 FileNotFoundException (java.io.FileNotFoundException)4