Search in sources :

Example 46 with Counter

use of org.apache.hadoop.mapreduce.Counter in project ignite by apache.

the class HadoopMapReduceCounterGroup method addCounter.

/**
 * {@inheritDoc}
 */
@Override
public Counter addCounter(String name, String displayName, long value) {
    final Counter counter = cntrs.findCounter(this.name, name);
    counter.setValue(value);
    return counter;
}
Also used : Counter(org.apache.hadoop.mapreduce.Counter)

Example 47 with Counter

use of org.apache.hadoop.mapreduce.Counter in project incubator-rya by apache.

the class IndexWritingTool method run.

@Override
public int run(final String[] args) throws Exception {
    Preconditions.checkArgument(args.length == 7, "java " + IndexWritingTool.class.getCanonicalName() + " hdfsSaveLocation sparqlFile cbinstance cbzk cbuser cbpassword rdfTablePrefix.");
    final String inputDir = PathUtils.clean(args[0]);
    final String sparqlFile = PathUtils.clean(args[1]);
    final String instStr = args[2];
    final String zooStr = args[3];
    final String userStr = args[4];
    final String passStr = args[5];
    final String tablePrefix = args[6];
    final String sparql = FileUtils.readFileToString(new File(sparqlFile));
    final Job job = new Job(getConf(), "Write HDFS Index to Accumulo");
    job.setJarByClass(this.getClass());
    final Configuration jobConf = job.getConfiguration();
    jobConf.setBoolean("mapred.map.tasks.speculative.execution", false);
    setVarOrders(sparql, jobConf);
    TextInputFormat.setInputPaths(job, inputDir);
    job.setInputFormatClass(TextInputFormat.class);
    job.setMapperClass(MyMapper.class);
    job.setMapOutputKeyClass(Text.class);
    job.setMapOutputValueClass(Mutation.class);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(Mutation.class);
    job.setNumReduceTasks(0);
    String tableName;
    if (zooStr.equals("mock")) {
        tableName = tablePrefix;
    } else {
        tableName = tablePrefix + "INDEX_" + UUID.randomUUID().toString().replace("-", "").toUpperCase();
    }
    setAccumuloOutput(instStr, zooStr, userStr, passStr, job, tableName);
    jobConf.set(sparql_key, sparql);
    final int complete = job.waitForCompletion(true) ? 0 : -1;
    if (complete == 0) {
        final String[] varOrders = jobConf.getStrings("varOrders");
        final String orders = Joiner.on("\u0000").join(varOrders);
        Instance inst;
        if (zooStr.equals("mock")) {
            inst = new MockInstance(instStr);
        } else {
            inst = new ZooKeeperInstance(instStr, zooStr);
        }
        final Connector conn = inst.getConnector(userStr, passStr.getBytes(StandardCharsets.UTF_8));
        final BatchWriter bw = conn.createBatchWriter(tableName, 10, 5000, 1);
        final Counters counters = job.getCounters();
        final Counter c1 = counters.findCounter(cardCounter, cardCounter);
        final Mutation m = new Mutation("~SPARQL");
        final Value v = new Value(sparql.getBytes(StandardCharsets.UTF_8));
        m.put(new Text("" + c1.getValue()), new Text(orders), v);
        bw.addMutation(m);
        bw.close();
        return complete;
    } else {
        return complete;
    }
}
Also used : Connector(org.apache.accumulo.core.client.Connector) Configuration(org.apache.hadoop.conf.Configuration) MockInstance(org.apache.accumulo.core.client.mock.MockInstance) Instance(org.apache.accumulo.core.client.Instance) ZooKeeperInstance(org.apache.accumulo.core.client.ZooKeeperInstance) Text(org.apache.hadoop.io.Text) ZooKeeperInstance(org.apache.accumulo.core.client.ZooKeeperInstance) Counter(org.apache.hadoop.mapreduce.Counter) MockInstance(org.apache.accumulo.core.client.mock.MockInstance) Value(org.apache.accumulo.core.data.Value) Counters(org.apache.hadoop.mapreduce.Counters) BatchWriter(org.apache.accumulo.core.client.BatchWriter) Mutation(org.apache.accumulo.core.data.Mutation) Job(org.apache.hadoop.mapreduce.Job) File(java.io.File)

Example 48 with Counter

use of org.apache.hadoop.mapreduce.Counter in project incubator-gobblin by apache.

the class CompactionCompleteFileOperationAction method onCompactionJobComplete.

/**
 * Replace or append the destination folder with new avro files from map-reduce job
 * Create a record count file containing the number of records that have been processed .
 */
public void onCompactionJobComplete(FileSystemDataset dataset) throws IOException {
    if (configurator != null && configurator.isJobCreated()) {
        CompactionPathParser.CompactionParserResult result = new CompactionPathParser(state).parse(dataset);
        Path tmpPath = configurator.getMrOutputPath();
        Path dstPath = new Path(result.getDstAbsoluteDir());
        // this is append delta mode due to the compaction rename source dir mode being enabled
        boolean appendDeltaOutput = this.state.getPropAsBoolean(MRCompactor.COMPACTION_RENAME_SOURCE_DIR_ENABLED, MRCompactor.DEFAULT_COMPACTION_RENAME_SOURCE_DIR_ENABLED);
        Job job = this.configurator.getConfiguredJob();
        long newTotalRecords = 0;
        long oldTotalRecords = helper.readRecordCount(new Path(result.getDstAbsoluteDir()));
        long executeCount = helper.readExecutionCount(new Path(result.getDstAbsoluteDir()));
        List<Path> goodPaths = CompactionAvroJobConfigurator.getGoodFiles(job, tmpPath, this.fs);
        if (appendDeltaOutput) {
            FsPermission permission = HadoopUtils.deserializeFsPermission(this.state, MRCompactorJobRunner.COMPACTION_JOB_OUTPUT_DIR_PERMISSION, FsPermission.getDefault());
            WriterUtils.mkdirsWithRecursivePermission(this.fs, dstPath, permission);
            // append files under mr output to destination
            for (Path filePath : goodPaths) {
                String fileName = filePath.getName();
                log.info(String.format("Adding %s to %s", filePath.toString(), dstPath));
                Path outPath = new Path(dstPath, fileName);
                if (!this.fs.rename(filePath, outPath)) {
                    throw new IOException(String.format("Unable to move %s to %s", filePath.toString(), outPath.toString()));
                }
            }
            // Obtain record count from input file names.
            // We don't get record count from map-reduce counter because in the next run, the threshold (delta record)
            // calculation is based on the input file names. By pre-defining which input folders are involved in the
            // MR execution, it is easy to track how many files are involved in MR so far, thus calculating the number of total records
            // (all previous run + current run) is possible.
            newTotalRecords = this.configurator.getFileNameRecordCount();
        } else {
            this.fs.delete(dstPath, true);
            FsPermission permission = HadoopUtils.deserializeFsPermission(this.state, MRCompactorJobRunner.COMPACTION_JOB_OUTPUT_DIR_PERMISSION, FsPermission.getDefault());
            WriterUtils.mkdirsWithRecursivePermission(this.fs, dstPath.getParent(), permission);
            if (!this.fs.rename(tmpPath, dstPath)) {
                throw new IOException(String.format("Unable to move %s to %s", tmpPath, dstPath));
            }
            // Obtain record count from map reduce job counter
            // We don't get record count from file name because tracking which files are actually involved in the MR execution can
            // be hard. This is due to new minutely data is rolled up to hourly folder but from daily compaction perspective we are not
            // able to tell which file are newly added (because we simply pass all hourly folders to MR job instead of individual files).
            Counter counter = job.getCounters().findCounter(AvroKeyMapper.EVENT_COUNTER.RECORD_COUNT);
            newTotalRecords = counter.getValue();
        }
        State compactState = helper.loadState(new Path(result.getDstAbsoluteDir()));
        compactState.setProp(CompactionSlaEventHelper.RECORD_COUNT_TOTAL, Long.toString(newTotalRecords));
        compactState.setProp(CompactionSlaEventHelper.EXEC_COUNT_TOTAL, Long.toString(executeCount + 1));
        compactState.setProp(CompactionSlaEventHelper.MR_JOB_ID, this.configurator.getConfiguredJob().getJobID().toString());
        helper.saveState(new Path(result.getDstAbsoluteDir()), compactState);
        log.info("Updating record count from {} to {} in {} [{}]", oldTotalRecords, newTotalRecords, dstPath, executeCount + 1);
        // submit events for record count
        if (eventSubmitter != null) {
            Map<String, String> eventMetadataMap = ImmutableMap.of(CompactionSlaEventHelper.DATASET_URN, dataset.datasetURN(), CompactionSlaEventHelper.RECORD_COUNT_TOTAL, Long.toString(newTotalRecords), CompactionSlaEventHelper.PREV_RECORD_COUNT_TOTAL, Long.toString(oldTotalRecords), CompactionSlaEventHelper.EXEC_COUNT_TOTAL, Long.toString(executeCount + 1), CompactionSlaEventHelper.MR_JOB_ID, this.configurator.getConfiguredJob().getJobID().toString());
            this.eventSubmitter.submit(CompactionSlaEventHelper.COMPACTION_RECORD_COUNT_EVENT, eventMetadataMap);
        }
    }
}
Also used : Path(org.apache.hadoop.fs.Path) IOException(java.io.IOException) Counter(org.apache.hadoop.mapreduce.Counter) WorkUnitState(org.apache.gobblin.configuration.WorkUnitState) State(org.apache.gobblin.configuration.State) CompactionPathParser(org.apache.gobblin.compaction.parser.CompactionPathParser) FsPermission(org.apache.hadoop.fs.permission.FsPermission) Job(org.apache.hadoop.mapreduce.Job)

Example 49 with Counter

use of org.apache.hadoop.mapreduce.Counter in project incubator-gobblin by apache.

the class AvroKeyDedupReducerTest method testReduce.

@Test
public void testReduce() throws IOException, InterruptedException {
    Schema keySchema = new Schema.Parser().parse(KEY_SCHEMA);
    GenericRecordBuilder keyRecordBuilder = new GenericRecordBuilder(keySchema.getField("key").schema());
    keyRecordBuilder.set("partitionKey", 1);
    keyRecordBuilder.set("environment", "test");
    keyRecordBuilder.set("subKey", "2");
    GenericRecord record = keyRecordBuilder.build();
    keyRecordBuilder = new GenericRecordBuilder(keySchema);
    keyRecordBuilder.set("key", record);
    GenericRecord keyRecord = keyRecordBuilder.build();
    // Test reducer with delta field "scn"
    Schema fullSchema = new Schema.Parser().parse(FULL_SCHEMA);
    AvroValue<GenericRecord> fullRecord1 = new AvroValue<>();
    AvroValue<GenericRecord> fullRecord2 = new AvroValue<>();
    AvroValue<GenericRecord> fullRecord3 = new AvroValue<>();
    AvroValue<GenericRecord> fullRecord4 = new AvroValue<>();
    GenericRecordBuilder fullRecordBuilder1 = new GenericRecordBuilder(fullSchema);
    fullRecordBuilder1.set("key", record);
    fullRecordBuilder1.set("scn", 123);
    fullRecordBuilder1.set("scn2", 100);
    fullRecord1.datum(fullRecordBuilder1.build());
    fullRecordBuilder1.set("scn", 125);
    fullRecordBuilder1.set("scn2", 1);
    fullRecord2.datum(fullRecordBuilder1.build());
    fullRecordBuilder1.set("scn", 124);
    fullRecordBuilder1.set("scn2", 10);
    fullRecord3.datum(fullRecordBuilder1.build());
    fullRecordBuilder1.set("scn", 122);
    fullRecordBuilder1.set("scn2", 1000);
    fullRecord4.datum(fullRecordBuilder1.build());
    Configuration conf = mock(Configuration.class);
    when(conf.get(AvroKeyDedupReducer.DELTA_SCHEMA_PROVIDER)).thenReturn(FieldAttributeBasedDeltaFieldsProvider.class.getName());
    when(conf.get(FieldAttributeBasedDeltaFieldsProvider.ATTRIBUTE_FIELD)).thenReturn("attributes_json");
    when(conf.get(FieldAttributeBasedDeltaFieldsProvider.DELTA_PROP_NAME, FieldAttributeBasedDeltaFieldsProvider.DEFAULT_DELTA_PROP_NAME)).thenReturn(FieldAttributeBasedDeltaFieldsProvider.DEFAULT_DELTA_PROP_NAME);
    AvroKeyDedupReducer reducer = new AvroKeyDedupReducer();
    WrappedReducer.Context reducerContext = mock(WrappedReducer.Context.class);
    when(reducerContext.getConfiguration()).thenReturn(conf);
    Counter moreThan1Counter = new GenericCounter();
    when(reducerContext.getCounter(AvroKeyDedupReducer.EVENT_COUNTER.MORE_THAN_1)).thenReturn(moreThan1Counter);
    Counter dedupedCounter = new GenericCounter();
    when(reducerContext.getCounter(AvroKeyDedupReducer.EVENT_COUNTER.DEDUPED)).thenReturn(dedupedCounter);
    Counter recordCounter = new GenericCounter();
    when(reducerContext.getCounter(AvroKeyDedupReducer.EVENT_COUNTER.RECORD_COUNT)).thenReturn(recordCounter);
    reducer.setup(reducerContext);
    doNothing().when(reducerContext).write(any(AvroKey.class), any(NullWritable.class));
    List<AvroValue<GenericRecord>> valueIterable = Lists.newArrayList(fullRecord1, fullRecord2, fullRecord3, fullRecord4);
    AvroKey<GenericRecord> key = new AvroKey<>();
    key.datum(keyRecord);
    reducer.reduce(key, valueIterable, reducerContext);
    Assert.assertEquals(reducer.getOutKey().datum(), fullRecord2.datum());
    // Test reducer without delta field
    Configuration conf2 = mock(Configuration.class);
    when(conf2.get(AvroKeyDedupReducer.DELTA_SCHEMA_PROVIDER)).thenReturn(null);
    when(reducerContext.getConfiguration()).thenReturn(conf2);
    AvroKeyDedupReducer reducer2 = new AvroKeyDedupReducer();
    reducer2.setup(reducerContext);
    reducer2.reduce(key, valueIterable, reducerContext);
    Assert.assertEquals(reducer2.getOutKey().datum(), fullRecord1.datum());
    // Test reducer with compound delta key.
    Schema fullSchema2 = new Schema.Parser().parse(FULL_SCHEMA_WITH_TWO_DELTA_FIELDS);
    GenericRecordBuilder fullRecordBuilder2 = new GenericRecordBuilder(fullSchema2);
    fullRecordBuilder2.set("key", record);
    fullRecordBuilder2.set("scn", 123);
    fullRecordBuilder2.set("scn2", 100);
    fullRecord1.datum(fullRecordBuilder2.build());
    fullRecordBuilder2.set("scn", 125);
    fullRecordBuilder2.set("scn2", 1000);
    fullRecord2.datum(fullRecordBuilder2.build());
    fullRecordBuilder2.set("scn", 126);
    fullRecordBuilder2.set("scn2", 1000);
    fullRecord3.datum(fullRecordBuilder2.build());
    fullRecordBuilder2.set("scn", 130);
    fullRecordBuilder2.set("scn2", 100);
    fullRecord4.datum(fullRecordBuilder2.build());
    List<AvroValue<GenericRecord>> valueIterable2 = Lists.newArrayList(fullRecord1, fullRecord2, fullRecord3, fullRecord4);
    reducer.reduce(key, valueIterable2, reducerContext);
    Assert.assertEquals(reducer.getOutKey().datum(), fullRecord3.datum());
}
Also used : Configuration(org.apache.hadoop.conf.Configuration) Schema(org.apache.avro.Schema) AvroKey(org.apache.avro.mapred.AvroKey) GenericCounter(org.apache.hadoop.mapreduce.counters.GenericCounter) NullWritable(org.apache.hadoop.io.NullWritable) GenericCounter(org.apache.hadoop.mapreduce.counters.GenericCounter) Counter(org.apache.hadoop.mapreduce.Counter) GenericRecordBuilder(org.apache.avro.generic.GenericRecordBuilder) WrappedReducer(org.apache.hadoop.mapreduce.lib.reduce.WrappedReducer) AvroValue(org.apache.avro.mapred.AvroValue) GenericRecord(org.apache.avro.generic.GenericRecord) Test(org.testng.annotations.Test)

Example 50 with Counter

use of org.apache.hadoop.mapreduce.Counter in project hbase by apache.

the class RowCounter method doWork.

@Override
protected int doWork() throws Exception {
    Job job = createSubmittableJob(getConf());
    if (job == null) {
        return -1;
    }
    boolean success = job.waitForCompletion(true);
    final long expectedCount = getConf().getLong(EXPECTED_COUNT_KEY, -1);
    if (success && expectedCount != -1) {
        final Counter counter = job.getCounters().findCounter(RowCounterMapper.Counters.ROWS);
        success = expectedCount == counter.getValue();
        if (!success) {
            LOG.error("Failing job because count of '" + counter.getValue() + "' does not match expected count of '" + expectedCount + "'");
        }
    }
    return (success ? 0 : 1);
}
Also used : Counter(org.apache.hadoop.mapreduce.Counter) Job(org.apache.hadoop.mapreduce.Job)

Aggregations

Counter (org.apache.hadoop.mapreduce.Counter)52 Configuration (org.apache.hadoop.conf.Configuration)16 CounterGroup (org.apache.hadoop.mapreduce.CounterGroup)13 Job (org.apache.hadoop.mapreduce.Job)13 Counters (org.apache.hadoop.mapreduce.Counters)11 IOException (java.io.IOException)10 Path (org.apache.hadoop.fs.Path)8 FileSystem (org.apache.hadoop.fs.FileSystem)5 Map (java.util.Map)4 Test (org.junit.Test)4 SimpleDateFormat (java.text.SimpleDateFormat)3 ArrayList (java.util.ArrayList)3 TaskCounter (org.apache.hadoop.mapreduce.TaskCounter)3 FileNotFoundException (java.io.FileNotFoundException)2 ExecutionException (java.util.concurrent.ExecutionException)2 RejectedExecutionException (java.util.concurrent.RejectedExecutionException)2 TimeoutException (java.util.concurrent.TimeoutException)2 Schema (org.apache.avro.Schema)2 CustomOutputCommitter (org.apache.hadoop.CustomOutputCommitter)2 BytesWritable (org.apache.hadoop.io.BytesWritable)2