use of org.apache.hadoop.mapreduce.Counter in project ignite by apache.
the class HadoopMapReduceCounterGroup method addCounter.
/**
* {@inheritDoc}
*/
@Override
public Counter addCounter(String name, String displayName, long value) {
final Counter counter = cntrs.findCounter(this.name, name);
counter.setValue(value);
return counter;
}
use of org.apache.hadoop.mapreduce.Counter in project incubator-rya by apache.
the class IndexWritingTool method run.
@Override
public int run(final String[] args) throws Exception {
Preconditions.checkArgument(args.length == 7, "java " + IndexWritingTool.class.getCanonicalName() + " hdfsSaveLocation sparqlFile cbinstance cbzk cbuser cbpassword rdfTablePrefix.");
final String inputDir = PathUtils.clean(args[0]);
final String sparqlFile = PathUtils.clean(args[1]);
final String instStr = args[2];
final String zooStr = args[3];
final String userStr = args[4];
final String passStr = args[5];
final String tablePrefix = args[6];
final String sparql = FileUtils.readFileToString(new File(sparqlFile));
final Job job = new Job(getConf(), "Write HDFS Index to Accumulo");
job.setJarByClass(this.getClass());
final Configuration jobConf = job.getConfiguration();
jobConf.setBoolean("mapred.map.tasks.speculative.execution", false);
setVarOrders(sparql, jobConf);
TextInputFormat.setInputPaths(job, inputDir);
job.setInputFormatClass(TextInputFormat.class);
job.setMapperClass(MyMapper.class);
job.setMapOutputKeyClass(Text.class);
job.setMapOutputValueClass(Mutation.class);
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(Mutation.class);
job.setNumReduceTasks(0);
String tableName;
if (zooStr.equals("mock")) {
tableName = tablePrefix;
} else {
tableName = tablePrefix + "INDEX_" + UUID.randomUUID().toString().replace("-", "").toUpperCase();
}
setAccumuloOutput(instStr, zooStr, userStr, passStr, job, tableName);
jobConf.set(sparql_key, sparql);
final int complete = job.waitForCompletion(true) ? 0 : -1;
if (complete == 0) {
final String[] varOrders = jobConf.getStrings("varOrders");
final String orders = Joiner.on("\u0000").join(varOrders);
Instance inst;
if (zooStr.equals("mock")) {
inst = new MockInstance(instStr);
} else {
inst = new ZooKeeperInstance(instStr, zooStr);
}
final Connector conn = inst.getConnector(userStr, passStr.getBytes(StandardCharsets.UTF_8));
final BatchWriter bw = conn.createBatchWriter(tableName, 10, 5000, 1);
final Counters counters = job.getCounters();
final Counter c1 = counters.findCounter(cardCounter, cardCounter);
final Mutation m = new Mutation("~SPARQL");
final Value v = new Value(sparql.getBytes(StandardCharsets.UTF_8));
m.put(new Text("" + c1.getValue()), new Text(orders), v);
bw.addMutation(m);
bw.close();
return complete;
} else {
return complete;
}
}
use of org.apache.hadoop.mapreduce.Counter in project incubator-gobblin by apache.
the class CompactionCompleteFileOperationAction method onCompactionJobComplete.
/**
* Replace or append the destination folder with new avro files from map-reduce job
* Create a record count file containing the number of records that have been processed .
*/
public void onCompactionJobComplete(FileSystemDataset dataset) throws IOException {
if (configurator != null && configurator.isJobCreated()) {
CompactionPathParser.CompactionParserResult result = new CompactionPathParser(state).parse(dataset);
Path tmpPath = configurator.getMrOutputPath();
Path dstPath = new Path(result.getDstAbsoluteDir());
// this is append delta mode due to the compaction rename source dir mode being enabled
boolean appendDeltaOutput = this.state.getPropAsBoolean(MRCompactor.COMPACTION_RENAME_SOURCE_DIR_ENABLED, MRCompactor.DEFAULT_COMPACTION_RENAME_SOURCE_DIR_ENABLED);
Job job = this.configurator.getConfiguredJob();
long newTotalRecords = 0;
long oldTotalRecords = helper.readRecordCount(new Path(result.getDstAbsoluteDir()));
long executeCount = helper.readExecutionCount(new Path(result.getDstAbsoluteDir()));
List<Path> goodPaths = CompactionAvroJobConfigurator.getGoodFiles(job, tmpPath, this.fs);
if (appendDeltaOutput) {
FsPermission permission = HadoopUtils.deserializeFsPermission(this.state, MRCompactorJobRunner.COMPACTION_JOB_OUTPUT_DIR_PERMISSION, FsPermission.getDefault());
WriterUtils.mkdirsWithRecursivePermission(this.fs, dstPath, permission);
// append files under mr output to destination
for (Path filePath : goodPaths) {
String fileName = filePath.getName();
log.info(String.format("Adding %s to %s", filePath.toString(), dstPath));
Path outPath = new Path(dstPath, fileName);
if (!this.fs.rename(filePath, outPath)) {
throw new IOException(String.format("Unable to move %s to %s", filePath.toString(), outPath.toString()));
}
}
// Obtain record count from input file names.
// We don't get record count from map-reduce counter because in the next run, the threshold (delta record)
// calculation is based on the input file names. By pre-defining which input folders are involved in the
// MR execution, it is easy to track how many files are involved in MR so far, thus calculating the number of total records
// (all previous run + current run) is possible.
newTotalRecords = this.configurator.getFileNameRecordCount();
} else {
this.fs.delete(dstPath, true);
FsPermission permission = HadoopUtils.deserializeFsPermission(this.state, MRCompactorJobRunner.COMPACTION_JOB_OUTPUT_DIR_PERMISSION, FsPermission.getDefault());
WriterUtils.mkdirsWithRecursivePermission(this.fs, dstPath.getParent(), permission);
if (!this.fs.rename(tmpPath, dstPath)) {
throw new IOException(String.format("Unable to move %s to %s", tmpPath, dstPath));
}
// Obtain record count from map reduce job counter
// We don't get record count from file name because tracking which files are actually involved in the MR execution can
// be hard. This is due to new minutely data is rolled up to hourly folder but from daily compaction perspective we are not
// able to tell which file are newly added (because we simply pass all hourly folders to MR job instead of individual files).
Counter counter = job.getCounters().findCounter(AvroKeyMapper.EVENT_COUNTER.RECORD_COUNT);
newTotalRecords = counter.getValue();
}
State compactState = helper.loadState(new Path(result.getDstAbsoluteDir()));
compactState.setProp(CompactionSlaEventHelper.RECORD_COUNT_TOTAL, Long.toString(newTotalRecords));
compactState.setProp(CompactionSlaEventHelper.EXEC_COUNT_TOTAL, Long.toString(executeCount + 1));
compactState.setProp(CompactionSlaEventHelper.MR_JOB_ID, this.configurator.getConfiguredJob().getJobID().toString());
helper.saveState(new Path(result.getDstAbsoluteDir()), compactState);
log.info("Updating record count from {} to {} in {} [{}]", oldTotalRecords, newTotalRecords, dstPath, executeCount + 1);
// submit events for record count
if (eventSubmitter != null) {
Map<String, String> eventMetadataMap = ImmutableMap.of(CompactionSlaEventHelper.DATASET_URN, dataset.datasetURN(), CompactionSlaEventHelper.RECORD_COUNT_TOTAL, Long.toString(newTotalRecords), CompactionSlaEventHelper.PREV_RECORD_COUNT_TOTAL, Long.toString(oldTotalRecords), CompactionSlaEventHelper.EXEC_COUNT_TOTAL, Long.toString(executeCount + 1), CompactionSlaEventHelper.MR_JOB_ID, this.configurator.getConfiguredJob().getJobID().toString());
this.eventSubmitter.submit(CompactionSlaEventHelper.COMPACTION_RECORD_COUNT_EVENT, eventMetadataMap);
}
}
}
use of org.apache.hadoop.mapreduce.Counter in project incubator-gobblin by apache.
the class AvroKeyDedupReducerTest method testReduce.
@Test
public void testReduce() throws IOException, InterruptedException {
Schema keySchema = new Schema.Parser().parse(KEY_SCHEMA);
GenericRecordBuilder keyRecordBuilder = new GenericRecordBuilder(keySchema.getField("key").schema());
keyRecordBuilder.set("partitionKey", 1);
keyRecordBuilder.set("environment", "test");
keyRecordBuilder.set("subKey", "2");
GenericRecord record = keyRecordBuilder.build();
keyRecordBuilder = new GenericRecordBuilder(keySchema);
keyRecordBuilder.set("key", record);
GenericRecord keyRecord = keyRecordBuilder.build();
// Test reducer with delta field "scn"
Schema fullSchema = new Schema.Parser().parse(FULL_SCHEMA);
AvroValue<GenericRecord> fullRecord1 = new AvroValue<>();
AvroValue<GenericRecord> fullRecord2 = new AvroValue<>();
AvroValue<GenericRecord> fullRecord3 = new AvroValue<>();
AvroValue<GenericRecord> fullRecord4 = new AvroValue<>();
GenericRecordBuilder fullRecordBuilder1 = new GenericRecordBuilder(fullSchema);
fullRecordBuilder1.set("key", record);
fullRecordBuilder1.set("scn", 123);
fullRecordBuilder1.set("scn2", 100);
fullRecord1.datum(fullRecordBuilder1.build());
fullRecordBuilder1.set("scn", 125);
fullRecordBuilder1.set("scn2", 1);
fullRecord2.datum(fullRecordBuilder1.build());
fullRecordBuilder1.set("scn", 124);
fullRecordBuilder1.set("scn2", 10);
fullRecord3.datum(fullRecordBuilder1.build());
fullRecordBuilder1.set("scn", 122);
fullRecordBuilder1.set("scn2", 1000);
fullRecord4.datum(fullRecordBuilder1.build());
Configuration conf = mock(Configuration.class);
when(conf.get(AvroKeyDedupReducer.DELTA_SCHEMA_PROVIDER)).thenReturn(FieldAttributeBasedDeltaFieldsProvider.class.getName());
when(conf.get(FieldAttributeBasedDeltaFieldsProvider.ATTRIBUTE_FIELD)).thenReturn("attributes_json");
when(conf.get(FieldAttributeBasedDeltaFieldsProvider.DELTA_PROP_NAME, FieldAttributeBasedDeltaFieldsProvider.DEFAULT_DELTA_PROP_NAME)).thenReturn(FieldAttributeBasedDeltaFieldsProvider.DEFAULT_DELTA_PROP_NAME);
AvroKeyDedupReducer reducer = new AvroKeyDedupReducer();
WrappedReducer.Context reducerContext = mock(WrappedReducer.Context.class);
when(reducerContext.getConfiguration()).thenReturn(conf);
Counter moreThan1Counter = new GenericCounter();
when(reducerContext.getCounter(AvroKeyDedupReducer.EVENT_COUNTER.MORE_THAN_1)).thenReturn(moreThan1Counter);
Counter dedupedCounter = new GenericCounter();
when(reducerContext.getCounter(AvroKeyDedupReducer.EVENT_COUNTER.DEDUPED)).thenReturn(dedupedCounter);
Counter recordCounter = new GenericCounter();
when(reducerContext.getCounter(AvroKeyDedupReducer.EVENT_COUNTER.RECORD_COUNT)).thenReturn(recordCounter);
reducer.setup(reducerContext);
doNothing().when(reducerContext).write(any(AvroKey.class), any(NullWritable.class));
List<AvroValue<GenericRecord>> valueIterable = Lists.newArrayList(fullRecord1, fullRecord2, fullRecord3, fullRecord4);
AvroKey<GenericRecord> key = new AvroKey<>();
key.datum(keyRecord);
reducer.reduce(key, valueIterable, reducerContext);
Assert.assertEquals(reducer.getOutKey().datum(), fullRecord2.datum());
// Test reducer without delta field
Configuration conf2 = mock(Configuration.class);
when(conf2.get(AvroKeyDedupReducer.DELTA_SCHEMA_PROVIDER)).thenReturn(null);
when(reducerContext.getConfiguration()).thenReturn(conf2);
AvroKeyDedupReducer reducer2 = new AvroKeyDedupReducer();
reducer2.setup(reducerContext);
reducer2.reduce(key, valueIterable, reducerContext);
Assert.assertEquals(reducer2.getOutKey().datum(), fullRecord1.datum());
// Test reducer with compound delta key.
Schema fullSchema2 = new Schema.Parser().parse(FULL_SCHEMA_WITH_TWO_DELTA_FIELDS);
GenericRecordBuilder fullRecordBuilder2 = new GenericRecordBuilder(fullSchema2);
fullRecordBuilder2.set("key", record);
fullRecordBuilder2.set("scn", 123);
fullRecordBuilder2.set("scn2", 100);
fullRecord1.datum(fullRecordBuilder2.build());
fullRecordBuilder2.set("scn", 125);
fullRecordBuilder2.set("scn2", 1000);
fullRecord2.datum(fullRecordBuilder2.build());
fullRecordBuilder2.set("scn", 126);
fullRecordBuilder2.set("scn2", 1000);
fullRecord3.datum(fullRecordBuilder2.build());
fullRecordBuilder2.set("scn", 130);
fullRecordBuilder2.set("scn2", 100);
fullRecord4.datum(fullRecordBuilder2.build());
List<AvroValue<GenericRecord>> valueIterable2 = Lists.newArrayList(fullRecord1, fullRecord2, fullRecord3, fullRecord4);
reducer.reduce(key, valueIterable2, reducerContext);
Assert.assertEquals(reducer.getOutKey().datum(), fullRecord3.datum());
}
use of org.apache.hadoop.mapreduce.Counter in project hbase by apache.
the class RowCounter method doWork.
@Override
protected int doWork() throws Exception {
Job job = createSubmittableJob(getConf());
if (job == null) {
return -1;
}
boolean success = job.waitForCompletion(true);
final long expectedCount = getConf().getLong(EXPECTED_COUNT_KEY, -1);
if (success && expectedCount != -1) {
final Counter counter = job.getCounters().findCounter(RowCounterMapper.Counters.ROWS);
success = expectedCount == counter.getValue();
if (!success) {
LOG.error("Failing job because count of '" + counter.getValue() + "' does not match expected count of '" + expectedCount + "'");
}
}
return (success ? 0 : 1);
}
Aggregations