use of org.apache.hadoop.mapred.JobClient in project voldemort by voldemort.
the class HadoopStoreBuilder method build.
/**
* Run the job
*/
public void build() {
try {
JobConf conf = prepareJobConf(baseJobConf);
FileSystem fs = outputDir.getFileSystem(conf);
if (fs.exists(outputDir)) {
info("Deleting previous output in " + outputDir + " for building store " + this.storeDef.getName());
fs.delete(outputDir, true);
}
conf.setInt("io.file.buffer.size", DEFAULT_BUFFER_SIZE);
conf.set("cluster.xml", new ClusterMapper().writeCluster(cluster));
conf.set("stores.xml", new StoreDefinitionsMapper().writeStoreList(Collections.singletonList(storeDef)));
conf.setBoolean(VoldemortBuildAndPushJob.SAVE_KEYS, saveKeys);
conf.setBoolean(VoldemortBuildAndPushJob.REDUCER_PER_BUCKET, reducerPerBucket);
conf.setBoolean(VoldemortBuildAndPushJob.BUILD_PRIMARY_REPLICAS_ONLY, buildPrimaryReplicasOnly);
if (!isAvro) {
conf.setPartitionerClass(HadoopStoreBuilderPartitioner.class);
conf.setMapperClass(mapperClass);
conf.setMapOutputKeyClass(BytesWritable.class);
conf.setMapOutputValueClass(BytesWritable.class);
conf.setReducerClass(HadoopStoreBuilderReducer.class);
}
conf.setInputFormat(inputFormatClass);
conf.setOutputFormat(SequenceFileOutputFormat.class);
conf.setOutputKeyClass(BytesWritable.class);
conf.setOutputValueClass(BytesWritable.class);
conf.setJarByClass(getClass());
conf.setReduceSpeculativeExecution(false);
FileInputFormat.setInputPaths(conf, inputPath);
conf.set("final.output.dir", outputDir.toString());
conf.set(VoldemortBuildAndPushJob.CHECKSUM_TYPE, CheckSum.toString(checkSumType));
conf.set("dfs.umaskmode", "002");
FileOutputFormat.setOutputPath(conf, tempDir);
FileSystem outputFs = outputDir.getFileSystem(conf);
if (outputFs.exists(outputDir)) {
throw new IOException("Final output directory already exists.");
}
// delete output dir if it already exists
FileSystem tempFs = tempDir.getFileSystem(conf);
tempFs.delete(tempDir, true);
long size = sizeOfPath(tempFs, inputPath);
logger.info("Data size = " + size + ", replication factor = " + storeDef.getReplicationFactor() + ", numNodes = " + cluster.getNumberOfNodes() + ", numPartitions = " + cluster.getNumberOfPartitions() + ", chunk size = " + chunkSizeBytes);
// Base numbers of chunks and reducers, will get modified according to various settings
int numChunks = (int) (size / cluster.getNumberOfPartitions() / chunkSizeBytes) + 1;
/* +1 so we round up */
int numReducers = cluster.getNumberOfPartitions();
// question, but in order to avoid breaking anything we'll just maintain the original behavior.
if (saveKeys) {
if (buildPrimaryReplicasOnly) {
// The buildPrimaryReplicasOnly mode is supported exclusively in combination with
// saveKeys. If enabled, then we don't want to shuffle extra keys redundantly,
// hence we don't change the number of reducers.
} else {
// Old behavior, where all keys are redundantly shuffled to redundant reducers.
numReducers = numReducers * storeDef.getReplicationFactor();
}
} else {
numChunks = numChunks * storeDef.getReplicationFactor();
}
// Ensure at least one chunk
numChunks = Math.max(numChunks, 1);
if (reducerPerBucket) {
// Then all chunks for a given partition/replica combination are shuffled to the same
// reducer, hence, the number of reducers remains the same as previously defined.
} else {
// Otherwise, we want one reducer per chunk, hence we multiply the number of reducers.
numReducers = numReducers * numChunks;
}
conf.setInt(AbstractStoreBuilderConfigurable.NUM_CHUNKS, numChunks);
conf.setNumReduceTasks(numReducers);
logger.info("Number of chunks: " + numChunks + ", number of reducers: " + numReducers + ", save keys: " + saveKeys + ", reducerPerBucket: " + reducerPerBucket + ", buildPrimaryReplicasOnly: " + buildPrimaryReplicasOnly);
if (isAvro) {
conf.setPartitionerClass(AvroStoreBuilderPartitioner.class);
// conf.setMapperClass(mapperClass);
conf.setMapOutputKeyClass(ByteBuffer.class);
conf.setMapOutputValueClass(ByteBuffer.class);
conf.setInputFormat(inputFormatClass);
conf.setOutputFormat((Class<? extends OutputFormat>) AvroOutputFormat.class);
conf.setOutputKeyClass(ByteBuffer.class);
conf.setOutputValueClass(ByteBuffer.class);
// AvroJob confs for the avro mapper
AvroJob.setInputSchema(conf, Schema.parse(baseJobConf.get(AVRO_REC_SCHEMA)));
AvroJob.setOutputSchema(conf, Pair.getPairSchema(Schema.create(Schema.Type.BYTES), Schema.create(Schema.Type.BYTES)));
AvroJob.setMapperClass(conf, mapperClass);
conf.setReducerClass(AvroStoreBuilderReducer.class);
}
logger.info("Building store...");
// The snipped below copied and adapted from: JobClient.runJob(conf);
// We have more control in the error handling this way.
JobClient jc = new JobClient(conf);
RunningJob runningJob = jc.submitJob(conf);
Counters counters;
try {
if (!jc.monitorAndPrintJob(conf, runningJob)) {
counters = runningJob.getCounters();
// For some datasets, the number of chunks that we calculated is inadequate.
// Here, we try to identify if this is the case.
long mapOutputBytes = counters.getCounter(Task.Counter.MAP_OUTPUT_BYTES);
long averageNumberOfBytesPerChunk = mapOutputBytes / numChunks / cluster.getNumberOfPartitions();
if (averageNumberOfBytesPerChunk > (HadoopStoreWriter.DEFAULT_CHUNK_SIZE)) {
float chunkSizeBloat = averageNumberOfBytesPerChunk / (float) HadoopStoreWriter.DEFAULT_CHUNK_SIZE;
long suggestedTargetChunkSize = (long) (HadoopStoreWriter.DEFAULT_CHUNK_SIZE / chunkSizeBloat);
logger.error("The number of bytes per chunk may be too high." + " averageNumberOfBytesPerChunk = " + averageNumberOfBytesPerChunk + ". Consider setting " + VoldemortBuildAndPushJob.BUILD_CHUNK_SIZE + "=" + suggestedTargetChunkSize);
} else {
logger.error("Job Failed: " + runningJob.getFailureInfo());
}
throw new VoldemortException("BnP's MapReduce job failed.");
}
} catch (InterruptedException ie) {
Thread.currentThread().interrupt();
}
counters = runningJob.getCounters();
long numberOfRecords = counters.getCounter(Task.Counter.REDUCE_INPUT_GROUPS);
if (numberOfRecords < minNumberOfRecords) {
throw new VoldemortException("The number of records in the data set (" + numberOfRecords + ") is lower than the minimum required (" + minNumberOfRecords + "). Aborting.");
}
if (saveKeys) {
logger.info("Number of collisions in the job - " + counters.getCounter(KeyValueWriter.CollisionCounter.NUM_COLLISIONS));
logger.info("Maximum number of collisions for one entry - " + counters.getCounter(KeyValueWriter.CollisionCounter.MAX_COLLISIONS));
}
// Do a CheckSumOfCheckSum - Similar to HDFS
CheckSum checkSumGenerator = CheckSum.getInstance(this.checkSumType);
if (!this.checkSumType.equals(CheckSumType.NONE) && checkSumGenerator == null) {
throw new VoldemortException("Could not generate checksum digest for type " + this.checkSumType);
}
List<Integer> directorySuffixes = Lists.newArrayList();
if (buildPrimaryReplicasOnly) {
// Files are grouped by partitions
for (int partitionId = 0; partitionId < cluster.getNumberOfPartitions(); partitionId++) {
directorySuffixes.add(partitionId);
}
} else {
// Files are grouped by node
for (Node node : cluster.getNodes()) {
directorySuffixes.add(node.getId());
}
}
ReadOnlyStorageMetadata fullStoreMetadata = new ReadOnlyStorageMetadata();
List<Integer> emptyDirectories = Lists.newArrayList();
final String directoryPrefix = buildPrimaryReplicasOnly ? ReadOnlyUtils.PARTITION_DIRECTORY_PREFIX : ReadOnlyUtils.NODE_DIRECTORY_PREFIX;
// Generate a log message every 30 seconds or after processing every 100 directories.
final long LOG_INTERVAL_TIME = TimeUnit.MILLISECONDS.convert(30, TimeUnit.SECONDS);
final int LOG_INTERVAL_COUNT = buildPrimaryReplicasOnly ? 100 : 5;
int lastLogCount = 0;
long lastLogTime = 0;
long startTimeMS = System.currentTimeMillis();
// Check if all folder exists and with format file
for (int index = 0; index < directorySuffixes.size(); index++) {
int directorySuffix = directorySuffixes.get(index);
long elapsedTime = System.currentTimeMillis() - lastLogTime;
long elapsedCount = index - lastLogCount;
if (elapsedTime >= LOG_INTERVAL_TIME || elapsedCount >= LOG_INTERVAL_COUNT) {
lastLogTime = System.currentTimeMillis();
lastLogCount = index;
logger.info("Processed " + directorySuffix + " out of " + directorySuffixes.size() + " directories.");
}
String directoryName = directoryPrefix + directorySuffix;
ReadOnlyStorageMetadata metadata = new ReadOnlyStorageMetadata();
if (saveKeys) {
metadata.add(ReadOnlyStorageMetadata.FORMAT, ReadOnlyStorageFormat.READONLY_V2.getCode());
} else {
metadata.add(ReadOnlyStorageMetadata.FORMAT, ReadOnlyStorageFormat.READONLY_V1.getCode());
}
Path directoryPath = new Path(outputDir.toString(), directoryName);
if (!outputFs.exists(directoryPath)) {
logger.debug("No data generated for " + directoryName + ". Generating empty folder");
emptyDirectories.add(directorySuffix);
// Create empty folder
outputFs.mkdirs(directoryPath);
outputFs.setPermission(directoryPath, new FsPermission(HADOOP_FILE_PERMISSION));
logger.debug("Setting permission to 755 for " + directoryPath);
}
processCheckSumMetadataFile(directoryName, outputFs, checkSumGenerator, directoryPath, metadata);
if (buildPrimaryReplicasOnly) {
// In buildPrimaryReplicasOnly mode, writing a metadata file for each partitions
// takes too long, so we skip it. We will rely on the full-store.metadata file instead.
} else {
// Maintaining the old behavior: we write the node-specific metadata file
writeMetadataFile(directoryPath, outputFs, ReadOnlyUtils.METADATA_FILE_EXTENSION, metadata);
}
fullStoreMetadata.addNestedMetadata(directoryName, metadata);
}
// Write the aggregate metadata file
writeMetadataFile(outputDir, outputFs, ReadOnlyUtils.FULL_STORE_METADATA_FILE, fullStoreMetadata);
long elapsedTimeMs = System.currentTimeMillis() - startTimeMS;
long elapsedTimeSeconds = TimeUnit.SECONDS.convert(elapsedTimeMs, TimeUnit.MILLISECONDS);
logger.info("Total Processed directories: " + directorySuffixes.size() + ". Elapsed Time (Seconds):" + elapsedTimeSeconds);
if (emptyDirectories.size() > 0) {
logger.info("Empty directories: " + Arrays.toString(emptyDirectories.toArray()));
}
} catch (Exception e) {
logger.error("Error in Store builder", e);
throw new VoldemortException(e);
}
}
use of org.apache.hadoop.mapred.JobClient in project incubator-systemml by apache.
the class InfrastructureAnalyzer method analyzeHadoopCluster.
/**
* Analyzes properties of hadoop cluster and configuration.
*/
private static void analyzeHadoopCluster() {
try {
JobConf job = ConfigurationManager.getCachedJobConf();
JobClient client = new JobClient(job);
ClusterStatus stat = client.getClusterStatus();
if (//if in cluster mode
stat != null) {
//analyze cluster status
_remotePar = stat.getTaskTrackers();
_remoteParMap = stat.getMaxMapTasks();
_remoteParReduce = stat.getMaxReduceTasks();
//analyze pure configuration properties
analyzeHadoopConfiguration();
}
} catch (IOException e) {
throw new RuntimeException("Unable to analyze infrastructure.", e);
}
}
Aggregations