use of org.apache.hadoop.mapred.JobConf in project hadoop by apache.
the class MergeManagerImpl method combineAndSpill.
private void combineAndSpill(RawKeyValueIterator kvIter, Counters.Counter inCounter) throws IOException {
JobConf job = jobConf;
Reducer combiner = ReflectionUtils.newInstance(combinerClass, job);
Class<K> keyClass = (Class<K>) job.getMapOutputKeyClass();
Class<V> valClass = (Class<V>) job.getMapOutputValueClass();
RawComparator<K> comparator = (RawComparator<K>) job.getCombinerKeyGroupingComparator();
try {
CombineValuesIterator values = new CombineValuesIterator(kvIter, comparator, keyClass, valClass, job, Reporter.NULL, inCounter);
while (values.more()) {
combiner.reduce(values.getKey(), values, combineCollector, Reporter.NULL);
values.nextKey();
}
} finally {
combiner.close();
}
}
use of org.apache.hadoop.mapred.JobConf in project hadoop by apache.
the class NNBench method runTests.
/**
* Run the test
*
* @throws IOException on error
*/
private void runTests() throws IOException {
getConf().setLong("io.bytes.per.checksum", bytesPerChecksum);
JobConf job = new JobConf(getConf(), NNBench.class);
job.setJobName("NNBench-" + operation);
FileInputFormat.setInputPaths(job, new Path(baseDir, CONTROL_DIR_NAME));
job.setInputFormat(SequenceFileInputFormat.class);
// Explicitly set number of max map attempts to 1.
job.setMaxMapAttempts(1);
// Explicitly turn off speculative execution
job.setSpeculativeExecution(false);
job.setMapperClass(NNBenchMapper.class);
job.setReducerClass(NNBenchReducer.class);
FileOutputFormat.setOutputPath(job, new Path(baseDir, OUTPUT_DIR_NAME));
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(Text.class);
job.setNumReduceTasks((int) numberOfReduces);
JobClient.runJob(job);
}
use of org.apache.hadoop.mapred.JobConf in project hadoop by apache.
the class NNBenchWithoutMR method main.
/**
* This launches a given namenode operation (<code>-operation</code>),
* starting at a given time (<code>-startTime</code>). The files used
* by the openRead, rename, and delete operations are the same files
* created by the createWrite operation. Typically, the program
* would be run four times, once for each operation in this order:
* createWrite, openRead, rename, delete.
*
* <pre>
* Usage: nnbench
* -operation <one of createWrite, openRead, rename, or delete>
* -baseDir <base output/input DFS path>
* -startTime <time to start, given in seconds from the epoch>
* -numFiles <number of files to create, read, rename, or delete>
* -blocksPerFile <number of blocks to create per file>
* [-bytesPerBlock <number of bytes to write to each block, default is 1>]
* [-bytesPerChecksum <value for io.bytes.per.checksum>]
* </pre>
*
* @param args is an array of the program command line arguments
* @throws IOException indicates a problem with test startup
*/
public static void main(String[] args) throws IOException {
String version = "NameNodeBenchmark.0.3";
System.out.println(version);
int bytesPerChecksum = -1;
String usage = "Usage: nnbench " + " -operation <one of createWrite, openRead, rename, or delete>\n " + " -baseDir <base output/input DFS path>\n " + " -startTime <time to start, given in seconds from the epoch>\n" + " -numFiles <number of files to create>\n " + " -replicationFactorPerFile <Replication factor for the files, default is 1>\n" + " -blocksPerFile <number of blocks to create per file>\n" + " [-bytesPerBlock <number of bytes to write to each block, default is 1>]\n" + " [-bytesPerChecksum <value for io.bytes.per.checksum>]\n" + "Note: bytesPerBlock MUST be a multiple of bytesPerChecksum\n";
String operation = null;
for (int i = 0; i < args.length; i++) {
// parse command line
if (args[i].equals("-baseDir")) {
baseDir = new Path(args[++i]);
} else if (args[i].equals("-numFiles")) {
numFiles = Integer.parseInt(args[++i]);
} else if (args[i].equals("-blocksPerFile")) {
blocksPerFile = Integer.parseInt(args[++i]);
} else if (args[i].equals("-bytesPerBlock")) {
bytesPerBlock = Long.parseLong(args[++i]);
} else if (args[i].equals("-bytesPerChecksum")) {
bytesPerChecksum = Integer.parseInt(args[++i]);
} else if (args[i].equals("-replicationFactorPerFile")) {
replicationFactorPerFile = Short.parseShort(args[++i]);
} else if (args[i].equals("-startTime")) {
startTime = Long.parseLong(args[++i]) * 1000;
} else if (args[i].equals("-operation")) {
operation = args[++i];
} else {
System.out.println(usage);
System.exit(-1);
}
}
bytesPerFile = bytesPerBlock * blocksPerFile;
JobConf jobConf = new JobConf(new Configuration(), NNBench.class);
if (bytesPerChecksum < 0) {
// if it is not set in cmdline
bytesPerChecksum = jobConf.getInt("io.bytes.per.checksum", 512);
}
jobConf.set("io.bytes.per.checksum", Integer.toString(bytesPerChecksum));
System.out.println("Inputs: ");
System.out.println(" operation: " + operation);
System.out.println(" baseDir: " + baseDir);
System.out.println(" startTime: " + startTime);
System.out.println(" numFiles: " + numFiles);
System.out.println(" replicationFactorPerFile: " + replicationFactorPerFile);
System.out.println(" blocksPerFile: " + blocksPerFile);
System.out.println(" bytesPerBlock: " + bytesPerBlock);
System.out.println(" bytesPerChecksum: " + bytesPerChecksum);
if (// verify args
operation == null || baseDir == null || numFiles < 1 || blocksPerFile < 1 || bytesPerBlock < 0 || bytesPerBlock % bytesPerChecksum != 0) {
System.err.println(usage);
System.exit(-1);
}
fileSys = FileSystem.get(jobConf);
String uniqueId = java.net.InetAddress.getLocalHost().getHostName();
taskDir = new Path(baseDir, uniqueId);
// initialize buffer used for writing/reading file
buffer = new byte[(int) Math.min(bytesPerFile, 32768L)];
Date execTime;
Date endTime;
long duration;
int exceptions = 0;
// wait for coordinated start time
barrier();
execTime = new Date();
System.out.println("Job started: " + startTime);
if (operation.equals("createWrite")) {
if (!fileSys.mkdirs(taskDir)) {
throw new IOException("Mkdirs failed to create " + taskDir.toString());
}
exceptions = createWrite();
} else if (operation.equals("openRead")) {
exceptions = openRead();
} else if (operation.equals("rename")) {
exceptions = rename();
} else if (operation.equals("delete")) {
exceptions = delete();
} else {
System.err.println(usage);
System.exit(-1);
}
endTime = new Date();
System.out.println("Job ended: " + endTime);
duration = (endTime.getTime() - execTime.getTime()) / 1000;
System.out.println("The " + operation + " job took " + duration + " seconds.");
System.out.println("The job recorded " + exceptions + " exceptions.");
}
use of org.apache.hadoop.mapred.JobConf in project hadoop by apache.
the class LoadGeneratorMR method submitAsMapReduce.
// The following methods are only used when LoadGenerator is run a MR job
/**
* Based on args we submit the LoadGenerator as MR job.
* Number of MapTasks is numMapTasks
* @return exitCode for job submission
*/
private int submitAsMapReduce() {
System.out.println("Running as a MapReduce job with " + numMapTasks + " mapTasks; Output to file " + mrOutDir);
Configuration conf = new Configuration(getConf());
// First set all the args of LoadGenerator as Conf vars to pass to MR tasks
conf.set(LG_ROOT, root.toString());
conf.setInt(LG_MAXDELAYBETWEENOPS, maxDelayBetweenOps);
conf.setInt(LG_NUMOFTHREADS, numOfThreads);
//Pass Double as string
conf.set(LG_READPR, readProbs[0] + "");
//Pass Double as string
conf.set(LG_WRITEPR, writeProbs[0] + "");
//No idea what this is
conf.setLong(LG_SEED, seed);
conf.setInt(LG_NUMMAPTASKS, numMapTasks);
if (scriptFile == null && durations[0] <= 0) {
System.err.println("When run as a MapReduce job, elapsed Time or ScriptFile must be specified");
System.exit(-1);
}
conf.setLong(LG_ELAPSEDTIME, durations[0]);
conf.setLong(LG_STARTTIME, startTime);
if (scriptFile != null) {
conf.set(LG_SCRIPTFILE, scriptFile);
}
conf.set(LG_FLAGFILE, flagFile.toString());
// Now set the necessary conf variables that apply to run MR itself.
JobConf jobConf = new JobConf(conf, LoadGenerator.class);
jobConf.setJobName("NNLoadGeneratorViaMR");
jobConf.setNumMapTasks(numMapTasks);
// 1 reducer to collect the results
jobConf.setNumReduceTasks(1);
jobConf.setOutputKeyClass(Text.class);
jobConf.setOutputValueClass(IntWritable.class);
jobConf.setMapperClass(MapperThatRunsNNLoadGenerator.class);
jobConf.setReducerClass(ReducerThatCollectsLGdata.class);
jobConf.setInputFormat(DummyInputFormat.class);
jobConf.setOutputFormat(TextOutputFormat.class);
// Explicitly set number of max map attempts to 1.
jobConf.setMaxMapAttempts(1);
// Explicitly turn off speculative execution
jobConf.setSpeculativeExecution(false);
// This mapReduce job has no input but has output
FileOutputFormat.setOutputPath(jobConf, new Path(mrOutDir));
try {
JobClient.runJob(jobConf);
} catch (IOException e) {
System.err.println("Failed to run job: " + e.getMessage());
return -1;
}
return 0;
}
use of org.apache.hadoop.mapred.JobConf in project hadoop by apache.
the class TestHadoopArchiveLogsRunner method testHadoopArchiveLogs.
@Test(timeout = 50000)
public void testHadoopArchiveLogs() throws Exception {
MiniDFSCluster dfsCluster = null;
FileSystem fs = null;
try (MiniYARNCluster yarnCluster = new MiniYARNCluster(TestHadoopArchiveLogsRunner.class.getSimpleName(), 1, 2, 1, 1)) {
Configuration conf = new YarnConfiguration();
conf.setBoolean(YarnConfiguration.LOG_AGGREGATION_ENABLED, true);
conf.setBoolean(YarnConfiguration.YARN_MINICLUSTER_FIXED_PORTS, true);
yarnCluster.init(conf);
yarnCluster.start();
conf = yarnCluster.getConfig();
dfsCluster = new MiniDFSCluster.Builder(conf).numDataNodes(1).build();
conf = new JobConf(conf);
ApplicationId app1 = ApplicationId.newInstance(System.currentTimeMillis(), 1);
fs = FileSystem.get(conf);
Path remoteRootLogDir = new Path(conf.get(YarnConfiguration.NM_REMOTE_APP_LOG_DIR, YarnConfiguration.DEFAULT_NM_REMOTE_APP_LOG_DIR));
Path workingDir = new Path(remoteRootLogDir, "archive-logs-work");
String suffix = "logs";
Path logDir = new Path(remoteRootLogDir, new Path(System.getProperty("user.name"), suffix));
fs.mkdirs(logDir);
Path app1Path = new Path(logDir, app1.toString());
fs.mkdirs(app1Path);
createFile(fs, new Path(app1Path, "log1"), 3);
createFile(fs, new Path(app1Path, "log2"), 4);
createFile(fs, new Path(app1Path, "log3"), 2);
FileStatus[] app1Files = fs.listStatus(app1Path);
Assert.assertEquals(3, app1Files.length);
String[] args = new String[] { "-appId", app1.toString(), "-user", System.getProperty("user.name"), "-workingDir", workingDir.toString(), "-remoteRootLogDir", remoteRootLogDir.toString(), "-suffix", suffix };
final HadoopArchiveLogsRunner halr = new HadoopArchiveLogsRunner(conf);
assertEquals(0, ToolRunner.run(halr, args));
fs = FileSystem.get(conf);
app1Files = fs.listStatus(app1Path);
Assert.assertEquals(1, app1Files.length);
FileStatus harFile = app1Files[0];
Assert.assertEquals(app1.toString() + ".har", harFile.getPath().getName());
Path harPath = new Path("har:///" + harFile.getPath().toUri().getRawPath());
FileStatus[] harLogs = HarFs.get(harPath.toUri(), conf).listStatus(harPath);
Assert.assertEquals(3, harLogs.length);
Arrays.sort(harLogs, new Comparator<FileStatus>() {
@Override
public int compare(FileStatus o1, FileStatus o2) {
return o1.getPath().getName().compareTo(o2.getPath().getName());
}
});
Assert.assertEquals("log1", harLogs[0].getPath().getName());
Assert.assertEquals(3 * FILE_SIZE_INCREMENT, harLogs[0].getLen());
Assert.assertEquals(new FsPermission(FsAction.READ_WRITE, FsAction.READ, FsAction.NONE), harLogs[0].getPermission());
Assert.assertEquals(System.getProperty("user.name"), harLogs[0].getOwner());
Assert.assertEquals("log2", harLogs[1].getPath().getName());
Assert.assertEquals(4 * FILE_SIZE_INCREMENT, harLogs[1].getLen());
Assert.assertEquals(new FsPermission(FsAction.READ_WRITE, FsAction.READ, FsAction.NONE), harLogs[1].getPermission());
Assert.assertEquals(System.getProperty("user.name"), harLogs[1].getOwner());
Assert.assertEquals("log3", harLogs[2].getPath().getName());
Assert.assertEquals(2 * FILE_SIZE_INCREMENT, harLogs[2].getLen());
Assert.assertEquals(new FsPermission(FsAction.READ_WRITE, FsAction.READ, FsAction.NONE), harLogs[2].getPermission());
Assert.assertEquals(System.getProperty("user.name"), harLogs[2].getOwner());
Assert.assertEquals(0, fs.listStatus(workingDir).length);
} finally {
if (fs != null) {
fs.close();
}
if (dfsCluster != null) {
dfsCluster.shutdown();
}
}
}
Aggregations