use of java.util.StringTokenizer in project hadoop by apache.
the class KeyFieldHelper method parseKey.
private KeyDescription parseKey(String arg, StringTokenizer args) {
//we allow for -k<arg> and -k <arg>
String keyArgs = null;
if (arg.length() == 2) {
if (args.hasMoreTokens()) {
keyArgs = args.nextToken();
}
} else {
keyArgs = arg.substring(2);
}
if (keyArgs == null || keyArgs.length() == 0) {
return null;
}
StringTokenizer st = new StringTokenizer(keyArgs, "nr.,", true);
KeyDescription key = new KeyDescription();
String token;
//the key is of the form 1[.3][nr][,1.5][nr]
if (st.hasMoreTokens()) {
token = st.nextToken();
//the first token must be a number
key.beginFieldIdx = Integer.parseInt(token);
}
if (st.hasMoreTokens()) {
token = st.nextToken();
if (token.equals(".")) {
token = st.nextToken();
key.beginChar = Integer.parseInt(token);
if (st.hasMoreTokens()) {
token = st.nextToken();
} else {
return key;
}
}
do {
if (token.equals("n")) {
key.numeric = true;
} else if (token.equals("r")) {
key.reverse = true;
} else
break;
if (st.hasMoreTokens()) {
token = st.nextToken();
} else {
return key;
}
} while (true);
if (token.equals(",")) {
token = st.nextToken();
//the first token must be a number
key.endFieldIdx = Integer.parseInt(token);
if (st.hasMoreTokens()) {
token = st.nextToken();
if (token.equals(".")) {
token = st.nextToken();
key.endChar = Integer.parseInt(token);
if (st.hasMoreTokens()) {
token = st.nextToken();
} else {
return key;
}
}
do {
if (token.equals("n")) {
key.numeric = true;
} else if (token.equals("r")) {
key.reverse = true;
} else {
throw new IllegalArgumentException("Invalid -k argument. " + "Must be of the form -k pos1,[pos2], where pos is of the form " + "f[.c]nr");
}
if (st.hasMoreTokens()) {
token = st.nextToken();
} else {
break;
}
} while (true);
}
return key;
}
throw new IllegalArgumentException("Invalid -k argument. " + "Must be of the form -k pos1,[pos2], where pos is of the form " + "f[.c]nr");
}
return key;
}
use of java.util.StringTokenizer in project hadoop by apache.
the class NNBench method analyzeResults.
/**
* Analyze the results
* @throws IOException on error
*/
private int analyzeResults() throws IOException {
final FileSystem fs = FileSystem.get(getConf());
Path reduceDir = new Path(baseDir, OUTPUT_DIR_NAME);
long totalTimeAL1 = 0l;
long totalTimeAL2 = 0l;
long totalTimeTPmS = 0l;
long lateMaps = 0l;
long numOfExceptions = 0l;
long successfulFileOps = 0l;
long mapStartTimeTPmS = 0l;
long mapEndTimeTPmS = 0l;
FileStatus[] fss = fs.listStatus(reduceDir);
for (FileStatus status : fss) {
Path reduceFile = status.getPath();
try (DataInputStream in = new DataInputStream(fs.open(reduceFile));
BufferedReader lines = new BufferedReader(new InputStreamReader(in))) {
String line;
while ((line = lines.readLine()) != null) {
StringTokenizer tokens = new StringTokenizer(line, " \t\n\r\f%;");
String attr = tokens.nextToken();
if (attr.endsWith(":totalTimeAL1")) {
totalTimeAL1 = Long.parseLong(tokens.nextToken());
} else if (attr.endsWith(":totalTimeAL2")) {
totalTimeAL2 = Long.parseLong(tokens.nextToken());
} else if (attr.endsWith(":totalTimeTPmS")) {
totalTimeTPmS = Long.parseLong(tokens.nextToken());
} else if (attr.endsWith(":latemaps")) {
lateMaps = Long.parseLong(tokens.nextToken());
} else if (attr.endsWith(":numOfExceptions")) {
numOfExceptions = Long.parseLong(tokens.nextToken());
} else if (attr.endsWith(":successfulFileOps")) {
successfulFileOps = Long.parseLong(tokens.nextToken());
} else if (attr.endsWith(":mapStartTimeTPmS")) {
mapStartTimeTPmS = Long.parseLong(tokens.nextToken());
} else if (attr.endsWith(":mapEndTimeTPmS")) {
mapEndTimeTPmS = Long.parseLong(tokens.nextToken());
}
}
}
}
// Average latency is the average time to perform 'n' number of
// operations, n being the number of files
double avgLatency1 = (double) totalTimeAL1 / successfulFileOps;
double avgLatency2 = (double) totalTimeAL2 / successfulFileOps;
// The time it takes for the longest running map is measured. Using that,
// cluster transactions per second is calculated. It includes time to
// retry any of the failed operations
double longestMapTimeTPmS = (double) (mapEndTimeTPmS - mapStartTimeTPmS);
double totalTimeTPS = (longestMapTimeTPmS == 0) ? (1000 * successfulFileOps) : (double) (1000 * successfulFileOps) / longestMapTimeTPmS;
// The time it takes to perform 'n' operations is calculated (in ms),
// n being the number of files. Using that time, the average execution
// time is calculated. It includes time to retry any of the
// failed operations
double AverageExecutionTime = (totalTimeTPmS == 0) ? (double) successfulFileOps : (double) totalTimeTPmS / successfulFileOps;
String resultTPSLine1 = null;
String resultTPSLine2 = null;
String resultALLine1 = null;
String resultALLine2 = null;
if (operation.equals(OP_CREATE_WRITE)) {
// For create/write/close, it is treated as two transactions,
// since a file create from a client perspective involves create and close
resultTPSLine1 = " TPS: Create/Write/Close: " + (int) (totalTimeTPS * 2);
resultTPSLine2 = "Avg exec time (ms): Create/Write/Close: " + AverageExecutionTime;
resultALLine1 = " Avg Lat (ms): Create/Write: " + avgLatency1;
resultALLine2 = " Avg Lat (ms): Close: " + avgLatency2;
} else if (operation.equals(OP_OPEN_READ)) {
resultTPSLine1 = " TPS: Open/Read: " + (int) totalTimeTPS;
resultTPSLine2 = " Avg Exec time (ms): Open/Read: " + AverageExecutionTime;
resultALLine1 = " Avg Lat (ms): Open: " + avgLatency1;
if (readFileAfterOpen) {
resultALLine2 = " Avg Lat (ms): Read: " + avgLatency2;
}
} else if (operation.equals(OP_RENAME)) {
resultTPSLine1 = " TPS: Rename: " + (int) totalTimeTPS;
resultTPSLine2 = " Avg Exec time (ms): Rename: " + AverageExecutionTime;
resultALLine1 = " Avg Lat (ms): Rename: " + avgLatency1;
} else if (operation.equals(OP_DELETE)) {
resultTPSLine1 = " TPS: Delete: " + (int) totalTimeTPS;
resultTPSLine2 = " Avg Exec time (ms): Delete: " + AverageExecutionTime;
resultALLine1 = " Avg Lat (ms): Delete: " + avgLatency1;
}
String[] resultLines = { "-------------- NNBench -------------- : ", " Version: " + NNBENCH_VERSION, " Date & time: " + sdf.format(new Date(System.currentTimeMillis())), "", " Test Operation: " + operation, " Start time: " + sdf.format(new Date(startTime)), " Maps to run: " + numberOfMaps, " Reduces to run: " + numberOfReduces, " Block Size (bytes): " + blockSize, " Bytes to write: " + bytesToWrite, " Bytes per checksum: " + bytesPerChecksum, " Number of files: " + numberOfFiles, " Replication factor: " + replicationFactorPerFile, " Successful file operations: " + successfulFileOps, "", " # maps that missed the barrier: " + lateMaps, " # exceptions: " + numOfExceptions, "", resultTPSLine1, resultTPSLine2, resultALLine1, resultALLine2, "", " RAW DATA: AL Total #1: " + totalTimeAL1, " RAW DATA: AL Total #2: " + totalTimeAL2, " RAW DATA: TPS Total (ms): " + totalTimeTPmS, " RAW DATA: Longest Map Time (ms): " + longestMapTimeTPmS, " RAW DATA: Late maps: " + lateMaps, " RAW DATA: # of exceptions: " + numOfExceptions, "" };
try (PrintStream res = new PrintStream(new FileOutputStream(new File(DEFAULT_RES_FILE_NAME), true))) {
// Write to a file and also dump to log
for (String resultLine : resultLines) {
LOG.info(resultLine);
res.println(resultLine);
}
}
if (numOfExceptions >= MAX_OPERATION_EXCEPTIONS) {
return -1;
}
return 0;
}
use of java.util.StringTokenizer in project hadoop by apache.
the class JobBuilder method preferredLocationForSplits.
private ArrayList<LoggedLocation> preferredLocationForSplits(String splits) {
if (splits != null) {
ArrayList<LoggedLocation> locations = null;
StringTokenizer tok = new StringTokenizer(splits, ",", false);
if (tok.countTokens() <= MAXIMUM_PREFERRED_LOCATIONS) {
locations = new ArrayList<LoggedLocation>();
while (tok.hasMoreTokens()) {
String nextSplit = tok.nextToken();
ParsedHost node = getAndRecordParsedHost(nextSplit);
if (locations != null && node != null) {
locations.add(node.makeLoggedLocation());
}
}
return locations;
}
}
return null;
}
use of java.util.StringTokenizer in project hadoop by apache.
the class TopologyBuilder method preferredLocationForSplits.
private void preferredLocationForSplits(String splits) {
if (splits != null) {
StringTokenizer tok = new StringTokenizer(splits, ",", false);
while (tok.hasMoreTokens()) {
String nextSplit = tok.nextToken();
recordParsedHost(nextSplit);
}
}
}
use of java.util.StringTokenizer in project hadoop by apache.
the class Submitter method run.
@Override
public int run(String[] args) throws Exception {
CommandLineParser cli = new CommandLineParser();
if (args.length == 0) {
cli.printUsage();
return 1;
}
cli.addOption("input", false, "input path to the maps", "path");
cli.addOption("output", false, "output path from the reduces", "path");
cli.addOption("jar", false, "job jar file", "path");
cli.addOption("inputformat", false, "java classname of InputFormat", "class");
//cli.addArgument("javareader", false, "is the RecordReader in Java");
cli.addOption("map", false, "java classname of Mapper", "class");
cli.addOption("partitioner", false, "java classname of Partitioner", "class");
cli.addOption("reduce", false, "java classname of Reducer", "class");
cli.addOption("writer", false, "java classname of OutputFormat", "class");
cli.addOption("program", false, "URI to application executable", "class");
cli.addOption("reduces", false, "number of reduces", "num");
cli.addOption("jobconf", false, "\"n1=v1,n2=v2,..\" (Deprecated) Optional. Add or override a JobConf property.", "key=val");
cli.addOption("lazyOutput", false, "Optional. Create output lazily", "boolean");
Parser parser = cli.createParser();
try {
GenericOptionsParser genericParser = new GenericOptionsParser(getConf(), args);
CommandLine results = parser.parse(cli.options, genericParser.getRemainingArgs());
JobConf job = new JobConf(getConf());
if (results.hasOption("input")) {
FileInputFormat.setInputPaths(job, results.getOptionValue("input"));
}
if (results.hasOption("output")) {
FileOutputFormat.setOutputPath(job, new Path(results.getOptionValue("output")));
}
if (results.hasOption("jar")) {
job.setJar(results.getOptionValue("jar"));
}
if (results.hasOption("inputformat")) {
setIsJavaRecordReader(job, true);
job.setInputFormat(getClass(results, "inputformat", job, InputFormat.class));
}
if (results.hasOption("javareader")) {
setIsJavaRecordReader(job, true);
}
if (results.hasOption("map")) {
setIsJavaMapper(job, true);
job.setMapperClass(getClass(results, "map", job, Mapper.class));
}
if (results.hasOption("partitioner")) {
job.setPartitionerClass(getClass(results, "partitioner", job, Partitioner.class));
}
if (results.hasOption("reduce")) {
setIsJavaReducer(job, true);
job.setReducerClass(getClass(results, "reduce", job, Reducer.class));
}
if (results.hasOption("reduces")) {
job.setNumReduceTasks(Integer.parseInt(results.getOptionValue("reduces")));
}
if (results.hasOption("writer")) {
setIsJavaRecordWriter(job, true);
job.setOutputFormat(getClass(results, "writer", job, OutputFormat.class));
}
if (results.hasOption("lazyOutput")) {
if (Boolean.parseBoolean(results.getOptionValue("lazyOutput"))) {
LazyOutputFormat.setOutputFormatClass(job, job.getOutputFormat().getClass());
}
}
if (results.hasOption("program")) {
setExecutable(job, results.getOptionValue("program"));
}
if (results.hasOption("jobconf")) {
LOG.warn("-jobconf option is deprecated, please use -D instead.");
String options = results.getOptionValue("jobconf");
StringTokenizer tokenizer = new StringTokenizer(options, ",");
while (tokenizer.hasMoreTokens()) {
String keyVal = tokenizer.nextToken().trim();
String[] keyValSplit = keyVal.split("=");
job.set(keyValSplit[0], keyValSplit[1]);
}
}
// if they gave us a jar file, include it into the class path
String jarFile = job.getJar();
if (jarFile != null) {
final URL[] urls = new URL[] { FileSystem.getLocal(job).pathToFile(new Path(jarFile)).toURL() };
//FindBugs complains that creating a URLClassLoader should be
//in a doPrivileged() block.
ClassLoader loader = AccessController.doPrivileged(new PrivilegedAction<ClassLoader>() {
public ClassLoader run() {
return new URLClassLoader(urls);
}
});
job.setClassLoader(loader);
}
runJob(job);
return 0;
} catch (ParseException pe) {
LOG.info("Error : " + pe);
cli.printUsage();
return 1;
}
}
Aggregations