Search in sources :

Example 46 with StringTokenizer

use of java.util.StringTokenizer in project hadoop by apache.

the class KeyFieldHelper method parseKey.

private KeyDescription parseKey(String arg, StringTokenizer args) {
    //we allow for -k<arg> and -k <arg>
    String keyArgs = null;
    if (arg.length() == 2) {
        if (args.hasMoreTokens()) {
            keyArgs = args.nextToken();
        }
    } else {
        keyArgs = arg.substring(2);
    }
    if (keyArgs == null || keyArgs.length() == 0) {
        return null;
    }
    StringTokenizer st = new StringTokenizer(keyArgs, "nr.,", true);
    KeyDescription key = new KeyDescription();
    String token;
    //the key is of the form 1[.3][nr][,1.5][nr]
    if (st.hasMoreTokens()) {
        token = st.nextToken();
        //the first token must be a number
        key.beginFieldIdx = Integer.parseInt(token);
    }
    if (st.hasMoreTokens()) {
        token = st.nextToken();
        if (token.equals(".")) {
            token = st.nextToken();
            key.beginChar = Integer.parseInt(token);
            if (st.hasMoreTokens()) {
                token = st.nextToken();
            } else {
                return key;
            }
        }
        do {
            if (token.equals("n")) {
                key.numeric = true;
            } else if (token.equals("r")) {
                key.reverse = true;
            } else
                break;
            if (st.hasMoreTokens()) {
                token = st.nextToken();
            } else {
                return key;
            }
        } while (true);
        if (token.equals(",")) {
            token = st.nextToken();
            //the first token must be a number
            key.endFieldIdx = Integer.parseInt(token);
            if (st.hasMoreTokens()) {
                token = st.nextToken();
                if (token.equals(".")) {
                    token = st.nextToken();
                    key.endChar = Integer.parseInt(token);
                    if (st.hasMoreTokens()) {
                        token = st.nextToken();
                    } else {
                        return key;
                    }
                }
                do {
                    if (token.equals("n")) {
                        key.numeric = true;
                    } else if (token.equals("r")) {
                        key.reverse = true;
                    } else {
                        throw new IllegalArgumentException("Invalid -k argument. " + "Must be of the form -k pos1,[pos2], where pos is of the form " + "f[.c]nr");
                    }
                    if (st.hasMoreTokens()) {
                        token = st.nextToken();
                    } else {
                        break;
                    }
                } while (true);
            }
            return key;
        }
        throw new IllegalArgumentException("Invalid -k argument. " + "Must be of the form -k pos1,[pos2], where pos is of the form " + "f[.c]nr");
    }
    return key;
}
Also used : StringTokenizer(java.util.StringTokenizer)

Example 47 with StringTokenizer

use of java.util.StringTokenizer in project hadoop by apache.

the class NNBench method analyzeResults.

/**
   * Analyze the results
   * @throws IOException on error
   */
private int analyzeResults() throws IOException {
    final FileSystem fs = FileSystem.get(getConf());
    Path reduceDir = new Path(baseDir, OUTPUT_DIR_NAME);
    long totalTimeAL1 = 0l;
    long totalTimeAL2 = 0l;
    long totalTimeTPmS = 0l;
    long lateMaps = 0l;
    long numOfExceptions = 0l;
    long successfulFileOps = 0l;
    long mapStartTimeTPmS = 0l;
    long mapEndTimeTPmS = 0l;
    FileStatus[] fss = fs.listStatus(reduceDir);
    for (FileStatus status : fss) {
        Path reduceFile = status.getPath();
        try (DataInputStream in = new DataInputStream(fs.open(reduceFile));
            BufferedReader lines = new BufferedReader(new InputStreamReader(in))) {
            String line;
            while ((line = lines.readLine()) != null) {
                StringTokenizer tokens = new StringTokenizer(line, " \t\n\r\f%;");
                String attr = tokens.nextToken();
                if (attr.endsWith(":totalTimeAL1")) {
                    totalTimeAL1 = Long.parseLong(tokens.nextToken());
                } else if (attr.endsWith(":totalTimeAL2")) {
                    totalTimeAL2 = Long.parseLong(tokens.nextToken());
                } else if (attr.endsWith(":totalTimeTPmS")) {
                    totalTimeTPmS = Long.parseLong(tokens.nextToken());
                } else if (attr.endsWith(":latemaps")) {
                    lateMaps = Long.parseLong(tokens.nextToken());
                } else if (attr.endsWith(":numOfExceptions")) {
                    numOfExceptions = Long.parseLong(tokens.nextToken());
                } else if (attr.endsWith(":successfulFileOps")) {
                    successfulFileOps = Long.parseLong(tokens.nextToken());
                } else if (attr.endsWith(":mapStartTimeTPmS")) {
                    mapStartTimeTPmS = Long.parseLong(tokens.nextToken());
                } else if (attr.endsWith(":mapEndTimeTPmS")) {
                    mapEndTimeTPmS = Long.parseLong(tokens.nextToken());
                }
            }
        }
    }
    // Average latency is the average time to perform 'n' number of
    // operations, n being the number of files
    double avgLatency1 = (double) totalTimeAL1 / successfulFileOps;
    double avgLatency2 = (double) totalTimeAL2 / successfulFileOps;
    // The time it takes for the longest running map is measured. Using that,
    // cluster transactions per second is calculated. It includes time to 
    // retry any of the failed operations
    double longestMapTimeTPmS = (double) (mapEndTimeTPmS - mapStartTimeTPmS);
    double totalTimeTPS = (longestMapTimeTPmS == 0) ? (1000 * successfulFileOps) : (double) (1000 * successfulFileOps) / longestMapTimeTPmS;
    // The time it takes to perform 'n' operations is calculated (in ms),
    // n being the number of files. Using that time, the average execution 
    // time is calculated. It includes time to retry any of the
    // failed operations
    double AverageExecutionTime = (totalTimeTPmS == 0) ? (double) successfulFileOps : (double) totalTimeTPmS / successfulFileOps;
    String resultTPSLine1 = null;
    String resultTPSLine2 = null;
    String resultALLine1 = null;
    String resultALLine2 = null;
    if (operation.equals(OP_CREATE_WRITE)) {
        // For create/write/close, it is treated as two transactions,
        // since a file create from a client perspective involves create and close
        resultTPSLine1 = "               TPS: Create/Write/Close: " + (int) (totalTimeTPS * 2);
        resultTPSLine2 = "Avg exec time (ms): Create/Write/Close: " + AverageExecutionTime;
        resultALLine1 = "            Avg Lat (ms): Create/Write: " + avgLatency1;
        resultALLine2 = "                   Avg Lat (ms): Close: " + avgLatency2;
    } else if (operation.equals(OP_OPEN_READ)) {
        resultTPSLine1 = "                        TPS: Open/Read: " + (int) totalTimeTPS;
        resultTPSLine2 = "         Avg Exec time (ms): Open/Read: " + AverageExecutionTime;
        resultALLine1 = "                    Avg Lat (ms): Open: " + avgLatency1;
        if (readFileAfterOpen) {
            resultALLine2 = "                  Avg Lat (ms): Read: " + avgLatency2;
        }
    } else if (operation.equals(OP_RENAME)) {
        resultTPSLine1 = "                           TPS: Rename: " + (int) totalTimeTPS;
        resultTPSLine2 = "            Avg Exec time (ms): Rename: " + AverageExecutionTime;
        resultALLine1 = "                  Avg Lat (ms): Rename: " + avgLatency1;
    } else if (operation.equals(OP_DELETE)) {
        resultTPSLine1 = "                           TPS: Delete: " + (int) totalTimeTPS;
        resultTPSLine2 = "            Avg Exec time (ms): Delete: " + AverageExecutionTime;
        resultALLine1 = "                  Avg Lat (ms): Delete: " + avgLatency1;
    }
    String[] resultLines = { "-------------- NNBench -------------- : ", "                               Version: " + NNBENCH_VERSION, "                           Date & time: " + sdf.format(new Date(System.currentTimeMillis())), "", "                        Test Operation: " + operation, "                            Start time: " + sdf.format(new Date(startTime)), "                           Maps to run: " + numberOfMaps, "                        Reduces to run: " + numberOfReduces, "                    Block Size (bytes): " + blockSize, "                        Bytes to write: " + bytesToWrite, "                    Bytes per checksum: " + bytesPerChecksum, "                       Number of files: " + numberOfFiles, "                    Replication factor: " + replicationFactorPerFile, "            Successful file operations: " + successfulFileOps, "", "        # maps that missed the barrier: " + lateMaps, "                          # exceptions: " + numOfExceptions, "", resultTPSLine1, resultTPSLine2, resultALLine1, resultALLine2, "", "                 RAW DATA: AL Total #1: " + totalTimeAL1, "                 RAW DATA: AL Total #2: " + totalTimeAL2, "              RAW DATA: TPS Total (ms): " + totalTimeTPmS, "       RAW DATA: Longest Map Time (ms): " + longestMapTimeTPmS, "                   RAW DATA: Late maps: " + lateMaps, "             RAW DATA: # of exceptions: " + numOfExceptions, "" };
    try (PrintStream res = new PrintStream(new FileOutputStream(new File(DEFAULT_RES_FILE_NAME), true))) {
        // Write to a file and also dump to log
        for (String resultLine : resultLines) {
            LOG.info(resultLine);
            res.println(resultLine);
        }
    }
    if (numOfExceptions >= MAX_OPERATION_EXCEPTIONS) {
        return -1;
    }
    return 0;
}
Also used : Path(org.apache.hadoop.fs.Path) PrintStream(java.io.PrintStream) FileStatus(org.apache.hadoop.fs.FileStatus) InputStreamReader(java.io.InputStreamReader) DataInputStream(java.io.DataInputStream) FSDataInputStream(org.apache.hadoop.fs.FSDataInputStream) Date(java.util.Date) StringTokenizer(java.util.StringTokenizer) FileSystem(org.apache.hadoop.fs.FileSystem) FileOutputStream(java.io.FileOutputStream) BufferedReader(java.io.BufferedReader) SequenceFile(org.apache.hadoop.io.SequenceFile) File(java.io.File)

Example 48 with StringTokenizer

use of java.util.StringTokenizer in project hadoop by apache.

the class JobBuilder method preferredLocationForSplits.

private ArrayList<LoggedLocation> preferredLocationForSplits(String splits) {
    if (splits != null) {
        ArrayList<LoggedLocation> locations = null;
        StringTokenizer tok = new StringTokenizer(splits, ",", false);
        if (tok.countTokens() <= MAXIMUM_PREFERRED_LOCATIONS) {
            locations = new ArrayList<LoggedLocation>();
            while (tok.hasMoreTokens()) {
                String nextSplit = tok.nextToken();
                ParsedHost node = getAndRecordParsedHost(nextSplit);
                if (locations != null && node != null) {
                    locations.add(node.makeLoggedLocation());
                }
            }
            return locations;
        }
    }
    return null;
}
Also used : StringTokenizer(java.util.StringTokenizer)

Example 49 with StringTokenizer

use of java.util.StringTokenizer in project hadoop by apache.

the class TopologyBuilder method preferredLocationForSplits.

private void preferredLocationForSplits(String splits) {
    if (splits != null) {
        StringTokenizer tok = new StringTokenizer(splits, ",", false);
        while (tok.hasMoreTokens()) {
            String nextSplit = tok.nextToken();
            recordParsedHost(nextSplit);
        }
    }
}
Also used : StringTokenizer(java.util.StringTokenizer)

Example 50 with StringTokenizer

use of java.util.StringTokenizer in project hadoop by apache.

the class Submitter method run.

@Override
public int run(String[] args) throws Exception {
    CommandLineParser cli = new CommandLineParser();
    if (args.length == 0) {
        cli.printUsage();
        return 1;
    }
    cli.addOption("input", false, "input path to the maps", "path");
    cli.addOption("output", false, "output path from the reduces", "path");
    cli.addOption("jar", false, "job jar file", "path");
    cli.addOption("inputformat", false, "java classname of InputFormat", "class");
    //cli.addArgument("javareader", false, "is the RecordReader in Java");
    cli.addOption("map", false, "java classname of Mapper", "class");
    cli.addOption("partitioner", false, "java classname of Partitioner", "class");
    cli.addOption("reduce", false, "java classname of Reducer", "class");
    cli.addOption("writer", false, "java classname of OutputFormat", "class");
    cli.addOption("program", false, "URI to application executable", "class");
    cli.addOption("reduces", false, "number of reduces", "num");
    cli.addOption("jobconf", false, "\"n1=v1,n2=v2,..\" (Deprecated) Optional. Add or override a JobConf property.", "key=val");
    cli.addOption("lazyOutput", false, "Optional. Create output lazily", "boolean");
    Parser parser = cli.createParser();
    try {
        GenericOptionsParser genericParser = new GenericOptionsParser(getConf(), args);
        CommandLine results = parser.parse(cli.options, genericParser.getRemainingArgs());
        JobConf job = new JobConf(getConf());
        if (results.hasOption("input")) {
            FileInputFormat.setInputPaths(job, results.getOptionValue("input"));
        }
        if (results.hasOption("output")) {
            FileOutputFormat.setOutputPath(job, new Path(results.getOptionValue("output")));
        }
        if (results.hasOption("jar")) {
            job.setJar(results.getOptionValue("jar"));
        }
        if (results.hasOption("inputformat")) {
            setIsJavaRecordReader(job, true);
            job.setInputFormat(getClass(results, "inputformat", job, InputFormat.class));
        }
        if (results.hasOption("javareader")) {
            setIsJavaRecordReader(job, true);
        }
        if (results.hasOption("map")) {
            setIsJavaMapper(job, true);
            job.setMapperClass(getClass(results, "map", job, Mapper.class));
        }
        if (results.hasOption("partitioner")) {
            job.setPartitionerClass(getClass(results, "partitioner", job, Partitioner.class));
        }
        if (results.hasOption("reduce")) {
            setIsJavaReducer(job, true);
            job.setReducerClass(getClass(results, "reduce", job, Reducer.class));
        }
        if (results.hasOption("reduces")) {
            job.setNumReduceTasks(Integer.parseInt(results.getOptionValue("reduces")));
        }
        if (results.hasOption("writer")) {
            setIsJavaRecordWriter(job, true);
            job.setOutputFormat(getClass(results, "writer", job, OutputFormat.class));
        }
        if (results.hasOption("lazyOutput")) {
            if (Boolean.parseBoolean(results.getOptionValue("lazyOutput"))) {
                LazyOutputFormat.setOutputFormatClass(job, job.getOutputFormat().getClass());
            }
        }
        if (results.hasOption("program")) {
            setExecutable(job, results.getOptionValue("program"));
        }
        if (results.hasOption("jobconf")) {
            LOG.warn("-jobconf option is deprecated, please use -D instead.");
            String options = results.getOptionValue("jobconf");
            StringTokenizer tokenizer = new StringTokenizer(options, ",");
            while (tokenizer.hasMoreTokens()) {
                String keyVal = tokenizer.nextToken().trim();
                String[] keyValSplit = keyVal.split("=");
                job.set(keyValSplit[0], keyValSplit[1]);
            }
        }
        // if they gave us a jar file, include it into the class path
        String jarFile = job.getJar();
        if (jarFile != null) {
            final URL[] urls = new URL[] { FileSystem.getLocal(job).pathToFile(new Path(jarFile)).toURL() };
            //FindBugs complains that creating a URLClassLoader should be
            //in a doPrivileged() block. 
            ClassLoader loader = AccessController.doPrivileged(new PrivilegedAction<ClassLoader>() {

                public ClassLoader run() {
                    return new URLClassLoader(urls);
                }
            });
            job.setClassLoader(loader);
        }
        runJob(job);
        return 0;
    } catch (ParseException pe) {
        LOG.info("Error : " + pe);
        cli.printUsage();
        return 1;
    }
}
Also used : Path(org.apache.hadoop.fs.Path) NullOutputFormat(org.apache.hadoop.mapred.lib.NullOutputFormat) OutputFormat(org.apache.hadoop.mapred.OutputFormat) LazyOutputFormat(org.apache.hadoop.mapred.lib.LazyOutputFormat) FileOutputFormat(org.apache.hadoop.mapred.FileOutputFormat) URL(java.net.URL) GenericOptionsParser(org.apache.hadoop.util.GenericOptionsParser) BasicParser(org.apache.commons.cli.BasicParser) Parser(org.apache.commons.cli.Parser) Mapper(org.apache.hadoop.mapred.Mapper) CommandLine(org.apache.commons.cli.CommandLine) StringTokenizer(java.util.StringTokenizer) InputFormat(org.apache.hadoop.mapred.InputFormat) FileInputFormat(org.apache.hadoop.mapred.FileInputFormat) URLClassLoader(java.net.URLClassLoader) URLClassLoader(java.net.URLClassLoader) ParseException(org.apache.commons.cli.ParseException) Reducer(org.apache.hadoop.mapred.Reducer) JobConf(org.apache.hadoop.mapred.JobConf) HashPartitioner(org.apache.hadoop.mapred.lib.HashPartitioner) Partitioner(org.apache.hadoop.mapred.Partitioner) GenericOptionsParser(org.apache.hadoop.util.GenericOptionsParser)

Aggregations

StringTokenizer (java.util.StringTokenizer)4881 ArrayList (java.util.ArrayList)1083 IOException (java.io.IOException)506 File (java.io.File)392 BufferedReader (java.io.BufferedReader)380 HashMap (java.util.HashMap)375 HashSet (java.util.HashSet)263 FileReader (java.io.FileReader)224 List (java.util.List)200 InputStreamReader (java.io.InputStreamReader)191 Map (java.util.Map)152 FileInputStream (java.io.FileInputStream)135 Iterator (java.util.Iterator)114 Set (java.util.Set)114 URL (java.net.URL)108 NoSuchElementException (java.util.NoSuchElementException)90 Properties (java.util.Properties)84 InputStream (java.io.InputStream)83 BufferedWriter (java.io.BufferedWriter)80 FileNotFoundException (java.io.FileNotFoundException)78