Search in sources :

Example 21 with StringTokenizer

use of java.util.StringTokenizer in project hadoop by apache.

the class WordMean method readAndCalcMean.

/**
   * Reads the output file and parses the summation of lengths, and the word
   * count, to perform a quick calculation of the mean.
   * 
   * @param path
   *          The path to find the output file in. Set in main to the output
   *          directory.
   * @throws IOException
   *           If it cannot access the output directory, we throw an exception.
   */
private double readAndCalcMean(Path path, Configuration conf) throws IOException {
    FileSystem fs = FileSystem.get(conf);
    Path file = new Path(path, "part-r-00000");
    if (!fs.exists(file))
        throw new IOException("Output not found!");
    BufferedReader br = null;
    // average = total sum / number of elements;
    try {
        br = new BufferedReader(new InputStreamReader(fs.open(file), Charsets.UTF_8));
        long count = 0;
        long length = 0;
        String line;
        while ((line = br.readLine()) != null) {
            StringTokenizer st = new StringTokenizer(line);
            // grab type
            String type = st.nextToken();
            // differentiate
            if (type.equals(COUNT.toString())) {
                String countLit = st.nextToken();
                count = Long.parseLong(countLit);
            } else if (type.equals(LENGTH.toString())) {
                String lengthLit = st.nextToken();
                length = Long.parseLong(lengthLit);
            }
        }
        double theMean = (((double) length) / ((double) count));
        System.out.println("The mean is: " + theMean);
        return theMean;
    } finally {
        if (br != null) {
            br.close();
        }
    }
}
Also used : Path(org.apache.hadoop.fs.Path) StringTokenizer(java.util.StringTokenizer) InputStreamReader(java.io.InputStreamReader) FileSystem(org.apache.hadoop.fs.FileSystem) BufferedReader(java.io.BufferedReader) IOException(java.io.IOException)

Example 22 with StringTokenizer

use of java.util.StringTokenizer in project hadoop by apache.

the class WordMedian method readAndFindMedian.

/**
   * This is a standard program to read and find a median value based on a file
   * of word counts such as: 1 456, 2 132, 3 56... Where the first values are
   * the word lengths and the following values are the number of times that
   * words of that length appear.
   * 
   * @param path
   *          The path to read the HDFS file from (part-r-00000...00001...etc).
   * @param medianIndex1
   *          The first length value to look for.
   * @param medianIndex2
   *          The second length value to look for (will be the same as the first
   *          if there are an even number of words total).
   * @throws IOException
   *           If file cannot be found, we throw an exception.
   * */
private double readAndFindMedian(String path, int medianIndex1, int medianIndex2, Configuration conf) throws IOException {
    FileSystem fs = FileSystem.get(conf);
    Path file = new Path(path, "part-r-00000");
    if (!fs.exists(file))
        throw new IOException("Output not found!");
    BufferedReader br = null;
    try {
        br = new BufferedReader(new InputStreamReader(fs.open(file), Charsets.UTF_8));
        int num = 0;
        String line;
        while ((line = br.readLine()) != null) {
            StringTokenizer st = new StringTokenizer(line);
            // grab length
            String currLen = st.nextToken();
            // grab count
            String lengthFreq = st.nextToken();
            int prevNum = num;
            num += Integer.parseInt(lengthFreq);
            if (medianIndex2 >= prevNum && medianIndex1 <= num) {
                System.out.println("The median is: " + currLen);
                br.close();
                return Double.parseDouble(currLen);
            } else if (medianIndex2 >= prevNum && medianIndex1 < num) {
                String nextCurrLen = st.nextToken();
                double theMedian = (Integer.parseInt(currLen) + Integer.parseInt(nextCurrLen)) / 2.0;
                System.out.println("The median is: " + theMedian);
                br.close();
                return theMedian;
            }
        }
    } finally {
        if (br != null) {
            br.close();
        }
    }
    // error, no median found
    return -1;
}
Also used : Path(org.apache.hadoop.fs.Path) StringTokenizer(java.util.StringTokenizer) InputStreamReader(java.io.InputStreamReader) FileSystem(org.apache.hadoop.fs.FileSystem) BufferedReader(java.io.BufferedReader) IOException(java.io.IOException)

Example 23 with StringTokenizer

use of java.util.StringTokenizer in project hadoop by apache.

the class WordStandardDeviation method readAndCalcStdDev.

/**
   * Reads the output file and parses the summation of lengths, the word count,
   * and the lengths squared, to perform a quick calculation of the standard
   * deviation.
   * 
   * @param path
   *          The path to find the output file in. Set in main to the output
   *          directory.
   * @throws IOException
   *           If it cannot access the output directory, we throw an exception.
   */
private double readAndCalcStdDev(Path path, Configuration conf) throws IOException {
    FileSystem fs = FileSystem.get(conf);
    Path file = new Path(path, "part-r-00000");
    if (!fs.exists(file))
        throw new IOException("Output not found!");
    double stddev = 0;
    BufferedReader br = null;
    try {
        br = new BufferedReader(new InputStreamReader(fs.open(file), Charsets.UTF_8));
        long count = 0;
        long length = 0;
        long square = 0;
        String line;
        while ((line = br.readLine()) != null) {
            StringTokenizer st = new StringTokenizer(line);
            // grab type
            String type = st.nextToken();
            // differentiate
            if (type.equals(COUNT.toString())) {
                String countLit = st.nextToken();
                count = Long.parseLong(countLit);
            } else if (type.equals(LENGTH.toString())) {
                String lengthLit = st.nextToken();
                length = Long.parseLong(lengthLit);
            } else if (type.equals(SQUARE.toString())) {
                String squareLit = st.nextToken();
                square = Long.parseLong(squareLit);
            }
        }
        // average = total sum / number of elements;
        double mean = (((double) length) / ((double) count));
        // standard deviation = sqrt((sum(lengths ^ 2)/count) - (mean ^ 2))
        mean = Math.pow(mean, 2.0);
        double term = (((double) square / ((double) count)));
        stddev = Math.sqrt((term - mean));
        System.out.println("The standard deviation is: " + stddev);
    } finally {
        if (br != null) {
            br.close();
        }
    }
    return stddev;
}
Also used : Path(org.apache.hadoop.fs.Path) StringTokenizer(java.util.StringTokenizer) InputStreamReader(java.io.InputStreamReader) FileSystem(org.apache.hadoop.fs.FileSystem) BufferedReader(java.io.BufferedReader) IOException(java.io.IOException)

Example 24 with StringTokenizer

use of java.util.StringTokenizer in project hadoop by apache.

the class TestNativeAzureFileSystemClientLogging method verifyStorageClientLogs.

/*
   * Helper method to verify the client logging is working. This check primarily
   * checks to make sure we see a line in the logs corresponding to the entity
   * that is created during test run.
   */
private boolean verifyStorageClientLogs(String capturedLogs, String entity) throws Exception {
    URI uri = testAccount.getRealAccount().getBlobEndpoint();
    String container = testAccount.getRealContainer().getName();
    String validateString = uri + Path.SEPARATOR + container + Path.SEPARATOR + entity;
    boolean entityFound = false;
    StringTokenizer tokenizer = new StringTokenizer(capturedLogs, "\n");
    while (tokenizer.hasMoreTokens()) {
        String token = tokenizer.nextToken();
        if (token.contains(validateString)) {
            entityFound = true;
            break;
        }
    }
    return entityFound;
}
Also used : StringTokenizer(java.util.StringTokenizer) URI(java.net.URI)

Example 25 with StringTokenizer

use of java.util.StringTokenizer in project hadoop by apache.

the class TestJobHistoryParsing method checkHistoryParsing.

private void checkHistoryParsing(final int numMaps, final int numReduces, final int numSuccessfulMaps) throws Exception {
    Configuration conf = new Configuration();
    conf.set(MRJobConfig.USER_NAME, System.getProperty("user.name"));
    long amStartTimeEst = System.currentTimeMillis();
    conf.setClass(NET_TOPOLOGY_NODE_SWITCH_MAPPING_IMPL_KEY, MyResolver.class, DNSToSwitchMapping.class);
    RackResolver.init(conf);
    MRApp app = new MRAppWithHistory(numMaps, numReduces, true, this.getClass().getName(), true);
    app.submit(conf);
    Job job = app.getContext().getAllJobs().values().iterator().next();
    JobId jobId = job.getID();
    LOG.info("JOBID is " + TypeConverter.fromYarn(jobId).toString());
    app.waitForState(job, JobState.SUCCEEDED);
    // make sure all events are flushed
    app.waitForState(Service.STATE.STOPPED);
    String jobhistoryDir = JobHistoryUtils.getHistoryIntermediateDoneDirForUser(conf);
    FileContext fc = null;
    try {
        fc = FileContext.getFileContext(conf);
    } catch (IOException ioe) {
        LOG.info("Can not get FileContext", ioe);
        throw (new Exception("Can not get File Context"));
    }
    if (numMaps == numSuccessfulMaps) {
        String summaryFileName = JobHistoryUtils.getIntermediateSummaryFileName(jobId);
        Path summaryFile = new Path(jobhistoryDir, summaryFileName);
        String jobSummaryString = getJobSummary(fc, summaryFile);
        Assert.assertNotNull(jobSummaryString);
        Assert.assertTrue(jobSummaryString.contains("resourcesPerMap=100"));
        Assert.assertTrue(jobSummaryString.contains("resourcesPerReduce=100"));
        Map<String, String> jobSummaryElements = new HashMap<String, String>();
        StringTokenizer strToken = new StringTokenizer(jobSummaryString, ",");
        while (strToken.hasMoreTokens()) {
            String keypair = strToken.nextToken();
            jobSummaryElements.put(keypair.split("=")[0], keypair.split("=")[1]);
        }
        Assert.assertEquals("JobId does not match", jobId.toString(), jobSummaryElements.get("jobId"));
        Assert.assertEquals("JobName does not match", "test", jobSummaryElements.get("jobName"));
        Assert.assertTrue("submitTime should not be 0", Long.parseLong(jobSummaryElements.get("submitTime")) != 0);
        Assert.assertTrue("launchTime should not be 0", Long.parseLong(jobSummaryElements.get("launchTime")) != 0);
        Assert.assertTrue("firstMapTaskLaunchTime should not be 0", Long.parseLong(jobSummaryElements.get("firstMapTaskLaunchTime")) != 0);
        Assert.assertTrue("firstReduceTaskLaunchTime should not be 0", Long.parseLong(jobSummaryElements.get("firstReduceTaskLaunchTime")) != 0);
        Assert.assertTrue("finishTime should not be 0", Long.parseLong(jobSummaryElements.get("finishTime")) != 0);
        Assert.assertEquals("Mismatch in num map slots", numSuccessfulMaps, Integer.parseInt(jobSummaryElements.get("numMaps")));
        Assert.assertEquals("Mismatch in num reduce slots", numReduces, Integer.parseInt(jobSummaryElements.get("numReduces")));
        Assert.assertEquals("User does not match", System.getProperty("user.name"), jobSummaryElements.get("user"));
        Assert.assertEquals("Queue does not match", "default", jobSummaryElements.get("queue"));
        Assert.assertEquals("Status does not match", "SUCCEEDED", jobSummaryElements.get("status"));
    }
    JobHistory jobHistory = new JobHistory();
    jobHistory.init(conf);
    HistoryFileInfo fileInfo = jobHistory.getJobFileInfo(jobId);
    JobInfo jobInfo;
    long numFinishedMaps;
    synchronized (fileInfo) {
        Path historyFilePath = fileInfo.getHistoryFile();
        FSDataInputStream in = null;
        LOG.info("JobHistoryFile is: " + historyFilePath);
        try {
            in = fc.open(fc.makeQualified(historyFilePath));
        } catch (IOException ioe) {
            LOG.info("Can not open history file: " + historyFilePath, ioe);
            throw (new Exception("Can not open History File"));
        }
        JobHistoryParser parser = new JobHistoryParser(in);
        final EventReader realReader = new EventReader(in);
        EventReader reader = Mockito.mock(EventReader.class);
        if (numMaps == numSuccessfulMaps) {
            reader = realReader;
        } else {
            // Hack!
            final AtomicInteger numFinishedEvents = new AtomicInteger(0);
            Mockito.when(reader.getNextEvent()).thenAnswer(new Answer<HistoryEvent>() {

                public HistoryEvent answer(InvocationOnMock invocation) throws IOException {
                    HistoryEvent event = realReader.getNextEvent();
                    if (event instanceof TaskFinishedEvent) {
                        numFinishedEvents.incrementAndGet();
                    }
                    if (numFinishedEvents.get() <= numSuccessfulMaps) {
                        return event;
                    } else {
                        throw new IOException("test");
                    }
                }
            });
        }
        jobInfo = parser.parse(reader);
        numFinishedMaps = computeFinishedMaps(jobInfo, numMaps, numSuccessfulMaps);
        if (numFinishedMaps != numMaps) {
            Exception parseException = parser.getParseException();
            Assert.assertNotNull("Didn't get expected parse exception", parseException);
        }
    }
    Assert.assertEquals("Incorrect username ", System.getProperty("user.name"), jobInfo.getUsername());
    Assert.assertEquals("Incorrect jobName ", "test", jobInfo.getJobname());
    Assert.assertEquals("Incorrect queuename ", "default", jobInfo.getJobQueueName());
    Assert.assertEquals("incorrect conf path", "test", jobInfo.getJobConfPath());
    Assert.assertEquals("incorrect finishedMap ", numSuccessfulMaps, numFinishedMaps);
    Assert.assertEquals("incorrect finishedReduces ", numReduces, jobInfo.getFinishedReduces());
    Assert.assertEquals("incorrect uberized ", job.isUber(), jobInfo.getUberized());
    Map<TaskID, TaskInfo> allTasks = jobInfo.getAllTasks();
    int totalTasks = allTasks.size();
    Assert.assertEquals("total number of tasks is incorrect  ", (numMaps + numReduces), totalTasks);
    // Verify aminfo
    Assert.assertEquals(1, jobInfo.getAMInfos().size());
    Assert.assertEquals(MRApp.NM_HOST, jobInfo.getAMInfos().get(0).getNodeManagerHost());
    AMInfo amInfo = jobInfo.getAMInfos().get(0);
    Assert.assertEquals(MRApp.NM_PORT, amInfo.getNodeManagerPort());
    Assert.assertEquals(MRApp.NM_HTTP_PORT, amInfo.getNodeManagerHttpPort());
    Assert.assertEquals(1, amInfo.getAppAttemptId().getAttemptId());
    Assert.assertEquals(amInfo.getAppAttemptId(), amInfo.getContainerId().getApplicationAttemptId());
    Assert.assertTrue(amInfo.getStartTime() <= System.currentTimeMillis() && amInfo.getStartTime() >= amStartTimeEst);
    ContainerId fakeCid = MRApp.newContainerId(-1, -1, -1, -1);
    // Assert at taskAttempt level
    for (TaskInfo taskInfo : allTasks.values()) {
        int taskAttemptCount = taskInfo.getAllTaskAttempts().size();
        Assert.assertEquals("total number of task attempts ", 1, taskAttemptCount);
        TaskAttemptInfo taInfo = taskInfo.getAllTaskAttempts().values().iterator().next();
        Assert.assertNotNull(taInfo.getContainerId());
        // Verify the wrong ctor is not being used. Remove after mrv1 is removed.
        Assert.assertFalse(taInfo.getContainerId().equals(fakeCid));
    }
    // Deep compare Job and JobInfo
    for (Task task : job.getTasks().values()) {
        TaskInfo taskInfo = allTasks.get(TypeConverter.fromYarn(task.getID()));
        Assert.assertNotNull("TaskInfo not found", taskInfo);
        for (TaskAttempt taskAttempt : task.getAttempts().values()) {
            TaskAttemptInfo taskAttemptInfo = taskInfo.getAllTaskAttempts().get(TypeConverter.fromYarn((taskAttempt.getID())));
            Assert.assertNotNull("TaskAttemptInfo not found", taskAttemptInfo);
            Assert.assertEquals("Incorrect shuffle port for task attempt", taskAttempt.getShufflePort(), taskAttemptInfo.getShufflePort());
            if (numMaps == numSuccessfulMaps) {
                Assert.assertEquals(MRApp.NM_HOST, taskAttemptInfo.getHostname());
                Assert.assertEquals(MRApp.NM_PORT, taskAttemptInfo.getPort());
                // Verify rack-name
                Assert.assertEquals("rack-name is incorrect", taskAttemptInfo.getRackname(), RACK_NAME);
            }
        }
    }
    // test output for HistoryViewer
    PrintStream stdps = System.out;
    try {
        System.setOut(new PrintStream(outContent));
        HistoryViewer viewer;
        synchronized (fileInfo) {
            viewer = new HistoryViewer(fc.makeQualified(fileInfo.getHistoryFile()).toString(), conf, true);
        }
        viewer.print();
        for (TaskInfo taskInfo : allTasks.values()) {
            String test = (taskInfo.getTaskStatus() == null ? "" : taskInfo.getTaskStatus()) + " " + taskInfo.getTaskType() + " task list for " + taskInfo.getTaskId().getJobID();
            Assert.assertTrue(outContent.toString().indexOf(test) > 0);
            Assert.assertTrue(outContent.toString().indexOf(taskInfo.getTaskId().toString()) > 0);
        }
    } finally {
        System.setOut(stdps);
    }
}
Also used : MRAppWithHistory(org.apache.hadoop.mapreduce.v2.hs.TestJobHistoryEvents.MRAppWithHistory) Task(org.apache.hadoop.mapreduce.v2.app.job.Task) Configuration(org.apache.hadoop.conf.Configuration) HashMap(java.util.HashMap) TaskInfo(org.apache.hadoop.mapreduce.jobhistory.JobHistoryParser.TaskInfo) HistoryViewer(org.apache.hadoop.mapreduce.jobhistory.HistoryViewer) JobInfo(org.apache.hadoop.mapreduce.jobhistory.JobHistoryParser.JobInfo) ContainerId(org.apache.hadoop.yarn.api.records.ContainerId) TaskAttempt(org.apache.hadoop.mapreduce.v2.app.job.TaskAttempt) Job(org.apache.hadoop.mapreduce.v2.app.job.Job) JobId(org.apache.hadoop.mapreduce.v2.api.records.JobId) MRApp(org.apache.hadoop.mapreduce.v2.app.MRApp) Path(org.apache.hadoop.fs.Path) EventReader(org.apache.hadoop.mapreduce.jobhistory.EventReader) PrintStream(java.io.PrintStream) HistoryFileInfo(org.apache.hadoop.mapreduce.v2.hs.HistoryFileManager.HistoryFileInfo) TaskID(org.apache.hadoop.mapreduce.TaskID) IOException(java.io.IOException) HistoryEvent(org.apache.hadoop.mapreduce.jobhistory.HistoryEvent) IOException(java.io.IOException) AMInfo(org.apache.hadoop.mapreduce.jobhistory.JobHistoryParser.AMInfo) StringTokenizer(java.util.StringTokenizer) TaskFinishedEvent(org.apache.hadoop.mapreduce.jobhistory.TaskFinishedEvent) JobHistoryParser(org.apache.hadoop.mapreduce.jobhistory.JobHistoryParser) AtomicInteger(java.util.concurrent.atomic.AtomicInteger) InvocationOnMock(org.mockito.invocation.InvocationOnMock) TaskAttemptInfo(org.apache.hadoop.mapreduce.jobhistory.JobHistoryParser.TaskAttemptInfo) FSDataInputStream(org.apache.hadoop.fs.FSDataInputStream) FileContext(org.apache.hadoop.fs.FileContext)

Aggregations

StringTokenizer (java.util.StringTokenizer)2182 ArrayList (java.util.ArrayList)430 IOException (java.io.IOException)219 HashSet (java.util.HashSet)137 HashMap (java.util.HashMap)135 File (java.io.File)108 Map (java.util.Map)92 Set (java.util.Set)88 BufferedReader (java.io.BufferedReader)87 Iterator (java.util.Iterator)80 ParseResult (pcgen.rules.persistence.token.ParseResult)68 List (java.util.List)65 InputStreamReader (java.io.InputStreamReader)50 URL (java.net.URL)48 NoSuchElementException (java.util.NoSuchElementException)43 FileReader (java.io.FileReader)40 CDOMReference (pcgen.cdom.base.CDOMReference)40 Vector (java.util.Vector)39 Properties (java.util.Properties)36 Prerequisite (pcgen.core.prereq.Prerequisite)36