Search in sources :

Example 51 with OutputStreamWriter

use of java.io.OutputStreamWriter in project hadoop by apache.

the class TestMRCJCJobClient method runJob.

private String runJob() throws Exception {
    OutputStream os = getFileSystem().create(new Path(getInputDir(), "text.txt"));
    Writer wr = new OutputStreamWriter(os);
    wr.write("hello1\n");
    wr.write("hello2\n");
    wr.write("hello3\n");
    wr.close();
    JobConf conf = createJobConf();
    conf.setJobName("mr");
    conf.setJobPriority(JobPriority.HIGH);
    conf.setInputFormat(TextInputFormat.class);
    conf.setMapOutputKeyClass(LongWritable.class);
    conf.setMapOutputValueClass(Text.class);
    conf.setOutputFormat(TextOutputFormat.class);
    conf.setOutputKeyClass(LongWritable.class);
    conf.setOutputValueClass(Text.class);
    conf.setMapperClass(org.apache.hadoop.mapred.lib.IdentityMapper.class);
    conf.setReducerClass(org.apache.hadoop.mapred.lib.IdentityReducer.class);
    FileInputFormat.setInputPaths(conf, getInputDir());
    FileOutputFormat.setOutputPath(conf, getOutputDir());
    return JobClient.runJob(conf).getID().toString();
}
Also used : Path(org.apache.hadoop.fs.Path) OutputStream(java.io.OutputStream) OutputStreamWriter(java.io.OutputStreamWriter) Writer(java.io.Writer) OutputStreamWriter(java.io.OutputStreamWriter)

Example 52 with OutputStreamWriter

use of java.io.OutputStreamWriter in project hadoop by apache.

the class TestMRIntermediateDataEncryption method createInput.

private void createInput(FileSystem fs, int numMappers, int numLines) throws Exception {
    fs.delete(INPUT_DIR, true);
    for (int i = 0; i < numMappers; i++) {
        OutputStream os = fs.create(new Path(INPUT_DIR, "input_" + i + ".txt"));
        Writer writer = new OutputStreamWriter(os);
        for (int j = 0; j < numLines; j++) {
            // Create sorted key, value pairs.
            int k = j + 1;
            String formattedNumber = String.format("%09d", k);
            writer.write(formattedNumber + " " + formattedNumber + "\n");
        }
        writer.close();
    }
}
Also used : Path(org.apache.hadoop.fs.Path) OutputStream(java.io.OutputStream) OutputStreamWriter(java.io.OutputStreamWriter) Writer(java.io.Writer) OutputStreamWriter(java.io.OutputStreamWriter)

Example 53 with OutputStreamWriter

use of java.io.OutputStreamWriter in project hadoop by apache.

the class TestYarnCLI method testGetContainers.

@Test
public void testGetContainers() throws Exception {
    ApplicationCLI cli = createAndGetAppCLI();
    ApplicationId applicationId = ApplicationId.newInstance(1234, 5);
    ApplicationAttemptId attemptId = ApplicationAttemptId.newInstance(applicationId, 1);
    ContainerId containerId = ContainerId.newContainerId(attemptId, 1);
    ContainerId containerId1 = ContainerId.newContainerId(attemptId, 2);
    ContainerId containerId2 = ContainerId.newContainerId(attemptId, 3);
    long time1 = 1234, time2 = 5678;
    ContainerReport container = ContainerReport.newInstance(containerId, null, NodeId.newInstance("host", 1234), Priority.UNDEFINED, time1, time2, "diagnosticInfo", "logURL", 0, ContainerState.COMPLETE, "http://" + NodeId.newInstance("host", 2345).toString());
    ContainerReport container1 = ContainerReport.newInstance(containerId1, null, NodeId.newInstance("host", 1234), Priority.UNDEFINED, time1, time2, "diagnosticInfo", "logURL", 0, ContainerState.COMPLETE, "http://" + NodeId.newInstance("host", 2345).toString());
    ContainerReport container2 = ContainerReport.newInstance(containerId2, null, NodeId.newInstance("host", 1234), Priority.UNDEFINED, time1, 0, "diagnosticInfo", "", 0, ContainerState.RUNNING, "http://" + NodeId.newInstance("host", 2345).toString());
    List<ContainerReport> reports = new ArrayList<ContainerReport>();
    reports.add(container);
    reports.add(container1);
    reports.add(container2);
    when(client.getContainers(any(ApplicationAttemptId.class))).thenReturn(reports);
    sysOutStream.reset();
    int result = cli.run(new String[] { "container", "-list", attemptId.toString() });
    assertEquals(0, result);
    verify(client).getContainers(attemptId);
    ByteArrayOutputStream baos = new ByteArrayOutputStream();
    OutputStreamWriter stream = new OutputStreamWriter(baos, "UTF-8");
    PrintWriter pw = new PrintWriter(stream);
    pw.println("Total number of containers :3");
    pw.printf(ApplicationCLI.CONTAINER_PATTERN, "Container-Id", "Start Time", "Finish Time", "State", "Host", "Node Http Address", "LOG-URL");
    pw.printf(ApplicationCLI.CONTAINER_PATTERN, "container_1234_0005_01_000001", Times.format(time1), Times.format(time2), "COMPLETE", "host:1234", "http://host:2345", "logURL");
    pw.printf(ApplicationCLI.CONTAINER_PATTERN, "container_1234_0005_01_000002", Times.format(time1), Times.format(time2), "COMPLETE", "host:1234", "http://host:2345", "logURL");
    pw.printf(ApplicationCLI.CONTAINER_PATTERN, "container_1234_0005_01_000003", Times.format(time1), "N/A", "RUNNING", "host:1234", "http://host:2345", "");
    pw.close();
    String appReportStr = baos.toString("UTF-8");
    Log.getLog().info("ExpectedOutput");
    Log.getLog().info("[" + appReportStr + "]");
    Log.getLog().info("OutputFrom command");
    String actualOutput = sysOutStream.toString("UTF-8");
    Log.getLog().info("[" + actualOutput + "]");
    Assert.assertEquals(appReportStr, actualOutput);
}
Also used : ArrayList(java.util.ArrayList) ApplicationAttemptId(org.apache.hadoop.yarn.api.records.ApplicationAttemptId) ByteArrayOutputStream(java.io.ByteArrayOutputStream) ContainerId(org.apache.hadoop.yarn.api.records.ContainerId) ContainerReport(org.apache.hadoop.yarn.api.records.ContainerReport) OutputStreamWriter(java.io.OutputStreamWriter) ApplicationId(org.apache.hadoop.yarn.api.records.ApplicationId) PrintWriter(java.io.PrintWriter) Test(org.junit.Test)

Example 54 with OutputStreamWriter

use of java.io.OutputStreamWriter in project hadoop by apache.

the class TestMapReduce method launch.

private static void launch() throws Exception {
    //
    // Generate distribution of ints.  This is the answer key.
    //
    Configuration conf = new Configuration();
    int countsToGo = counts;
    int[] dist = new int[range];
    for (int i = 0; i < range; i++) {
        double avgInts = (1.0 * countsToGo) / (range - i);
        dist[i] = (int) Math.max(0, Math.round(avgInts + (Math.sqrt(avgInts) * r.nextGaussian())));
        countsToGo -= dist[i];
    }
    if (countsToGo > 0) {
        dist[dist.length - 1] += countsToGo;
    }
    //
    // Write the answer key to a file.  
    //
    Path testdir = new Path(TEST_DIR.getAbsolutePath());
    if (!fs.mkdirs(testdir)) {
        throw new IOException("Mkdirs failed to create " + testdir.toString());
    }
    Path randomIns = new Path(testdir, "genins");
    if (!fs.mkdirs(randomIns)) {
        throw new IOException("Mkdirs failed to create " + randomIns.toString());
    }
    Path answerkey = new Path(randomIns, "answer.key");
    SequenceFile.Writer out = SequenceFile.createWriter(fs, conf, answerkey, IntWritable.class, IntWritable.class, SequenceFile.CompressionType.NONE);
    try {
        for (int i = 0; i < range; i++) {
            out.append(new IntWritable(i), new IntWritable(dist[i]));
        }
    } finally {
        out.close();
    }
    printFiles(randomIns, conf);
    //
    // Now we need to generate the random numbers according to
    // the above distribution.
    //
    // We create a lot of map tasks, each of which takes at least
    // one "line" of the distribution.  (That is, a certain number
    // X is to be generated Y number of times.)
    //
    // A map task emits Y key/val pairs.  The val is X.  The key
    // is a randomly-generated number.
    //
    // The reduce task gets its input sorted by key.  That is, sorted
    // in random order.  It then emits a single line of text that
    // for the given values.  It does not emit the key.
    //
    // Because there's just one reduce task, we emit a single big
    // file of random numbers.
    //
    Path randomOuts = new Path(testdir, "genouts");
    fs.delete(randomOuts, true);
    Job genJob = Job.getInstance(conf);
    FileInputFormat.setInputPaths(genJob, randomIns);
    genJob.setInputFormatClass(SequenceFileInputFormat.class);
    genJob.setMapperClass(RandomGenMapper.class);
    FileOutputFormat.setOutputPath(genJob, randomOuts);
    genJob.setOutputKeyClass(IntWritable.class);
    genJob.setOutputValueClass(IntWritable.class);
    genJob.setReducerClass(RandomGenReducer.class);
    genJob.setNumReduceTasks(1);
    genJob.waitForCompletion(true);
    printFiles(randomOuts, conf);
    //
    // Next, we read the big file in and regenerate the 
    // original map.  It's split into a number of parts.
    // (That number is 'intermediateReduces'.)
    //
    // We have many map tasks, each of which read at least one
    // of the output numbers.  For each number read in, the
    // map task emits a key/value pair where the key is the
    // number and the value is "1".
    //
    // We have a single reduce task, which receives its input
    // sorted by the key emitted above.  For each key, there will
    // be a certain number of "1" values.  The reduce task sums
    // these values to compute how many times the given key was
    // emitted.
    //
    // The reduce task then emits a key/val pair where the key
    // is the number in question, and the value is the number of
    // times the key was emitted.  This is the same format as the
    // original answer key (except that numbers emitted zero times
    // will not appear in the regenerated key.)  The answer set
    // is split into a number of pieces.  A final MapReduce job
    // will merge them.
    //
    // There's not really a need to go to 10 reduces here 
    // instead of 1.  But we want to test what happens when
    // you have multiple reduces at once.
    //
    int intermediateReduces = 10;
    Path intermediateOuts = new Path(testdir, "intermediateouts");
    fs.delete(intermediateOuts, true);
    Job checkJob = Job.getInstance(conf);
    FileInputFormat.setInputPaths(checkJob, randomOuts);
    checkJob.setMapperClass(RandomCheckMapper.class);
    FileOutputFormat.setOutputPath(checkJob, intermediateOuts);
    checkJob.setOutputKeyClass(IntWritable.class);
    checkJob.setOutputValueClass(IntWritable.class);
    checkJob.setOutputFormatClass(MapFileOutputFormat.class);
    checkJob.setReducerClass(RandomCheckReducer.class);
    checkJob.setNumReduceTasks(intermediateReduces);
    checkJob.waitForCompletion(true);
    printFiles(intermediateOuts, conf);
    //
    // OK, now we take the output from the last job and
    // merge it down to a single file.  The map() and reduce()
    // functions don't really do anything except reemit tuples.
    // But by having a single reduce task here, we end up merging
    // all the files.
    //
    Path finalOuts = new Path(testdir, "finalouts");
    fs.delete(finalOuts, true);
    Job mergeJob = Job.getInstance(conf);
    FileInputFormat.setInputPaths(mergeJob, intermediateOuts);
    mergeJob.setInputFormatClass(SequenceFileInputFormat.class);
    mergeJob.setMapperClass(MergeMapper.class);
    FileOutputFormat.setOutputPath(mergeJob, finalOuts);
    mergeJob.setOutputKeyClass(IntWritable.class);
    mergeJob.setOutputValueClass(IntWritable.class);
    mergeJob.setOutputFormatClass(SequenceFileOutputFormat.class);
    mergeJob.setReducerClass(MergeReducer.class);
    mergeJob.setNumReduceTasks(1);
    mergeJob.waitForCompletion(true);
    printFiles(finalOuts, conf);
    //
    // Finally, we compare the reconstructed answer key with the
    // original one.  Remember, we need to ignore zero-count items
    // in the original key.
    //
    boolean success = true;
    Path recomputedkey = new Path(finalOuts, "part-r-00000");
    SequenceFile.Reader in = new SequenceFile.Reader(fs, recomputedkey, conf);
    int totalseen = 0;
    try {
        IntWritable key = new IntWritable();
        IntWritable val = new IntWritable();
        for (int i = 0; i < range; i++) {
            if (dist[i] == 0) {
                continue;
            }
            if (!in.next(key, val)) {
                System.err.println("Cannot read entry " + i);
                success = false;
                break;
            } else {
                if (!((key.get() == i) && (val.get() == dist[i]))) {
                    System.err.println("Mismatch!  Pos=" + key.get() + ", i=" + i + ", val=" + val.get() + ", dist[i]=" + dist[i]);
                    success = false;
                }
                totalseen += val.get();
            }
        }
        if (success) {
            if (in.next(key, val)) {
                System.err.println("Unnecessary lines in recomputed key!");
                success = false;
            }
        }
    } finally {
        in.close();
    }
    int originalTotal = 0;
    for (int i = 0; i < dist.length; i++) {
        originalTotal += dist[i];
    }
    System.out.println("Original sum: " + originalTotal);
    System.out.println("Recomputed sum: " + totalseen);
    //
    // Write to "results" whether the test succeeded or not.
    //
    Path resultFile = new Path(testdir, "results");
    BufferedWriter bw = new BufferedWriter(new OutputStreamWriter(fs.create(resultFile)));
    try {
        bw.write("Success=" + success + "\n");
        System.out.println("Success=" + success);
    } finally {
        bw.close();
    }
    assertTrue("testMapRed failed", success);
    fs.delete(testdir, true);
}
Also used : Path(org.apache.hadoop.fs.Path) Configuration(org.apache.hadoop.conf.Configuration) InputStreamReader(java.io.InputStreamReader) BufferedReader(java.io.BufferedReader) IOException(java.io.IOException) BufferedWriter(java.io.BufferedWriter) SequenceFile(org.apache.hadoop.io.SequenceFile) OutputStreamWriter(java.io.OutputStreamWriter) IntWritable(org.apache.hadoop.io.IntWritable)

Example 55 with OutputStreamWriter

use of java.io.OutputStreamWriter in project hadoop by apache.

the class TestMapReduceLazyOutput method createInput.

public void createInput(FileSystem fs, int numMappers) throws Exception {
    for (int i = 0; i < numMappers; i++) {
        OutputStream os = fs.create(new Path(INPUTPATH, "text" + i + ".txt"));
        Writer wr = new OutputStreamWriter(os);
        for (String inp : INPUTLIST) {
            wr.write(inp + "\n");
        }
        wr.close();
    }
}
Also used : Path(org.apache.hadoop.fs.Path) OutputStream(java.io.OutputStream) OutputStreamWriter(java.io.OutputStreamWriter) Writer(java.io.Writer) OutputStreamWriter(java.io.OutputStreamWriter)

Aggregations

OutputStreamWriter (java.io.OutputStreamWriter)1644 IOException (java.io.IOException)625 BufferedWriter (java.io.BufferedWriter)596 FileOutputStream (java.io.FileOutputStream)594 Writer (java.io.Writer)443 File (java.io.File)365 PrintWriter (java.io.PrintWriter)272 InputStreamReader (java.io.InputStreamReader)222 ByteArrayOutputStream (java.io.ByteArrayOutputStream)202 OutputStream (java.io.OutputStream)194 BufferedReader (java.io.BufferedReader)189 Test (org.junit.Test)123 InputStream (java.io.InputStream)92 ArrayList (java.util.ArrayList)90 FileNotFoundException (java.io.FileNotFoundException)88 Path (org.apache.hadoop.fs.Path)86 UnsupportedEncodingException (java.io.UnsupportedEncodingException)78 URL (java.net.URL)72 Socket (java.net.Socket)70 HttpURLConnection (java.net.HttpURLConnection)65