Search in sources :

Example 61 with BufferedWriter

use of java.io.BufferedWriter in project hadoop by apache.

the class TestMapReduce method launch.

private static void launch() throws Exception {
    //
    // Generate distribution of ints.  This is the answer key.
    //
    Configuration conf = new Configuration();
    int countsToGo = counts;
    int[] dist = new int[range];
    for (int i = 0; i < range; i++) {
        double avgInts = (1.0 * countsToGo) / (range - i);
        dist[i] = (int) Math.max(0, Math.round(avgInts + (Math.sqrt(avgInts) * r.nextGaussian())));
        countsToGo -= dist[i];
    }
    if (countsToGo > 0) {
        dist[dist.length - 1] += countsToGo;
    }
    //
    // Write the answer key to a file.  
    //
    Path testdir = new Path(TEST_DIR.getAbsolutePath());
    if (!fs.mkdirs(testdir)) {
        throw new IOException("Mkdirs failed to create " + testdir.toString());
    }
    Path randomIns = new Path(testdir, "genins");
    if (!fs.mkdirs(randomIns)) {
        throw new IOException("Mkdirs failed to create " + randomIns.toString());
    }
    Path answerkey = new Path(randomIns, "answer.key");
    SequenceFile.Writer out = SequenceFile.createWriter(fs, conf, answerkey, IntWritable.class, IntWritable.class, SequenceFile.CompressionType.NONE);
    try {
        for (int i = 0; i < range; i++) {
            out.append(new IntWritable(i), new IntWritable(dist[i]));
        }
    } finally {
        out.close();
    }
    printFiles(randomIns, conf);
    //
    // Now we need to generate the random numbers according to
    // the above distribution.
    //
    // We create a lot of map tasks, each of which takes at least
    // one "line" of the distribution.  (That is, a certain number
    // X is to be generated Y number of times.)
    //
    // A map task emits Y key/val pairs.  The val is X.  The key
    // is a randomly-generated number.
    //
    // The reduce task gets its input sorted by key.  That is, sorted
    // in random order.  It then emits a single line of text that
    // for the given values.  It does not emit the key.
    //
    // Because there's just one reduce task, we emit a single big
    // file of random numbers.
    //
    Path randomOuts = new Path(testdir, "genouts");
    fs.delete(randomOuts, true);
    Job genJob = Job.getInstance(conf);
    FileInputFormat.setInputPaths(genJob, randomIns);
    genJob.setInputFormatClass(SequenceFileInputFormat.class);
    genJob.setMapperClass(RandomGenMapper.class);
    FileOutputFormat.setOutputPath(genJob, randomOuts);
    genJob.setOutputKeyClass(IntWritable.class);
    genJob.setOutputValueClass(IntWritable.class);
    genJob.setReducerClass(RandomGenReducer.class);
    genJob.setNumReduceTasks(1);
    genJob.waitForCompletion(true);
    printFiles(randomOuts, conf);
    //
    // Next, we read the big file in and regenerate the 
    // original map.  It's split into a number of parts.
    // (That number is 'intermediateReduces'.)
    //
    // We have many map tasks, each of which read at least one
    // of the output numbers.  For each number read in, the
    // map task emits a key/value pair where the key is the
    // number and the value is "1".
    //
    // We have a single reduce task, which receives its input
    // sorted by the key emitted above.  For each key, there will
    // be a certain number of "1" values.  The reduce task sums
    // these values to compute how many times the given key was
    // emitted.
    //
    // The reduce task then emits a key/val pair where the key
    // is the number in question, and the value is the number of
    // times the key was emitted.  This is the same format as the
    // original answer key (except that numbers emitted zero times
    // will not appear in the regenerated key.)  The answer set
    // is split into a number of pieces.  A final MapReduce job
    // will merge them.
    //
    // There's not really a need to go to 10 reduces here 
    // instead of 1.  But we want to test what happens when
    // you have multiple reduces at once.
    //
    int intermediateReduces = 10;
    Path intermediateOuts = new Path(testdir, "intermediateouts");
    fs.delete(intermediateOuts, true);
    Job checkJob = Job.getInstance(conf);
    FileInputFormat.setInputPaths(checkJob, randomOuts);
    checkJob.setMapperClass(RandomCheckMapper.class);
    FileOutputFormat.setOutputPath(checkJob, intermediateOuts);
    checkJob.setOutputKeyClass(IntWritable.class);
    checkJob.setOutputValueClass(IntWritable.class);
    checkJob.setOutputFormatClass(MapFileOutputFormat.class);
    checkJob.setReducerClass(RandomCheckReducer.class);
    checkJob.setNumReduceTasks(intermediateReduces);
    checkJob.waitForCompletion(true);
    printFiles(intermediateOuts, conf);
    //
    // OK, now we take the output from the last job and
    // merge it down to a single file.  The map() and reduce()
    // functions don't really do anything except reemit tuples.
    // But by having a single reduce task here, we end up merging
    // all the files.
    //
    Path finalOuts = new Path(testdir, "finalouts");
    fs.delete(finalOuts, true);
    Job mergeJob = Job.getInstance(conf);
    FileInputFormat.setInputPaths(mergeJob, intermediateOuts);
    mergeJob.setInputFormatClass(SequenceFileInputFormat.class);
    mergeJob.setMapperClass(MergeMapper.class);
    FileOutputFormat.setOutputPath(mergeJob, finalOuts);
    mergeJob.setOutputKeyClass(IntWritable.class);
    mergeJob.setOutputValueClass(IntWritable.class);
    mergeJob.setOutputFormatClass(SequenceFileOutputFormat.class);
    mergeJob.setReducerClass(MergeReducer.class);
    mergeJob.setNumReduceTasks(1);
    mergeJob.waitForCompletion(true);
    printFiles(finalOuts, conf);
    //
    // Finally, we compare the reconstructed answer key with the
    // original one.  Remember, we need to ignore zero-count items
    // in the original key.
    //
    boolean success = true;
    Path recomputedkey = new Path(finalOuts, "part-r-00000");
    SequenceFile.Reader in = new SequenceFile.Reader(fs, recomputedkey, conf);
    int totalseen = 0;
    try {
        IntWritable key = new IntWritable();
        IntWritable val = new IntWritable();
        for (int i = 0; i < range; i++) {
            if (dist[i] == 0) {
                continue;
            }
            if (!in.next(key, val)) {
                System.err.println("Cannot read entry " + i);
                success = false;
                break;
            } else {
                if (!((key.get() == i) && (val.get() == dist[i]))) {
                    System.err.println("Mismatch!  Pos=" + key.get() + ", i=" + i + ", val=" + val.get() + ", dist[i]=" + dist[i]);
                    success = false;
                }
                totalseen += val.get();
            }
        }
        if (success) {
            if (in.next(key, val)) {
                System.err.println("Unnecessary lines in recomputed key!");
                success = false;
            }
        }
    } finally {
        in.close();
    }
    int originalTotal = 0;
    for (int i = 0; i < dist.length; i++) {
        originalTotal += dist[i];
    }
    System.out.println("Original sum: " + originalTotal);
    System.out.println("Recomputed sum: " + totalseen);
    //
    // Write to "results" whether the test succeeded or not.
    //
    Path resultFile = new Path(testdir, "results");
    BufferedWriter bw = new BufferedWriter(new OutputStreamWriter(fs.create(resultFile)));
    try {
        bw.write("Success=" + success + "\n");
        System.out.println("Success=" + success);
    } finally {
        bw.close();
    }
    assertTrue("testMapRed failed", success);
    fs.delete(testdir, true);
}
Also used : Path(org.apache.hadoop.fs.Path) Configuration(org.apache.hadoop.conf.Configuration) InputStreamReader(java.io.InputStreamReader) BufferedReader(java.io.BufferedReader) IOException(java.io.IOException) BufferedWriter(java.io.BufferedWriter) SequenceFile(org.apache.hadoop.io.SequenceFile) OutputStreamWriter(java.io.OutputStreamWriter) IntWritable(org.apache.hadoop.io.IntWritable)

Example 62 with BufferedWriter

use of java.io.BufferedWriter in project hadoop by apache.

the class TestLocalRunner method makeNumberFile.

/**
   * Write out an input file containing an integer.
   *
   * @param fileNum the file number to write to.
   * @param value the value to write to the file
   * @return the path of the written file.
   */
private Path makeNumberFile(int fileNum, int value) throws IOException {
    Path workDir = getNumberDirPath();
    Path filePath = new Path(workDir, "file" + fileNum);
    Configuration conf = new Configuration();
    FileSystem fs = FileSystem.getLocal(conf);
    OutputStream os = fs.create(filePath);
    BufferedWriter w = new BufferedWriter(new OutputStreamWriter(os));
    w.write("" + value);
    w.close();
    return filePath;
}
Also used : Path(org.apache.hadoop.fs.Path) Configuration(org.apache.hadoop.conf.Configuration) FileSystem(org.apache.hadoop.fs.FileSystem) OutputStream(java.io.OutputStream) OutputStreamWriter(java.io.OutputStreamWriter) BufferedWriter(java.io.BufferedWriter)

Example 63 with BufferedWriter

use of java.io.BufferedWriter in project hadoop by apache.

the class TestCodec method testGzipCodecWrite.

private void testGzipCodecWrite(boolean useNative) throws IOException {
    // Create a gzipped file using a compressor from the CodecPool,
    // and try to read it back via the regular GZIPInputStream.
    // Use native libs per the parameter
    Configuration conf = new Configuration();
    if (useNative) {
        assumeTrue(ZlibFactory.isNativeZlibLoaded(conf));
    } else {
        assertFalse("ZlibFactory is using native libs against request", ZlibFactory.isNativeZlibLoaded(conf));
    }
    // Ensure that the CodecPool has a BuiltInZlibDeflater in it.
    Compressor zlibCompressor = ZlibFactory.getZlibCompressor(conf);
    assertNotNull("zlibCompressor is null!", zlibCompressor);
    assertTrue("ZlibFactory returned unexpected deflator", useNative ? zlibCompressor instanceof ZlibCompressor : zlibCompressor instanceof BuiltInZlibDeflater);
    CodecPool.returnCompressor(zlibCompressor);
    // Create a GZIP text file via the Compressor interface.
    CompressionCodecFactory ccf = new CompressionCodecFactory(conf);
    CompressionCodec codec = ccf.getCodec(new Path("foo.gz"));
    assertTrue("Codec for .gz file is not GzipCodec", codec instanceof GzipCodec);
    final String msg = "This is the message we are going to compress.";
    final String fileName = new Path(GenericTestUtils.getTempPath("testGzipCodecWrite.txt.gz")).toString();
    BufferedWriter w = null;
    Compressor gzipCompressor = CodecPool.getCompressor(codec);
    if (null != gzipCompressor) {
        // If it gives us back a Compressor, we should be able to use this
        // to write files we can then read back with Java's gzip tools.
        OutputStream os = new CompressorStream(new FileOutputStream(fileName), gzipCompressor);
        w = new BufferedWriter(new OutputStreamWriter(os));
        w.write(msg);
        w.close();
        CodecPool.returnCompressor(gzipCompressor);
        verifyGzipFile(fileName, msg);
    }
    // Create a gzip text file via codec.getOutputStream().
    w = new BufferedWriter(new OutputStreamWriter(codec.createOutputStream(new FileOutputStream(fileName))));
    w.write(msg);
    w.close();
    verifyGzipFile(fileName, msg);
}
Also used : Path(org.apache.hadoop.fs.Path) Configuration(org.apache.hadoop.conf.Configuration) ZlibCompressor(org.apache.hadoop.io.compress.zlib.ZlibCompressor) DataOutputStream(java.io.DataOutputStream) GZIPOutputStream(java.util.zip.GZIPOutputStream) ByteArrayOutputStream(java.io.ByteArrayOutputStream) BufferedOutputStream(java.io.BufferedOutputStream) OutputStream(java.io.OutputStream) FileOutputStream(java.io.FileOutputStream) ZlibCompressor(org.apache.hadoop.io.compress.zlib.ZlibCompressor) BuiltInZlibDeflater(org.apache.hadoop.io.compress.zlib.BuiltInZlibDeflater) BufferedWriter(java.io.BufferedWriter) FileOutputStream(java.io.FileOutputStream) OutputStreamWriter(java.io.OutputStreamWriter)

Example 64 with BufferedWriter

use of java.io.BufferedWriter in project hadoop by apache.

the class TestCodec method testGzipCodecRead.

@Test
public void testGzipCodecRead() throws IOException {
    // Create a gzipped file and try to read it back, using a decompressor
    // from the CodecPool.
    // Don't use native libs for this test.
    Configuration conf = new Configuration();
    ZlibFactory.setNativeZlibLoaded(false);
    // Ensure that the CodecPool has a BuiltInZlibInflater in it.
    Decompressor zlibDecompressor = ZlibFactory.getZlibDecompressor(conf);
    assertNotNull("zlibDecompressor is null!", zlibDecompressor);
    assertTrue("ZlibFactory returned unexpected inflator", zlibDecompressor instanceof BuiltInZlibInflater);
    CodecPool.returnDecompressor(zlibDecompressor);
    // Now create a GZip text file.
    Path f = new Path(GenericTestUtils.getTempPath("testGzipCodecRead.txt.gz"));
    BufferedWriter bw = new BufferedWriter(new OutputStreamWriter(new GZIPOutputStream(new FileOutputStream(f.toString()))));
    final String msg = "This is the message in the file!";
    bw.write(msg);
    bw.close();
    // Now read it back, using the CodecPool to establish the
    // decompressor to use.
    CompressionCodecFactory ccf = new CompressionCodecFactory(conf);
    CompressionCodec codec = ccf.getCodec(f);
    Decompressor decompressor = CodecPool.getDecompressor(codec);
    FileSystem fs = FileSystem.getLocal(conf);
    InputStream is = fs.open(f);
    is = codec.createInputStream(is, decompressor);
    BufferedReader br = new BufferedReader(new InputStreamReader(is));
    String line = br.readLine();
    assertEquals("Didn't get the same message back!", msg, line);
    br.close();
}
Also used : BuiltInZlibInflater(org.apache.hadoop.io.compress.zlib.BuiltInZlibInflater) Path(org.apache.hadoop.fs.Path) BuiltInGzipDecompressor(org.apache.hadoop.io.compress.zlib.BuiltInGzipDecompressor) Configuration(org.apache.hadoop.conf.Configuration) InputStreamReader(java.io.InputStreamReader) GZIPInputStream(java.util.zip.GZIPInputStream) BufferedInputStream(java.io.BufferedInputStream) ByteArrayInputStream(java.io.ByteArrayInputStream) DataInputStream(java.io.DataInputStream) FileInputStream(java.io.FileInputStream) InputStream(java.io.InputStream) BufferedWriter(java.io.BufferedWriter) GZIPOutputStream(java.util.zip.GZIPOutputStream) FileOutputStream(java.io.FileOutputStream) FileSystem(org.apache.hadoop.fs.FileSystem) BufferedReader(java.io.BufferedReader) OutputStreamWriter(java.io.OutputStreamWriter) Test(org.junit.Test)

Example 65 with BufferedWriter

use of java.io.BufferedWriter in project hadoop by apache.

the class TestCodec method testGzipLongOverflow.

@Test
public void testGzipLongOverflow() throws IOException {
    LOG.info("testGzipLongOverflow");
    // Don't use native libs for this test.
    Configuration conf = new Configuration();
    ZlibFactory.setNativeZlibLoaded(false);
    assertFalse("ZlibFactory is using native libs against request", ZlibFactory.isNativeZlibLoaded(conf));
    // Ensure that the CodecPool has a BuiltInZlibInflater in it.
    Decompressor zlibDecompressor = ZlibFactory.getZlibDecompressor(conf);
    assertNotNull("zlibDecompressor is null!", zlibDecompressor);
    assertTrue("ZlibFactory returned unexpected inflator", zlibDecompressor instanceof BuiltInZlibInflater);
    CodecPool.returnDecompressor(zlibDecompressor);
    // Now create a GZip text file.
    Path f = new Path(GenericTestUtils.getTempPath("testGzipLongOverflow.bin.gz"));
    BufferedWriter bw = new BufferedWriter(new OutputStreamWriter(new GZIPOutputStream(new FileOutputStream(f.toString()))));
    final int NBUF = 1024 * 4 + 1;
    final char[] buf = new char[1024 * 1024];
    for (int i = 0; i < buf.length; i++) buf[i] = '\0';
    for (int i = 0; i < NBUF; i++) {
        bw.write(buf);
    }
    bw.close();
    // Now read it back, using the CodecPool to establish the
    // decompressor to use.
    CompressionCodecFactory ccf = new CompressionCodecFactory(conf);
    CompressionCodec codec = ccf.getCodec(f);
    Decompressor decompressor = CodecPool.getDecompressor(codec);
    FileSystem fs = FileSystem.getLocal(conf);
    InputStream is = fs.open(f);
    is = codec.createInputStream(is, decompressor);
    BufferedReader br = new BufferedReader(new InputStreamReader(is));
    for (int j = 0; j < NBUF; j++) {
        int n = br.read(buf);
        assertEquals("got wrong read length!", n, buf.length);
        for (int i = 0; i < buf.length; i++) assertEquals("got wrong byte!", buf[i], '\0');
    }
    br.close();
}
Also used : BuiltInZlibInflater(org.apache.hadoop.io.compress.zlib.BuiltInZlibInflater) Path(org.apache.hadoop.fs.Path) BuiltInGzipDecompressor(org.apache.hadoop.io.compress.zlib.BuiltInGzipDecompressor) Configuration(org.apache.hadoop.conf.Configuration) InputStreamReader(java.io.InputStreamReader) GZIPInputStream(java.util.zip.GZIPInputStream) BufferedInputStream(java.io.BufferedInputStream) ByteArrayInputStream(java.io.ByteArrayInputStream) DataInputStream(java.io.DataInputStream) FileInputStream(java.io.FileInputStream) InputStream(java.io.InputStream) BufferedWriter(java.io.BufferedWriter) GZIPOutputStream(java.util.zip.GZIPOutputStream) FileOutputStream(java.io.FileOutputStream) FileSystem(org.apache.hadoop.fs.FileSystem) BufferedReader(java.io.BufferedReader) OutputStreamWriter(java.io.OutputStreamWriter) Test(org.junit.Test)

Aggregations

BufferedWriter (java.io.BufferedWriter)4214 FileWriter (java.io.FileWriter)2181 File (java.io.File)1879 IOException (java.io.IOException)1847 OutputStreamWriter (java.io.OutputStreamWriter)1344 BufferedReader (java.io.BufferedReader)747 FileOutputStream (java.io.FileOutputStream)656 ArrayList (java.util.ArrayList)386 FileReader (java.io.FileReader)376 InputStreamReader (java.io.InputStreamReader)349 PrintWriter (java.io.PrintWriter)324 Writer (java.io.Writer)324 Test (org.junit.Test)286 FileNotFoundException (java.io.FileNotFoundException)217 OutputStream (java.io.OutputStream)213 HashMap (java.util.HashMap)200 Path (java.nio.file.Path)177 InputStream (java.io.InputStream)171 FileInputStream (java.io.FileInputStream)158 StringWriter (java.io.StringWriter)143