use of java.io.BufferedWriter in project hadoop by apache.
the class TestMapReduce method launch.
private static void launch() throws Exception {
//
// Generate distribution of ints. This is the answer key.
//
Configuration conf = new Configuration();
int countsToGo = counts;
int[] dist = new int[range];
for (int i = 0; i < range; i++) {
double avgInts = (1.0 * countsToGo) / (range - i);
dist[i] = (int) Math.max(0, Math.round(avgInts + (Math.sqrt(avgInts) * r.nextGaussian())));
countsToGo -= dist[i];
}
if (countsToGo > 0) {
dist[dist.length - 1] += countsToGo;
}
//
// Write the answer key to a file.
//
Path testdir = new Path(TEST_DIR.getAbsolutePath());
if (!fs.mkdirs(testdir)) {
throw new IOException("Mkdirs failed to create " + testdir.toString());
}
Path randomIns = new Path(testdir, "genins");
if (!fs.mkdirs(randomIns)) {
throw new IOException("Mkdirs failed to create " + randomIns.toString());
}
Path answerkey = new Path(randomIns, "answer.key");
SequenceFile.Writer out = SequenceFile.createWriter(fs, conf, answerkey, IntWritable.class, IntWritable.class, SequenceFile.CompressionType.NONE);
try {
for (int i = 0; i < range; i++) {
out.append(new IntWritable(i), new IntWritable(dist[i]));
}
} finally {
out.close();
}
printFiles(randomIns, conf);
//
// Now we need to generate the random numbers according to
// the above distribution.
//
// We create a lot of map tasks, each of which takes at least
// one "line" of the distribution. (That is, a certain number
// X is to be generated Y number of times.)
//
// A map task emits Y key/val pairs. The val is X. The key
// is a randomly-generated number.
//
// The reduce task gets its input sorted by key. That is, sorted
// in random order. It then emits a single line of text that
// for the given values. It does not emit the key.
//
// Because there's just one reduce task, we emit a single big
// file of random numbers.
//
Path randomOuts = new Path(testdir, "genouts");
fs.delete(randomOuts, true);
Job genJob = Job.getInstance(conf);
FileInputFormat.setInputPaths(genJob, randomIns);
genJob.setInputFormatClass(SequenceFileInputFormat.class);
genJob.setMapperClass(RandomGenMapper.class);
FileOutputFormat.setOutputPath(genJob, randomOuts);
genJob.setOutputKeyClass(IntWritable.class);
genJob.setOutputValueClass(IntWritable.class);
genJob.setReducerClass(RandomGenReducer.class);
genJob.setNumReduceTasks(1);
genJob.waitForCompletion(true);
printFiles(randomOuts, conf);
//
// Next, we read the big file in and regenerate the
// original map. It's split into a number of parts.
// (That number is 'intermediateReduces'.)
//
// We have many map tasks, each of which read at least one
// of the output numbers. For each number read in, the
// map task emits a key/value pair where the key is the
// number and the value is "1".
//
// We have a single reduce task, which receives its input
// sorted by the key emitted above. For each key, there will
// be a certain number of "1" values. The reduce task sums
// these values to compute how many times the given key was
// emitted.
//
// The reduce task then emits a key/val pair where the key
// is the number in question, and the value is the number of
// times the key was emitted. This is the same format as the
// original answer key (except that numbers emitted zero times
// will not appear in the regenerated key.) The answer set
// is split into a number of pieces. A final MapReduce job
// will merge them.
//
// There's not really a need to go to 10 reduces here
// instead of 1. But we want to test what happens when
// you have multiple reduces at once.
//
int intermediateReduces = 10;
Path intermediateOuts = new Path(testdir, "intermediateouts");
fs.delete(intermediateOuts, true);
Job checkJob = Job.getInstance(conf);
FileInputFormat.setInputPaths(checkJob, randomOuts);
checkJob.setMapperClass(RandomCheckMapper.class);
FileOutputFormat.setOutputPath(checkJob, intermediateOuts);
checkJob.setOutputKeyClass(IntWritable.class);
checkJob.setOutputValueClass(IntWritable.class);
checkJob.setOutputFormatClass(MapFileOutputFormat.class);
checkJob.setReducerClass(RandomCheckReducer.class);
checkJob.setNumReduceTasks(intermediateReduces);
checkJob.waitForCompletion(true);
printFiles(intermediateOuts, conf);
//
// OK, now we take the output from the last job and
// merge it down to a single file. The map() and reduce()
// functions don't really do anything except reemit tuples.
// But by having a single reduce task here, we end up merging
// all the files.
//
Path finalOuts = new Path(testdir, "finalouts");
fs.delete(finalOuts, true);
Job mergeJob = Job.getInstance(conf);
FileInputFormat.setInputPaths(mergeJob, intermediateOuts);
mergeJob.setInputFormatClass(SequenceFileInputFormat.class);
mergeJob.setMapperClass(MergeMapper.class);
FileOutputFormat.setOutputPath(mergeJob, finalOuts);
mergeJob.setOutputKeyClass(IntWritable.class);
mergeJob.setOutputValueClass(IntWritable.class);
mergeJob.setOutputFormatClass(SequenceFileOutputFormat.class);
mergeJob.setReducerClass(MergeReducer.class);
mergeJob.setNumReduceTasks(1);
mergeJob.waitForCompletion(true);
printFiles(finalOuts, conf);
//
// Finally, we compare the reconstructed answer key with the
// original one. Remember, we need to ignore zero-count items
// in the original key.
//
boolean success = true;
Path recomputedkey = new Path(finalOuts, "part-r-00000");
SequenceFile.Reader in = new SequenceFile.Reader(fs, recomputedkey, conf);
int totalseen = 0;
try {
IntWritable key = new IntWritable();
IntWritable val = new IntWritable();
for (int i = 0; i < range; i++) {
if (dist[i] == 0) {
continue;
}
if (!in.next(key, val)) {
System.err.println("Cannot read entry " + i);
success = false;
break;
} else {
if (!((key.get() == i) && (val.get() == dist[i]))) {
System.err.println("Mismatch! Pos=" + key.get() + ", i=" + i + ", val=" + val.get() + ", dist[i]=" + dist[i]);
success = false;
}
totalseen += val.get();
}
}
if (success) {
if (in.next(key, val)) {
System.err.println("Unnecessary lines in recomputed key!");
success = false;
}
}
} finally {
in.close();
}
int originalTotal = 0;
for (int i = 0; i < dist.length; i++) {
originalTotal += dist[i];
}
System.out.println("Original sum: " + originalTotal);
System.out.println("Recomputed sum: " + totalseen);
//
// Write to "results" whether the test succeeded or not.
//
Path resultFile = new Path(testdir, "results");
BufferedWriter bw = new BufferedWriter(new OutputStreamWriter(fs.create(resultFile)));
try {
bw.write("Success=" + success + "\n");
System.out.println("Success=" + success);
} finally {
bw.close();
}
assertTrue("testMapRed failed", success);
fs.delete(testdir, true);
}
use of java.io.BufferedWriter in project hadoop by apache.
the class TestLocalRunner method makeNumberFile.
/**
* Write out an input file containing an integer.
*
* @param fileNum the file number to write to.
* @param value the value to write to the file
* @return the path of the written file.
*/
private Path makeNumberFile(int fileNum, int value) throws IOException {
Path workDir = getNumberDirPath();
Path filePath = new Path(workDir, "file" + fileNum);
Configuration conf = new Configuration();
FileSystem fs = FileSystem.getLocal(conf);
OutputStream os = fs.create(filePath);
BufferedWriter w = new BufferedWriter(new OutputStreamWriter(os));
w.write("" + value);
w.close();
return filePath;
}
use of java.io.BufferedWriter in project hadoop by apache.
the class TestCodec method testGzipCodecWrite.
private void testGzipCodecWrite(boolean useNative) throws IOException {
// Create a gzipped file using a compressor from the CodecPool,
// and try to read it back via the regular GZIPInputStream.
// Use native libs per the parameter
Configuration conf = new Configuration();
if (useNative) {
assumeTrue(ZlibFactory.isNativeZlibLoaded(conf));
} else {
assertFalse("ZlibFactory is using native libs against request", ZlibFactory.isNativeZlibLoaded(conf));
}
// Ensure that the CodecPool has a BuiltInZlibDeflater in it.
Compressor zlibCompressor = ZlibFactory.getZlibCompressor(conf);
assertNotNull("zlibCompressor is null!", zlibCompressor);
assertTrue("ZlibFactory returned unexpected deflator", useNative ? zlibCompressor instanceof ZlibCompressor : zlibCompressor instanceof BuiltInZlibDeflater);
CodecPool.returnCompressor(zlibCompressor);
// Create a GZIP text file via the Compressor interface.
CompressionCodecFactory ccf = new CompressionCodecFactory(conf);
CompressionCodec codec = ccf.getCodec(new Path("foo.gz"));
assertTrue("Codec for .gz file is not GzipCodec", codec instanceof GzipCodec);
final String msg = "This is the message we are going to compress.";
final String fileName = new Path(GenericTestUtils.getTempPath("testGzipCodecWrite.txt.gz")).toString();
BufferedWriter w = null;
Compressor gzipCompressor = CodecPool.getCompressor(codec);
if (null != gzipCompressor) {
// If it gives us back a Compressor, we should be able to use this
// to write files we can then read back with Java's gzip tools.
OutputStream os = new CompressorStream(new FileOutputStream(fileName), gzipCompressor);
w = new BufferedWriter(new OutputStreamWriter(os));
w.write(msg);
w.close();
CodecPool.returnCompressor(gzipCompressor);
verifyGzipFile(fileName, msg);
}
// Create a gzip text file via codec.getOutputStream().
w = new BufferedWriter(new OutputStreamWriter(codec.createOutputStream(new FileOutputStream(fileName))));
w.write(msg);
w.close();
verifyGzipFile(fileName, msg);
}
use of java.io.BufferedWriter in project hadoop by apache.
the class TestCodec method testGzipCodecRead.
@Test
public void testGzipCodecRead() throws IOException {
// Create a gzipped file and try to read it back, using a decompressor
// from the CodecPool.
// Don't use native libs for this test.
Configuration conf = new Configuration();
ZlibFactory.setNativeZlibLoaded(false);
// Ensure that the CodecPool has a BuiltInZlibInflater in it.
Decompressor zlibDecompressor = ZlibFactory.getZlibDecompressor(conf);
assertNotNull("zlibDecompressor is null!", zlibDecompressor);
assertTrue("ZlibFactory returned unexpected inflator", zlibDecompressor instanceof BuiltInZlibInflater);
CodecPool.returnDecompressor(zlibDecompressor);
// Now create a GZip text file.
Path f = new Path(GenericTestUtils.getTempPath("testGzipCodecRead.txt.gz"));
BufferedWriter bw = new BufferedWriter(new OutputStreamWriter(new GZIPOutputStream(new FileOutputStream(f.toString()))));
final String msg = "This is the message in the file!";
bw.write(msg);
bw.close();
// Now read it back, using the CodecPool to establish the
// decompressor to use.
CompressionCodecFactory ccf = new CompressionCodecFactory(conf);
CompressionCodec codec = ccf.getCodec(f);
Decompressor decompressor = CodecPool.getDecompressor(codec);
FileSystem fs = FileSystem.getLocal(conf);
InputStream is = fs.open(f);
is = codec.createInputStream(is, decompressor);
BufferedReader br = new BufferedReader(new InputStreamReader(is));
String line = br.readLine();
assertEquals("Didn't get the same message back!", msg, line);
br.close();
}
use of java.io.BufferedWriter in project hadoop by apache.
the class TestCodec method testGzipLongOverflow.
@Test
public void testGzipLongOverflow() throws IOException {
LOG.info("testGzipLongOverflow");
// Don't use native libs for this test.
Configuration conf = new Configuration();
ZlibFactory.setNativeZlibLoaded(false);
assertFalse("ZlibFactory is using native libs against request", ZlibFactory.isNativeZlibLoaded(conf));
// Ensure that the CodecPool has a BuiltInZlibInflater in it.
Decompressor zlibDecompressor = ZlibFactory.getZlibDecompressor(conf);
assertNotNull("zlibDecompressor is null!", zlibDecompressor);
assertTrue("ZlibFactory returned unexpected inflator", zlibDecompressor instanceof BuiltInZlibInflater);
CodecPool.returnDecompressor(zlibDecompressor);
// Now create a GZip text file.
Path f = new Path(GenericTestUtils.getTempPath("testGzipLongOverflow.bin.gz"));
BufferedWriter bw = new BufferedWriter(new OutputStreamWriter(new GZIPOutputStream(new FileOutputStream(f.toString()))));
final int NBUF = 1024 * 4 + 1;
final char[] buf = new char[1024 * 1024];
for (int i = 0; i < buf.length; i++) buf[i] = '\0';
for (int i = 0; i < NBUF; i++) {
bw.write(buf);
}
bw.close();
// Now read it back, using the CodecPool to establish the
// decompressor to use.
CompressionCodecFactory ccf = new CompressionCodecFactory(conf);
CompressionCodec codec = ccf.getCodec(f);
Decompressor decompressor = CodecPool.getDecompressor(codec);
FileSystem fs = FileSystem.getLocal(conf);
InputStream is = fs.open(f);
is = codec.createInputStream(is, decompressor);
BufferedReader br = new BufferedReader(new InputStreamReader(is));
for (int j = 0; j < NBUF; j++) {
int n = br.read(buf);
assertEquals("got wrong read length!", n, buf.length);
for (int i = 0; i < buf.length; i++) assertEquals("got wrong byte!", buf[i], '\0');
}
br.close();
}
Aggregations