use of org.apache.hadoop.io.compress.CompressionCodec in project hadoop by apache.
the class SequenceFileAsBinaryOutputFormat method getRecordWriter.
@Override
public RecordWriter<BytesWritable, BytesWritable> getRecordWriter(FileSystem ignored, JobConf job, String name, Progressable progress) throws IOException {
// get the path of the temporary output file
Path file = FileOutputFormat.getTaskOutputPath(job, name);
FileSystem fs = file.getFileSystem(job);
CompressionCodec codec = null;
CompressionType compressionType = CompressionType.NONE;
if (getCompressOutput(job)) {
// find the kind of compression to do
compressionType = getOutputCompressionType(job);
// find the right codec
Class<? extends CompressionCodec> codecClass = getOutputCompressorClass(job, DefaultCodec.class);
codec = ReflectionUtils.newInstance(codecClass, job);
}
final SequenceFile.Writer out = SequenceFile.createWriter(fs, job, file, getSequenceFileOutputKeyClass(job), getSequenceFileOutputValueClass(job), compressionType, codec, progress);
return new RecordWriter<BytesWritable, BytesWritable>() {
private WritableValueBytes wvaluebytes = new WritableValueBytes();
public void write(BytesWritable bkey, BytesWritable bvalue) throws IOException {
wvaluebytes.reset(bvalue);
out.appendRaw(bkey.getBytes(), 0, bkey.getLength(), wvaluebytes);
wvaluebytes.reset(null);
}
public void close(Reporter reporter) throws IOException {
out.close();
}
};
}
use of org.apache.hadoop.io.compress.CompressionCodec in project hadoop by apache.
the class TextOutputFormat method getRecordWriter.
public RecordWriter<K, V> getRecordWriter(FileSystem ignored, JobConf job, String name, Progressable progress) throws IOException {
boolean isCompressed = getCompressOutput(job);
String keyValueSeparator = job.get("mapreduce.output.textoutputformat.separator", "\t");
if (!isCompressed) {
Path file = FileOutputFormat.getTaskOutputPath(job, name);
FileSystem fs = file.getFileSystem(job);
FSDataOutputStream fileOut = fs.create(file, progress);
return new LineRecordWriter<K, V>(fileOut, keyValueSeparator);
} else {
Class<? extends CompressionCodec> codecClass = getOutputCompressorClass(job, GzipCodec.class);
// create the named codec
CompressionCodec codec = ReflectionUtils.newInstance(codecClass, job);
// build the filename including the extension
Path file = FileOutputFormat.getTaskOutputPath(job, name + codec.getDefaultExtension());
FileSystem fs = file.getFileSystem(job);
FSDataOutputStream fileOut = fs.create(file, progress);
return new LineRecordWriter<K, V>(new DataOutputStream(codec.createOutputStream(fileOut)), keyValueSeparator);
}
}
use of org.apache.hadoop.io.compress.CompressionCodec in project hadoop by apache.
the class ImageLoaderCurrent method loadImage.
/* (non-Javadoc)
* @see ImageLoader#processImage(java.io.DataInputStream, ImageVisitor, boolean)
*/
@Override
public void loadImage(DataInputStream in, ImageVisitor v, boolean skipBlocks) throws IOException {
boolean done = false;
try {
v.start();
v.visitEnclosingElement(ImageElement.FS_IMAGE);
imageVersion = in.readInt();
if (!canLoadVersion(imageVersion))
throw new IOException("Cannot process fslayout version " + imageVersion);
if (NameNodeLayoutVersion.supports(Feature.ADD_LAYOUT_FLAGS, imageVersion)) {
LayoutFlags.read(in);
}
v.visit(ImageElement.IMAGE_VERSION, imageVersion);
v.visit(ImageElement.NAMESPACE_ID, in.readInt());
long numInodes = in.readLong();
v.visit(ImageElement.GENERATION_STAMP, in.readLong());
if (NameNodeLayoutVersion.supports(Feature.SEQUENTIAL_BLOCK_ID, imageVersion)) {
v.visit(ImageElement.GENERATION_STAMP_V2, in.readLong());
v.visit(ImageElement.GENERATION_STAMP_V1_LIMIT, in.readLong());
v.visit(ImageElement.LAST_ALLOCATED_BLOCK_ID, in.readLong());
}
if (NameNodeLayoutVersion.supports(Feature.STORED_TXIDS, imageVersion)) {
v.visit(ImageElement.TRANSACTION_ID, in.readLong());
}
if (NameNodeLayoutVersion.supports(Feature.ADD_INODE_ID, imageVersion)) {
v.visit(ImageElement.LAST_INODE_ID, in.readLong());
}
boolean supportSnapshot = NameNodeLayoutVersion.supports(Feature.SNAPSHOT, imageVersion);
if (supportSnapshot) {
v.visit(ImageElement.SNAPSHOT_COUNTER, in.readInt());
int numSnapshots = in.readInt();
v.visit(ImageElement.NUM_SNAPSHOTS_TOTAL, numSnapshots);
for (int i = 0; i < numSnapshots; i++) {
processSnapshot(in, v);
}
}
if (NameNodeLayoutVersion.supports(Feature.FSIMAGE_COMPRESSION, imageVersion)) {
boolean isCompressed = in.readBoolean();
v.visit(ImageElement.IS_COMPRESSED, String.valueOf(isCompressed));
if (isCompressed) {
String codecClassName = Text.readString(in);
v.visit(ImageElement.COMPRESS_CODEC, codecClassName);
CompressionCodecFactory codecFac = new CompressionCodecFactory(new Configuration());
CompressionCodec codec = codecFac.getCodecByClassName(codecClassName);
if (codec == null) {
throw new IOException("Image compression codec not supported: " + codecClassName);
}
in = new DataInputStream(codec.createInputStream(in));
}
}
processINodes(in, v, numInodes, skipBlocks, supportSnapshot);
subtreeMap.clear();
dirNodeMap.clear();
processINodesUC(in, v, skipBlocks);
if (NameNodeLayoutVersion.supports(Feature.DELEGATION_TOKEN, imageVersion)) {
processDelegationTokens(in, v);
}
if (NameNodeLayoutVersion.supports(Feature.CACHING, imageVersion)) {
processCacheManagerState(in, v);
}
// FSImage
v.leaveEnclosingElement();
done = true;
} finally {
if (done) {
v.finish();
} else {
v.finishAbnormally();
}
}
}
use of org.apache.hadoop.io.compress.CompressionCodec in project hadoop by apache.
the class TestDFSShell method textTest.
private void textTest(Path root, Configuration conf) throws Exception {
PrintStream bak = null;
try {
final FileSystem fs = root.getFileSystem(conf);
fs.mkdirs(root);
// Test the gzip type of files. Magic detection.
OutputStream zout = new GZIPOutputStream(fs.create(new Path(root, "file.gz")));
Random r = new Random();
bak = System.out;
ByteArrayOutputStream file = new ByteArrayOutputStream();
for (int i = 0; i < 1024; ++i) {
char c = Character.forDigit(r.nextInt(26) + 10, 36);
file.write(c);
zout.write(c);
}
zout.close();
ByteArrayOutputStream out = new ByteArrayOutputStream();
System.setOut(new PrintStream(out));
String[] argv = new String[2];
argv[0] = "-text";
argv[1] = new Path(root, "file.gz").toString();
int ret = ToolRunner.run(new FsShell(conf), argv);
assertEquals("'-text " + argv[1] + " returned " + ret, 0, ret);
assertTrue("Output doesn't match input", Arrays.equals(file.toByteArray(), out.toByteArray()));
// Create a sequence file with a gz extension, to test proper
// container detection. Magic detection.
SequenceFile.Writer writer = SequenceFile.createWriter(conf, SequenceFile.Writer.file(new Path(root, "file.gz")), SequenceFile.Writer.keyClass(Text.class), SequenceFile.Writer.valueClass(Text.class));
writer.append(new Text("Foo"), new Text("Bar"));
writer.close();
out = new ByteArrayOutputStream();
System.setOut(new PrintStream(out));
argv = new String[2];
argv[0] = "-text";
argv[1] = new Path(root, "file.gz").toString();
ret = ToolRunner.run(new FsShell(conf), argv);
assertEquals("'-text " + argv[1] + " returned " + ret, 0, ret);
assertTrue("Output doesn't match input", Arrays.equals("Foo\tBar\n".getBytes(), out.toByteArray()));
out.reset();
// Test deflate. Extension-based detection.
OutputStream dout = new DeflaterOutputStream(fs.create(new Path(root, "file.deflate")));
byte[] outbytes = "foo".getBytes();
dout.write(outbytes);
dout.close();
out = new ByteArrayOutputStream();
System.setOut(new PrintStream(out));
argv = new String[2];
argv[0] = "-text";
argv[1] = new Path(root, "file.deflate").toString();
ret = ToolRunner.run(new FsShell(conf), argv);
assertEquals("'-text " + argv[1] + " returned " + ret, 0, ret);
assertTrue("Output doesn't match input", Arrays.equals(outbytes, out.toByteArray()));
out.reset();
// Test a simple codec. Extension based detection. We use
// Bzip2 cause its non-native.
CompressionCodec codec = ReflectionUtils.newInstance(BZip2Codec.class, conf);
String extension = codec.getDefaultExtension();
Path p = new Path(root, "file." + extension);
OutputStream fout = new DataOutputStream(codec.createOutputStream(fs.create(p, true)));
byte[] writebytes = "foo".getBytes();
fout.write(writebytes);
fout.close();
out = new ByteArrayOutputStream();
System.setOut(new PrintStream(out));
argv = new String[2];
argv[0] = "-text";
argv[1] = new Path(root, p).toString();
ret = ToolRunner.run(new FsShell(conf), argv);
assertEquals("'-text " + argv[1] + " returned " + ret, 0, ret);
assertTrue("Output doesn't match input", Arrays.equals(writebytes, out.toByteArray()));
out.reset();
// Test a plain text.
OutputStream pout = fs.create(new Path(root, "file.txt"));
writebytes = "bar".getBytes();
pout.write(writebytes);
pout.close();
out = new ByteArrayOutputStream();
System.setOut(new PrintStream(out));
argv = new String[2];
argv[0] = "-text";
argv[1] = new Path(root, "file.txt").toString();
ret = ToolRunner.run(new FsShell(conf), argv);
assertEquals("'-text " + argv[1] + " returned " + ret, 0, ret);
assertTrue("Output doesn't match input", Arrays.equals(writebytes, out.toByteArray()));
out.reset();
} finally {
if (null != bak) {
System.setOut(bak);
}
}
}
use of org.apache.hadoop.io.compress.CompressionCodec in project hadoop by apache.
the class MapFileOutputFormat method getRecordWriter.
public RecordWriter<WritableComparable<?>, Writable> getRecordWriter(TaskAttemptContext context) throws IOException {
Configuration conf = context.getConfiguration();
CompressionCodec codec = null;
CompressionType compressionType = CompressionType.NONE;
if (getCompressOutput(context)) {
// find the kind of compression to do
compressionType = SequenceFileOutputFormat.getOutputCompressionType(context);
// find the right codec
Class<?> codecClass = getOutputCompressorClass(context, DefaultCodec.class);
codec = (CompressionCodec) ReflectionUtils.newInstance(codecClass, conf);
}
Path file = getDefaultWorkFile(context, "");
FileSystem fs = file.getFileSystem(conf);
// ignore the progress parameter, since MapFile is local
final MapFile.Writer out = new MapFile.Writer(conf, fs, file.toString(), context.getOutputKeyClass().asSubclass(WritableComparable.class), context.getOutputValueClass().asSubclass(Writable.class), compressionType, codec, context);
return new RecordWriter<WritableComparable<?>, Writable>() {
public void write(WritableComparable<?> key, Writable value) throws IOException {
out.append(key, value);
}
public void close(TaskAttemptContext context) throws IOException {
out.close();
}
};
}
Aggregations