Examples with CompressionCodec - org.apache.hadoop.io.compress.CompressionCodec

Example 16 with CompressionCodec

use of org.apache.hadoop.io.compress.CompressionCodec in project hadoop by apache.

the class SequenceFileAsBinaryOutputFormat method getRecordWriter.

@Override
public RecordWriter<BytesWritable, BytesWritable> getRecordWriter(FileSystem ignored, JobConf job, String name, Progressable progress) throws IOException {
    // get the path of the temporary output file 
    Path file = FileOutputFormat.getTaskOutputPath(job, name);
    FileSystem fs = file.getFileSystem(job);
    CompressionCodec codec = null;
    CompressionType compressionType = CompressionType.NONE;
    if (getCompressOutput(job)) {
        // find the kind of compression to do
        compressionType = getOutputCompressionType(job);
        // find the right codec
        Class<? extends CompressionCodec> codecClass = getOutputCompressorClass(job, DefaultCodec.class);
        codec = ReflectionUtils.newInstance(codecClass, job);
    }
    final SequenceFile.Writer out = SequenceFile.createWriter(fs, job, file, getSequenceFileOutputKeyClass(job), getSequenceFileOutputValueClass(job), compressionType, codec, progress);
    return new RecordWriter<BytesWritable, BytesWritable>() {

        private WritableValueBytes wvaluebytes = new WritableValueBytes();

        public void write(BytesWritable bkey, BytesWritable bvalue) throws IOException {
            wvaluebytes.reset(bvalue);
            out.appendRaw(bkey.getBytes(), 0, bkey.getLength(), wvaluebytes);
            wvaluebytes.reset(null);
        }

        public void close(Reporter reporter) throws IOException {
            out.close();
        }
    };
}

Also used : Path(org.apache.hadoop.fs.Path) SequenceFile(org.apache.hadoop.io.SequenceFile) FileSystem(org.apache.hadoop.fs.FileSystem) BytesWritable(org.apache.hadoop.io.BytesWritable) CompressionCodec(org.apache.hadoop.io.compress.CompressionCodec) CompressionType(org.apache.hadoop.io.SequenceFile.CompressionType)

Example 17 with CompressionCodec

use of org.apache.hadoop.io.compress.CompressionCodec in project hadoop by apache.

the class TextOutputFormat method getRecordWriter.

public RecordWriter<K, V> getRecordWriter(FileSystem ignored, JobConf job, String name, Progressable progress) throws IOException {
    boolean isCompressed = getCompressOutput(job);
    String keyValueSeparator = job.get("mapreduce.output.textoutputformat.separator", "\t");
    if (!isCompressed) {
        Path file = FileOutputFormat.getTaskOutputPath(job, name);
        FileSystem fs = file.getFileSystem(job);
        FSDataOutputStream fileOut = fs.create(file, progress);
        return new LineRecordWriter<K, V>(fileOut, keyValueSeparator);
    } else {
        Class<? extends CompressionCodec> codecClass = getOutputCompressorClass(job, GzipCodec.class);
        // create the named codec
        CompressionCodec codec = ReflectionUtils.newInstance(codecClass, job);
        // build the filename including the extension
        Path file = FileOutputFormat.getTaskOutputPath(job, name + codec.getDefaultExtension());
        FileSystem fs = file.getFileSystem(job);
        FSDataOutputStream fileOut = fs.create(file, progress);
        return new LineRecordWriter<K, V>(new DataOutputStream(codec.createOutputStream(fileOut)), keyValueSeparator);
    }
}

Also used : Path(org.apache.hadoop.fs.Path) FSDataOutputStream(org.apache.hadoop.fs.FSDataOutputStream) DataOutputStream(java.io.DataOutputStream) FileSystem(org.apache.hadoop.fs.FileSystem) FSDataOutputStream(org.apache.hadoop.fs.FSDataOutputStream) CompressionCodec(org.apache.hadoop.io.compress.CompressionCodec)

Example 18 with CompressionCodec

use of org.apache.hadoop.io.compress.CompressionCodec in project hadoop by apache.

the class ImageLoaderCurrent method loadImage.

/* (non-Javadoc)
   * @see ImageLoader#processImage(java.io.DataInputStream, ImageVisitor, boolean)
   */
@Override
public void loadImage(DataInputStream in, ImageVisitor v, boolean skipBlocks) throws IOException {
    boolean done = false;
    try {
        v.start();
        v.visitEnclosingElement(ImageElement.FS_IMAGE);
        imageVersion = in.readInt();
        if (!canLoadVersion(imageVersion))
            throw new IOException("Cannot process fslayout version " + imageVersion);
        if (NameNodeLayoutVersion.supports(Feature.ADD_LAYOUT_FLAGS, imageVersion)) {
            LayoutFlags.read(in);
        }
        v.visit(ImageElement.IMAGE_VERSION, imageVersion);
        v.visit(ImageElement.NAMESPACE_ID, in.readInt());
        long numInodes = in.readLong();
        v.visit(ImageElement.GENERATION_STAMP, in.readLong());
        if (NameNodeLayoutVersion.supports(Feature.SEQUENTIAL_BLOCK_ID, imageVersion)) {
            v.visit(ImageElement.GENERATION_STAMP_V2, in.readLong());
            v.visit(ImageElement.GENERATION_STAMP_V1_LIMIT, in.readLong());
            v.visit(ImageElement.LAST_ALLOCATED_BLOCK_ID, in.readLong());
        }
        if (NameNodeLayoutVersion.supports(Feature.STORED_TXIDS, imageVersion)) {
            v.visit(ImageElement.TRANSACTION_ID, in.readLong());
        }
        if (NameNodeLayoutVersion.supports(Feature.ADD_INODE_ID, imageVersion)) {
            v.visit(ImageElement.LAST_INODE_ID, in.readLong());
        }
        boolean supportSnapshot = NameNodeLayoutVersion.supports(Feature.SNAPSHOT, imageVersion);
        if (supportSnapshot) {
            v.visit(ImageElement.SNAPSHOT_COUNTER, in.readInt());
            int numSnapshots = in.readInt();
            v.visit(ImageElement.NUM_SNAPSHOTS_TOTAL, numSnapshots);
            for (int i = 0; i < numSnapshots; i++) {
                processSnapshot(in, v);
            }
        }
        if (NameNodeLayoutVersion.supports(Feature.FSIMAGE_COMPRESSION, imageVersion)) {
            boolean isCompressed = in.readBoolean();
            v.visit(ImageElement.IS_COMPRESSED, String.valueOf(isCompressed));
            if (isCompressed) {
                String codecClassName = Text.readString(in);
                v.visit(ImageElement.COMPRESS_CODEC, codecClassName);
                CompressionCodecFactory codecFac = new CompressionCodecFactory(new Configuration());
                CompressionCodec codec = codecFac.getCodecByClassName(codecClassName);
                if (codec == null) {
                    throw new IOException("Image compression codec not supported: " + codecClassName);
                }
                in = new DataInputStream(codec.createInputStream(in));
            }
        }
        processINodes(in, v, numInodes, skipBlocks, supportSnapshot);
        subtreeMap.clear();
        dirNodeMap.clear();
        processINodesUC(in, v, skipBlocks);
        if (NameNodeLayoutVersion.supports(Feature.DELEGATION_TOKEN, imageVersion)) {
            processDelegationTokens(in, v);
        }
        if (NameNodeLayoutVersion.supports(Feature.CACHING, imageVersion)) {
            processCacheManagerState(in, v);
        }
        // FSImage
        v.leaveEnclosingElement();
        done = true;
    } finally {
        if (done) {
            v.finish();
        } else {
            v.finishAbnormally();
        }
    }
}

Also used : CompressionCodecFactory(org.apache.hadoop.io.compress.CompressionCodecFactory) Configuration(org.apache.hadoop.conf.Configuration) IOException(java.io.IOException) CompressionCodec(org.apache.hadoop.io.compress.CompressionCodec) DataInputStream(java.io.DataInputStream)

Example 19 with CompressionCodec

use of org.apache.hadoop.io.compress.CompressionCodec in project hadoop by apache.

the class TestDFSShell method textTest.

private void textTest(Path root, Configuration conf) throws Exception {
    PrintStream bak = null;
    try {
        final FileSystem fs = root.getFileSystem(conf);
        fs.mkdirs(root);
        // Test the gzip type of files. Magic detection.
        OutputStream zout = new GZIPOutputStream(fs.create(new Path(root, "file.gz")));
        Random r = new Random();
        bak = System.out;
        ByteArrayOutputStream file = new ByteArrayOutputStream();
        for (int i = 0; i < 1024; ++i) {
            char c = Character.forDigit(r.nextInt(26) + 10, 36);
            file.write(c);
            zout.write(c);
        }
        zout.close();
        ByteArrayOutputStream out = new ByteArrayOutputStream();
        System.setOut(new PrintStream(out));
        String[] argv = new String[2];
        argv[0] = "-text";
        argv[1] = new Path(root, "file.gz").toString();
        int ret = ToolRunner.run(new FsShell(conf), argv);
        assertEquals("'-text " + argv[1] + " returned " + ret, 0, ret);
        assertTrue("Output doesn't match input", Arrays.equals(file.toByteArray(), out.toByteArray()));
        // Create a sequence file with a gz extension, to test proper
        // container detection. Magic detection.
        SequenceFile.Writer writer = SequenceFile.createWriter(conf, SequenceFile.Writer.file(new Path(root, "file.gz")), SequenceFile.Writer.keyClass(Text.class), SequenceFile.Writer.valueClass(Text.class));
        writer.append(new Text("Foo"), new Text("Bar"));
        writer.close();
        out = new ByteArrayOutputStream();
        System.setOut(new PrintStream(out));
        argv = new String[2];
        argv[0] = "-text";
        argv[1] = new Path(root, "file.gz").toString();
        ret = ToolRunner.run(new FsShell(conf), argv);
        assertEquals("'-text " + argv[1] + " returned " + ret, 0, ret);
        assertTrue("Output doesn't match input", Arrays.equals("Foo\tBar\n".getBytes(), out.toByteArray()));
        out.reset();
        // Test deflate. Extension-based detection.
        OutputStream dout = new DeflaterOutputStream(fs.create(new Path(root, "file.deflate")));
        byte[] outbytes = "foo".getBytes();
        dout.write(outbytes);
        dout.close();
        out = new ByteArrayOutputStream();
        System.setOut(new PrintStream(out));
        argv = new String[2];
        argv[0] = "-text";
        argv[1] = new Path(root, "file.deflate").toString();
        ret = ToolRunner.run(new FsShell(conf), argv);
        assertEquals("'-text " + argv[1] + " returned " + ret, 0, ret);
        assertTrue("Output doesn't match input", Arrays.equals(outbytes, out.toByteArray()));
        out.reset();
        // Test a simple codec. Extension based detection. We use
        // Bzip2 cause its non-native.
        CompressionCodec codec = ReflectionUtils.newInstance(BZip2Codec.class, conf);
        String extension = codec.getDefaultExtension();
        Path p = new Path(root, "file." + extension);
        OutputStream fout = new DataOutputStream(codec.createOutputStream(fs.create(p, true)));
        byte[] writebytes = "foo".getBytes();
        fout.write(writebytes);
        fout.close();
        out = new ByteArrayOutputStream();
        System.setOut(new PrintStream(out));
        argv = new String[2];
        argv[0] = "-text";
        argv[1] = new Path(root, p).toString();
        ret = ToolRunner.run(new FsShell(conf), argv);
        assertEquals("'-text " + argv[1] + " returned " + ret, 0, ret);
        assertTrue("Output doesn't match input", Arrays.equals(writebytes, out.toByteArray()));
        out.reset();
        // Test a plain text.
        OutputStream pout = fs.create(new Path(root, "file.txt"));
        writebytes = "bar".getBytes();
        pout.write(writebytes);
        pout.close();
        out = new ByteArrayOutputStream();
        System.setOut(new PrintStream(out));
        argv = new String[2];
        argv[0] = "-text";
        argv[1] = new Path(root, "file.txt").toString();
        ret = ToolRunner.run(new FsShell(conf), argv);
        assertEquals("'-text " + argv[1] + " returned " + ret, 0, ret);
        assertTrue("Output doesn't match input", Arrays.equals(writebytes, out.toByteArray()));
        out.reset();
    } finally {
        if (null != bak) {
            System.setOut(bak);
        }
    }
}

Also used : GZIPOutputStream(java.util.zip.GZIPOutputStream) DeflaterOutputStream(java.util.zip.DeflaterOutputStream) Text(org.apache.hadoop.io.Text) StringContains.containsString(org.hamcrest.core.StringContains.containsString) Random(java.util.Random) SequenceFile(org.apache.hadoop.io.SequenceFile) GZIPOutputStream(java.util.zip.GZIPOutputStream) DeflaterOutputStream(java.util.zip.DeflaterOutputStream) CompressionCodec(org.apache.hadoop.io.compress.CompressionCodec)

Example 20 with CompressionCodec

use of org.apache.hadoop.io.compress.CompressionCodec in project hadoop by apache.

the class MapFileOutputFormat method getRecordWriter.

public RecordWriter<WritableComparable<?>, Writable> getRecordWriter(TaskAttemptContext context) throws IOException {
    Configuration conf = context.getConfiguration();
    CompressionCodec codec = null;
    CompressionType compressionType = CompressionType.NONE;
    if (getCompressOutput(context)) {
        // find the kind of compression to do
        compressionType = SequenceFileOutputFormat.getOutputCompressionType(context);
        // find the right codec
        Class<?> codecClass = getOutputCompressorClass(context, DefaultCodec.class);
        codec = (CompressionCodec) ReflectionUtils.newInstance(codecClass, conf);
    }
    Path file = getDefaultWorkFile(context, "");
    FileSystem fs = file.getFileSystem(conf);
    // ignore the progress parameter, since MapFile is local
    final MapFile.Writer out = new MapFile.Writer(conf, fs, file.toString(), context.getOutputKeyClass().asSubclass(WritableComparable.class), context.getOutputValueClass().asSubclass(Writable.class), compressionType, codec, context);
    return new RecordWriter<WritableComparable<?>, Writable>() {

        public void write(WritableComparable<?> key, Writable value) throws IOException {
            out.append(key, value);
        }

        public void close(TaskAttemptContext context) throws IOException {
            out.close();
        }
    };
}

Also used : Path(org.apache.hadoop.fs.Path) Configuration(org.apache.hadoop.conf.Configuration) Writable(org.apache.hadoop.io.Writable) MapFile(org.apache.hadoop.io.MapFile) TaskAttemptContext(org.apache.hadoop.mapreduce.TaskAttemptContext) RecordWriter(org.apache.hadoop.mapreduce.RecordWriter) WritableComparable(org.apache.hadoop.io.WritableComparable) FileSystem(org.apache.hadoop.fs.FileSystem) CompressionCodec(org.apache.hadoop.io.compress.CompressionCodec) CompressionType(org.apache.hadoop.io.SequenceFile.CompressionType) RecordWriter(org.apache.hadoop.mapreduce.RecordWriter)

Aggregations

CompressionCodec (org.apache.hadoop.io.compress.CompressionCodec)110 Path (org.apache.hadoop.fs.Path)53 FileSystem (org.apache.hadoop.fs.FileSystem)41 Configuration (org.apache.hadoop.conf.Configuration)37 CompressionCodecFactory (org.apache.hadoop.io.compress.CompressionCodecFactory)36 InputStream (java.io.InputStream)17 Test (org.junit.Test)17 IOException (java.io.IOException)16 FSDataInputStream (org.apache.hadoop.fs.FSDataInputStream)14 Text (org.apache.hadoop.io.Text)14 Configurable (org.apache.hadoop.conf.Configurable)10 GzipCodec (org.apache.hadoop.io.compress.GzipCodec)10 JobConf (org.apache.hadoop.mapred.JobConf)10 SequenceFile (org.apache.hadoop.io.SequenceFile)9 OutputStream (java.io.OutputStream)8 DefaultCodec (org.apache.hadoop.io.compress.DefaultCodec)8 FileInputStream (java.io.FileInputStream)7 FSDataOutputStream (org.apache.hadoop.fs.FSDataOutputStream)6 CompressionInputStream (org.apache.hadoop.io.compress.CompressionInputStream)6 ByteString (com.google.protobuf.ByteString)5