Search in sources :

Example 66 with CompressionCodec

use of org.apache.hadoop.io.compress.CompressionCodec in project hadoop by apache.

the class SequenceFileOutputFormat method getRecordWriter.

public RecordWriter<K, V> getRecordWriter(FileSystem ignored, JobConf job, String name, Progressable progress) throws IOException {
    // get the path of the temporary output file 
    Path file = FileOutputFormat.getTaskOutputPath(job, name);
    FileSystem fs = file.getFileSystem(job);
    CompressionCodec codec = null;
    CompressionType compressionType = CompressionType.NONE;
    if (getCompressOutput(job)) {
        // find the kind of compression to do
        compressionType = getOutputCompressionType(job);
        // find the right codec
        Class<? extends CompressionCodec> codecClass = getOutputCompressorClass(job, DefaultCodec.class);
        codec = ReflectionUtils.newInstance(codecClass, job);
    }
    final SequenceFile.Writer out = SequenceFile.createWriter(fs, job, file, job.getOutputKeyClass(), job.getOutputValueClass(), compressionType, codec, progress);
    return new RecordWriter<K, V>() {

        public void write(K key, V value) throws IOException {
            out.append(key, value);
        }

        public void close(Reporter reporter) throws IOException {
            out.close();
        }
    };
}
Also used : Path(org.apache.hadoop.fs.Path) SequenceFile(org.apache.hadoop.io.SequenceFile) FileSystem(org.apache.hadoop.fs.FileSystem) CompressionCodec(org.apache.hadoop.io.compress.CompressionCodec) CompressionType(org.apache.hadoop.io.SequenceFile.CompressionType)

Example 67 with CompressionCodec

use of org.apache.hadoop.io.compress.CompressionCodec in project hadoop by apache.

the class MapFileOutputFormat method getRecordWriter.

public RecordWriter<WritableComparable, Writable> getRecordWriter(FileSystem ignored, JobConf job, String name, Progressable progress) throws IOException {
    // get the path of the temporary output file 
    Path file = FileOutputFormat.getTaskOutputPath(job, name);
    FileSystem fs = file.getFileSystem(job);
    CompressionCodec codec = null;
    CompressionType compressionType = CompressionType.NONE;
    if (getCompressOutput(job)) {
        // find the kind of compression to do
        compressionType = SequenceFileOutputFormat.getOutputCompressionType(job);
        // find the right codec
        Class<? extends CompressionCodec> codecClass = getOutputCompressorClass(job, DefaultCodec.class);
        codec = ReflectionUtils.newInstance(codecClass, job);
    }
    // ignore the progress parameter, since MapFile is local
    final MapFile.Writer out = new MapFile.Writer(job, fs, file.toString(), job.getOutputKeyClass().asSubclass(WritableComparable.class), job.getOutputValueClass().asSubclass(Writable.class), compressionType, codec, progress);
    return new RecordWriter<WritableComparable, Writable>() {

        public void write(WritableComparable key, Writable value) throws IOException {
            out.append(key, value);
        }

        public void close(Reporter reporter) throws IOException {
            out.close();
        }
    };
}
Also used : Path(org.apache.hadoop.fs.Path) WritableComparable(org.apache.hadoop.io.WritableComparable) FileSystem(org.apache.hadoop.fs.FileSystem) Writable(org.apache.hadoop.io.Writable) MapFile(org.apache.hadoop.io.MapFile) CompressionCodec(org.apache.hadoop.io.compress.CompressionCodec) CompressionType(org.apache.hadoop.io.SequenceFile.CompressionType)

Example 68 with CompressionCodec

use of org.apache.hadoop.io.compress.CompressionCodec in project hadoop by apache.

the class TestSequenceFile method testSequenceFileMetadata.

/** Unit tests for SequenceFile metadata. */
@Test
public void testSequenceFileMetadata() throws Exception {
    LOG.info("Testing SequenceFile with metadata");
    int count = 1024 * 10;
    CompressionCodec codec = new DefaultCodec();
    Path file = new Path(GenericTestUtils.getTempPath("test.seq.metadata"));
    Path sortedFile = new Path(GenericTestUtils.getTempPath("test.sorted.seq.metadata"));
    Path recordCompressedFile = new Path(GenericTestUtils.getTempPath("test.rc.seq.metadata"));
    Path blockCompressedFile = new Path(GenericTestUtils.getTempPath("test.bc.seq.metadata"));
    FileSystem fs = FileSystem.getLocal(conf);
    SequenceFile.Metadata theMetadata = new SequenceFile.Metadata();
    theMetadata.set(new Text("name_1"), new Text("value_1"));
    theMetadata.set(new Text("name_2"), new Text("value_2"));
    theMetadata.set(new Text("name_3"), new Text("value_3"));
    theMetadata.set(new Text("name_4"), new Text("value_4"));
    int seed = new Random().nextInt();
    try {
        // SequenceFile.Writer
        writeMetadataTest(fs, count, seed, file, CompressionType.NONE, null, theMetadata);
        SequenceFile.Metadata aMetadata = readMetadata(fs, file);
        if (!theMetadata.equals(aMetadata)) {
            LOG.info("The original metadata:\n" + theMetadata.toString());
            LOG.info("The retrieved metadata:\n" + aMetadata.toString());
            throw new RuntimeException("metadata not match:  " + 1);
        }
        // SequenceFile.RecordCompressWriter
        writeMetadataTest(fs, count, seed, recordCompressedFile, CompressionType.RECORD, codec, theMetadata);
        aMetadata = readMetadata(fs, recordCompressedFile);
        if (!theMetadata.equals(aMetadata)) {
            LOG.info("The original metadata:\n" + theMetadata.toString());
            LOG.info("The retrieved metadata:\n" + aMetadata.toString());
            throw new RuntimeException("metadata not match:  " + 2);
        }
        // SequenceFile.BlockCompressWriter
        writeMetadataTest(fs, count, seed, blockCompressedFile, CompressionType.BLOCK, codec, theMetadata);
        aMetadata = readMetadata(fs, blockCompressedFile);
        if (!theMetadata.equals(aMetadata)) {
            LOG.info("The original metadata:\n" + theMetadata.toString());
            LOG.info("The retrieved metadata:\n" + aMetadata.toString());
            throw new RuntimeException("metadata not match:  " + 3);
        }
        // SequenceFile.Sorter
        sortMetadataTest(fs, file, sortedFile, theMetadata);
        aMetadata = readMetadata(fs, recordCompressedFile);
        if (!theMetadata.equals(aMetadata)) {
            LOG.info("The original metadata:\n" + theMetadata.toString());
            LOG.info("The retrieved metadata:\n" + aMetadata.toString());
            throw new RuntimeException("metadata not match:  " + 4);
        }
    } finally {
        fs.close();
    }
    LOG.info("Successfully tested SequenceFile with metadata");
}
Also used : Metadata(org.apache.hadoop.io.SequenceFile.Metadata) Metadata(org.apache.hadoop.io.SequenceFile.Metadata) DefaultCodec(org.apache.hadoop.io.compress.DefaultCodec) CompressionCodec(org.apache.hadoop.io.compress.CompressionCodec) Test(org.junit.Test)

Example 69 with CompressionCodec

use of org.apache.hadoop.io.compress.CompressionCodec in project hadoop by apache.

the class TestSequenceFile method main.

/** For debugging and testing. */
public static void main(String[] args) throws Exception {
    int count = 1024 * 1024;
    int megabytes = 1;
    int factor = 10;
    boolean create = true;
    boolean rwonly = false;
    boolean check = false;
    boolean fast = false;
    boolean merge = false;
    String compressType = "NONE";
    String compressionCodec = "org.apache.hadoop.io.compress.DefaultCodec";
    Path file = null;
    int seed = new Random().nextInt();
    String usage = "Usage: testsequencefile " + "[-count N] " + "[-seed #] [-check] [-compressType <NONE|RECORD|BLOCK>] " + "-codec <compressionCodec> " + "[[-rwonly] | {[-megabytes M] [-factor F] [-nocreate] [-fast] [-merge]}] " + " file";
    if (args.length == 0) {
        System.err.println(usage);
        System.exit(-1);
    }
    FileSystem fs = null;
    try {
        for (int i = 0; i < args.length; ++i) {
            // parse command line
            if (args[i] == null) {
                continue;
            } else if (args[i].equals("-count")) {
                count = Integer.parseInt(args[++i]);
            } else if (args[i].equals("-megabytes")) {
                megabytes = Integer.parseInt(args[++i]);
            } else if (args[i].equals("-factor")) {
                factor = Integer.parseInt(args[++i]);
            } else if (args[i].equals("-seed")) {
                seed = Integer.parseInt(args[++i]);
            } else if (args[i].equals("-rwonly")) {
                rwonly = true;
            } else if (args[i].equals("-nocreate")) {
                create = false;
            } else if (args[i].equals("-check")) {
                check = true;
            } else if (args[i].equals("-fast")) {
                fast = true;
            } else if (args[i].equals("-merge")) {
                merge = true;
            } else if (args[i].equals("-compressType")) {
                compressType = args[++i];
            } else if (args[i].equals("-codec")) {
                compressionCodec = args[++i];
            } else {
                // file is required parameter
                file = new Path(args[i]);
            }
        }
        TestSequenceFile test = new TestSequenceFile();
        fs = file.getFileSystem(test.conf);
        LOG.info("count = " + count);
        LOG.info("megabytes = " + megabytes);
        LOG.info("factor = " + factor);
        LOG.info("create = " + create);
        LOG.info("seed = " + seed);
        LOG.info("rwonly = " + rwonly);
        LOG.info("check = " + check);
        LOG.info("fast = " + fast);
        LOG.info("merge = " + merge);
        LOG.info("compressType = " + compressType);
        LOG.info("compressionCodec = " + compressionCodec);
        LOG.info("file = " + file);
        if (rwonly && (!create || merge || fast)) {
            System.err.println(usage);
            System.exit(-1);
        }
        CompressionType compressionType = CompressionType.valueOf(compressType);
        CompressionCodec codec = (CompressionCodec) ReflectionUtils.newInstance(test.conf.getClassByName(compressionCodec), test.conf);
        if (rwonly || (create && !merge)) {
            test.writeTest(fs, count, seed, file, compressionType, codec);
            test.readTest(fs, count, seed, file);
        }
        if (!rwonly) {
            if (merge) {
                test.mergeTest(fs, count, seed, file, compressionType, fast, factor, megabytes);
            } else {
                test.sortTest(fs, count, megabytes, factor, fast, file);
            }
        }
        if (check) {
            test.checkSort(fs, count, seed, file);
        }
    } finally {
        if (fs != null) {
            fs.close();
        }
    }
}
Also used : CompressionCodec(org.apache.hadoop.io.compress.CompressionCodec) CompressionType(org.apache.hadoop.io.SequenceFile.CompressionType)

Example 70 with CompressionCodec

use of org.apache.hadoop.io.compress.CompressionCodec in project hadoop by apache.

the class FileOutputFormat method getOutputCompressorClass.

/**
   * Get the {@link CompressionCodec} for compressing the job outputs.
   * @param job the {@link Job} to look in
   * @param defaultValue the {@link CompressionCodec} to return if not set
   * @return the {@link CompressionCodec} to be used to compress the 
   *         job outputs
   * @throws IllegalArgumentException if the class was specified, but not found
   */
public static Class<? extends CompressionCodec> getOutputCompressorClass(JobContext job, Class<? extends CompressionCodec> defaultValue) {
    Class<? extends CompressionCodec> codecClass = defaultValue;
    Configuration conf = job.getConfiguration();
    String name = conf.get(FileOutputFormat.COMPRESS_CODEC);
    if (name != null) {
        try {
            codecClass = conf.getClassByName(name).asSubclass(CompressionCodec.class);
        } catch (ClassNotFoundException e) {
            throw new IllegalArgumentException("Compression codec " + name + " was not found.", e);
        }
    }
    return codecClass;
}
Also used : Configuration(org.apache.hadoop.conf.Configuration) CompressionCodec(org.apache.hadoop.io.compress.CompressionCodec)

Aggregations

CompressionCodec (org.apache.hadoop.io.compress.CompressionCodec)111 Path (org.apache.hadoop.fs.Path)54 FileSystem (org.apache.hadoop.fs.FileSystem)41 Configuration (org.apache.hadoop.conf.Configuration)38 CompressionCodecFactory (org.apache.hadoop.io.compress.CompressionCodecFactory)37 InputStream (java.io.InputStream)18 IOException (java.io.IOException)17 Test (org.junit.Test)17 FSDataInputStream (org.apache.hadoop.fs.FSDataInputStream)15 Text (org.apache.hadoop.io.Text)14 Configurable (org.apache.hadoop.conf.Configurable)10 GzipCodec (org.apache.hadoop.io.compress.GzipCodec)10 JobConf (org.apache.hadoop.mapred.JobConf)10 SequenceFile (org.apache.hadoop.io.SequenceFile)9 OutputStream (java.io.OutputStream)8 DefaultCodec (org.apache.hadoop.io.compress.DefaultCodec)8 FileInputStream (java.io.FileInputStream)7 FSDataOutputStream (org.apache.hadoop.fs.FSDataOutputStream)6 ByteString (com.google.protobuf.ByteString)5 DataInputStream (java.io.DataInputStream)5