Search in sources :

Example 1 with CompressionType

use of org.apache.hadoop.io.SequenceFile.CompressionType in project hadoop by apache.

the class TestMapRed method testCompression.

@Test
public void testCompression() throws Exception {
    EnumSet<SequenceFile.CompressionType> seq = EnumSet.allOf(SequenceFile.CompressionType.class);
    for (CompressionType redCompression : seq) {
        for (int combine = 0; combine < 2; ++combine) {
            checkCompression(false, redCompression, combine == 1);
            checkCompression(true, redCompression, combine == 1);
        }
    }
}
Also used : SequenceFile(org.apache.hadoop.io.SequenceFile) CompressionType(org.apache.hadoop.io.SequenceFile.CompressionType) Test(org.junit.Test)

Example 2 with CompressionType

use of org.apache.hadoop.io.SequenceFile.CompressionType in project hadoop by apache.

the class SequenceFileAsBinaryOutputFormat method getRecordWriter.

@Override
public RecordWriter<BytesWritable, BytesWritable> getRecordWriter(FileSystem ignored, JobConf job, String name, Progressable progress) throws IOException {
    // get the path of the temporary output file 
    Path file = FileOutputFormat.getTaskOutputPath(job, name);
    FileSystem fs = file.getFileSystem(job);
    CompressionCodec codec = null;
    CompressionType compressionType = CompressionType.NONE;
    if (getCompressOutput(job)) {
        // find the kind of compression to do
        compressionType = getOutputCompressionType(job);
        // find the right codec
        Class<? extends CompressionCodec> codecClass = getOutputCompressorClass(job, DefaultCodec.class);
        codec = ReflectionUtils.newInstance(codecClass, job);
    }
    final SequenceFile.Writer out = SequenceFile.createWriter(fs, job, file, getSequenceFileOutputKeyClass(job), getSequenceFileOutputValueClass(job), compressionType, codec, progress);
    return new RecordWriter<BytesWritable, BytesWritable>() {

        private WritableValueBytes wvaluebytes = new WritableValueBytes();

        public void write(BytesWritable bkey, BytesWritable bvalue) throws IOException {
            wvaluebytes.reset(bvalue);
            out.appendRaw(bkey.getBytes(), 0, bkey.getLength(), wvaluebytes);
            wvaluebytes.reset(null);
        }

        public void close(Reporter reporter) throws IOException {
            out.close();
        }
    };
}
Also used : Path(org.apache.hadoop.fs.Path) SequenceFile(org.apache.hadoop.io.SequenceFile) FileSystem(org.apache.hadoop.fs.FileSystem) BytesWritable(org.apache.hadoop.io.BytesWritable) CompressionCodec(org.apache.hadoop.io.compress.CompressionCodec) CompressionType(org.apache.hadoop.io.SequenceFile.CompressionType)

Example 3 with CompressionType

use of org.apache.hadoop.io.SequenceFile.CompressionType in project hadoop by apache.

the class MapFileOutputFormat method getRecordWriter.

public RecordWriter<WritableComparable<?>, Writable> getRecordWriter(TaskAttemptContext context) throws IOException {
    Configuration conf = context.getConfiguration();
    CompressionCodec codec = null;
    CompressionType compressionType = CompressionType.NONE;
    if (getCompressOutput(context)) {
        // find the kind of compression to do
        compressionType = SequenceFileOutputFormat.getOutputCompressionType(context);
        // find the right codec
        Class<?> codecClass = getOutputCompressorClass(context, DefaultCodec.class);
        codec = (CompressionCodec) ReflectionUtils.newInstance(codecClass, conf);
    }
    Path file = getDefaultWorkFile(context, "");
    FileSystem fs = file.getFileSystem(conf);
    // ignore the progress parameter, since MapFile is local
    final MapFile.Writer out = new MapFile.Writer(conf, fs, file.toString(), context.getOutputKeyClass().asSubclass(WritableComparable.class), context.getOutputValueClass().asSubclass(Writable.class), compressionType, codec, context);
    return new RecordWriter<WritableComparable<?>, Writable>() {

        public void write(WritableComparable<?> key, Writable value) throws IOException {
            out.append(key, value);
        }

        public void close(TaskAttemptContext context) throws IOException {
            out.close();
        }
    };
}
Also used : Path(org.apache.hadoop.fs.Path) Configuration(org.apache.hadoop.conf.Configuration) Writable(org.apache.hadoop.io.Writable) MapFile(org.apache.hadoop.io.MapFile) TaskAttemptContext(org.apache.hadoop.mapreduce.TaskAttemptContext) RecordWriter(org.apache.hadoop.mapreduce.RecordWriter) WritableComparable(org.apache.hadoop.io.WritableComparable) FileSystem(org.apache.hadoop.fs.FileSystem) CompressionCodec(org.apache.hadoop.io.compress.CompressionCodec) CompressionType(org.apache.hadoop.io.SequenceFile.CompressionType) RecordWriter(org.apache.hadoop.mapreduce.RecordWriter)

Example 4 with CompressionType

use of org.apache.hadoop.io.SequenceFile.CompressionType in project hive by apache.

the class HiveFileFormatUtils method getHiveRecordWriter.

public static RecordWriter getHiveRecordWriter(JobConf jc, TableDesc tableInfo, Class<? extends Writable> outputClass, FileSinkDesc conf, Path outPath, Reporter reporter) throws HiveException {
    HiveOutputFormat<?, ?> hiveOutputFormat = getHiveOutputFormat(jc, tableInfo);
    try {
        boolean isCompressed = conf.getCompressed();
        JobConf jc_output = jc;
        if (isCompressed) {
            jc_output = new JobConf(jc);
            String codecStr = conf.getCompressCodec();
            if (codecStr != null && !codecStr.trim().equals("")) {
                Class<? extends CompressionCodec> codec = JavaUtils.loadClass(codecStr);
                FileOutputFormat.setOutputCompressorClass(jc_output, codec);
            }
            String type = conf.getCompressType();
            if (type != null && !type.trim().equals("")) {
                CompressionType style = CompressionType.valueOf(type);
                SequenceFileOutputFormat.setOutputCompressionType(jc, style);
            }
        }
        return hiveOutputFormat.getHiveRecordWriter(jc_output, outPath, outputClass, isCompressed, tableInfo.getProperties(), reporter);
    } catch (Exception e) {
        throw new HiveException(e);
    }
}
Also used : HiveException(org.apache.hadoop.hive.ql.metadata.HiveException) JobConf(org.apache.hadoop.mapred.JobConf) CompressionType(org.apache.hadoop.io.SequenceFile.CompressionType) HiveException(org.apache.hadoop.hive.ql.metadata.HiveException) FileSystemNotFoundException(java.nio.file.FileSystemNotFoundException) IOException(java.io.IOException)

Example 5 with CompressionType

use of org.apache.hadoop.io.SequenceFile.CompressionType in project hadoop by apache.

the class SequenceFileOutputFormat method getRecordWriter.

public RecordWriter<K, V> getRecordWriter(FileSystem ignored, JobConf job, String name, Progressable progress) throws IOException {
    // get the path of the temporary output file 
    Path file = FileOutputFormat.getTaskOutputPath(job, name);
    FileSystem fs = file.getFileSystem(job);
    CompressionCodec codec = null;
    CompressionType compressionType = CompressionType.NONE;
    if (getCompressOutput(job)) {
        // find the kind of compression to do
        compressionType = getOutputCompressionType(job);
        // find the right codec
        Class<? extends CompressionCodec> codecClass = getOutputCompressorClass(job, DefaultCodec.class);
        codec = ReflectionUtils.newInstance(codecClass, job);
    }
    final SequenceFile.Writer out = SequenceFile.createWriter(fs, job, file, job.getOutputKeyClass(), job.getOutputValueClass(), compressionType, codec, progress);
    return new RecordWriter<K, V>() {

        public void write(K key, V value) throws IOException {
            out.append(key, value);
        }

        public void close(Reporter reporter) throws IOException {
            out.close();
        }
    };
}
Also used : Path(org.apache.hadoop.fs.Path) SequenceFile(org.apache.hadoop.io.SequenceFile) FileSystem(org.apache.hadoop.fs.FileSystem) CompressionCodec(org.apache.hadoop.io.compress.CompressionCodec) CompressionType(org.apache.hadoop.io.SequenceFile.CompressionType)

Aggregations

CompressionType (org.apache.hadoop.io.SequenceFile.CompressionType)11 Path (org.apache.hadoop.fs.Path)7 CompressionCodec (org.apache.hadoop.io.compress.CompressionCodec)7 FileSystem (org.apache.hadoop.fs.FileSystem)6 Configuration (org.apache.hadoop.conf.Configuration)4 MapFile (org.apache.hadoop.io.MapFile)4 SequenceFile (org.apache.hadoop.io.SequenceFile)4 Writable (org.apache.hadoop.io.Writable)3 RecordWriter (org.apache.hadoop.mapreduce.RecordWriter)3 TaskAttemptContext (org.apache.hadoop.mapreduce.TaskAttemptContext)3 IOException (java.io.IOException)2 Option (org.apache.hadoop.io.MapFile.Writer.Option)2 Text (org.apache.hadoop.io.Text)2 WritableComparable (org.apache.hadoop.io.WritableComparable)2 DefaultCodec (org.apache.hadoop.io.compress.DefaultCodec)2 CrawlDatum (org.apache.nutch.crawl.CrawlDatum)2 MalformedURLException (java.net.MalformedURLException)1 URL (java.net.URL)1 FileSystemNotFoundException (java.nio.file.FileSystemNotFoundException)1 ArrayList (java.util.ArrayList)1