Search in sources :

Example 56 with CompressionCodec

use of org.apache.hadoop.io.compress.CompressionCodec in project hive by apache.

the class Utilities method createRCFileWriter.

/**
 * Create a RCFile output stream based on job configuration Uses user supplied compression flag
 * (rather than obtaining it from the Job Configuration).
 *
 * @param jc
 *          Job configuration
 * @param fs
 *          File System to create file in
 * @param file
 *          Path to be created
 * @return output stream over the created rcfile
 */
public static RCFile.Writer createRCFileWriter(JobConf jc, FileSystem fs, Path file, boolean isCompressed, Progressable progressable) throws IOException {
    CompressionCodec codec = null;
    if (isCompressed) {
        Class<?> codecClass = FileOutputFormat.getOutputCompressorClass(jc, DefaultCodec.class);
        codec = (CompressionCodec) ReflectionUtil.newInstance(codecClass, jc);
    }
    return new RCFile.Writer(fs, jc, file, progressable, codec);
}
Also used : CompressionCodec(org.apache.hadoop.io.compress.CompressionCodec) RecordWriter(org.apache.hadoop.hive.ql.exec.FileSinkOperator.RecordWriter)

Example 57 with CompressionCodec

use of org.apache.hadoop.io.compress.CompressionCodec in project hive by apache.

the class RCFileOutputFormat method getRecordWriter.

/**
 * {@inheritDoc}
 */
@Override
public RecordWriter<WritableComparable, BytesRefArrayWritable> getRecordWriter(FileSystem ignored, JobConf job, String name, Progressable progress) throws IOException {
    Path outputPath = getWorkOutputPath(job);
    FileSystem fs = outputPath.getFileSystem(job);
    Path file = new Path(outputPath, name);
    CompressionCodec codec = null;
    if (getCompressOutput(job)) {
        Class<?> codecClass = getOutputCompressorClass(job, DefaultCodec.class);
        codec = (CompressionCodec) ReflectionUtils.newInstance(codecClass, job);
    }
    final RCFile.Writer out = new RCFile.Writer(fs, job, file, progress, codec);
    return new RecordWriter<WritableComparable, BytesRefArrayWritable>() {

        @Override
        public void close(Reporter reporter) throws IOException {
            out.close();
        }

        @Override
        public void write(WritableComparable key, BytesRefArrayWritable value) throws IOException {
            out.append(value);
        }
    };
}
Also used : Path(org.apache.hadoop.fs.Path) RecordWriter(org.apache.hadoop.mapred.RecordWriter) BytesRefArrayWritable(org.apache.hadoop.hive.serde2.columnar.BytesRefArrayWritable) WritableComparable(org.apache.hadoop.io.WritableComparable) FileSystem(org.apache.hadoop.fs.FileSystem) Reporter(org.apache.hadoop.mapred.Reporter) CompressionCodec(org.apache.hadoop.io.compress.CompressionCodec) RecordWriter(org.apache.hadoop.mapred.RecordWriter)

Example 58 with CompressionCodec

use of org.apache.hadoop.io.compress.CompressionCodec in project hive by apache.

the class PerformTestRCFileAndSeqFile method main.

public static void main(String[] args) throws Exception {
    int count = 1000;
    String file = null;
    try {
        for (int i = 0; i < args.length; ++i) {
            // parse command line
            if (args[i] == null) {
                continue;
            } else if (args[i].equals("-count")) {
                count = Integer.parseInt(args[++i]);
            } else {
                // file is required parameter
                file = args[i];
            }
        }
        // change it to choose the appropriate file system
        boolean isLocalFS = true;
        PerformTestRCFileAndSeqFile testcase = new PerformTestRCFileAndSeqFile(isLocalFS, file);
        // change these parameters
        boolean checkCorrect = true;
        CompressionCodec codec = new DefaultCodec();
        testcase.columnMaxSize = 30;
        // testcase.testWithColumnNumber(count, 2, checkCorrect, codec);
        // testcase.testWithColumnNumber(count, 10, checkCorrect, codec);
        // testcase.testWithColumnNumber(count, 25, checkCorrect, codec);
        testcase.testWithColumnNumber(count, 40, checkCorrect, codec);
    // testcase.testWithColumnNumber(count, 50, checkCorrect, codec);
    // testcase.testWithColumnNumber(count, 80, checkCorrect, codec);
    } finally {
    }
}
Also used : DefaultCodec(org.apache.hadoop.io.compress.DefaultCodec) CompressionCodec(org.apache.hadoop.io.compress.CompressionCodec)

Example 59 with CompressionCodec

use of org.apache.hadoop.io.compress.CompressionCodec in project shifu by ShifuML.

the class ShifuFileUtils method getCompressInputStream.

private static InputStream getCompressInputStream(FSDataInputStream fdis, Path path) throws IOException {
    String name = path.getName();
    if (name.toLowerCase().endsWith(".gz")) {
        return new GZIPInputStream(fdis);
    } else if (name.toLowerCase().endsWith(".bz2")) {
        return new BZip2CompressorInputStream(fdis);
    } else if (name.toLowerCase().endsWith(".snappy")) {
        Configuration conf = new Configuration();
        CompressionCodecFactory ccf = new CompressionCodecFactory(conf);
        CompressionCodec codec = ccf.getCodecByClassName(SnappyCodec.class.getName());
        return codec.createInputStream(fdis);
    } else {
        return fdis;
    }
}
Also used : GZIPInputStream(java.util.zip.GZIPInputStream) BZip2CompressorInputStream(org.apache.commons.compress.compressors.bzip2.BZip2CompressorInputStream) Configuration(org.apache.hadoop.conf.Configuration) CompressionCodecFactory(org.apache.hadoop.io.compress.CompressionCodecFactory) CompressionCodec(org.apache.hadoop.io.compress.CompressionCodec) SnappyCodec(org.apache.hadoop.io.compress.SnappyCodec)

Example 60 with CompressionCodec

use of org.apache.hadoop.io.compress.CompressionCodec in project shifu by ShifuML.

the class HdfsGlobalFile method openPartFileAsStream.

private InputStream openPartFileAsStream(FileStatus fileStatus) throws IOException {
    CompressionCodecFactory compressionFactory = new CompressionCodecFactory(new Configuration());
    InputStream is = null;
    FileSystem fs = ShifuFileUtils.getFileSystemBySourceType(sourceType);
    CompressionCodec codec = compressionFactory.getCodec(fileStatus.getPath());
    if (codec != null) {
        is = codec.createInputStream(fs.open(fileStatus.getPath()));
    } else {
        is = fs.open(fileStatus.getPath());
    }
    return is;
}
Also used : CompressionCodecFactory(org.apache.hadoop.io.compress.CompressionCodecFactory) Configuration(org.apache.hadoop.conf.Configuration) InputStream(java.io.InputStream) FileSystem(org.apache.hadoop.fs.FileSystem) CompressionCodec(org.apache.hadoop.io.compress.CompressionCodec)

Aggregations

CompressionCodec (org.apache.hadoop.io.compress.CompressionCodec)111 Path (org.apache.hadoop.fs.Path)54 FileSystem (org.apache.hadoop.fs.FileSystem)41 Configuration (org.apache.hadoop.conf.Configuration)38 CompressionCodecFactory (org.apache.hadoop.io.compress.CompressionCodecFactory)37 InputStream (java.io.InputStream)18 IOException (java.io.IOException)17 Test (org.junit.Test)17 FSDataInputStream (org.apache.hadoop.fs.FSDataInputStream)15 Text (org.apache.hadoop.io.Text)14 Configurable (org.apache.hadoop.conf.Configurable)10 GzipCodec (org.apache.hadoop.io.compress.GzipCodec)10 JobConf (org.apache.hadoop.mapred.JobConf)10 SequenceFile (org.apache.hadoop.io.SequenceFile)9 OutputStream (java.io.OutputStream)8 DefaultCodec (org.apache.hadoop.io.compress.DefaultCodec)8 FileInputStream (java.io.FileInputStream)7 FSDataOutputStream (org.apache.hadoop.fs.FSDataOutputStream)6 ByteString (com.google.protobuf.ByteString)5 DataInputStream (java.io.DataInputStream)5