Search in sources :

Example 31 with CompressionCodec

use of org.apache.hadoop.io.compress.CompressionCodec in project carbondata by apache.

the class AbstractDFSCarbonFile method getDataOutputStream.

@Override
public DataOutputStream getDataOutputStream(String path, FileFactory.FileType fileType, int bufferSize, String compressor) throws IOException {
    path = path.replace("\\", "/");
    Path pt = new Path(path);
    OutputStream outputStream;
    if (bufferSize <= 0) {
        outputStream = fs.create(pt);
    } else {
        outputStream = fs.create(pt, true, bufferSize);
    }
    String codecName = getCodecNameFromCompressor(compressor);
    if (!codecName.isEmpty()) {
        CompressionCodec codec = new CompressionCodecFactory(hadoopConf).getCodecByName(codecName);
        outputStream = codec.createOutputStream(outputStream);
    }
    return new DataOutputStream(new BufferedOutputStream(outputStream));
}
Also used : Path(org.apache.hadoop.fs.Path) CompressionCodecFactory(org.apache.hadoop.io.compress.CompressionCodecFactory) FSDataOutputStream(org.apache.hadoop.fs.FSDataOutputStream) DataOutputStream(java.io.DataOutputStream) BufferedOutputStream(java.io.BufferedOutputStream) FSDataOutputStream(org.apache.hadoop.fs.FSDataOutputStream) DataOutputStream(java.io.DataOutputStream) OutputStream(java.io.OutputStream) CompressionCodec(org.apache.hadoop.io.compress.CompressionCodec) BufferedOutputStream(java.io.BufferedOutputStream)

Example 32 with CompressionCodec

use of org.apache.hadoop.io.compress.CompressionCodec in project accumulo by apache.

the class CompressionTest method testManyStartNotNull.

@Test(timeout = 60 * 1000)
public void testManyStartNotNull() throws IOException, InterruptedException, ExecutionException {
    for (final Algorithm al : Algorithm.values()) {
        if (isSupported.get(al) != null && isSupported.get(al)) {
            // first call to issupported should be true
            Assert.assertTrue(al + " is not supported, but should be", al.isSupported());
            final CompressionCodec codec = al.getCodec();
            Assert.assertNotNull(al + " should not be null", codec);
            ExecutorService service = Executors.newFixedThreadPool(10);
            ArrayList<Future<Boolean>> results = new ArrayList<>();
            for (int i = 0; i < 30; i++) {
                results.add(service.submit(new Callable<Boolean>() {

                    @Override
                    public Boolean call() throws Exception {
                        Assert.assertNotNull(al + " should not be null", al.getCodec());
                        return true;
                    }
                }));
            }
            service.shutdown();
            Assert.assertNotNull(al + " should not be null", codec);
            while (!service.awaitTermination(1, TimeUnit.SECONDS)) {
            // wait
            }
            for (Future<Boolean> result : results) {
                Assert.assertTrue(al + " resulted in a failed call to getcodec within the thread pool", result.get());
            }
        }
    }
}
Also used : ExecutorService(java.util.concurrent.ExecutorService) ArrayList(java.util.ArrayList) Future(java.util.concurrent.Future) CompressionCodec(org.apache.hadoop.io.compress.CompressionCodec) Algorithm(org.apache.accumulo.core.file.rfile.bcfile.Compression.Algorithm) Callable(java.util.concurrent.Callable) Test(org.junit.Test)

Example 33 with CompressionCodec

use of org.apache.hadoop.io.compress.CompressionCodec in project accumulo by apache.

the class CompressionTest method testSupport.

@Before
public void testSupport() {
    // we can safely assert that GZ exists by virtue of it being the DefaultCodec
    isSupported.put(Compression.Algorithm.GZ, true);
    Configuration myConf = new Configuration();
    String extClazz = System.getProperty(Compression.Algorithm.CONF_LZO_CLASS);
    String clazz = (extClazz != null) ? extClazz : "org.apache.hadoop.io.compress.LzoCodec";
    try {
        CompressionCodec codec = (CompressionCodec) ReflectionUtils.newInstance(Class.forName(clazz), myConf);
        Assert.assertNotNull(codec);
        isSupported.put(Compression.Algorithm.LZO, true);
    } catch (ClassNotFoundException e) {
    // that is okay
    }
    extClazz = System.getProperty(Compression.Algorithm.CONF_SNAPPY_CLASS);
    clazz = (extClazz != null) ? extClazz : "org.apache.hadoop.io.compress.SnappyCodec";
    try {
        CompressionCodec codec = (CompressionCodec) ReflectionUtils.newInstance(Class.forName(clazz), myConf);
        Assert.assertNotNull(codec);
        isSupported.put(Compression.Algorithm.SNAPPY, true);
    } catch (ClassNotFoundException e) {
    // that is okay
    }
}
Also used : Configuration(org.apache.hadoop.conf.Configuration) CompressionCodec(org.apache.hadoop.io.compress.CompressionCodec) Before(org.junit.Before)

Example 34 with CompressionCodec

use of org.apache.hadoop.io.compress.CompressionCodec in project elephant-bird by twitter.

the class LzoRecordReader method initialize.

@Override
public void initialize(InputSplit genericSplit, TaskAttemptContext context) throws IOException, InterruptedException {
    FileSplit split = (FileSplit) genericSplit;
    start_ = split.getStart();
    end_ = start_ + split.getLength();
    final Path file = split.getPath();
    Configuration job = HadoopCompat.getConfiguration(context);
    errorTracker = new InputErrorTracker(job);
    LOG.info("input split: " + file + " " + start_ + ":" + end_);
    FileSystem fs = file.getFileSystem(job);
    CompressionCodecFactory compressionCodecs = new CompressionCodecFactory(job);
    final CompressionCodec codec = compressionCodecs.getCodec(file);
    if (codec == null) {
        throw new IOException("No codec for file " + file + " found, cannot run");
    }
    // Open the file and seek to the start of the split.
    fileIn_ = fs.open(split.getPath());
    // Creates input stream and also reads the file header.
    createInputReader(codec.createInputStream(fileIn_), job);
    if (start_ != 0) {
        fileIn_.seek(start_);
        skipToNextSyncPoint(false);
        start_ = fileIn_.getPos();
        LOG.info("Start is now " + start_);
    } else {
        skipToNextSyncPoint(true);
    }
    pos_ = start_;
}
Also used : Path(org.apache.hadoop.fs.Path) Configuration(org.apache.hadoop.conf.Configuration) CompressionCodecFactory(org.apache.hadoop.io.compress.CompressionCodecFactory) FileSystem(org.apache.hadoop.fs.FileSystem) CompressionCodec(org.apache.hadoop.io.compress.CompressionCodec) IOException(java.io.IOException) FileSplit(org.apache.hadoop.mapreduce.lib.input.FileSplit)

Example 35 with CompressionCodec

use of org.apache.hadoop.io.compress.CompressionCodec in project elephant-bird by twitter.

the class RCFileOutputFormat method createRCFileWriter.

protected RCFile.Writer createRCFileWriter(TaskAttemptContext job, Text columnMetadata) throws IOException {
    Configuration conf = HadoopCompat.getConfiguration(job);
    // override compression codec if set.
    String codecOverride = conf.get(COMPRESSION_CODEC_CONF);
    if (codecOverride != null) {
        conf.setBoolean("mapred.output.compress", true);
        conf.set("mapred.output.compression.codec", codecOverride);
    }
    CompressionCodec codec = null;
    if (getCompressOutput(job)) {
        Class<? extends CompressionCodec> codecClass = getOutputCompressorClass(job, GzipCodec.class);
        codec = (CompressionCodec) ReflectionUtils.newInstance(codecClass, conf);
    }
    Metadata metadata = null;
    if (columnMetadata != null) {
        metadata = new Metadata();
        metadata.set(new Text(RCFileUtil.COLUMN_METADATA_PROTOBUF_KEY), columnMetadata);
    }
    String ext = conf.get(EXTENSION_OVERRIDE_CONF, DEFAULT_EXTENSION);
    Path file = getDefaultWorkFile(job, ext.equalsIgnoreCase("none") ? null : ext);
    LOG.info("writing to rcfile " + file.toString());
    return new RCFile.Writer(file.getFileSystem(conf), conf, file, job, metadata, codec);
}
Also used : Path(org.apache.hadoop.fs.Path) Configuration(org.apache.hadoop.conf.Configuration) Metadata(org.apache.hadoop.io.SequenceFile.Metadata) Text(org.apache.hadoop.io.Text) CompressionCodec(org.apache.hadoop.io.compress.CompressionCodec) RecordWriter(org.apache.hadoop.mapreduce.RecordWriter)

Aggregations

CompressionCodec (org.apache.hadoop.io.compress.CompressionCodec)110 Path (org.apache.hadoop.fs.Path)53 FileSystem (org.apache.hadoop.fs.FileSystem)41 Configuration (org.apache.hadoop.conf.Configuration)37 CompressionCodecFactory (org.apache.hadoop.io.compress.CompressionCodecFactory)36 InputStream (java.io.InputStream)17 Test (org.junit.Test)17 IOException (java.io.IOException)16 FSDataInputStream (org.apache.hadoop.fs.FSDataInputStream)14 Text (org.apache.hadoop.io.Text)14 Configurable (org.apache.hadoop.conf.Configurable)10 GzipCodec (org.apache.hadoop.io.compress.GzipCodec)10 JobConf (org.apache.hadoop.mapred.JobConf)10 SequenceFile (org.apache.hadoop.io.SequenceFile)9 OutputStream (java.io.OutputStream)8 DefaultCodec (org.apache.hadoop.io.compress.DefaultCodec)8 FileInputStream (java.io.FileInputStream)7 FSDataOutputStream (org.apache.hadoop.fs.FSDataOutputStream)6 CompressionInputStream (org.apache.hadoop.io.compress.CompressionInputStream)6 ByteString (com.google.protobuf.ByteString)5