Search in sources :

Example 61 with CompressionCodec

use of org.apache.hadoop.io.compress.CompressionCodec in project apex-malhar by apache.

the class AbstractFileOutputOperatorTest method checkSnappyFile.

private void checkSnappyFile(File file, List<Long> offsets, int startVal, int totalWindows, int totalRecords) throws IOException {
    FileInputStream fis;
    InputStream gss = null;
    Configuration conf = new Configuration();
    CompressionCodec codec = (CompressionCodec) ReflectionUtils.newInstance(SnappyCodec.class, conf);
    CompressionInputStream snappyIs = null;
    BufferedReader br = null;
    int numWindows = 0;
    try {
        fis = new FileInputStream(file);
        gss = fis;
        long startOffset = 0;
        for (long offset : offsets) {
            // Skip initial case in case file is not yet created
            if (offset == 0) {
                continue;
            }
            long limit = offset - startOffset;
            LimitInputStream lis = new LimitInputStream(gss, limit);
            snappyIs = codec.createInputStream(lis);
            br = new BufferedReader(new InputStreamReader(snappyIs));
            String eline = "" + (startVal + numWindows * 2);
            int count = 0;
            String line;
            while ((line = br.readLine()) != null) {
                Assert.assertEquals("File line", eline, line);
                ++count;
                if ((count % totalRecords) == 0) {
                    ++numWindows;
                    eline = "" + (startVal + numWindows * 2);
                }
            }
            startOffset = offset;
        }
    } catch (Exception e) {
        e.printStackTrace();
    } finally {
        if (br != null) {
            br.close();
        } else {
            if (snappyIs != null) {
                snappyIs.close();
            } else if (gss != null) {
                gss.close();
            }
        }
    }
    Assert.assertEquals("Total", totalWindows, numWindows);
}
Also used : Configuration(org.apache.hadoop.conf.Configuration) InputStreamReader(java.io.InputStreamReader) CompressionInputStream(org.apache.hadoop.io.compress.CompressionInputStream) GZIPInputStream(java.util.zip.GZIPInputStream) LimitInputStream(com.google.common.io.LimitInputStream) CipherInputStream(javax.crypto.CipherInputStream) CompressionInputStream(org.apache.hadoop.io.compress.CompressionInputStream) FileInputStream(java.io.FileInputStream) InputStream(java.io.InputStream) LimitInputStream(com.google.common.io.LimitInputStream) FileInputStream(java.io.FileInputStream) NoSuchAlgorithmException(java.security.NoSuchAlgorithmException) IOException(java.io.IOException) ConstraintViolationException(javax.validation.ConstraintViolationException) BufferedReader(java.io.BufferedReader) CompressionCodec(org.apache.hadoop.io.compress.CompressionCodec) SnappyCodec(org.apache.hadoop.io.compress.SnappyCodec)

Example 62 with CompressionCodec

use of org.apache.hadoop.io.compress.CompressionCodec in project hadoop-pcap by RIPE-NCC.

the class PcapInputFormat method initPcapRecordReader.

public static PcapRecordReader initPcapRecordReader(Path path, long start, long length, TaskAttemptContext context) throws IOException {
    Configuration conf = context.getConfiguration();
    FileSystem fs = path.getFileSystem(conf);
    FSDataInputStream baseStream = fs.open(path);
    DataInputStream stream = baseStream;
    CompressionCodecFactory compressionCodecs = new CompressionCodecFactory(conf);
    final CompressionCodec codec = compressionCodecs.getCodec(path);
    if (codec != null)
        stream = new DataInputStream(codec.createInputStream(stream));
    PcapReader reader = initPcapReader(stream, conf);
    return new PcapRecordReader(reader, start, length, baseStream, stream, context);
}
Also used : Configuration(org.apache.hadoop.conf.Configuration) CompressionCodecFactory(org.apache.hadoop.io.compress.CompressionCodecFactory) PcapReader(net.ripe.hadoop.pcap.PcapReader) FileSystem(org.apache.hadoop.fs.FileSystem) FSDataInputStream(org.apache.hadoop.fs.FSDataInputStream) PcapRecordReader(net.ripe.hadoop.pcap.io.reader.PcapRecordReader) CompressionCodec(org.apache.hadoop.io.compress.CompressionCodec) DataInputStream(java.io.DataInputStream) FSDataInputStream(org.apache.hadoop.fs.FSDataInputStream)

Example 63 with CompressionCodec

use of org.apache.hadoop.io.compress.CompressionCodec in project hadoop-pcap by RIPE-NCC.

the class PcapInputFormat method initPcapRecordReader.

public static PcapRecordReader initPcapRecordReader(Path path, long start, long length, Reporter reporter, Configuration conf) throws IOException {
    FileSystem fs = path.getFileSystem(conf);
    FSDataInputStream baseStream = fs.open(path);
    DataInputStream stream = baseStream;
    CompressionCodecFactory compressionCodecs = new CompressionCodecFactory(conf);
    final CompressionCodec codec = compressionCodecs.getCodec(path);
    if (codec != null)
        stream = new DataInputStream(codec.createInputStream(stream));
    PcapReader reader = initPcapReader(stream, conf);
    return new PcapRecordReader(reader, start, length, baseStream, stream, reporter);
}
Also used : CompressionCodecFactory(org.apache.hadoop.io.compress.CompressionCodecFactory) PcapReader(net.ripe.hadoop.pcap.PcapReader) FileSystem(org.apache.hadoop.fs.FileSystem) FSDataInputStream(org.apache.hadoop.fs.FSDataInputStream) PcapRecordReader(net.ripe.hadoop.pcap.mr1.io.reader.PcapRecordReader) CompressionCodec(org.apache.hadoop.io.compress.CompressionCodec) DataInputStream(java.io.DataInputStream) FSDataInputStream(org.apache.hadoop.fs.FSDataInputStream)

Example 64 with CompressionCodec

use of org.apache.hadoop.io.compress.CompressionCodec in project druid by druid-io.

the class Utils method makePathAndOutputStream.

public static OutputStream makePathAndOutputStream(JobContext job, Path outputPath, boolean deleteExisting) throws IOException {
    OutputStream retVal;
    FileSystem fs = outputPath.getFileSystem(job.getConfiguration());
    Class<? extends CompressionCodec> codecClass;
    CompressionCodec codec = null;
    if (FileOutputFormat.getCompressOutput(job)) {
        codecClass = FileOutputFormat.getOutputCompressorClass(job, GzipCodec.class);
        codec = ReflectionUtils.newInstance(codecClass, job.getConfiguration());
        outputPath = new Path(outputPath.toString() + codec.getDefaultExtension());
    }
    if (fs.exists(outputPath)) {
        if (deleteExisting) {
            fs.delete(outputPath, false);
        } else {
            throw new ISE("outputPath[%s] must not exist.", outputPath);
        }
    }
    if (FileOutputFormat.getCompressOutput(job)) {
        retVal = codec.createOutputStream(fs.create(outputPath, false));
    } else {
        retVal = fs.create(outputPath, false);
    }
    return retVal;
}
Also used : Path(org.apache.hadoop.fs.Path) OutputStream(java.io.OutputStream) FileSystem(org.apache.hadoop.fs.FileSystem) GzipCodec(org.apache.hadoop.io.compress.GzipCodec) ISE(io.druid.java.util.common.ISE) CompressionCodec(org.apache.hadoop.io.compress.CompressionCodec)

Example 65 with CompressionCodec

use of org.apache.hadoop.io.compress.CompressionCodec in project presto by prestodb.

the class TestOrcPageSourceMemoryTracking method createTestFile.

public static FileSplit createTestFile(String filePath, HiveOutputFormat<?, ?> outputFormat, @SuppressWarnings("deprecation") SerDe serDe, String compressionCodec, List<TestColumn> testColumns, int numRows) throws Exception {
    // filter out partition keys, which are not written to the file
    testColumns = ImmutableList.copyOf(filter(testColumns, not(TestColumn::isPartitionKey)));
    Properties tableProperties = new Properties();
    tableProperties.setProperty("columns", Joiner.on(',').join(transform(testColumns, TestColumn::getName)));
    tableProperties.setProperty("columns.types", Joiner.on(',').join(transform(testColumns, TestColumn::getType)));
    serDe.initialize(CONFIGURATION, tableProperties);
    JobConf jobConf = new JobConf();
    if (compressionCodec != null) {
        CompressionCodec codec = new CompressionCodecFactory(CONFIGURATION).getCodecByName(compressionCodec);
        jobConf.set(COMPRESS_CODEC, codec.getClass().getName());
        jobConf.set(COMPRESS_TYPE, SequenceFile.CompressionType.BLOCK.toString());
    }
    RecordWriter recordWriter = createRecordWriter(new Path(filePath), CONFIGURATION);
    try {
        SettableStructObjectInspector objectInspector = getStandardStructObjectInspector(ImmutableList.copyOf(transform(testColumns, TestColumn::getName)), ImmutableList.copyOf(transform(testColumns, TestColumn::getObjectInspector)));
        Object row = objectInspector.create();
        List<StructField> fields = ImmutableList.copyOf(objectInspector.getAllStructFieldRefs());
        for (int rowNumber = 0; rowNumber < numRows; rowNumber++) {
            for (int i = 0; i < testColumns.size(); i++) {
                Object writeValue = testColumns.get(i).getWriteValue();
                if (writeValue instanceof Slice) {
                    writeValue = ((Slice) writeValue).getBytes();
                }
                objectInspector.setStructFieldData(row, fields.get(i), writeValue);
            }
            Writable record = serDe.serialize(row, objectInspector);
            recordWriter.write(record);
            if (rowNumber % STRIPE_ROWS == STRIPE_ROWS - 1) {
                flushStripe(recordWriter);
            }
        }
    } finally {
        recordWriter.close(false);
    }
    Path path = new Path(filePath);
    path.getFileSystem(CONFIGURATION).setVerifyChecksum(true);
    File file = new File(filePath);
    return new FileSplit(path, 0, file.length(), new String[0]);
}
Also used : Path(org.apache.hadoop.fs.Path) Writable(org.apache.hadoop.io.Writable) Properties(java.util.Properties) FileSplit(org.apache.hadoop.mapred.FileSplit) SettableStructObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.SettableStructObjectInspector) RecordWriter(org.apache.hadoop.hive.ql.exec.FileSinkOperator.RecordWriter) StructField(org.apache.hadoop.hive.serde2.objectinspector.StructField) CompressionCodecFactory(org.apache.hadoop.io.compress.CompressionCodecFactory) Slice(io.airlift.slice.Slice) CompressionCodec(org.apache.hadoop.io.compress.CompressionCodec) JobConf(org.apache.hadoop.mapred.JobConf) SequenceFile(org.apache.hadoop.io.SequenceFile) File(java.io.File)

Aggregations

CompressionCodec (org.apache.hadoop.io.compress.CompressionCodec)111 Path (org.apache.hadoop.fs.Path)54 FileSystem (org.apache.hadoop.fs.FileSystem)41 Configuration (org.apache.hadoop.conf.Configuration)38 CompressionCodecFactory (org.apache.hadoop.io.compress.CompressionCodecFactory)37 InputStream (java.io.InputStream)18 IOException (java.io.IOException)17 Test (org.junit.Test)17 FSDataInputStream (org.apache.hadoop.fs.FSDataInputStream)15 Text (org.apache.hadoop.io.Text)14 Configurable (org.apache.hadoop.conf.Configurable)10 GzipCodec (org.apache.hadoop.io.compress.GzipCodec)10 JobConf (org.apache.hadoop.mapred.JobConf)10 SequenceFile (org.apache.hadoop.io.SequenceFile)9 OutputStream (java.io.OutputStream)8 DefaultCodec (org.apache.hadoop.io.compress.DefaultCodec)8 FileInputStream (java.io.FileInputStream)7 FSDataOutputStream (org.apache.hadoop.fs.FSDataOutputStream)6 ByteString (com.google.protobuf.ByteString)5 DataInputStream (java.io.DataInputStream)5