Search in sources :

Example 6 with CompressionCodecFactory

use of org.apache.hadoop.io.compress.CompressionCodecFactory in project hadoop by apache.

the class FixedLengthRecordReader method initialize.

// This is also called from the old FixedLengthRecordReader API implementation
public void initialize(Configuration job, long splitStart, long splitLength, Path file) throws IOException {
    start = splitStart;
    end = start + splitLength;
    long partialRecordLength = start % recordLength;
    long numBytesToSkip = 0;
    if (partialRecordLength != 0) {
        numBytesToSkip = recordLength - partialRecordLength;
    }
    // open the file and seek to the start of the split
    final FileSystem fs = file.getFileSystem(job);
    fileIn = fs.open(file);
    CompressionCodec codec = new CompressionCodecFactory(job).getCodec(file);
    if (null != codec) {
        isCompressedInput = true;
        decompressor = CodecPool.getDecompressor(codec);
        CompressionInputStream cIn = codec.createInputStream(fileIn, decompressor);
        filePosition = cIn;
        inputStream = cIn;
        numRecordsRemainingInSplit = Long.MAX_VALUE;
        LOG.info("Compressed input; cannot compute number of records in the split");
    } else {
        fileIn.seek(start);
        filePosition = fileIn;
        inputStream = fileIn;
        long splitSize = end - start - numBytesToSkip;
        numRecordsRemainingInSplit = (splitSize + recordLength - 1) / recordLength;
        if (numRecordsRemainingInSplit < 0) {
            numRecordsRemainingInSplit = 0;
        }
        LOG.info("Expecting " + numRecordsRemainingInSplit + " records each with a length of " + recordLength + " bytes in the split with an effective size of " + splitSize + " bytes");
    }
    if (numBytesToSkip != 0) {
        start += inputStream.skip(numBytesToSkip);
    }
    this.pos = start;
}
Also used : CompressionCodecFactory(org.apache.hadoop.io.compress.CompressionCodecFactory) CompressionInputStream(org.apache.hadoop.io.compress.CompressionInputStream) FileSystem(org.apache.hadoop.fs.FileSystem) CompressionCodec(org.apache.hadoop.io.compress.CompressionCodec)

Example 7 with CompressionCodecFactory

use of org.apache.hadoop.io.compress.CompressionCodecFactory in project hadoop by apache.

the class LineRecordReader method initialize.

public void initialize(InputSplit genericSplit, TaskAttemptContext context) throws IOException {
    FileSplit split = (FileSplit) genericSplit;
    Configuration job = context.getConfiguration();
    this.maxLineLength = job.getInt(MAX_LINE_LENGTH, Integer.MAX_VALUE);
    start = split.getStart();
    end = start + split.getLength();
    final Path file = split.getPath();
    // open the file and seek to the start of the split
    final FileSystem fs = file.getFileSystem(job);
    fileIn = fs.open(file);
    CompressionCodec codec = new CompressionCodecFactory(job).getCodec(file);
    if (null != codec) {
        isCompressedInput = true;
        decompressor = CodecPool.getDecompressor(codec);
        if (codec instanceof SplittableCompressionCodec) {
            final SplitCompressionInputStream cIn = ((SplittableCompressionCodec) codec).createInputStream(fileIn, decompressor, start, end, SplittableCompressionCodec.READ_MODE.BYBLOCK);
            in = new CompressedSplitLineReader(cIn, job, this.recordDelimiterBytes);
            start = cIn.getAdjustedStart();
            end = cIn.getAdjustedEnd();
            filePosition = cIn;
        } else {
            if (start != 0) {
                // a Compression codec that cannot be split.
                throw new IOException("Cannot seek in " + codec.getClass().getSimpleName() + " compressed stream");
            }
            in = new SplitLineReader(codec.createInputStream(fileIn, decompressor), job, this.recordDelimiterBytes);
            filePosition = fileIn;
        }
    } else {
        fileIn.seek(start);
        in = new UncompressedSplitLineReader(fileIn, job, this.recordDelimiterBytes, split.getLength());
        filePosition = fileIn;
    }
    // next() method.
    if (start != 0) {
        start += in.readLine(new Text(), 0, maxBytesToConsume(start));
    }
    this.pos = start;
}
Also used : Path(org.apache.hadoop.fs.Path) SplittableCompressionCodec(org.apache.hadoop.io.compress.SplittableCompressionCodec) Configuration(org.apache.hadoop.conf.Configuration) SplitCompressionInputStream(org.apache.hadoop.io.compress.SplitCompressionInputStream) Text(org.apache.hadoop.io.Text) IOException(java.io.IOException) CompressionCodecFactory(org.apache.hadoop.io.compress.CompressionCodecFactory) FileSystem(org.apache.hadoop.fs.FileSystem) CompressionCodec(org.apache.hadoop.io.compress.CompressionCodec) SplittableCompressionCodec(org.apache.hadoop.io.compress.SplittableCompressionCodec)

Example 8 with CompressionCodecFactory

use of org.apache.hadoop.io.compress.CompressionCodecFactory in project hadoop by apache.

the class HadoopLogsAnalyzer method maybeUncompressedPath.

private LineReader maybeUncompressedPath(Path p) throws FileNotFoundException, IOException {
    CompressionCodecFactory codecs = new CompressionCodecFactory(getConf());
    inputCodec = codecs.getCodec(p);
    FileSystem fs = p.getFileSystem(getConf());
    FSDataInputStream fileIn = fs.open(p);
    if (inputCodec == null) {
        return new LineReader(fileIn, getConf());
    } else {
        inputDecompressor = CodecPool.getDecompressor(inputCodec);
        return new LineReader(inputCodec.createInputStream(fileIn, inputDecompressor), getConf());
    }
}
Also used : CompressionCodecFactory(org.apache.hadoop.io.compress.CompressionCodecFactory) FileSystem(org.apache.hadoop.fs.FileSystem) LineReader(org.apache.hadoop.util.LineReader) FSDataInputStream(org.apache.hadoop.fs.FSDataInputStream)

Example 9 with CompressionCodecFactory

use of org.apache.hadoop.io.compress.CompressionCodecFactory in project hadoop by apache.

the class DefaultOutputter method init.

@Override
public void init(Path path, Configuration conf) throws IOException {
    FileSystem fs = path.getFileSystem(conf);
    CompressionCodec codec = new CompressionCodecFactory(conf).getCodec(path);
    OutputStream output;
    if (codec != null) {
        compressor = CodecPool.getCompressor(codec);
        output = codec.createOutputStream(fs.create(path), compressor);
    } else {
        output = fs.create(path);
    }
    writer = new JsonObjectMapperWriter<T>(output, conf.getBoolean("rumen.output.pretty.print", true));
}
Also used : CompressionCodecFactory(org.apache.hadoop.io.compress.CompressionCodecFactory) FileSystem(org.apache.hadoop.fs.FileSystem) OutputStream(java.io.OutputStream) CompressionCodec(org.apache.hadoop.io.compress.CompressionCodec)

Example 10 with CompressionCodecFactory

use of org.apache.hadoop.io.compress.CompressionCodecFactory in project hadoop by apache.

the class ImageLoaderCurrent method loadImage.

/* (non-Javadoc)
   * @see ImageLoader#processImage(java.io.DataInputStream, ImageVisitor, boolean)
   */
@Override
public void loadImage(DataInputStream in, ImageVisitor v, boolean skipBlocks) throws IOException {
    boolean done = false;
    try {
        v.start();
        v.visitEnclosingElement(ImageElement.FS_IMAGE);
        imageVersion = in.readInt();
        if (!canLoadVersion(imageVersion))
            throw new IOException("Cannot process fslayout version " + imageVersion);
        if (NameNodeLayoutVersion.supports(Feature.ADD_LAYOUT_FLAGS, imageVersion)) {
            LayoutFlags.read(in);
        }
        v.visit(ImageElement.IMAGE_VERSION, imageVersion);
        v.visit(ImageElement.NAMESPACE_ID, in.readInt());
        long numInodes = in.readLong();
        v.visit(ImageElement.GENERATION_STAMP, in.readLong());
        if (NameNodeLayoutVersion.supports(Feature.SEQUENTIAL_BLOCK_ID, imageVersion)) {
            v.visit(ImageElement.GENERATION_STAMP_V2, in.readLong());
            v.visit(ImageElement.GENERATION_STAMP_V1_LIMIT, in.readLong());
            v.visit(ImageElement.LAST_ALLOCATED_BLOCK_ID, in.readLong());
        }
        if (NameNodeLayoutVersion.supports(Feature.STORED_TXIDS, imageVersion)) {
            v.visit(ImageElement.TRANSACTION_ID, in.readLong());
        }
        if (NameNodeLayoutVersion.supports(Feature.ADD_INODE_ID, imageVersion)) {
            v.visit(ImageElement.LAST_INODE_ID, in.readLong());
        }
        boolean supportSnapshot = NameNodeLayoutVersion.supports(Feature.SNAPSHOT, imageVersion);
        if (supportSnapshot) {
            v.visit(ImageElement.SNAPSHOT_COUNTER, in.readInt());
            int numSnapshots = in.readInt();
            v.visit(ImageElement.NUM_SNAPSHOTS_TOTAL, numSnapshots);
            for (int i = 0; i < numSnapshots; i++) {
                processSnapshot(in, v);
            }
        }
        if (NameNodeLayoutVersion.supports(Feature.FSIMAGE_COMPRESSION, imageVersion)) {
            boolean isCompressed = in.readBoolean();
            v.visit(ImageElement.IS_COMPRESSED, String.valueOf(isCompressed));
            if (isCompressed) {
                String codecClassName = Text.readString(in);
                v.visit(ImageElement.COMPRESS_CODEC, codecClassName);
                CompressionCodecFactory codecFac = new CompressionCodecFactory(new Configuration());
                CompressionCodec codec = codecFac.getCodecByClassName(codecClassName);
                if (codec == null) {
                    throw new IOException("Image compression codec not supported: " + codecClassName);
                }
                in = new DataInputStream(codec.createInputStream(in));
            }
        }
        processINodes(in, v, numInodes, skipBlocks, supportSnapshot);
        subtreeMap.clear();
        dirNodeMap.clear();
        processINodesUC(in, v, skipBlocks);
        if (NameNodeLayoutVersion.supports(Feature.DELEGATION_TOKEN, imageVersion)) {
            processDelegationTokens(in, v);
        }
        if (NameNodeLayoutVersion.supports(Feature.CACHING, imageVersion)) {
            processCacheManagerState(in, v);
        }
        // FSImage
        v.leaveEnclosingElement();
        done = true;
    } finally {
        if (done) {
            v.finish();
        } else {
            v.finishAbnormally();
        }
    }
}
Also used : CompressionCodecFactory(org.apache.hadoop.io.compress.CompressionCodecFactory) Configuration(org.apache.hadoop.conf.Configuration) IOException(java.io.IOException) CompressionCodec(org.apache.hadoop.io.compress.CompressionCodec) DataInputStream(java.io.DataInputStream)

Aggregations

CompressionCodecFactory (org.apache.hadoop.io.compress.CompressionCodecFactory)22 CompressionCodec (org.apache.hadoop.io.compress.CompressionCodec)18 FileSystem (org.apache.hadoop.fs.FileSystem)14 FSDataInputStream (org.apache.hadoop.fs.FSDataInputStream)9 Path (org.apache.hadoop.fs.Path)9 Configuration (org.apache.hadoop.conf.Configuration)7 IOException (java.io.IOException)6 DataInputStream (java.io.DataInputStream)4 Text (org.apache.hadoop.io.Text)3 FileSplit (org.apache.hadoop.mapreduce.lib.input.FileSplit)3 LineReader (org.apache.hadoop.util.LineReader)3 InputStream (java.io.InputStream)2 OutputStream (java.io.OutputStream)2 PcapReader (net.ripe.hadoop.pcap.PcapReader)2 CompressionInputStream (org.apache.hadoop.io.compress.CompressionInputStream)2 JobConf (org.apache.hadoop.mapred.JobConf)2 RDFParserBuilder (org.apache.jena.riot.RDFParserBuilder)2 JsonGenerator (com.fasterxml.jackson.core.JsonGenerator)1 Slice (io.airlift.slice.Slice)1 BufferedInputStream (java.io.BufferedInputStream)1