use of org.apache.hadoop.io.compress.CompressionCodecFactory in project hadoop by apache.
the class FSImageCompression method createCompression.
/**
* Create a compression instance using the codec specified by
* <code>codecClassName</code>
*/
static FSImageCompression createCompression(Configuration conf, String codecClassName) throws IOException {
CompressionCodecFactory factory = new CompressionCodecFactory(conf);
CompressionCodec codec = factory.getCodecByClassName(codecClassName);
if (codec == null) {
throw new IOException("Not a supported codec: " + codecClassName);
}
return new FSImageCompression(codec);
}
use of org.apache.hadoop.io.compress.CompressionCodecFactory in project hadoop by apache.
the class ImageLoaderCurrent method loadImage.
/* (non-Javadoc)
* @see ImageLoader#processImage(java.io.DataInputStream, ImageVisitor, boolean)
*/
@Override
public void loadImage(DataInputStream in, ImageVisitor v, boolean skipBlocks) throws IOException {
boolean done = false;
try {
v.start();
v.visitEnclosingElement(ImageElement.FS_IMAGE);
imageVersion = in.readInt();
if (!canLoadVersion(imageVersion))
throw new IOException("Cannot process fslayout version " + imageVersion);
if (NameNodeLayoutVersion.supports(Feature.ADD_LAYOUT_FLAGS, imageVersion)) {
LayoutFlags.read(in);
}
v.visit(ImageElement.IMAGE_VERSION, imageVersion);
v.visit(ImageElement.NAMESPACE_ID, in.readInt());
long numInodes = in.readLong();
v.visit(ImageElement.GENERATION_STAMP, in.readLong());
if (NameNodeLayoutVersion.supports(Feature.SEQUENTIAL_BLOCK_ID, imageVersion)) {
v.visit(ImageElement.GENERATION_STAMP_V2, in.readLong());
v.visit(ImageElement.GENERATION_STAMP_V1_LIMIT, in.readLong());
v.visit(ImageElement.LAST_ALLOCATED_BLOCK_ID, in.readLong());
}
if (NameNodeLayoutVersion.supports(Feature.STORED_TXIDS, imageVersion)) {
v.visit(ImageElement.TRANSACTION_ID, in.readLong());
}
if (NameNodeLayoutVersion.supports(Feature.ADD_INODE_ID, imageVersion)) {
v.visit(ImageElement.LAST_INODE_ID, in.readLong());
}
boolean supportSnapshot = NameNodeLayoutVersion.supports(Feature.SNAPSHOT, imageVersion);
if (supportSnapshot) {
v.visit(ImageElement.SNAPSHOT_COUNTER, in.readInt());
int numSnapshots = in.readInt();
v.visit(ImageElement.NUM_SNAPSHOTS_TOTAL, numSnapshots);
for (int i = 0; i < numSnapshots; i++) {
processSnapshot(in, v);
}
}
if (NameNodeLayoutVersion.supports(Feature.FSIMAGE_COMPRESSION, imageVersion)) {
boolean isCompressed = in.readBoolean();
v.visit(ImageElement.IS_COMPRESSED, String.valueOf(isCompressed));
if (isCompressed) {
String codecClassName = Text.readString(in);
v.visit(ImageElement.COMPRESS_CODEC, codecClassName);
CompressionCodecFactory codecFac = new CompressionCodecFactory(new Configuration());
CompressionCodec codec = codecFac.getCodecByClassName(codecClassName);
if (codec == null) {
throw new IOException("Image compression codec not supported: " + codecClassName);
}
in = new DataInputStream(codec.createInputStream(in));
}
}
processINodes(in, v, numInodes, skipBlocks, supportSnapshot);
subtreeMap.clear();
dirNodeMap.clear();
processINodesUC(in, v, skipBlocks);
if (NameNodeLayoutVersion.supports(Feature.DELEGATION_TOKEN, imageVersion)) {
processDelegationTokens(in, v);
}
if (NameNodeLayoutVersion.supports(Feature.CACHING, imageVersion)) {
processCacheManagerState(in, v);
}
// FSImage
v.leaveEnclosingElement();
done = true;
} finally {
if (done) {
v.finish();
} else {
v.finishAbnormally();
}
}
}
use of org.apache.hadoop.io.compress.CompressionCodecFactory in project hadoop by apache.
the class FixedLengthRecordReader method initialize.
// This is also called from the old FixedLengthRecordReader API implementation
public void initialize(Configuration job, long splitStart, long splitLength, Path file) throws IOException {
start = splitStart;
end = start + splitLength;
long partialRecordLength = start % recordLength;
long numBytesToSkip = 0;
if (partialRecordLength != 0) {
numBytesToSkip = recordLength - partialRecordLength;
}
// open the file and seek to the start of the split
final FileSystem fs = file.getFileSystem(job);
fileIn = fs.open(file);
CompressionCodec codec = new CompressionCodecFactory(job).getCodec(file);
if (null != codec) {
isCompressedInput = true;
decompressor = CodecPool.getDecompressor(codec);
CompressionInputStream cIn = codec.createInputStream(fileIn, decompressor);
filePosition = cIn;
inputStream = cIn;
numRecordsRemainingInSplit = Long.MAX_VALUE;
LOG.info("Compressed input; cannot compute number of records in the split");
} else {
fileIn.seek(start);
filePosition = fileIn;
inputStream = fileIn;
long splitSize = end - start - numBytesToSkip;
numRecordsRemainingInSplit = (splitSize + recordLength - 1) / recordLength;
if (numRecordsRemainingInSplit < 0) {
numRecordsRemainingInSplit = 0;
}
LOG.info("Expecting " + numRecordsRemainingInSplit + " records each with a length of " + recordLength + " bytes in the split with an effective size of " + splitSize + " bytes");
}
if (numBytesToSkip != 0) {
start += inputStream.skip(numBytesToSkip);
}
this.pos = start;
}
use of org.apache.hadoop.io.compress.CompressionCodecFactory in project hadoop by apache.
the class LineRecordReader method initialize.
public void initialize(InputSplit genericSplit, TaskAttemptContext context) throws IOException {
FileSplit split = (FileSplit) genericSplit;
Configuration job = context.getConfiguration();
this.maxLineLength = job.getInt(MAX_LINE_LENGTH, Integer.MAX_VALUE);
start = split.getStart();
end = start + split.getLength();
final Path file = split.getPath();
// open the file and seek to the start of the split
final FileSystem fs = file.getFileSystem(job);
fileIn = fs.open(file);
CompressionCodec codec = new CompressionCodecFactory(job).getCodec(file);
if (null != codec) {
isCompressedInput = true;
decompressor = CodecPool.getDecompressor(codec);
if (codec instanceof SplittableCompressionCodec) {
final SplitCompressionInputStream cIn = ((SplittableCompressionCodec) codec).createInputStream(fileIn, decompressor, start, end, SplittableCompressionCodec.READ_MODE.BYBLOCK);
in = new CompressedSplitLineReader(cIn, job, this.recordDelimiterBytes);
start = cIn.getAdjustedStart();
end = cIn.getAdjustedEnd();
filePosition = cIn;
} else {
if (start != 0) {
// a Compression codec that cannot be split.
throw new IOException("Cannot seek in " + codec.getClass().getSimpleName() + " compressed stream");
}
in = new SplitLineReader(codec.createInputStream(fileIn, decompressor), job, this.recordDelimiterBytes);
filePosition = fileIn;
}
} else {
fileIn.seek(start);
in = new UncompressedSplitLineReader(fileIn, job, this.recordDelimiterBytes, split.getLength());
filePosition = fileIn;
}
// next() method.
if (start != 0) {
start += in.readLine(new Text(), 0, maxBytesToConsume(start));
}
this.pos = start;
}
use of org.apache.hadoop.io.compress.CompressionCodecFactory in project hadoop by apache.
the class HadoopLogsAnalyzer method maybeUncompressedPath.
private LineReader maybeUncompressedPath(Path p) throws FileNotFoundException, IOException {
CompressionCodecFactory codecs = new CompressionCodecFactory(getConf());
inputCodec = codecs.getCodec(p);
FileSystem fs = p.getFileSystem(getConf());
FSDataInputStream fileIn = fs.open(p);
if (inputCodec == null) {
return new LineReader(fileIn, getConf());
} else {
inputDecompressor = CodecPool.getDecompressor(inputCodec);
return new LineReader(inputCodec.createInputStream(fileIn, inputDecompressor), getConf());
}
}
Aggregations