use of org.apache.hadoop.io.compress.Decompressor in project hadoop by apache.
the class TestZlibCompressorDecompressor method testZlibCompressorDecompressorWithCompressionLevels.
@Test
public void testZlibCompressorDecompressorWithCompressionLevels() {
Configuration conf = new Configuration();
conf.set("zlib.compress.level", "FOUR");
if (ZlibFactory.isNativeZlibLoaded(conf)) {
byte[] rawData;
int tryNumber = 5;
int BYTE_SIZE = 10 * 1024;
Compressor zlibCompressor = ZlibFactory.getZlibCompressor(conf);
Decompressor zlibDecompressor = ZlibFactory.getZlibDecompressor(conf);
rawData = generate(BYTE_SIZE);
try {
for (int i = 0; i < tryNumber; i++) compressDecompressZlib(rawData, (ZlibCompressor) zlibCompressor, (ZlibDecompressor) zlibDecompressor);
zlibCompressor.reinit(conf);
} catch (Exception ex) {
fail("testZlibCompressorDecompressorWithConfiguration ex error " + ex);
}
} else {
assertTrue("ZlibFactory is using native libs against request", ZlibFactory.isNativeZlibLoaded(conf));
}
}
use of org.apache.hadoop.io.compress.Decompressor in project hadoop by apache.
the class TestZlibCompressorDecompressor method testBuiltInGzipDecompressorExceptions.
@Test
public void testBuiltInGzipDecompressorExceptions() {
BuiltInGzipDecompressor decompresser = new BuiltInGzipDecompressor();
try {
decompresser.setInput(null, 0, 1);
} catch (NullPointerException ex) {
// expected
} catch (Exception ex) {
fail("testBuiltInGzipDecompressorExceptions npe error " + ex);
}
try {
decompresser.setInput(new byte[] { 0 }, 0, -1);
} catch (ArrayIndexOutOfBoundsException ex) {
// expected
} catch (Exception ex) {
fail("testBuiltInGzipDecompressorExceptions aioob error" + ex);
}
assertTrue("decompresser.getBytesRead error", decompresser.getBytesRead() == 0);
assertTrue("decompresser.getRemaining error", decompresser.getRemaining() == 0);
decompresser.reset();
decompresser.end();
InputStream decompStream = null;
try {
// invalid 0 and 1 bytes , must be 31, -117
int buffSize = 1 * 1024;
byte[] buffer = new byte[buffSize];
Decompressor decompressor = new BuiltInGzipDecompressor();
DataInputBuffer gzbuf = new DataInputBuffer();
decompStream = new DecompressorStream(gzbuf, decompressor);
gzbuf.reset(new byte[] { 0, 0, 1, 1, 1, 1, 11, 1, 1, 1, 1 }, 11);
decompStream.read(buffer);
} catch (IOException ioex) {
// expected
} catch (Exception ex) {
fail("invalid 0 and 1 byte in gzip stream" + ex);
}
// invalid 2 byte, must be 8
try {
int buffSize = 1 * 1024;
byte[] buffer = new byte[buffSize];
Decompressor decompressor = new BuiltInGzipDecompressor();
DataInputBuffer gzbuf = new DataInputBuffer();
decompStream = new DecompressorStream(gzbuf, decompressor);
gzbuf.reset(new byte[] { 31, -117, 7, 1, 1, 1, 1, 11, 1, 1, 1, 1 }, 11);
decompStream.read(buffer);
} catch (IOException ioex) {
// expected
} catch (Exception ex) {
fail("invalid 2 byte in gzip stream" + ex);
}
try {
int buffSize = 1 * 1024;
byte[] buffer = new byte[buffSize];
Decompressor decompressor = new BuiltInGzipDecompressor();
DataInputBuffer gzbuf = new DataInputBuffer();
decompStream = new DecompressorStream(gzbuf, decompressor);
gzbuf.reset(new byte[] { 31, -117, 8, -32, 1, 1, 1, 11, 1, 1, 1, 1 }, 11);
decompStream.read(buffer);
} catch (IOException ioex) {
// expected
} catch (Exception ex) {
fail("invalid 3 byte in gzip stream" + ex);
}
try {
int buffSize = 1 * 1024;
byte[] buffer = new byte[buffSize];
Decompressor decompressor = new BuiltInGzipDecompressor();
DataInputBuffer gzbuf = new DataInputBuffer();
decompStream = new DecompressorStream(gzbuf, decompressor);
gzbuf.reset(new byte[] { 31, -117, 8, 4, 1, 1, 1, 11, 1, 1, 1, 1 }, 11);
decompStream.read(buffer);
} catch (IOException ioex) {
// expected
} catch (Exception ex) {
fail("invalid 3 byte make hasExtraField" + ex);
}
}
use of org.apache.hadoop.io.compress.Decompressor in project hadoop by apache.
the class TestZStandardCompressorDecompressor method testReadingWithAStream.
@Test
public void testReadingWithAStream() throws Exception {
FileInputStream inputStream = FileUtils.openInputStream(compressedFile);
ZStandardCodec codec = new ZStandardCodec();
codec.setConf(CONFIGURATION);
Decompressor decompressor = codec.createDecompressor();
CompressionInputStream cis = codec.createInputStream(inputStream, decompressor);
ByteArrayOutputStream baos = new ByteArrayOutputStream();
byte[] resultOfDecompression;
try {
byte[] buffer = new byte[100];
int n;
while ((n = cis.read(buffer, 0, buffer.length)) != -1) {
baos.write(buffer, 0, n);
}
resultOfDecompression = baos.toByteArray();
} finally {
IOUtils.closeQuietly(baos);
IOUtils.closeQuietly(cis);
}
byte[] expected = FileUtils.readFileToByteArray(uncompressedFile);
assertEquals(bytesToHex(expected), bytesToHex(resultOfDecompression));
}
use of org.apache.hadoop.io.compress.Decompressor in project hadoop by apache.
the class TestLineRecordReader method testMultipleClose.
@Test
public void testMultipleClose() throws IOException {
URL testFileUrl = getClass().getClassLoader().getResource("recordSpanningMultipleSplits.txt.bz2");
assertNotNull("Cannot find recordSpanningMultipleSplits.txt.bz2", testFileUrl);
File testFile = new File(testFileUrl.getFile());
Path testFilePath = new Path(testFile.getAbsolutePath());
long testFileSize = testFile.length();
Configuration conf = new Configuration();
conf.setInt(org.apache.hadoop.mapreduce.lib.input.LineRecordReader.MAX_LINE_LENGTH, Integer.MAX_VALUE);
TaskAttemptContext context = new TaskAttemptContextImpl(conf, new TaskAttemptID());
// read the data and check whether BOM is skipped
FileSplit split = new FileSplit(testFilePath, 0, testFileSize, null);
LineRecordReader reader = new LineRecordReader();
reader.initialize(split, context);
//noinspection StatementWithEmptyBody
while (reader.nextKeyValue()) ;
reader.close();
reader.close();
BZip2Codec codec = new BZip2Codec();
codec.setConf(conf);
Set<Decompressor> decompressors = new HashSet<Decompressor>();
for (int i = 0; i < 10; ++i) {
decompressors.add(CodecPool.getDecompressor(codec));
}
assertEquals(10, decompressors.size());
}
use of org.apache.hadoop.io.compress.Decompressor in project hadoop by apache.
the class TestLineRecordReader method testMultipleClose.
@Test
public void testMultipleClose() throws IOException {
URL testFileUrl = getClass().getClassLoader().getResource("recordSpanningMultipleSplits.txt.bz2");
assertNotNull("Cannot find recordSpanningMultipleSplits.txt.bz2", testFileUrl);
File testFile = new File(testFileUrl.getFile());
Path testFilePath = new Path(testFile.getAbsolutePath());
long testFileSize = testFile.length();
Configuration conf = new Configuration();
conf.setInt(org.apache.hadoop.mapreduce.lib.input.LineRecordReader.MAX_LINE_LENGTH, Integer.MAX_VALUE);
FileSplit split = new FileSplit(testFilePath, 0, testFileSize, (String[]) null);
LineRecordReader reader = new LineRecordReader(conf, split);
LongWritable key = new LongWritable();
Text value = new Text();
//noinspection StatementWithEmptyBody
while (reader.next(key, value)) ;
reader.close();
reader.close();
BZip2Codec codec = new BZip2Codec();
codec.setConf(conf);
Set<Decompressor> decompressors = new HashSet<Decompressor>();
for (int i = 0; i < 10; ++i) {
decompressors.add(CodecPool.getDecompressor(codec));
}
assertEquals(10, decompressors.size());
}
Aggregations