use of org.apache.hadoop.io.compress.Compressor in project hbase by apache.
the class TestHFileBlock method createTestV1Block.
public byte[] createTestV1Block(Compression.Algorithm algo) throws IOException {
Compressor compressor = algo.getCompressor();
ByteArrayOutputStream baos = new ByteArrayOutputStream();
OutputStream os = algo.createCompressionStream(baos, compressor, 0);
DataOutputStream dos = new DataOutputStream(os);
// Let's make this a meta block.
BlockType.META.write(dos);
writeTestBlockContents(dos);
dos.flush();
algo.returnCompressor(compressor);
return baos.toByteArray();
}
use of org.apache.hadoop.io.compress.Compressor in project mongo-hadoop by mongodb.
the class BSONSplitter method run.
/**
* When run as a Tool, BSONSplitter can be used to pre-split and compress
* BSON files. This can be especially useful before uploading large BSON
* files to HDFS to save time. The compressed splits are written to the
* given output path or to the directory containing the input file, if
* the output path is unspecified. A ".splits" file is not generated, since
* each output file is expected to be its own split.
*
* @param args command-line arguments. Run with zero arguments to see usage.
* @return exit status
* @throws Exception
*/
@Override
public int run(final String[] args) throws Exception {
if (args.length < 1) {
printUsage();
return 1;
}
// Parse command-line arguments.
Path filePath = new Path(args[0]);
String compressorName = null, outputDirectoryStr = null;
Path outputDirectory;
CompressionCodec codec;
Compressor compressor;
for (int i = 1; i < args.length; ++i) {
if ("-c".equals(args[i]) && args.length > i) {
compressorName = args[++i];
} else if ("-o".equals(args[i]) && args.length > i) {
outputDirectoryStr = args[++i];
} else {
// CHECKSTYLE:OFF
System.err.println("unrecognized option: " + args[i]);
// CHECKSTYLE:ON
printUsage();
return 1;
}
}
// Supply default values for unspecified arguments.
if (null == outputDirectoryStr) {
outputDirectory = filePath.getParent();
} else {
outputDirectory = new Path(outputDirectoryStr);
}
if (null == compressorName) {
codec = new DefaultCodec();
} else {
Class<?> codecClass = Class.forName(compressorName);
codec = (CompressionCodec) ReflectionUtils.newInstance(codecClass, getConf());
}
if (codec instanceof Configurable) {
((Configurable) codec).setConf(getConf());
}
// Do not write a .splits file so as not to confuse BSONSplitter.
// Each compressed file will be its own split.
MongoConfigUtil.setBSONWriteSplits(getConf(), false);
// Open the file.
FileSystem inputFS = FileSystem.get(filePath.toUri(), getConf());
FileSystem outputFS = FileSystem.get(outputDirectory.toUri(), getConf());
FSDataInputStream inputStream = inputFS.open(filePath);
// Use BSONSplitter to split the file.
Path splitFilePath = getSplitsFilePath(filePath, getConf());
try {
loadSplitsFromSplitFile(inputFS.getFileStatus(filePath), splitFilePath);
} catch (NoSplitFileException e) {
LOG.info("did not find .splits file in " + splitFilePath.toUri());
setInputPath(filePath);
readSplits();
}
List<BSONFileSplit> splits = getAllSplits();
LOG.info("compressing " + splits.size() + " splits.");
byte[] buf = new byte[1024 * 1024];
for (int i = 0; i < splits.size(); ++i) {
// e.g., hdfs:///user/hive/warehouse/mongo/OutputFile-42.bz2
Path splitOutputPath = new Path(outputDirectory, filePath.getName() + "-" + i + codec.getDefaultExtension());
// Compress the split into a new file.
compressor = CodecPool.getCompressor(codec);
CompressionOutputStream compressionOutputStream = null;
try {
compressionOutputStream = codec.createOutputStream(outputFS.create(splitOutputPath), compressor);
int totalBytes = 0, bytesRead = 0;
BSONFileSplit split = splits.get(i);
inputStream.seek(split.getStart());
LOG.info("writing " + splitOutputPath.toUri() + ".");
while (totalBytes < split.getLength() && bytesRead >= 0) {
bytesRead = inputStream.read(buf, 0, (int) Math.min(buf.length, split.getLength() - totalBytes));
if (bytesRead > 0) {
compressionOutputStream.write(buf, 0, bytesRead);
totalBytes += bytesRead;
}
}
} finally {
if (compressionOutputStream != null) {
compressionOutputStream.close();
}
CodecPool.returnCompressor(compressor);
}
}
LOG.info("done.");
return 0;
}
use of org.apache.hadoop.io.compress.Compressor in project hadoop by apache.
the class TestZlibCompressorDecompressor method testZlibCompressorDecompressorSetDictionary.
@Test
public void testZlibCompressorDecompressorSetDictionary() {
Configuration conf = new Configuration();
if (ZlibFactory.isNativeZlibLoaded(conf)) {
Compressor zlibCompressor = ZlibFactory.getZlibCompressor(conf);
Decompressor zlibDecompressor = ZlibFactory.getZlibDecompressor(conf);
checkSetDictionaryNullPointerException(zlibCompressor);
checkSetDictionaryNullPointerException(zlibDecompressor);
checkSetDictionaryArrayIndexOutOfBoundsException(zlibDecompressor);
checkSetDictionaryArrayIndexOutOfBoundsException(zlibCompressor);
} else {
assertTrue("ZlibFactory is using native libs against request", ZlibFactory.isNativeZlibLoaded(conf));
}
}
use of org.apache.hadoop.io.compress.Compressor in project hadoop by apache.
the class TestZlibCompressorDecompressor method testZlibCompressorDecompressorWithConfiguration.
@Test
public void testZlibCompressorDecompressorWithConfiguration() {
Configuration conf = new Configuration();
if (ZlibFactory.isNativeZlibLoaded(conf)) {
byte[] rawData;
int tryNumber = 5;
int BYTE_SIZE = 10 * 1024;
Compressor zlibCompressor = ZlibFactory.getZlibCompressor(conf);
Decompressor zlibDecompressor = ZlibFactory.getZlibDecompressor(conf);
rawData = generate(BYTE_SIZE);
try {
for (int i = 0; i < tryNumber; i++) compressDecompressZlib(rawData, (ZlibCompressor) zlibCompressor, (ZlibDecompressor) zlibDecompressor);
zlibCompressor.reinit(conf);
} catch (Exception ex) {
fail("testZlibCompressorDecompressorWithConfiguration ex error " + ex);
}
} else {
assertTrue("ZlibFactory is using native libs against request", ZlibFactory.isNativeZlibLoaded(conf));
}
}
use of org.apache.hadoop.io.compress.Compressor in project hadoop by apache.
the class TestZStandardCompressorDecompressor method testCompressionCompressesCorrectly.
@Test
public void testCompressionCompressesCorrectly() throws Exception {
int uncompressedSize = (int) FileUtils.sizeOf(uncompressedFile);
byte[] bytes = FileUtils.readFileToByteArray(uncompressedFile);
assertEquals(uncompressedSize, bytes.length);
Configuration conf = new Configuration();
ZStandardCodec codec = new ZStandardCodec();
codec.setConf(conf);
ByteArrayOutputStream baos = new ByteArrayOutputStream();
Compressor compressor = codec.createCompressor();
CompressionOutputStream outputStream = codec.createOutputStream(baos, compressor);
for (byte aByte : bytes) {
outputStream.write(aByte);
}
outputStream.finish();
outputStream.close();
assertEquals(uncompressedSize, compressor.getBytesRead());
assertTrue(compressor.finished());
// just make sure we can decompress the file
ByteArrayOutputStream byteArrayOutputStream = new ByteArrayOutputStream();
ByteArrayInputStream bais = new ByteArrayInputStream(baos.toByteArray());
Decompressor decompressor = codec.createDecompressor();
CompressionInputStream inputStream = codec.createInputStream(bais, decompressor);
byte[] buffer = new byte[100];
int n = buffer.length;
while ((n = inputStream.read(buffer, 0, n)) != -1) {
byteArrayOutputStream.write(buffer, 0, n);
}
assertArrayEquals(bytes, byteArrayOutputStream.toByteArray());
}
Aggregations