use of org.apache.hadoop.io.DataInputBuffer in project hadoop by apache.
the class TestCodec method testGzipCompatibility.
@Test
public void testGzipCompatibility() throws IOException {
Random r = new Random();
long seed = r.nextLong();
r.setSeed(seed);
LOG.info("seed: " + seed);
DataOutputBuffer dflbuf = new DataOutputBuffer();
GZIPOutputStream gzout = new GZIPOutputStream(dflbuf);
byte[] b = new byte[r.nextInt(128 * 1024 + 1)];
r.nextBytes(b);
gzout.write(b);
gzout.close();
DataInputBuffer gzbuf = new DataInputBuffer();
gzbuf.reset(dflbuf.getData(), dflbuf.getLength());
Configuration conf = new Configuration();
// don't use native libs
ZlibFactory.setNativeZlibLoaded(false);
CompressionCodec codec = ReflectionUtils.newInstance(GzipCodec.class, conf);
Decompressor decom = codec.createDecompressor();
assertNotNull(decom);
assertEquals(BuiltInGzipDecompressor.class, decom.getClass());
InputStream gzin = codec.createInputStream(gzbuf, decom);
dflbuf.reset();
IOUtils.copyBytes(gzin, dflbuf, 4096);
final byte[] dflchk = Arrays.copyOf(dflbuf.getData(), dflbuf.getLength());
assertArrayEquals(b, dflchk);
}
use of org.apache.hadoop.io.DataInputBuffer in project hadoop by apache.
the class TestCodec method codecTest.
private static void codecTest(Configuration conf, int seed, int count, String codecClass) throws IOException {
// Create the codec
CompressionCodec codec = null;
try {
codec = (CompressionCodec) ReflectionUtils.newInstance(conf.getClassByName(codecClass), conf);
} catch (ClassNotFoundException cnfe) {
throw new IOException("Illegal codec!");
}
LOG.info("Created a Codec object of type: " + codecClass);
// Generate data
DataOutputBuffer data = new DataOutputBuffer();
RandomDatum.Generator generator = new RandomDatum.Generator(seed);
for (int i = 0; i < count; ++i) {
generator.next();
RandomDatum key = generator.getKey();
RandomDatum value = generator.getValue();
key.write(data);
value.write(data);
}
LOG.info("Generated " + count + " records");
// Compress data
DataOutputBuffer compressedDataBuffer = new DataOutputBuffer();
CompressionOutputStream deflateFilter = codec.createOutputStream(compressedDataBuffer);
DataOutputStream deflateOut = new DataOutputStream(new BufferedOutputStream(deflateFilter));
deflateOut.write(data.getData(), 0, data.getLength());
deflateOut.flush();
deflateFilter.finish();
LOG.info("Finished compressing data");
// De-compress data
DataInputBuffer deCompressedDataBuffer = new DataInputBuffer();
deCompressedDataBuffer.reset(compressedDataBuffer.getData(), 0, compressedDataBuffer.getLength());
CompressionInputStream inflateFilter = codec.createInputStream(deCompressedDataBuffer);
DataInputStream inflateIn = new DataInputStream(new BufferedInputStream(inflateFilter));
// Check
DataInputBuffer originalData = new DataInputBuffer();
originalData.reset(data.getData(), 0, data.getLength());
DataInputStream originalIn = new DataInputStream(new BufferedInputStream(originalData));
for (int i = 0; i < count; ++i) {
RandomDatum k1 = new RandomDatum();
RandomDatum v1 = new RandomDatum();
k1.readFields(originalIn);
v1.readFields(originalIn);
RandomDatum k2 = new RandomDatum();
RandomDatum v2 = new RandomDatum();
k2.readFields(inflateIn);
v2.readFields(inflateIn);
assertTrue("original and compressed-then-decompressed-output not equal", k1.equals(k2) && v1.equals(v2));
// original and compressed-then-decompressed-output have the same hashCode
Map<RandomDatum, String> m = new HashMap<RandomDatum, String>();
m.put(k1, k1.toString());
m.put(v1, v1.toString());
String result = m.get(k2);
assertEquals("k1 and k2 hashcode not equal", result, k1.toString());
result = m.get(v2);
assertEquals("v1 and v2 hashcode not equal", result, v1.toString());
}
// De-compress data byte-at-a-time
originalData.reset(data.getData(), 0, data.getLength());
deCompressedDataBuffer.reset(compressedDataBuffer.getData(), 0, compressedDataBuffer.getLength());
inflateFilter = codec.createInputStream(deCompressedDataBuffer);
// Check
originalIn = new DataInputStream(new BufferedInputStream(originalData));
int expected;
do {
expected = originalIn.read();
assertEquals("Inflated stream read by byte does not match", expected, inflateFilter.read());
} while (expected != -1);
LOG.info("SUCCESS! Completed checking " + count + " records");
}
use of org.apache.hadoop.io.DataInputBuffer in project hadoop by apache.
the class TestCodec method GzipConcatTest.
void GzipConcatTest(Configuration conf, Class<? extends Decompressor> decomClass) throws IOException {
Random r = new Random();
long seed = r.nextLong();
r.setSeed(seed);
LOG.info(decomClass + " seed: " + seed);
final int CONCAT = r.nextInt(4) + 3;
final int BUFLEN = 128 * 1024;
DataOutputBuffer dflbuf = new DataOutputBuffer();
DataOutputBuffer chkbuf = new DataOutputBuffer();
byte[] b = new byte[BUFLEN];
for (int i = 0; i < CONCAT; ++i) {
GZIPOutputStream gzout = new GZIPOutputStream(dflbuf);
r.nextBytes(b);
int len = r.nextInt(BUFLEN);
int off = r.nextInt(BUFLEN - len);
chkbuf.write(b, off, len);
gzout.write(b, off, len);
gzout.close();
}
final byte[] chk = Arrays.copyOf(chkbuf.getData(), chkbuf.getLength());
CompressionCodec codec = ReflectionUtils.newInstance(GzipCodec.class, conf);
Decompressor decom = codec.createDecompressor();
assertNotNull(decom);
assertEquals(decomClass, decom.getClass());
DataInputBuffer gzbuf = new DataInputBuffer();
gzbuf.reset(dflbuf.getData(), dflbuf.getLength());
InputStream gzin = codec.createInputStream(gzbuf, decom);
dflbuf.reset();
IOUtils.copyBytes(gzin, dflbuf, 4096);
final byte[] dflchk = Arrays.copyOf(dflbuf.getData(), dflbuf.getLength());
assertArrayEquals(chk, dflchk);
}
use of org.apache.hadoop.io.DataInputBuffer in project hadoop by apache.
the class TestCompressionStreamReuse method resetStateTest.
private void resetStateTest(Configuration conf, int seed, int count, String codecClass) throws IOException {
// Create the codec
CompressionCodec codec = null;
try {
codec = (CompressionCodec) ReflectionUtils.newInstance(conf.getClassByName(codecClass), conf);
} catch (ClassNotFoundException cnfe) {
throw new IOException("Illegal codec!");
}
LOG.info("Created a Codec object of type: " + codecClass);
// Generate data
DataOutputBuffer data = new DataOutputBuffer();
RandomDatum.Generator generator = new RandomDatum.Generator(seed);
for (int i = 0; i < count; ++i) {
generator.next();
RandomDatum key = generator.getKey();
RandomDatum value = generator.getValue();
key.write(data);
value.write(data);
}
LOG.info("Generated " + count + " records");
// Compress data
DataOutputBuffer compressedDataBuffer = new DataOutputBuffer();
DataOutputStream deflateOut = new DataOutputStream(new BufferedOutputStream(compressedDataBuffer));
CompressionOutputStream deflateFilter = codec.createOutputStream(deflateOut);
deflateFilter.write(data.getData(), 0, data.getLength());
deflateFilter.finish();
deflateFilter.flush();
LOG.info("Finished compressing data");
// reset deflator
deflateFilter.resetState();
LOG.info("Finished reseting deflator");
// re-generate data
data.reset();
generator = new RandomDatum.Generator(seed);
for (int i = 0; i < count; ++i) {
generator.next();
RandomDatum key = generator.getKey();
RandomDatum value = generator.getValue();
key.write(data);
value.write(data);
}
DataInputBuffer originalData = new DataInputBuffer();
DataInputStream originalIn = new DataInputStream(new BufferedInputStream(originalData));
originalData.reset(data.getData(), 0, data.getLength());
// re-compress data
compressedDataBuffer.reset();
deflateOut = new DataOutputStream(new BufferedOutputStream(compressedDataBuffer));
deflateFilter = codec.createOutputStream(deflateOut);
deflateFilter.write(data.getData(), 0, data.getLength());
deflateFilter.finish();
deflateFilter.flush();
LOG.info("Finished re-compressing data");
// De-compress data
DataInputBuffer deCompressedDataBuffer = new DataInputBuffer();
deCompressedDataBuffer.reset(compressedDataBuffer.getData(), 0, compressedDataBuffer.getLength());
CompressionInputStream inflateFilter = codec.createInputStream(deCompressedDataBuffer);
DataInputStream inflateIn = new DataInputStream(new BufferedInputStream(inflateFilter));
// Check
for (int i = 0; i < count; ++i) {
RandomDatum k1 = new RandomDatum();
RandomDatum v1 = new RandomDatum();
k1.readFields(originalIn);
v1.readFields(originalIn);
RandomDatum k2 = new RandomDatum();
RandomDatum v2 = new RandomDatum();
k2.readFields(inflateIn);
v2.readFields(inflateIn);
assertTrue("original and compressed-then-decompressed-output not equal", k1.equals(k2) && v1.equals(v2));
}
LOG.info("SUCCESS! Completed checking " + count + " records");
}
use of org.apache.hadoop.io.DataInputBuffer in project hadoop by apache.
the class TestZStandardCompressorDecompressor method testCompressorDecompressorLogicWithCompressionStreams.
// test compress/decompress process through
// CompressionOutputStream/CompressionInputStream api
@Test
public void testCompressorDecompressorLogicWithCompressionStreams() throws Exception {
DataOutputStream deflateOut = null;
DataInputStream inflateIn = null;
int byteSize = 1024 * 100;
byte[] bytes = generate(byteSize);
int bufferSize = IO_FILE_BUFFER_SIZE_DEFAULT;
try {
DataOutputBuffer compressedDataBuffer = new DataOutputBuffer();
CompressionOutputStream deflateFilter = new CompressorStream(compressedDataBuffer, new ZStandardCompressor(), bufferSize);
deflateOut = new DataOutputStream(new BufferedOutputStream(deflateFilter));
deflateOut.write(bytes, 0, bytes.length);
deflateOut.flush();
deflateFilter.finish();
DataInputBuffer deCompressedDataBuffer = new DataInputBuffer();
deCompressedDataBuffer.reset(compressedDataBuffer.getData(), 0, compressedDataBuffer.getLength());
CompressionInputStream inflateFilter = new DecompressorStream(deCompressedDataBuffer, new ZStandardDecompressor(bufferSize), bufferSize);
inflateIn = new DataInputStream(new BufferedInputStream(inflateFilter));
byte[] result = new byte[byteSize];
inflateIn.read(result);
assertArrayEquals("original array not equals compress/decompressed array", result, bytes);
} finally {
IOUtils.closeQuietly(deflateOut);
IOUtils.closeQuietly(inflateIn);
}
}
Aggregations