use of org.apache.hadoop.hbase.nio.MultiByteBuff in project hbase by apache.
the class TestBloomFilterChunk method testBasicBloom.
public void testBasicBloom() throws Exception {
BloomFilterChunk bf1 = new BloomFilterChunk(1000, (float) 0.01, Hash.MURMUR_HASH, 0);
BloomFilterChunk bf2 = new BloomFilterChunk(1000, (float) 0.01, Hash.MURMUR_HASH, 0);
bf1.allocBloom();
bf2.allocBloom();
// test 1: verify no fundamental false negatives or positives
byte[] key1 = { 1, 2, 3, 4, 5, 6, 7, 8, 9 };
byte[] key2 = { 1, 2, 3, 4, 5, 6, 7, 8, 7 };
bf1.add(key1, 0, key1.length);
bf2.add(key2, 0, key2.length);
assertTrue(BloomFilterUtil.contains(key1, 0, key1.length, new MultiByteBuff(bf1.bloom), 0, (int) bf1.byteSize, bf1.hash, bf1.hashCount));
assertFalse(BloomFilterUtil.contains(key2, 0, key2.length, new MultiByteBuff(bf1.bloom), 0, (int) bf1.byteSize, bf1.hash, bf1.hashCount));
assertFalse(BloomFilterUtil.contains(key1, 0, key1.length, new MultiByteBuff(bf2.bloom), 0, (int) bf2.byteSize, bf2.hash, bf2.hashCount));
assertTrue(BloomFilterUtil.contains(key2, 0, key2.length, new MultiByteBuff(bf2.bloom), 0, (int) bf2.byteSize, bf2.hash, bf2.hashCount));
byte[] bkey = { 1, 2, 3, 4 };
byte[] bval = "this is a much larger byte array".getBytes();
bf1.add(bkey, 0, bkey.length);
bf1.add(bval, 1, bval.length - 1);
assertTrue(BloomFilterUtil.contains(bkey, 0, bkey.length, new MultiByteBuff(bf1.bloom), 0, (int) bf1.byteSize, bf1.hash, bf1.hashCount));
assertTrue(BloomFilterUtil.contains(bval, 1, bval.length - 1, new MultiByteBuff(bf1.bloom), 0, (int) bf1.byteSize, bf1.hash, bf1.hashCount));
assertFalse(BloomFilterUtil.contains(bval, 0, bval.length, new MultiByteBuff(bf1.bloom), 0, (int) bf1.byteSize, bf1.hash, bf1.hashCount));
// test 2: serialization & deserialization.
// (convert bloom to byte array & read byte array back in as input)
ByteArrayOutputStream bOut = new ByteArrayOutputStream();
bf1.writeBloom(new DataOutputStream(bOut));
ByteBuffer bb = ByteBuffer.wrap(bOut.toByteArray());
BloomFilterChunk newBf1 = new BloomFilterChunk(1000, (float) 0.01, Hash.MURMUR_HASH, 0);
assertTrue(BloomFilterUtil.contains(key1, 0, key1.length, new MultiByteBuff(bb), 0, (int) newBf1.byteSize, newBf1.hash, newBf1.hashCount));
assertFalse(BloomFilterUtil.contains(key2, 0, key2.length, new MultiByteBuff(bb), 0, (int) newBf1.byteSize, newBf1.hash, newBf1.hashCount));
assertTrue(BloomFilterUtil.contains(bkey, 0, bkey.length, new MultiByteBuff(bb), 0, (int) newBf1.byteSize, newBf1.hash, newBf1.hashCount));
assertTrue(BloomFilterUtil.contains(bval, 1, bval.length - 1, new MultiByteBuff(bb), 0, (int) newBf1.byteSize, newBf1.hash, newBf1.hashCount));
assertFalse(BloomFilterUtil.contains(bval, 0, bval.length, new MultiByteBuff(bb), 0, (int) newBf1.byteSize, newBf1.hash, newBf1.hashCount));
assertFalse(BloomFilterUtil.contains(bval, 0, bval.length, new MultiByteBuff(bb), 0, (int) newBf1.byteSize, newBf1.hash, newBf1.hashCount));
System.out.println("Serialized as " + bOut.size() + " bytes");
//... allow small padding
assertTrue(bOut.size() - bf1.byteSize < 10);
}
use of org.apache.hadoop.hbase.nio.MultiByteBuff in project hbase by apache.
the class TestBloomFilterChunk method testBloomPerf.
public void testBloomPerf() throws Exception {
// add
float err = (float) 0.01;
BloomFilterChunk b = new BloomFilterChunk(10 * 1000 * 1000, (float) err, Hash.MURMUR_HASH, 3);
b.allocBloom();
long startTime = System.currentTimeMillis();
long origSize = b.getByteSize();
for (int i = 0; i < 1 * 1000 * 1000; ++i) {
byte[] ib = Bytes.toBytes(i);
b.add(ib, 0, ib.length);
}
long endTime = System.currentTimeMillis();
System.out.println("Total Add time = " + (endTime - startTime) + "ms");
// fold
startTime = System.currentTimeMillis();
b.compactBloom();
endTime = System.currentTimeMillis();
System.out.println("Total Fold time = " + (endTime - startTime) + "ms");
assertTrue(origSize >= b.getByteSize() << 3);
// test
startTime = System.currentTimeMillis();
int falsePositives = 0;
for (int i = 0; i < 2 * 1000 * 1000; ++i) {
byte[] bytes = Bytes.toBytes(i);
if (BloomFilterUtil.contains(bytes, 0, bytes.length, new MultiByteBuff(b.bloom), 0, (int) b.byteSize, b.hash, b.hashCount)) {
if (i >= 1 * 1000 * 1000)
falsePositives++;
} else {
assertFalse(i < 1 * 1000 * 1000);
}
}
endTime = System.currentTimeMillis();
System.out.println("Total Contains time = " + (endTime - startTime) + "ms");
System.out.println("False Positive = " + falsePositives);
assertTrue(falsePositives <= (1 * 1000 * 1000) * err);
// test: foldFactor > log(max/actual)
}
use of org.apache.hadoop.hbase.nio.MultiByteBuff in project hbase by apache.
the class TestBloomFilterChunk method testBloomFold.
public void testBloomFold() throws Exception {
// test: foldFactor < log(max/actual)
BloomFilterChunk b = new BloomFilterChunk(1003, (float) 0.01, Hash.MURMUR_HASH, 2);
b.allocBloom();
long origSize = b.getByteSize();
assertEquals(1204, origSize);
for (int i = 0; i < 12; ++i) {
byte[] ib = Bytes.toBytes(i);
b.add(ib, 0, ib.length);
}
b.compactBloom();
assertEquals(origSize >> 2, b.getByteSize());
int falsePositives = 0;
for (int i = 0; i < 25; ++i) {
byte[] bytes = Bytes.toBytes(i);
if (BloomFilterUtil.contains(bytes, 0, bytes.length, new MultiByteBuff(b.bloom), 0, (int) b.byteSize, b.hash, b.hashCount)) {
if (i >= 12)
falsePositives++;
} else {
assertFalse(i < 12);
}
}
assertTrue(falsePositives <= 1);
// test: foldFactor > log(max/actual)
}
use of org.apache.hadoop.hbase.nio.MultiByteBuff in project hbase by apache.
the class ByteBufferArray method asSubByteBuff.
/**
* Creates a ByteBuff from a given array of ByteBuffers from the given offset to the
* length specified. For eg, if there are 4 buffers forming an array each with length 10 and
* if we call asSubBuffer(5, 10) then we will create an MBB consisting of two BBs
* and the first one be a BB from 'position' 5 to a 'length' 5 and the 2nd BB will be from
* 'position' 0 to 'length' 5.
* @param offset
* @param len
* @return a ByteBuff formed from the underlying ByteBuffers
*/
public ByteBuff asSubByteBuff(long offset, int len) {
assert len >= 0;
long end = offset + len;
int startBuffer = (int) (offset / bufferSize), startBufferOffset = (int) (offset % bufferSize);
int endBuffer = (int) (end / bufferSize), endBufferOffset = (int) (end % bufferSize);
// Last buffer in the array is a dummy one with 0 capacity. Avoid sending back that
if (endBuffer == this.bufferCount) {
endBuffer--;
endBufferOffset = bufferSize;
}
assert startBuffer >= 0 && startBuffer < bufferCount;
assert endBuffer >= 0 && endBuffer < bufferCount || (endBuffer == bufferCount && endBufferOffset == 0);
if (startBuffer >= buffers.length || startBuffer < 0) {
String msg = "Failed subArray, start=" + offset + ",startBuffer=" + startBuffer + ",bufferSize=" + bufferSize;
LOG.error(msg);
throw new RuntimeException(msg);
}
int srcIndex = 0, cnt = -1;
ByteBuffer[] mbb = new ByteBuffer[endBuffer - startBuffer + 1];
for (int i = startBuffer, j = 0; i <= endBuffer; ++i, j++) {
ByteBuffer bb = buffers[i].duplicate();
if (i == startBuffer) {
cnt = bufferSize - startBufferOffset;
if (cnt > len)
cnt = len;
bb.limit(startBufferOffset + cnt).position(startBufferOffset);
} else if (i == endBuffer) {
cnt = endBufferOffset;
bb.position(0).limit(cnt);
} else {
cnt = bufferSize;
bb.position(0).limit(cnt);
}
mbb[j] = bb.slice();
srcIndex += cnt;
}
assert srcIndex == len;
if (mbb.length > 1) {
return new MultiByteBuff(mbb);
} else {
return new SingleByteBuff(mbb[0]);
}
}
use of org.apache.hadoop.hbase.nio.MultiByteBuff in project hbase by apache.
the class TestMultiByteBuffInputStream method testReads.
@Test
public void testReads() throws Exception {
ByteArrayOutputStream bos = new ByteArrayOutputStream(100);
DataOutputStream dos = new DataOutputStream(bos);
String s = "test";
int i = 128;
dos.write(1);
dos.writeInt(i);
dos.writeBytes(s);
dos.writeLong(12345L);
dos.writeShort(2);
dos.flush();
ByteBuffer bb = ByteBuffer.wrap(bos.toByteArray());
// bbis contains 19 bytes
// 1 byte, 4 bytes int, 4 bytes string, 8 bytes long and 2 bytes short
ByteBuffInputStream bbis = new ByteBuffInputStream(new MultiByteBuff(bb));
assertEquals(15 + s.length(), bbis.available());
assertEquals(1, bbis.read());
byte[] ib = new byte[4];
bbis.read(ib);
assertEquals(i, Bytes.toInt(ib));
byte[] sb = new byte[s.length()];
bbis.read(sb);
assertEquals(s, Bytes.toString(sb));
byte[] lb = new byte[8];
bbis.read(lb);
assertEquals(12345, Bytes.toLong(lb));
assertEquals(2, bbis.available());
ib = new byte[4];
int read = bbis.read(ib, 0, ib.length);
// We dont have 4 bytes remainig but only 2. So onlt those should be returned back
assertEquals(2, read);
assertEquals(2, Bytes.toShort(ib));
assertEquals(0, bbis.available());
// At end. The read() should return -1
assertEquals(-1, bbis.read());
bbis.close();
bb = ByteBuffer.wrap(bos.toByteArray());
bbis = new ByteBuffInputStream(new MultiByteBuff(bb));
DataInputStream dis = new DataInputStream(bbis);
dis.read();
assertEquals(i, dis.readInt());
dis.close();
}
Aggregations