Search in sources :

Example 6 with MultiByteBuff

use of org.apache.hadoop.hbase.nio.MultiByteBuff in project hbase by apache.

the class TestBloomFilterChunk method testBasicBloom.

public void testBasicBloom() throws Exception {
    BloomFilterChunk bf1 = new BloomFilterChunk(1000, (float) 0.01, Hash.MURMUR_HASH, 0);
    BloomFilterChunk bf2 = new BloomFilterChunk(1000, (float) 0.01, Hash.MURMUR_HASH, 0);
    bf1.allocBloom();
    bf2.allocBloom();
    // test 1: verify no fundamental false negatives or positives
    byte[] key1 = { 1, 2, 3, 4, 5, 6, 7, 8, 9 };
    byte[] key2 = { 1, 2, 3, 4, 5, 6, 7, 8, 7 };
    bf1.add(key1, 0, key1.length);
    bf2.add(key2, 0, key2.length);
    assertTrue(BloomFilterUtil.contains(key1, 0, key1.length, new MultiByteBuff(bf1.bloom), 0, (int) bf1.byteSize, bf1.hash, bf1.hashCount));
    assertFalse(BloomFilterUtil.contains(key2, 0, key2.length, new MultiByteBuff(bf1.bloom), 0, (int) bf1.byteSize, bf1.hash, bf1.hashCount));
    assertFalse(BloomFilterUtil.contains(key1, 0, key1.length, new MultiByteBuff(bf2.bloom), 0, (int) bf2.byteSize, bf2.hash, bf2.hashCount));
    assertTrue(BloomFilterUtil.contains(key2, 0, key2.length, new MultiByteBuff(bf2.bloom), 0, (int) bf2.byteSize, bf2.hash, bf2.hashCount));
    byte[] bkey = { 1, 2, 3, 4 };
    byte[] bval = "this is a much larger byte array".getBytes();
    bf1.add(bkey, 0, bkey.length);
    bf1.add(bval, 1, bval.length - 1);
    assertTrue(BloomFilterUtil.contains(bkey, 0, bkey.length, new MultiByteBuff(bf1.bloom), 0, (int) bf1.byteSize, bf1.hash, bf1.hashCount));
    assertTrue(BloomFilterUtil.contains(bval, 1, bval.length - 1, new MultiByteBuff(bf1.bloom), 0, (int) bf1.byteSize, bf1.hash, bf1.hashCount));
    assertFalse(BloomFilterUtil.contains(bval, 0, bval.length, new MultiByteBuff(bf1.bloom), 0, (int) bf1.byteSize, bf1.hash, bf1.hashCount));
    // test 2: serialization & deserialization.
    // (convert bloom to byte array & read byte array back in as input)
    ByteArrayOutputStream bOut = new ByteArrayOutputStream();
    bf1.writeBloom(new DataOutputStream(bOut));
    ByteBuffer bb = ByteBuffer.wrap(bOut.toByteArray());
    BloomFilterChunk newBf1 = new BloomFilterChunk(1000, (float) 0.01, Hash.MURMUR_HASH, 0);
    assertTrue(BloomFilterUtil.contains(key1, 0, key1.length, new MultiByteBuff(bb), 0, (int) newBf1.byteSize, newBf1.hash, newBf1.hashCount));
    assertFalse(BloomFilterUtil.contains(key2, 0, key2.length, new MultiByteBuff(bb), 0, (int) newBf1.byteSize, newBf1.hash, newBf1.hashCount));
    assertTrue(BloomFilterUtil.contains(bkey, 0, bkey.length, new MultiByteBuff(bb), 0, (int) newBf1.byteSize, newBf1.hash, newBf1.hashCount));
    assertTrue(BloomFilterUtil.contains(bval, 1, bval.length - 1, new MultiByteBuff(bb), 0, (int) newBf1.byteSize, newBf1.hash, newBf1.hashCount));
    assertFalse(BloomFilterUtil.contains(bval, 0, bval.length, new MultiByteBuff(bb), 0, (int) newBf1.byteSize, newBf1.hash, newBf1.hashCount));
    assertFalse(BloomFilterUtil.contains(bval, 0, bval.length, new MultiByteBuff(bb), 0, (int) newBf1.byteSize, newBf1.hash, newBf1.hashCount));
    System.out.println("Serialized as " + bOut.size() + " bytes");
    //... allow small padding
    assertTrue(bOut.size() - bf1.byteSize < 10);
}
Also used : DataOutputStream(java.io.DataOutputStream) MultiByteBuff(org.apache.hadoop.hbase.nio.MultiByteBuff) ByteArrayOutputStream(java.io.ByteArrayOutputStream) ByteBuffer(java.nio.ByteBuffer)

Example 7 with MultiByteBuff

use of org.apache.hadoop.hbase.nio.MultiByteBuff in project hbase by apache.

the class TestBloomFilterChunk method testBloomPerf.

public void testBloomPerf() throws Exception {
    // add
    float err = (float) 0.01;
    BloomFilterChunk b = new BloomFilterChunk(10 * 1000 * 1000, (float) err, Hash.MURMUR_HASH, 3);
    b.allocBloom();
    long startTime = System.currentTimeMillis();
    long origSize = b.getByteSize();
    for (int i = 0; i < 1 * 1000 * 1000; ++i) {
        byte[] ib = Bytes.toBytes(i);
        b.add(ib, 0, ib.length);
    }
    long endTime = System.currentTimeMillis();
    System.out.println("Total Add time = " + (endTime - startTime) + "ms");
    // fold
    startTime = System.currentTimeMillis();
    b.compactBloom();
    endTime = System.currentTimeMillis();
    System.out.println("Total Fold time = " + (endTime - startTime) + "ms");
    assertTrue(origSize >= b.getByteSize() << 3);
    // test
    startTime = System.currentTimeMillis();
    int falsePositives = 0;
    for (int i = 0; i < 2 * 1000 * 1000; ++i) {
        byte[] bytes = Bytes.toBytes(i);
        if (BloomFilterUtil.contains(bytes, 0, bytes.length, new MultiByteBuff(b.bloom), 0, (int) b.byteSize, b.hash, b.hashCount)) {
            if (i >= 1 * 1000 * 1000)
                falsePositives++;
        } else {
            assertFalse(i < 1 * 1000 * 1000);
        }
    }
    endTime = System.currentTimeMillis();
    System.out.println("Total Contains time = " + (endTime - startTime) + "ms");
    System.out.println("False Positive = " + falsePositives);
    assertTrue(falsePositives <= (1 * 1000 * 1000) * err);
// test: foldFactor > log(max/actual)
}
Also used : MultiByteBuff(org.apache.hadoop.hbase.nio.MultiByteBuff)

Example 8 with MultiByteBuff

use of org.apache.hadoop.hbase.nio.MultiByteBuff in project hbase by apache.

the class TestBloomFilterChunk method testBloomFold.

public void testBloomFold() throws Exception {
    // test: foldFactor < log(max/actual)
    BloomFilterChunk b = new BloomFilterChunk(1003, (float) 0.01, Hash.MURMUR_HASH, 2);
    b.allocBloom();
    long origSize = b.getByteSize();
    assertEquals(1204, origSize);
    for (int i = 0; i < 12; ++i) {
        byte[] ib = Bytes.toBytes(i);
        b.add(ib, 0, ib.length);
    }
    b.compactBloom();
    assertEquals(origSize >> 2, b.getByteSize());
    int falsePositives = 0;
    for (int i = 0; i < 25; ++i) {
        byte[] bytes = Bytes.toBytes(i);
        if (BloomFilterUtil.contains(bytes, 0, bytes.length, new MultiByteBuff(b.bloom), 0, (int) b.byteSize, b.hash, b.hashCount)) {
            if (i >= 12)
                falsePositives++;
        } else {
            assertFalse(i < 12);
        }
    }
    assertTrue(falsePositives <= 1);
// test: foldFactor > log(max/actual)
}
Also used : MultiByteBuff(org.apache.hadoop.hbase.nio.MultiByteBuff)

Example 9 with MultiByteBuff

use of org.apache.hadoop.hbase.nio.MultiByteBuff in project hbase by apache.

the class ByteBufferArray method asSubByteBuff.

/**
   * Creates a ByteBuff from a given array of ByteBuffers from the given offset to the
   * length specified. For eg, if there are 4 buffers forming an array each with length 10 and
   * if we call asSubBuffer(5, 10) then we will create an MBB consisting of two BBs
   * and the first one be a BB from 'position' 5 to a 'length' 5 and the 2nd BB will be from
   * 'position' 0 to 'length' 5.
   * @param offset
   * @param len
   * @return a ByteBuff formed from the underlying ByteBuffers
   */
public ByteBuff asSubByteBuff(long offset, int len) {
    assert len >= 0;
    long end = offset + len;
    int startBuffer = (int) (offset / bufferSize), startBufferOffset = (int) (offset % bufferSize);
    int endBuffer = (int) (end / bufferSize), endBufferOffset = (int) (end % bufferSize);
    // Last buffer in the array is a dummy one with 0 capacity. Avoid sending back that
    if (endBuffer == this.bufferCount) {
        endBuffer--;
        endBufferOffset = bufferSize;
    }
    assert startBuffer >= 0 && startBuffer < bufferCount;
    assert endBuffer >= 0 && endBuffer < bufferCount || (endBuffer == bufferCount && endBufferOffset == 0);
    if (startBuffer >= buffers.length || startBuffer < 0) {
        String msg = "Failed subArray, start=" + offset + ",startBuffer=" + startBuffer + ",bufferSize=" + bufferSize;
        LOG.error(msg);
        throw new RuntimeException(msg);
    }
    int srcIndex = 0, cnt = -1;
    ByteBuffer[] mbb = new ByteBuffer[endBuffer - startBuffer + 1];
    for (int i = startBuffer, j = 0; i <= endBuffer; ++i, j++) {
        ByteBuffer bb = buffers[i].duplicate();
        if (i == startBuffer) {
            cnt = bufferSize - startBufferOffset;
            if (cnt > len)
                cnt = len;
            bb.limit(startBufferOffset + cnt).position(startBufferOffset);
        } else if (i == endBuffer) {
            cnt = endBufferOffset;
            bb.position(0).limit(cnt);
        } else {
            cnt = bufferSize;
            bb.position(0).limit(cnt);
        }
        mbb[j] = bb.slice();
        srcIndex += cnt;
    }
    assert srcIndex == len;
    if (mbb.length > 1) {
        return new MultiByteBuff(mbb);
    } else {
        return new SingleByteBuff(mbb[0]);
    }
}
Also used : SingleByteBuff(org.apache.hadoop.hbase.nio.SingleByteBuff) MultiByteBuff(org.apache.hadoop.hbase.nio.MultiByteBuff) ByteBuffer(java.nio.ByteBuffer)

Example 10 with MultiByteBuff

use of org.apache.hadoop.hbase.nio.MultiByteBuff in project hbase by apache.

the class TestMultiByteBuffInputStream method testReads.

@Test
public void testReads() throws Exception {
    ByteArrayOutputStream bos = new ByteArrayOutputStream(100);
    DataOutputStream dos = new DataOutputStream(bos);
    String s = "test";
    int i = 128;
    dos.write(1);
    dos.writeInt(i);
    dos.writeBytes(s);
    dos.writeLong(12345L);
    dos.writeShort(2);
    dos.flush();
    ByteBuffer bb = ByteBuffer.wrap(bos.toByteArray());
    // bbis contains 19 bytes
    // 1 byte, 4 bytes int, 4 bytes string, 8 bytes long and 2 bytes short
    ByteBuffInputStream bbis = new ByteBuffInputStream(new MultiByteBuff(bb));
    assertEquals(15 + s.length(), bbis.available());
    assertEquals(1, bbis.read());
    byte[] ib = new byte[4];
    bbis.read(ib);
    assertEquals(i, Bytes.toInt(ib));
    byte[] sb = new byte[s.length()];
    bbis.read(sb);
    assertEquals(s, Bytes.toString(sb));
    byte[] lb = new byte[8];
    bbis.read(lb);
    assertEquals(12345, Bytes.toLong(lb));
    assertEquals(2, bbis.available());
    ib = new byte[4];
    int read = bbis.read(ib, 0, ib.length);
    // We dont have 4 bytes remainig but only 2. So onlt those should be returned back
    assertEquals(2, read);
    assertEquals(2, Bytes.toShort(ib));
    assertEquals(0, bbis.available());
    // At end. The read() should return -1
    assertEquals(-1, bbis.read());
    bbis.close();
    bb = ByteBuffer.wrap(bos.toByteArray());
    bbis = new ByteBuffInputStream(new MultiByteBuff(bb));
    DataInputStream dis = new DataInputStream(bbis);
    dis.read();
    assertEquals(i, dis.readInt());
    dis.close();
}
Also used : DataOutputStream(java.io.DataOutputStream) MultiByteBuff(org.apache.hadoop.hbase.nio.MultiByteBuff) ByteArrayOutputStream(java.io.ByteArrayOutputStream) DataInputStream(java.io.DataInputStream) ByteBuffer(java.nio.ByteBuffer) Test(org.junit.Test)

Aggregations

MultiByteBuff (org.apache.hadoop.hbase.nio.MultiByteBuff)10 ByteBuffer (java.nio.ByteBuffer)8 SingleByteBuff (org.apache.hadoop.hbase.nio.SingleByteBuff)4 Test (org.junit.Test)4 ByteArrayOutputStream (java.io.ByteArrayOutputStream)3 DataOutputStream (java.io.DataOutputStream)3 ByteBuff (org.apache.hadoop.hbase.nio.ByteBuff)2 VisibleForTesting (com.google.common.annotations.VisibleForTesting)1 DataInputStream (java.io.DataInputStream)1 ArrayList (java.util.ArrayList)1 FSDataOutputStream (org.apache.hadoop.fs.FSDataOutputStream)1 KeyValue (org.apache.hadoop.hbase.KeyValue)1 ByteBufferPool (org.apache.hadoop.hbase.io.ByteBufferPool)1 CallCleanup (org.apache.hadoop.hbase.ipc.RpcServer.CallCleanup)1 Pair (org.apache.hadoop.hbase.util.Pair)1