Search in sources :

Example 16 with ArrayTupleBuilder

use of org.apache.hyracks.dataflow.common.comm.io.ArrayTupleBuilder in project asterixdb by apache.

the class BloomFilterTest method singleFieldTest.

@Test
public void singleFieldTest() throws Exception {
    if (LOGGER.isLoggable(Level.INFO)) {
        LOGGER.info("TESTING BLOOM FILTER");
    }
    IBufferCache bufferCache = harness.getBufferCache();
    int numElements = 100;
    int[] keyFields = { 0 };
    BloomFilter bf = new BloomFilter(bufferCache, harness.getFileMapProvider(), harness.getFileReference(), keyFields);
    double acceptanleFalsePositiveRate = 0.1;
    int maxBucketsPerElement = BloomCalculations.maxBucketsPerElement(numElements);
    BloomFilterSpecification bloomFilterSpec = BloomCalculations.computeBloomSpec(maxBucketsPerElement, acceptanleFalsePositiveRate);
    bf.create();
    bf.activate();
    IIndexBulkLoader builder = bf.createBuilder(numElements, bloomFilterSpec.getNumHashes(), bloomFilterSpec.getNumBucketsPerElements());
    int fieldCount = 2;
    ArrayTupleBuilder tupleBuilder = new ArrayTupleBuilder(fieldCount);
    ArrayTupleReference tuple = new ArrayTupleReference();
    // generate keys
    int maxKey = 1000;
    TreeSet<Integer> uniqueKeys = new TreeSet<>();
    ArrayList<Integer> keys = new ArrayList<>();
    while (uniqueKeys.size() < numElements) {
        int key = rnd.nextInt() % maxKey;
        uniqueKeys.add(key);
    }
    for (Integer i : uniqueKeys) {
        keys.add(i);
    }
    // Insert tuples in the bloom filter
    for (int i = 0; i < keys.size(); ++i) {
        TupleUtils.createIntegerTuple(tupleBuilder, tuple, keys.get(i), i);
        builder.add(tuple);
    }
    builder.end();
    // Check all the inserted tuples can be found.
    long[] hashes = new long[2];
    for (int i = 0; i < keys.size(); ++i) {
        TupleUtils.createIntegerTuple(tupleBuilder, tuple, keys.get(i), i);
        Assert.assertTrue(bf.contains(tuple, hashes));
    }
    bf.deactivate();
    bf.destroy();
}
Also used : ArrayTupleReference(org.apache.hyracks.dataflow.common.comm.io.ArrayTupleReference) ArrayList(java.util.ArrayList) ArrayTupleBuilder(org.apache.hyracks.dataflow.common.comm.io.ArrayTupleBuilder) BloomFilter(org.apache.hyracks.storage.am.bloomfilter.impls.BloomFilter) IIndexBulkLoader(org.apache.hyracks.storage.common.IIndexBulkLoader) TreeSet(java.util.TreeSet) BloomFilterSpecification(org.apache.hyracks.storage.am.bloomfilter.impls.BloomFilterSpecification) IBufferCache(org.apache.hyracks.storage.common.buffercache.IBufferCache) AbstractBloomFilterTest(org.apache.hyracks.storage.am.bloomfilter.util.AbstractBloomFilterTest) Test(org.junit.Test)

Example 17 with ArrayTupleBuilder

use of org.apache.hyracks.dataflow.common.comm.io.ArrayTupleBuilder in project asterixdb by apache.

the class BloomFilterTest method multiFieldTest.

@Test
public void multiFieldTest() throws Exception {
    if (LOGGER.isLoggable(Level.INFO)) {
        LOGGER.info("TESTING BLOOM FILTER");
    }
    IBufferCache bufferCache = harness.getBufferCache();
    int numElements = 10000;
    int[] keyFields = { 2, 4, 1 };
    BloomFilter bf = new BloomFilter(bufferCache, harness.getFileMapProvider(), harness.getFileReference(), keyFields);
    double acceptanleFalsePositiveRate = 0.1;
    int maxBucketsPerElement = BloomCalculations.maxBucketsPerElement(numElements);
    BloomFilterSpecification bloomFilterSpec = BloomCalculations.computeBloomSpec(maxBucketsPerElement, acceptanleFalsePositiveRate);
    bf.create();
    bf.activate();
    IIndexBulkLoader builder = bf.createBuilder(numElements, bloomFilterSpec.getNumHashes(), bloomFilterSpec.getNumBucketsPerElements());
    int fieldCount = 5;
    ISerializerDeserializer[] fieldSerdes = { new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(), IntegerSerializerDeserializer.INSTANCE, new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer() };
    ArrayTupleBuilder tupleBuilder = new ArrayTupleBuilder(fieldCount);
    ArrayTupleReference tuple = new ArrayTupleReference();
    int maxLength = 20;
    ArrayList<String> s1 = new ArrayList<>();
    ArrayList<String> s2 = new ArrayList<>();
    ArrayList<String> s3 = new ArrayList<>();
    ArrayList<String> s4 = new ArrayList<>();
    for (int i = 0; i < numElements; ++i) {
        s1.add(randomString(rnd.nextInt() % maxLength, rnd));
        s2.add(randomString(rnd.nextInt() % maxLength, rnd));
        s3.add(randomString(rnd.nextInt() % maxLength, rnd));
        s4.add(randomString(rnd.nextInt() % maxLength, rnd));
    }
    for (int i = 0; i < numElements; ++i) {
        TupleUtils.createTuple(tupleBuilder, tuple, fieldSerdes, s1.get(i), s2.get(i), i, s3.get(i), s4.get(i));
        builder.add(tuple);
    }
    builder.end();
    long[] hashes = new long[2];
    for (int i = 0; i < numElements; ++i) {
        TupleUtils.createTuple(tupleBuilder, tuple, fieldSerdes, s1.get(i), s2.get(i), i, s3.get(i), s4.get(i));
        Assert.assertTrue(bf.contains(tuple, hashes));
    }
    bf.deactivate();
    bf.destroy();
}
Also used : ArrayTupleReference(org.apache.hyracks.dataflow.common.comm.io.ArrayTupleReference) ArrayList(java.util.ArrayList) ArrayTupleBuilder(org.apache.hyracks.dataflow.common.comm.io.ArrayTupleBuilder) UTF8StringSerializerDeserializer(org.apache.hyracks.dataflow.common.data.marshalling.UTF8StringSerializerDeserializer) BloomFilter(org.apache.hyracks.storage.am.bloomfilter.impls.BloomFilter) ISerializerDeserializer(org.apache.hyracks.api.dataflow.value.ISerializerDeserializer) IIndexBulkLoader(org.apache.hyracks.storage.common.IIndexBulkLoader) BloomFilterSpecification(org.apache.hyracks.storage.am.bloomfilter.impls.BloomFilterSpecification) IBufferCache(org.apache.hyracks.storage.common.buffercache.IBufferCache) AbstractBloomFilterTest(org.apache.hyracks.storage.am.bloomfilter.util.AbstractBloomFilterTest) Test(org.junit.Test)

Example 18 with ArrayTupleBuilder

use of org.apache.hyracks.dataflow.common.comm.io.ArrayTupleBuilder in project asterixdb by apache.

the class MurmurHashForITupleReferenceTest method murmurhashTwoIntegerFieldsTest.

@Test
public void murmurhashTwoIntegerFieldsTest() throws Exception {
    if (LOGGER.isLoggable(Level.INFO)) {
        LOGGER.info("TESTING MURMUR HASH TWO INTEGER FIELDS");
    }
    int fieldCount = 2;
    ArrayTupleBuilder tupleBuilder = new ArrayTupleBuilder(fieldCount);
    ArrayTupleReference tuple = new ArrayTupleReference();
    TupleUtils.createIntegerTuple(tupleBuilder, tuple, rnd.nextInt(), rnd.nextInt());
    tuple.reset(tupleBuilder.getFieldEndOffsets(), tupleBuilder.getByteArray());
    int[] keyFields = { 0, 1 };
    int length = getTupleSize(tuple, keyFields);
    long[] actuals = new long[NUM_LONG_VARS_FOR_128_BIT_HASH];
    MurmurHash128Bit.hash3_x64_128(tuple, keyFields, 0L, actuals);
    ByteBuffer buffer;
    byte[] array = new byte[length];
    fillArrayWithData(array, keyFields, tuple, length);
    buffer = ByteBuffer.wrap(array);
    long[] expecteds = hash3_x64_128(buffer, 0, length, 0L);
    Assert.assertArrayEquals(expecteds, actuals);
}
Also used : ArrayTupleReference(org.apache.hyracks.dataflow.common.comm.io.ArrayTupleReference) ArrayTupleBuilder(org.apache.hyracks.dataflow.common.comm.io.ArrayTupleBuilder) ByteBuffer(java.nio.ByteBuffer) AbstractBloomFilterTest(org.apache.hyracks.storage.am.bloomfilter.util.AbstractBloomFilterTest) Test(org.junit.Test)

Example 19 with ArrayTupleBuilder

use of org.apache.hyracks.dataflow.common.comm.io.ArrayTupleBuilder in project asterixdb by apache.

the class MurmurHashForITupleReferenceTest method murmurhashONEIntegerFieldTest.

@Test
public void murmurhashONEIntegerFieldTest() throws Exception {
    if (LOGGER.isLoggable(Level.INFO)) {
        LOGGER.info("TESTING MURMUR HASH ONE INTEGER FIELD");
    }
    int fieldCount = 2;
    ArrayTupleBuilder tupleBuilder = new ArrayTupleBuilder(fieldCount);
    ArrayTupleReference tuple = new ArrayTupleReference();
    TupleUtils.createIntegerTuple(tupleBuilder, tuple, rnd.nextInt());
    tuple.reset(tupleBuilder.getFieldEndOffsets(), tupleBuilder.getByteArray());
    int[] keyFields = { 0 };
    int length = getTupleSize(tuple, keyFields);
    long[] actuals = new long[NUM_LONG_VARS_FOR_128_BIT_HASH];
    MurmurHash128Bit.hash3_x64_128(tuple, keyFields, 0L, actuals);
    ByteBuffer buffer;
    byte[] array = new byte[length];
    fillArrayWithData(array, keyFields, tuple, length);
    buffer = ByteBuffer.wrap(array);
    long[] expecteds = hash3_x64_128(buffer, 0, length, 0L);
    Assert.assertArrayEquals(expecteds, actuals);
}
Also used : ArrayTupleReference(org.apache.hyracks.dataflow.common.comm.io.ArrayTupleReference) ArrayTupleBuilder(org.apache.hyracks.dataflow.common.comm.io.ArrayTupleBuilder) ByteBuffer(java.nio.ByteBuffer) AbstractBloomFilterTest(org.apache.hyracks.storage.am.bloomfilter.util.AbstractBloomFilterTest) Test(org.junit.Test)

Example 20 with ArrayTupleBuilder

use of org.apache.hyracks.dataflow.common.comm.io.ArrayTupleBuilder in project asterixdb by apache.

the class MurmurHashForITupleReferenceTest method murmurhashThreeStringFieldsTest.

@Test
public void murmurhashThreeStringFieldsTest() throws Exception {
    if (LOGGER.isLoggable(Level.INFO)) {
        LOGGER.info("TESTING MURMUR HASH THREE STRING FIELDS");
    }
    int fieldCount = 3;
    ISerializerDeserializer[] fieldSerdes = { new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer() };
    ArrayTupleBuilder tupleBuilder = new ArrayTupleBuilder(fieldCount);
    ArrayTupleReference tuple = new ArrayTupleReference();
    String s1 = randomString(40, rnd);
    String s2 = randomString(60, rnd);
    String s3 = randomString(20, rnd);
    TupleUtils.createTuple(tupleBuilder, tuple, fieldSerdes, s1, s2, s3);
    int[] keyFields = { 2, 0, 1 };
    int length = getTupleSize(tuple, keyFields);
    long[] actuals = new long[NUM_LONG_VARS_FOR_128_BIT_HASH];
    MurmurHash128Bit.hash3_x64_128(tuple, keyFields, 0L, actuals);
    byte[] array = new byte[length];
    ByteBuffer buffer;
    fillArrayWithData(array, keyFields, tuple, length);
    buffer = ByteBuffer.wrap(array);
    long[] expecteds = hash3_x64_128(buffer, 0, length, 0L);
    Assert.assertArrayEquals(expecteds, actuals);
}
Also used : ArrayTupleReference(org.apache.hyracks.dataflow.common.comm.io.ArrayTupleReference) ArrayTupleBuilder(org.apache.hyracks.dataflow.common.comm.io.ArrayTupleBuilder) UTF8StringSerializerDeserializer(org.apache.hyracks.dataflow.common.data.marshalling.UTF8StringSerializerDeserializer) ByteBuffer(java.nio.ByteBuffer) ISerializerDeserializer(org.apache.hyracks.api.dataflow.value.ISerializerDeserializer) AbstractBloomFilterTest(org.apache.hyracks.storage.am.bloomfilter.util.AbstractBloomFilterTest) Test(org.junit.Test)

Aggregations

ArrayTupleBuilder (org.apache.hyracks.dataflow.common.comm.io.ArrayTupleBuilder)99 ArrayTupleReference (org.apache.hyracks.dataflow.common.comm.io.ArrayTupleReference)45 ISerializerDeserializer (org.apache.hyracks.api.dataflow.value.ISerializerDeserializer)42 Test (org.junit.Test)40 HyracksDataException (org.apache.hyracks.api.exceptions.HyracksDataException)35 DataOutput (java.io.DataOutput)33 IBinaryComparatorFactory (org.apache.hyracks.api.dataflow.value.IBinaryComparatorFactory)25 UTF8StringSerializerDeserializer (org.apache.hyracks.dataflow.common.data.marshalling.UTF8StringSerializerDeserializer)24 ITypeTraits (org.apache.hyracks.api.dataflow.value.ITypeTraits)21 RecordDescriptor (org.apache.hyracks.api.dataflow.value.RecordDescriptor)21 ITreeIndex (org.apache.hyracks.storage.am.common.api.ITreeIndex)18 FrameTupleAppender (org.apache.hyracks.dataflow.common.comm.io.FrameTupleAppender)17 ConstantTupleSourceOperatorDescriptor (org.apache.hyracks.dataflow.std.misc.ConstantTupleSourceOperatorDescriptor)17 VSizeFrame (org.apache.hyracks.api.comm.VSizeFrame)16 JobSpecification (org.apache.hyracks.api.job.JobSpecification)16 OneToOneConnectorDescriptor (org.apache.hyracks.dataflow.std.connectors.OneToOneConnectorDescriptor)16 IIndexAccessor (org.apache.hyracks.storage.common.IIndexAccessor)16 IFileSplitProvider (org.apache.hyracks.dataflow.std.file.IFileSplitProvider)15 BTreeSearchOperatorDescriptor (org.apache.hyracks.storage.am.btree.dataflow.BTreeSearchOperatorDescriptor)14 IOperatorDescriptor (org.apache.hyracks.api.dataflow.IOperatorDescriptor)12