use of org.apache.hyracks.api.dataflow.value.ISerializerDeserializer in project asterixdb by apache.
the class LSMBTreeExamplesTest method additionalFilteringingExample.
/**
* Test the LSM component filters.
*/
@Test
public void additionalFilteringingExample() throws Exception {
if (LOGGER.isLoggable(Level.INFO)) {
LOGGER.info("Testing LSMBTree component filters.");
}
// Declare fields.
int fieldCount = 2;
ITypeTraits[] typeTraits = new ITypeTraits[fieldCount];
typeTraits[0] = IntegerPointable.TYPE_TRAITS;
typeTraits[1] = IntegerPointable.TYPE_TRAITS;
// Declare field serdes.
ISerializerDeserializer[] fieldSerdes = { IntegerSerializerDeserializer.INSTANCE, IntegerSerializerDeserializer.INSTANCE };
// Declare keys.
int keyFieldCount = 1;
IBinaryComparatorFactory[] cmpFactories = new IBinaryComparatorFactory[keyFieldCount];
cmpFactories[0] = PointableBinaryComparatorFactory.of(IntegerPointable.FACTORY);
// This is only used for the LSM-BTree.
int[] bloomFilterKeyFields = new int[keyFieldCount];
bloomFilterKeyFields[0] = 0;
ITypeTraits[] filterTypeTraits = { IntegerPointable.TYPE_TRAITS };
IBinaryComparatorFactory[] filterCmpFactories = { PointableBinaryComparatorFactory.of(IntegerPointable.FACTORY) };
int[] filterFields = { 1 };
int[] btreeFields = { 1 };
ITreeIndex treeIndex = createTreeIndex(typeTraits, cmpFactories, bloomFilterKeyFields, filterTypeTraits, filterCmpFactories, btreeFields, filterFields);
treeIndex.create();
treeIndex.activate();
long start = System.currentTimeMillis();
if (LOGGER.isLoggable(Level.INFO)) {
LOGGER.info("Inserting into tree...");
}
ArrayTupleBuilder tb = new ArrayTupleBuilder(fieldCount);
ArrayTupleReference tuple = new ArrayTupleReference();
IIndexAccessor indexAccessor = treeIndex.createAccessor(TestOperationCallback.INSTANCE, TestOperationCallback.INSTANCE);
int numInserts = 10000;
for (int i = 0; i < numInserts; i++) {
int f0 = rnd.nextInt() % numInserts;
int f1 = i;
TupleUtils.createIntegerTuple(tb, tuple, f0, f1);
if (LOGGER.isLoggable(Level.INFO)) {
if (i % 1000 == 0) {
LOGGER.info("Inserting " + i + " : " + f0 + " " + f1);
}
}
indexAccessor.insert(tuple);
}
long end = System.currentTimeMillis();
if (LOGGER.isLoggable(Level.INFO)) {
LOGGER.info(numInserts + " inserts in " + (end - start) + "ms");
}
orderedScan(indexAccessor, fieldSerdes);
diskOrderScan(indexAccessor, fieldSerdes);
// Build low key.
ArrayTupleBuilder lowKeyTb = new ArrayTupleBuilder(keyFieldCount);
ArrayTupleReference lowKey = new ArrayTupleReference();
TupleUtils.createIntegerTuple(lowKeyTb, lowKey, -1000);
// Build high key.
ArrayTupleBuilder highKeyTb = new ArrayTupleBuilder(keyFieldCount);
ArrayTupleReference highKey = new ArrayTupleReference();
TupleUtils.createIntegerTuple(highKeyTb, highKey, 1000);
// Build min filter key.
ArrayTupleBuilder minFilterTb = new ArrayTupleBuilder(filterFields.length);
ArrayTupleReference minTuple = new ArrayTupleReference();
TupleUtils.createIntegerTuple(minFilterTb, minTuple, 400);
// Build max filter key.
ArrayTupleBuilder maxFilterTb = new ArrayTupleBuilder(filterFields.length);
ArrayTupleReference maxTuple = new ArrayTupleReference();
TupleUtils.createIntegerTuple(maxFilterTb, maxTuple, 500);
rangeSearch(cmpFactories, indexAccessor, fieldSerdes, lowKey, highKey, minTuple, maxTuple);
treeIndex.validate();
treeIndex.deactivate();
treeIndex.destroy();
}
use of org.apache.hyracks.api.dataflow.value.ISerializerDeserializer in project asterixdb by apache.
the class BloomFilterTest method multiFieldTest.
@Test
public void multiFieldTest() throws Exception {
if (LOGGER.isLoggable(Level.INFO)) {
LOGGER.info("TESTING BLOOM FILTER");
}
IBufferCache bufferCache = harness.getBufferCache();
int numElements = 10000;
int[] keyFields = { 2, 4, 1 };
BloomFilter bf = new BloomFilter(bufferCache, harness.getFileMapProvider(), harness.getFileReference(), keyFields);
double acceptanleFalsePositiveRate = 0.1;
int maxBucketsPerElement = BloomCalculations.maxBucketsPerElement(numElements);
BloomFilterSpecification bloomFilterSpec = BloomCalculations.computeBloomSpec(maxBucketsPerElement, acceptanleFalsePositiveRate);
bf.create();
bf.activate();
IIndexBulkLoader builder = bf.createBuilder(numElements, bloomFilterSpec.getNumHashes(), bloomFilterSpec.getNumBucketsPerElements());
int fieldCount = 5;
ISerializerDeserializer[] fieldSerdes = { new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(), IntegerSerializerDeserializer.INSTANCE, new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer() };
ArrayTupleBuilder tupleBuilder = new ArrayTupleBuilder(fieldCount);
ArrayTupleReference tuple = new ArrayTupleReference();
int maxLength = 20;
ArrayList<String> s1 = new ArrayList<>();
ArrayList<String> s2 = new ArrayList<>();
ArrayList<String> s3 = new ArrayList<>();
ArrayList<String> s4 = new ArrayList<>();
for (int i = 0; i < numElements; ++i) {
s1.add(randomString(rnd.nextInt() % maxLength, rnd));
s2.add(randomString(rnd.nextInt() % maxLength, rnd));
s3.add(randomString(rnd.nextInt() % maxLength, rnd));
s4.add(randomString(rnd.nextInt() % maxLength, rnd));
}
for (int i = 0; i < numElements; ++i) {
TupleUtils.createTuple(tupleBuilder, tuple, fieldSerdes, s1.get(i), s2.get(i), i, s3.get(i), s4.get(i));
builder.add(tuple);
}
builder.end();
long[] hashes = new long[2];
for (int i = 0; i < numElements; ++i) {
TupleUtils.createTuple(tupleBuilder, tuple, fieldSerdes, s1.get(i), s2.get(i), i, s3.get(i), s4.get(i));
Assert.assertTrue(bf.contains(tuple, hashes));
}
bf.deactivate();
bf.destroy();
}
use of org.apache.hyracks.api.dataflow.value.ISerializerDeserializer in project asterixdb by apache.
the class MurmurHashForITupleReferenceTest method murmurhashThreeStringFieldsTest.
@Test
public void murmurhashThreeStringFieldsTest() throws Exception {
if (LOGGER.isLoggable(Level.INFO)) {
LOGGER.info("TESTING MURMUR HASH THREE STRING FIELDS");
}
int fieldCount = 3;
ISerializerDeserializer[] fieldSerdes = { new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer() };
ArrayTupleBuilder tupleBuilder = new ArrayTupleBuilder(fieldCount);
ArrayTupleReference tuple = new ArrayTupleReference();
String s1 = randomString(40, rnd);
String s2 = randomString(60, rnd);
String s3 = randomString(20, rnd);
TupleUtils.createTuple(tupleBuilder, tuple, fieldSerdes, s1, s2, s3);
int[] keyFields = { 2, 0, 1 };
int length = getTupleSize(tuple, keyFields);
long[] actuals = new long[NUM_LONG_VARS_FOR_128_BIT_HASH];
MurmurHash128Bit.hash3_x64_128(tuple, keyFields, 0L, actuals);
byte[] array = new byte[length];
ByteBuffer buffer;
fillArrayWithData(array, keyFields, tuple, length);
buffer = ByteBuffer.wrap(array);
long[] expecteds = hash3_x64_128(buffer, 0, length, 0L);
Assert.assertArrayEquals(expecteds, actuals);
}
use of org.apache.hyracks.api.dataflow.value.ISerializerDeserializer in project asterixdb by apache.
the class LSMBTreeTuplesTest method testLSMBTreeTuple.
private void testLSMBTreeTuple(ISerializerDeserializer[] maxFieldSerdes) throws HyracksDataException {
// Create a tuple with the max-1 fields for checking setFieldCount() of tuple references later.
ITypeTraits[] maxTypeTraits = SerdeUtils.serdesToTypeTraits(maxFieldSerdes);
IFieldValueGenerator[] maxFieldGens = DataGenUtils.getFieldGensFromSerdes(maxFieldSerdes, rnd, false);
// Generate a tuple with random field values.
Object[] maxFields = new Object[maxFieldSerdes.length];
for (int j = 0; j < maxFieldSerdes.length; j++) {
maxFields[j] = maxFieldGens[j].next();
}
// Run test for varying number of fields and keys.
for (int numKeyFields = 1; numKeyFields < maxFieldSerdes.length; numKeyFields++) {
// Create tuples with varying number of fields, and try to interpret their bytes with the lsmBTreeTuple.
for (int numFields = numKeyFields; numFields <= maxFieldSerdes.length; numFields++) {
// Create and write tuple to bytes using an LSMBTreeTupleWriter.
LSMBTreeTupleWriter maxMatterTupleWriter = new LSMBTreeTupleWriter(maxTypeTraits, numKeyFields, false);
ITupleReference maxTuple = TupleUtils.createTuple(maxFieldSerdes, (Object[]) maxFields);
ByteBuffer maxMatterBuf = writeTuple(maxTuple, maxMatterTupleWriter);
// Tuple reference should work for both matter and antimatter tuples (doesn't matter which factory creates it).
LSMBTreeTupleReference maxLsmBTreeTuple = (LSMBTreeTupleReference) maxMatterTupleWriter.createTupleReference();
ISerializerDeserializer[] fieldSerdes = Arrays.copyOfRange(maxFieldSerdes, 0, numFields);
ITypeTraits[] typeTraits = SerdeUtils.serdesToTypeTraits(fieldSerdes);
IFieldValueGenerator[] fieldGens = DataGenUtils.getFieldGensFromSerdes(fieldSerdes, rnd, false);
// Generate a tuple with random field values.
Object[] fields = new Object[numFields];
for (int j = 0; j < numFields; j++) {
fields[j] = fieldGens[j].next();
}
// Create and write tuple to bytes using an LSMBTreeTupleWriter.
ITupleReference tuple = TupleUtils.createTuple(fieldSerdes, (Object[]) fields);
LSMBTreeTupleWriter matterTupleWriter = new LSMBTreeTupleWriter(typeTraits, numKeyFields, false);
LSMBTreeTupleWriter antimatterTupleWriter = new LSMBTreeTupleWriter(typeTraits, numKeyFields, true);
LSMBTreeCopyTupleWriter copyTupleWriter = new LSMBTreeCopyTupleWriter(typeTraits, numKeyFields);
ByteBuffer matterBuf = writeTuple(tuple, matterTupleWriter);
ByteBuffer antimatterBuf = writeTuple(tuple, antimatterTupleWriter);
// The antimatter buf should only contain keys, sanity check the size.
if (numFields != numKeyFields) {
assertTrue(antimatterBuf.array().length < matterBuf.array().length);
}
// Tuple reference should work for both matter and antimatter tuples (doesn't matter which factory creates it).
LSMBTreeTupleReference lsmBTreeTuple = (LSMBTreeTupleReference) matterTupleWriter.createTupleReference();
// Repeat the block inside to test that repeated resetting to matter/antimatter tuples works.
for (int r = 0; r < 4; r++) {
// Check matter tuple with lsmBTreeTuple.
lsmBTreeTuple.resetByTupleOffset(matterBuf.array(), 0);
checkTuple(lsmBTreeTuple, numFields, false, fieldSerdes, fields);
// Create a copy using copyTupleWriter, and verify again.
ByteBuffer copyMatterBuf = writeTuple(lsmBTreeTuple, copyTupleWriter);
lsmBTreeTuple.resetByTupleOffset(copyMatterBuf.array(), 0);
checkTuple(lsmBTreeTuple, numFields, false, fieldSerdes, fields);
// Check antimatter tuple with lsmBTreeTuple.
lsmBTreeTuple.resetByTupleOffset(antimatterBuf.array(), 0);
// Should only contain keys.
checkTuple(lsmBTreeTuple, numKeyFields, true, fieldSerdes, fields);
// Create a copy using copyTupleWriter, and verify again.
ByteBuffer copyAntimatterBuf = writeTuple(lsmBTreeTuple, copyTupleWriter);
lsmBTreeTuple.resetByTupleOffset(copyAntimatterBuf.array(), 0);
// Should only contain keys.
checkTuple(lsmBTreeTuple, numKeyFields, true, fieldSerdes, fields);
// Check matter tuple with maxLsmBTreeTuple.
// We should be able to manually set a prefix of the fields
// (the passed type traits in the tuple factory's constructor).
maxLsmBTreeTuple.setFieldCount(numFields);
maxLsmBTreeTuple.resetByTupleOffset(matterBuf.array(), 0);
checkTuple(maxLsmBTreeTuple, numFields, false, fieldSerdes, fields);
// Check antimatter tuple with maxLsmBTreeTuple.
maxLsmBTreeTuple.resetByTupleOffset(antimatterBuf.array(), 0);
// Should only contain keys.
checkTuple(maxLsmBTreeTuple, numKeyFields, true, fieldSerdes, fields);
// Resetting maxLsmBTreeTuple should set its field count to
// maxFieldSerdes.length, based on the its type traits.
maxLsmBTreeTuple.resetByTupleOffset(maxMatterBuf.array(), 0);
checkTuple(maxLsmBTreeTuple, maxFieldSerdes.length, false, maxFieldSerdes, maxFields);
}
}
}
}
use of org.apache.hyracks.api.dataflow.value.ISerializerDeserializer in project asterixdb by apache.
the class SimpleSerializerDeserializerTest method test.
@SuppressWarnings("rawtypes")
@Test
public void test() {
Reflections reflections = new Reflections("org.apache.asterix.dataflow.data.nontagged.serde");
Set<Class<? extends ISerializerDeserializer>> allClasses = reflections.getSubTypesOf(ISerializerDeserializer.class);
for (Class<? extends ISerializerDeserializer> cl : allClasses) {
String className = cl.getName();
if (className.endsWith("ARecordSerializerDeserializer") || className.endsWith("AUnorderedListSerializerDeserializer") || className.endsWith("AOrderedListSerializerDeserializer") || className.endsWith("AStringSerializerDeserializer")) {
// Serializer/Deserializer for complex types can have (immutable) states.
continue;
}
// Verifies the class does not have non-static fields.
for (Field field : cl.getDeclaredFields()) {
if (!java.lang.reflect.Modifier.isStatic(field.getModifiers())) {
throw new IllegalStateException("The serializer/deserializer " + cl.getName() + " is not stateless!");
}
}
// Verifies the class follows the singleton pattern.
for (Constructor constructor : cl.getDeclaredConstructors()) {
if (!java.lang.reflect.Modifier.isPrivate(constructor.getModifiers())) {
throw new IllegalStateException("The serializer/deserializer " + cl.getName() + " is not implemented as a singleton class!");
}
}
}
}
Aggregations