Search in sources :

Example 1 with IBinaryHashFunction

use of org.apache.hyracks.api.dataflow.value.IBinaryHashFunction in project asterixdb by apache.

the class AMurmurHash3BinaryHashFunctionFamily method createBinaryHashFunction.

// This hash function family is used to promote a numeric type to a DOUBLE numeric type
// to return same hash value for the original numeric value, regardless of the numeric type.
// (e.g., h( int64("1") )  =  h( double("1.0") )
@Override
public IBinaryHashFunction createBinaryHashFunction(final int seed) {
    return new IBinaryHashFunction() {

        private ArrayBackedValueStorage fieldValueBuffer = new ArrayBackedValueStorage();

        private DataOutput fieldValueBufferOutput = fieldValueBuffer.getDataOutput();

        private ATypeTag sourceTag = null;

        private boolean numericTypePromotionApplied = false;

        @Override
        public int hash(byte[] bytes, int offset, int length) throws HyracksDataException {
            // If a numeric type is encountered, then we promote each numeric type to the DOUBLE type.
            fieldValueBuffer.reset();
            sourceTag = EnumDeserializer.ATYPETAGDESERIALIZER.deserialize(bytes[offset]);
            switch(sourceTag) {
                case TINYINT:
                case SMALLINT:
                case INTEGER:
                case BIGINT:
                    try {
                        IntegerToDoubleTypeConvertComputer.getInstance().convertType(bytes, offset + 1, length - 1, fieldValueBufferOutput);
                    } catch (IOException e) {
                        throw new HyracksDataException("A numeric type promotion error has occurred before doing hash(). Can't continue process. Detailed Error message:" + e.getMessage());
                    }
                    numericTypePromotionApplied = true;
                    break;
                case FLOAT:
                    try {
                        FloatToDoubleTypeConvertComputer.getInstance().convertType(bytes, offset + 1, length - 1, fieldValueBufferOutput);
                    } catch (IOException e) {
                        throw new HyracksDataException("A numeric type promotion error has occurred before doing hash(). Can't continue process. Detailed Error message:" + e.getMessage());
                    }
                    numericTypePromotionApplied = true;
                    break;
                default:
                    numericTypePromotionApplied = false;
                    break;
            }
            // If a numeric type promotion happened
            if (numericTypePromotionApplied) {
                return MurmurHash3BinaryHash.hash(fieldValueBuffer.getByteArray(), fieldValueBuffer.getStartOffset(), fieldValueBuffer.getLength(), seed);
            } else {
                // Usual case for non numeric types and the DOBULE numeric type
                return MurmurHash3BinaryHash.hash(bytes, offset, length, seed);
            }
        }
    };
}
Also used : DataOutput(java.io.DataOutput) IBinaryHashFunction(org.apache.hyracks.api.dataflow.value.IBinaryHashFunction) ArrayBackedValueStorage(org.apache.hyracks.data.std.util.ArrayBackedValueStorage) ATypeTag(org.apache.asterix.om.types.ATypeTag) IOException(java.io.IOException) HyracksDataException(org.apache.hyracks.api.exceptions.HyracksDataException)

Example 2 with IBinaryHashFunction

use of org.apache.hyracks.api.dataflow.value.IBinaryHashFunction in project asterixdb by apache.

the class ListItemBinaryHashFunctionFactory method createBinaryHashFunction.

public IBinaryHashFunction createBinaryHashFunction(final ATypeTag itemTypeTag, final boolean ignoreCase) {
    return new IBinaryHashFunction() {

        private IBinaryHashFunction lowerCaseStringHash = new PointableBinaryHashFunctionFactory(UTF8StringLowercasePointable.FACTORY).createBinaryHashFunction();

        private IBinaryHashFunction genericBinaryHash = MurmurHash3BinaryHashFunctionFamily.INSTANCE.createBinaryHashFunction(0);

        private GrowableArray taggedBytes = new GrowableArray();

        @Override
        public int hash(byte[] bytes, int offset, int length) throws HyracksDataException {
            ATypeTag tag = itemTypeTag;
            int skip = 0;
            if (itemTypeTag == ATypeTag.ANY) {
                tag = EnumDeserializer.ATYPETAGDESERIALIZER.deserialize(bytes[offset]);
                skip = 1;
            }
            switch(tag) {
                case STRING:
                    {
                        if (ignoreCase) {
                            return lowerCaseStringHash.hash(bytes, offset + skip, length - skip);
                        }
                    }
                default:
                    {
                        if (itemTypeTag != ATypeTag.ANY) {
                            // add the itemTypeTag in front of the data
                            try {
                                resetTaggedBytes(bytes, offset, length);
                                return genericBinaryHash.hash(taggedBytes.getByteArray(), 0, length + 1);
                            } catch (IOException e) {
                                throw new HyracksDataException(e);
                            }
                        } else {
                            return genericBinaryHash.hash(bytes, offset, length);
                        }
                    }
            }
        }

        private void resetTaggedBytes(byte[] data, int offset, int length) throws IOException {
            taggedBytes.reset();
            taggedBytes.getDataOutput().writeByte(itemTypeTag.serialize());
            taggedBytes.getDataOutput().write(data, offset, length);
        }
    };
}
Also used : IBinaryHashFunction(org.apache.hyracks.api.dataflow.value.IBinaryHashFunction) ATypeTag(org.apache.asterix.om.types.ATypeTag) GrowableArray(org.apache.hyracks.data.std.util.GrowableArray) PointableBinaryHashFunctionFactory(org.apache.hyracks.data.std.accessors.PointableBinaryHashFunctionFactory) IOException(java.io.IOException) HyracksDataException(org.apache.hyracks.api.exceptions.HyracksDataException)

Example 3 with IBinaryHashFunction

use of org.apache.hyracks.api.dataflow.value.IBinaryHashFunction in project asterixdb by apache.

the class FieldHashPartitionComputerFamily method createPartitioner.

@Override
public ITuplePartitionComputer createPartitioner(int seed) {
    final IBinaryHashFunction[] hashFunctions = new IBinaryHashFunction[hashFunctionGeneratorFactories.length];
    for (int i = 0; i < hashFunctionGeneratorFactories.length; ++i) {
        hashFunctions[i] = hashFunctionGeneratorFactories[i].createBinaryHashFunction(seed);
    }
    return new ITuplePartitionComputer() {

        @Override
        public int partition(IFrameTupleAccessor accessor, int tIndex, int nParts) throws HyracksDataException {
            int h = 0;
            int startOffset = accessor.getTupleStartOffset(tIndex);
            int slotLength = accessor.getFieldSlotsLength();
            for (int j = 0; j < hashFields.length; ++j) {
                int fIdx = hashFields[j];
                IBinaryHashFunction hashFn = hashFunctions[j];
                int fStart = accessor.getFieldStartOffset(tIndex, fIdx);
                int fEnd = accessor.getFieldEndOffset(tIndex, fIdx);
                int fh = hashFn.hash(accessor.getBuffer().array(), startOffset + slotLength + fStart, fEnd - fStart);
                h += fh;
            }
            if (h < 0) {
                h = -(h + 1);
            }
            return h % nParts;
        }
    };
}
Also used : IBinaryHashFunction(org.apache.hyracks.api.dataflow.value.IBinaryHashFunction) IFrameTupleAccessor(org.apache.hyracks.api.comm.IFrameTupleAccessor) ITuplePartitionComputer(org.apache.hyracks.api.dataflow.value.ITuplePartitionComputer)

Example 4 with IBinaryHashFunction

use of org.apache.hyracks.api.dataflow.value.IBinaryHashFunction in project asterixdb by apache.

the class RecordAddFieldsDescriptor method createEvaluatorFactory.

@Override
public IScalarEvaluatorFactory createEvaluatorFactory(final IScalarEvaluatorFactory[] args) {
    return new IScalarEvaluatorFactory() {

        private static final long serialVersionUID = 1L;

        @Override
        public IScalarEvaluator createScalarEvaluator(final IHyracksTaskContext ctx) throws HyracksDataException {
            final PointableAllocator allocator = new PointableAllocator();
            final IVisitablePointable vp0 = allocator.allocateRecordValue(inRecType);
            final IVisitablePointable vp1 = allocator.allocateListValue(inListType);
            final IPointable argPtr0 = new VoidPointable();
            final IPointable argPtr1 = new VoidPointable();
            final IScalarEvaluator eval0 = args[0].createScalarEvaluator(ctx);
            final IScalarEvaluator eval1 = args[1].createScalarEvaluator(ctx);
            final ArrayBackedValueStorage fieldNamePointable = new ArrayBackedValueStorage();
            final ArrayBackedValueStorage fieldValuePointer = new ArrayBackedValueStorage();
            final PointableHelper pointableHelper = new PointableHelper();
            try {
                pointableHelper.serializeString("field-name", fieldNamePointable, true);
                pointableHelper.serializeString("field-value", fieldValuePointer, true);
            } catch (AsterixException e) {
                throw new HyracksDataException(e);
            }
            return new IScalarEvaluator() {

                // the default 32k frame size
                public static final int TABLE_FRAME_SIZE = 32768;

                // the default 32k frame size
                public static final int TABLE_SIZE = 100;

                private final RecordBuilder recordBuilder = new RecordBuilder();

                private final RuntimeRecordTypeInfo requiredRecordTypeInfo = new RuntimeRecordTypeInfo();

                private final IBinaryHashFunction putHashFunc = ListItemBinaryHashFunctionFactory.INSTANCE.createBinaryHashFunction();

                private final IBinaryHashFunction getHashFunc = ListItemBinaryHashFunctionFactory.INSTANCE.createBinaryHashFunction();

                private final BinaryEntry keyEntry = new BinaryEntry();

                private final BinaryEntry valEntry = new BinaryEntry();

                private final IVisitablePointable tempValReference = allocator.allocateEmpty();

                private final IBinaryComparator cmp = ListItemBinaryComparatorFactory.INSTANCE.createBinaryComparator();

                private BinaryHashMap hashMap = new BinaryHashMap(TABLE_SIZE, TABLE_FRAME_SIZE, putHashFunc, getHashFunc, cmp);

                private ArrayBackedValueStorage resultStorage = new ArrayBackedValueStorage();

                private DataOutput out = resultStorage.getDataOutput();

                @Override
                public void evaluate(IFrameTupleReference tuple, IPointable result) throws HyracksDataException {
                    resultStorage.reset();
                    recordBuilder.reset(outRecType);
                    requiredRecordTypeInfo.reset(outRecType);
                    eval0.evaluate(tuple, argPtr0);
                    eval1.evaluate(tuple, argPtr1);
                    // Make sure we get a valid record
                    byte typeTag0 = argPtr0.getByteArray()[argPtr0.getStartOffset()];
                    if (typeTag0 != ATypeTag.SERIALIZED_RECORD_TYPE_TAG) {
                        throw new TypeMismatchException(getIdentifier(), 0, typeTag0, ATypeTag.SERIALIZED_RECORD_TYPE_TAG);
                    }
                    // Make sure we get a valid list
                    byte typeTag1 = argPtr1.getByteArray()[argPtr1.getStartOffset()];
                    if (typeTag1 != ATypeTag.SERIALIZED_ORDEREDLIST_TYPE_TAG) {
                        throw new TypeMismatchException(getIdentifier(), 1, typeTag1, ATypeTag.SERIALIZED_ORDEREDLIST_TYPE_TAG);
                    }
                    vp0.set(argPtr0);
                    vp1.set(argPtr1);
                    ARecordVisitablePointable recordPointable = (ARecordVisitablePointable) vp0;
                    AListVisitablePointable listPointable = (AListVisitablePointable) vp1;
                    // Initialize our hashmap
                    int tableSize = recordPointable.getFieldNames().size() + listPointable.getItems().size();
                    // Thus avoiding unnecessary object construction
                    if (hashMap == null || tableSize > TABLE_SIZE) {
                        hashMap = new BinaryHashMap(tableSize, TABLE_FRAME_SIZE, putHashFunc, getHashFunc, cmp);
                    } else {
                        hashMap.clear();
                    }
                    addFields(recordPointable, listPointable);
                    recordBuilder.write(out, true);
                    result.set(resultStorage);
                }

                private void addFields(ARecordVisitablePointable inputRecordPointer, AListVisitablePointable listPointable) throws HyracksDataException {
                    List<IVisitablePointable> inputRecordFieldNames = inputRecordPointer.getFieldNames();
                    List<IVisitablePointable> inputRecordFieldValues = inputRecordPointer.getFieldValues();
                    List<IVisitablePointable> inputFields = listPointable.getItems();
                    IVisitablePointable namePointable = null;
                    IVisitablePointable valuePointable = null;
                    int numInputRecordFields = inputRecordFieldNames.size();
                    try {
                        // Add original record without duplicate checking
                        for (int i = 0; i < numInputRecordFields; ++i) {
                            IVisitablePointable fnp = inputRecordFieldNames.get(i);
                            IVisitablePointable fvp = inputRecordFieldValues.get(i);
                            int pos = requiredRecordTypeInfo.getFieldIndex(fnp.getByteArray(), fnp.getStartOffset() + 1, fnp.getLength() - 1);
                            if (pos >= 0) {
                                recordBuilder.addField(pos, fvp);
                            } else {
                                recordBuilder.addField(fnp, fvp);
                            }
                            keyEntry.set(fnp.getByteArray(), fnp.getStartOffset(), fnp.getLength());
                            valEntry.set(fvp.getByteArray(), fvp.getStartOffset(), fvp.getLength());
                            hashMap.put(keyEntry, valEntry);
                        }
                        // Get the fields from a list of records
                        for (int i = 0; i < inputFields.size(); i++) {
                            if (!PointableHelper.sameType(ATypeTag.OBJECT, inputFields.get(i))) {
                                throw new AsterixException("Expected list of record, got " + PointableHelper.getTypeTag(inputFields.get(i)));
                            }
                            List<IVisitablePointable> names = ((ARecordVisitablePointable) inputFields.get(i)).getFieldNames();
                            List<IVisitablePointable> values = ((ARecordVisitablePointable) inputFields.get(i)).getFieldValues();
                            // Get name and value of the field to be added
                            // Use loop to account for the cases where users switches the order of the fields
                            IVisitablePointable fieldName;
                            for (int j = 0; j < names.size(); j++) {
                                fieldName = names.get(j);
                                // if fieldName is "field-name" then read the name
                                if (PointableHelper.byteArrayEqual(fieldNamePointable, fieldName)) {
                                    namePointable = values.get(j);
                                } else {
                                    // otherwise the fieldName is "field-value". Thus, read the value
                                    valuePointable = values.get(j);
                                }
                            }
                            if (namePointable == null || valuePointable == null) {
                                throw new InvalidDataFormatException(getIdentifier(), "fields to be added");
                            }
                            // Check that the field being added is a valid field
                            int pos = requiredRecordTypeInfo.getFieldIndex(namePointable.getByteArray(), namePointable.getStartOffset() + 1, namePointable.getLength() - 1);
                            keyEntry.set(namePointable.getByteArray(), namePointable.getStartOffset(), namePointable.getLength());
                            // Check if already in our built record
                            BinaryEntry entry = hashMap.get(keyEntry);
                            if (entry != null) {
                                tempValReference.set(entry.getBuf(), entry.getOffset(), entry.getLength());
                                // If value is not equal throw conflicting duplicate field, otherwise ignore
                                if (!PointableHelper.byteArrayEqual(valuePointable, tempValReference)) {
                                    throw new RuntimeDataException(ErrorCode.DUPLICATE_FIELD_NAME, getIdentifier());
                                }
                            } else {
                                if (pos > -1) {
                                    recordBuilder.addField(pos, valuePointable);
                                } else {
                                    recordBuilder.addField(namePointable, valuePointable);
                                }
                                valEntry.set(valuePointable.getByteArray(), valuePointable.getStartOffset(), valuePointable.getLength());
                                hashMap.put(keyEntry, valEntry);
                            }
                        }
                    } catch (AsterixException e) {
                        throw new HyracksDataException(e);
                    }
                }
            };
        }
    };
}
Also used : BinaryEntry(org.apache.hyracks.data.std.util.BinaryEntry) DataOutput(java.io.DataOutput) TypeMismatchException(org.apache.asterix.runtime.exceptions.TypeMismatchException) IBinaryComparator(org.apache.hyracks.api.dataflow.value.IBinaryComparator) IPointable(org.apache.hyracks.data.std.api.IPointable) IScalarEvaluator(org.apache.hyracks.algebricks.runtime.base.IScalarEvaluator) InvalidDataFormatException(org.apache.asterix.runtime.exceptions.InvalidDataFormatException) ARecordVisitablePointable(org.apache.asterix.om.pointables.ARecordVisitablePointable) AListVisitablePointable(org.apache.asterix.om.pointables.AListVisitablePointable) AsterixException(org.apache.asterix.common.exceptions.AsterixException) VoidPointable(org.apache.hyracks.data.std.primitive.VoidPointable) PointableAllocator(org.apache.asterix.om.pointables.PointableAllocator) RuntimeDataException(org.apache.asterix.common.exceptions.RuntimeDataException) IBinaryHashFunction(org.apache.hyracks.api.dataflow.value.IBinaryHashFunction) RecordBuilder(org.apache.asterix.builders.RecordBuilder) HyracksDataException(org.apache.hyracks.api.exceptions.HyracksDataException) IScalarEvaluatorFactory(org.apache.hyracks.algebricks.runtime.base.IScalarEvaluatorFactory) PointableHelper(org.apache.asterix.runtime.evaluators.functions.PointableHelper) IVisitablePointable(org.apache.asterix.om.pointables.base.IVisitablePointable) ArrayBackedValueStorage(org.apache.hyracks.data.std.util.ArrayBackedValueStorage) IHyracksTaskContext(org.apache.hyracks.api.context.IHyracksTaskContext) IFrameTupleReference(org.apache.hyracks.dataflow.common.data.accessors.IFrameTupleReference) RuntimeRecordTypeInfo(org.apache.asterix.om.types.runtime.RuntimeRecordTypeInfo) BinaryHashMap(org.apache.asterix.runtime.evaluators.functions.BinaryHashMap)

Example 5 with IBinaryHashFunction

use of org.apache.hyracks.api.dataflow.value.IBinaryHashFunction in project asterixdb by apache.

the class ARecordSerializerDeserializer method getFieldOffsetByName.

public static int getFieldOffsetByName(byte[] serRecord, int start, int len, byte[] fieldName, int nstart) throws HyracksDataException {
    // a record with len < 5 is empty
    if (serRecord[start] != ATypeTag.SERIALIZED_RECORD_TYPE_TAG || len <= 5 || serRecord[start + 5] != 1) {
        return -1;
    }
    // 6 is the index of the first byte of the openPartOffset value.
    int openPartOffset = start + AInt32SerializerDeserializer.getInt(serRecord, start + 6);
    int numberOfOpenField = AInt32SerializerDeserializer.getInt(serRecord, openPartOffset);
    int fieldUtflength = UTF8StringUtil.getUTFLength(fieldName, nstart + 1);
    int fieldUtfMetaLen = UTF8StringUtil.getNumBytesToStoreLength(fieldUtflength);
    IBinaryHashFunction utf8HashFunction = BinaryHashFunctionFactoryProvider.UTF8STRING_POINTABLE_INSTANCE.createBinaryHashFunction();
    IBinaryComparator utf8BinaryComparator = BinaryComparatorFactoryProvider.UTF8STRING_POINTABLE_INSTANCE.createBinaryComparator();
    int fieldNameHashCode = utf8HashFunction.hash(fieldName, nstart + 1, fieldUtflength + fieldUtfMetaLen);
    int offset = openPartOffset + 4;
    int fieldOffset = -1;
    int mid = 0;
    int high = numberOfOpenField - 1;
    int low = 0;
    while (low <= high) {
        mid = (high + low) / 2;
        // 8 = hash code (4) + offset to the (name + tag + value ) of the field (4).
        int h = AInt32SerializerDeserializer.getInt(serRecord, offset + (8 * mid));
        if (h == fieldNameHashCode) {
            fieldOffset = start + AInt32SerializerDeserializer.getInt(serRecord, offset + (8 * mid) + 4);
            // the utf8 comparator do not require to put the precise length, we can just pass a estimated limit.
            if (utf8BinaryComparator.compare(serRecord, fieldOffset, len, fieldName, nstart + 1, fieldUtflength + fieldUtfMetaLen) == 0) {
                // since they are equal, we can directly use the meta length and the utf length.
                return fieldOffset + fieldUtfMetaLen + fieldUtflength;
            } else {
                // this else part has not been tested yet
                for (int j = mid + 1; j < numberOfOpenField; j++) {
                    h = AInt32SerializerDeserializer.getInt(serRecord, offset + (8 * j));
                    if (h == fieldNameHashCode) {
                        fieldOffset = start + AInt32SerializerDeserializer.getInt(serRecord, offset + (8 * j) + 4);
                        if (utf8BinaryComparator.compare(serRecord, fieldOffset, len, fieldName, nstart + 1, fieldUtflength) == 0) {
                            return fieldOffset + fieldUtfMetaLen + fieldUtflength;
                        }
                    } else {
                        break;
                    }
                }
            }
        }
        if (fieldNameHashCode > h) {
            low = mid + 1;
        } else {
            high = mid - 1;
        }
    }
    // no field with this name.
    return -1;
}
Also used : IBinaryHashFunction(org.apache.hyracks.api.dataflow.value.IBinaryHashFunction) IBinaryComparator(org.apache.hyracks.api.dataflow.value.IBinaryComparator)

Aggregations

IBinaryHashFunction (org.apache.hyracks.api.dataflow.value.IBinaryHashFunction)7 IBinaryComparator (org.apache.hyracks.api.dataflow.value.IBinaryComparator)3 HyracksDataException (org.apache.hyracks.api.exceptions.HyracksDataException)3 DataOutput (java.io.DataOutput)2 IOException (java.io.IOException)2 ATypeTag (org.apache.asterix.om.types.ATypeTag)2 BinaryHashMap (org.apache.asterix.runtime.evaluators.functions.BinaryHashMap)2 IFrameTupleAccessor (org.apache.hyracks.api.comm.IFrameTupleAccessor)2 ITuplePartitionComputer (org.apache.hyracks.api.dataflow.value.ITuplePartitionComputer)2 ArrayBackedValueStorage (org.apache.hyracks.data.std.util.ArrayBackedValueStorage)2 RecordBuilder (org.apache.asterix.builders.RecordBuilder)1 AsterixException (org.apache.asterix.common.exceptions.AsterixException)1 RuntimeDataException (org.apache.asterix.common.exceptions.RuntimeDataException)1 AListVisitablePointable (org.apache.asterix.om.pointables.AListVisitablePointable)1 ARecordVisitablePointable (org.apache.asterix.om.pointables.ARecordVisitablePointable)1 PointableAllocator (org.apache.asterix.om.pointables.PointableAllocator)1 IVisitablePointable (org.apache.asterix.om.pointables.base.IVisitablePointable)1 RuntimeRecordTypeInfo (org.apache.asterix.om.types.runtime.RuntimeRecordTypeInfo)1 PointableHelper (org.apache.asterix.runtime.evaluators.functions.PointableHelper)1 InvalidDataFormatException (org.apache.asterix.runtime.exceptions.InvalidDataFormatException)1