Search in sources :

Example 1 with BinaryEntry

use of org.apache.hyracks.data.std.util.BinaryEntry in project asterixdb by apache.

the class RecordAddFieldsDescriptor method createEvaluatorFactory.

@Override
public IScalarEvaluatorFactory createEvaluatorFactory(final IScalarEvaluatorFactory[] args) {
    return new IScalarEvaluatorFactory() {

        private static final long serialVersionUID = 1L;

        @Override
        public IScalarEvaluator createScalarEvaluator(final IHyracksTaskContext ctx) throws HyracksDataException {
            final PointableAllocator allocator = new PointableAllocator();
            final IVisitablePointable vp0 = allocator.allocateRecordValue(inRecType);
            final IVisitablePointable vp1 = allocator.allocateListValue(inListType);
            final IPointable argPtr0 = new VoidPointable();
            final IPointable argPtr1 = new VoidPointable();
            final IScalarEvaluator eval0 = args[0].createScalarEvaluator(ctx);
            final IScalarEvaluator eval1 = args[1].createScalarEvaluator(ctx);
            final ArrayBackedValueStorage fieldNamePointable = new ArrayBackedValueStorage();
            final ArrayBackedValueStorage fieldValuePointer = new ArrayBackedValueStorage();
            final PointableHelper pointableHelper = new PointableHelper();
            try {
                pointableHelper.serializeString("field-name", fieldNamePointable, true);
                pointableHelper.serializeString("field-value", fieldValuePointer, true);
            } catch (AsterixException e) {
                throw new HyracksDataException(e);
            }
            return new IScalarEvaluator() {

                // the default 32k frame size
                public static final int TABLE_FRAME_SIZE = 32768;

                // the default 32k frame size
                public static final int TABLE_SIZE = 100;

                private final RecordBuilder recordBuilder = new RecordBuilder();

                private final RuntimeRecordTypeInfo requiredRecordTypeInfo = new RuntimeRecordTypeInfo();

                private final IBinaryHashFunction putHashFunc = ListItemBinaryHashFunctionFactory.INSTANCE.createBinaryHashFunction();

                private final IBinaryHashFunction getHashFunc = ListItemBinaryHashFunctionFactory.INSTANCE.createBinaryHashFunction();

                private final BinaryEntry keyEntry = new BinaryEntry();

                private final BinaryEntry valEntry = new BinaryEntry();

                private final IVisitablePointable tempValReference = allocator.allocateEmpty();

                private final IBinaryComparator cmp = ListItemBinaryComparatorFactory.INSTANCE.createBinaryComparator();

                private BinaryHashMap hashMap = new BinaryHashMap(TABLE_SIZE, TABLE_FRAME_SIZE, putHashFunc, getHashFunc, cmp);

                private ArrayBackedValueStorage resultStorage = new ArrayBackedValueStorage();

                private DataOutput out = resultStorage.getDataOutput();

                @Override
                public void evaluate(IFrameTupleReference tuple, IPointable result) throws HyracksDataException {
                    resultStorage.reset();
                    recordBuilder.reset(outRecType);
                    requiredRecordTypeInfo.reset(outRecType);
                    eval0.evaluate(tuple, argPtr0);
                    eval1.evaluate(tuple, argPtr1);
                    // Make sure we get a valid record
                    byte typeTag0 = argPtr0.getByteArray()[argPtr0.getStartOffset()];
                    if (typeTag0 != ATypeTag.SERIALIZED_RECORD_TYPE_TAG) {
                        throw new TypeMismatchException(getIdentifier(), 0, typeTag0, ATypeTag.SERIALIZED_RECORD_TYPE_TAG);
                    }
                    // Make sure we get a valid list
                    byte typeTag1 = argPtr1.getByteArray()[argPtr1.getStartOffset()];
                    if (typeTag1 != ATypeTag.SERIALIZED_ORDEREDLIST_TYPE_TAG) {
                        throw new TypeMismatchException(getIdentifier(), 1, typeTag1, ATypeTag.SERIALIZED_ORDEREDLIST_TYPE_TAG);
                    }
                    vp0.set(argPtr0);
                    vp1.set(argPtr1);
                    ARecordVisitablePointable recordPointable = (ARecordVisitablePointable) vp0;
                    AListVisitablePointable listPointable = (AListVisitablePointable) vp1;
                    // Initialize our hashmap
                    int tableSize = recordPointable.getFieldNames().size() + listPointable.getItems().size();
                    // Thus avoiding unnecessary object construction
                    if (hashMap == null || tableSize > TABLE_SIZE) {
                        hashMap = new BinaryHashMap(tableSize, TABLE_FRAME_SIZE, putHashFunc, getHashFunc, cmp);
                    } else {
                        hashMap.clear();
                    }
                    addFields(recordPointable, listPointable);
                    recordBuilder.write(out, true);
                    result.set(resultStorage);
                }

                private void addFields(ARecordVisitablePointable inputRecordPointer, AListVisitablePointable listPointable) throws HyracksDataException {
                    List<IVisitablePointable> inputRecordFieldNames = inputRecordPointer.getFieldNames();
                    List<IVisitablePointable> inputRecordFieldValues = inputRecordPointer.getFieldValues();
                    List<IVisitablePointable> inputFields = listPointable.getItems();
                    IVisitablePointable namePointable = null;
                    IVisitablePointable valuePointable = null;
                    int numInputRecordFields = inputRecordFieldNames.size();
                    try {
                        // Add original record without duplicate checking
                        for (int i = 0; i < numInputRecordFields; ++i) {
                            IVisitablePointable fnp = inputRecordFieldNames.get(i);
                            IVisitablePointable fvp = inputRecordFieldValues.get(i);
                            int pos = requiredRecordTypeInfo.getFieldIndex(fnp.getByteArray(), fnp.getStartOffset() + 1, fnp.getLength() - 1);
                            if (pos >= 0) {
                                recordBuilder.addField(pos, fvp);
                            } else {
                                recordBuilder.addField(fnp, fvp);
                            }
                            keyEntry.set(fnp.getByteArray(), fnp.getStartOffset(), fnp.getLength());
                            valEntry.set(fvp.getByteArray(), fvp.getStartOffset(), fvp.getLength());
                            hashMap.put(keyEntry, valEntry);
                        }
                        // Get the fields from a list of records
                        for (int i = 0; i < inputFields.size(); i++) {
                            if (!PointableHelper.sameType(ATypeTag.OBJECT, inputFields.get(i))) {
                                throw new AsterixException("Expected list of record, got " + PointableHelper.getTypeTag(inputFields.get(i)));
                            }
                            List<IVisitablePointable> names = ((ARecordVisitablePointable) inputFields.get(i)).getFieldNames();
                            List<IVisitablePointable> values = ((ARecordVisitablePointable) inputFields.get(i)).getFieldValues();
                            // Get name and value of the field to be added
                            // Use loop to account for the cases where users switches the order of the fields
                            IVisitablePointable fieldName;
                            for (int j = 0; j < names.size(); j++) {
                                fieldName = names.get(j);
                                // if fieldName is "field-name" then read the name
                                if (PointableHelper.byteArrayEqual(fieldNamePointable, fieldName)) {
                                    namePointable = values.get(j);
                                } else {
                                    // otherwise the fieldName is "field-value". Thus, read the value
                                    valuePointable = values.get(j);
                                }
                            }
                            if (namePointable == null || valuePointable == null) {
                                throw new InvalidDataFormatException(getIdentifier(), "fields to be added");
                            }
                            // Check that the field being added is a valid field
                            int pos = requiredRecordTypeInfo.getFieldIndex(namePointable.getByteArray(), namePointable.getStartOffset() + 1, namePointable.getLength() - 1);
                            keyEntry.set(namePointable.getByteArray(), namePointable.getStartOffset(), namePointable.getLength());
                            // Check if already in our built record
                            BinaryEntry entry = hashMap.get(keyEntry);
                            if (entry != null) {
                                tempValReference.set(entry.getBuf(), entry.getOffset(), entry.getLength());
                                // If value is not equal throw conflicting duplicate field, otherwise ignore
                                if (!PointableHelper.byteArrayEqual(valuePointable, tempValReference)) {
                                    throw new RuntimeDataException(ErrorCode.DUPLICATE_FIELD_NAME, getIdentifier());
                                }
                            } else {
                                if (pos > -1) {
                                    recordBuilder.addField(pos, valuePointable);
                                } else {
                                    recordBuilder.addField(namePointable, valuePointable);
                                }
                                valEntry.set(valuePointable.getByteArray(), valuePointable.getStartOffset(), valuePointable.getLength());
                                hashMap.put(keyEntry, valEntry);
                            }
                        }
                    } catch (AsterixException e) {
                        throw new HyracksDataException(e);
                    }
                }
            };
        }
    };
}
Also used : BinaryEntry(org.apache.hyracks.data.std.util.BinaryEntry) DataOutput(java.io.DataOutput) TypeMismatchException(org.apache.asterix.runtime.exceptions.TypeMismatchException) IBinaryComparator(org.apache.hyracks.api.dataflow.value.IBinaryComparator) IPointable(org.apache.hyracks.data.std.api.IPointable) IScalarEvaluator(org.apache.hyracks.algebricks.runtime.base.IScalarEvaluator) InvalidDataFormatException(org.apache.asterix.runtime.exceptions.InvalidDataFormatException) ARecordVisitablePointable(org.apache.asterix.om.pointables.ARecordVisitablePointable) AListVisitablePointable(org.apache.asterix.om.pointables.AListVisitablePointable) AsterixException(org.apache.asterix.common.exceptions.AsterixException) VoidPointable(org.apache.hyracks.data.std.primitive.VoidPointable) PointableAllocator(org.apache.asterix.om.pointables.PointableAllocator) RuntimeDataException(org.apache.asterix.common.exceptions.RuntimeDataException) IBinaryHashFunction(org.apache.hyracks.api.dataflow.value.IBinaryHashFunction) RecordBuilder(org.apache.asterix.builders.RecordBuilder) HyracksDataException(org.apache.hyracks.api.exceptions.HyracksDataException) IScalarEvaluatorFactory(org.apache.hyracks.algebricks.runtime.base.IScalarEvaluatorFactory) PointableHelper(org.apache.asterix.runtime.evaluators.functions.PointableHelper) IVisitablePointable(org.apache.asterix.om.pointables.base.IVisitablePointable) ArrayBackedValueStorage(org.apache.hyracks.data.std.util.ArrayBackedValueStorage) IHyracksTaskContext(org.apache.hyracks.api.context.IHyracksTaskContext) IFrameTupleReference(org.apache.hyracks.dataflow.common.data.accessors.IFrameTupleReference) RuntimeRecordTypeInfo(org.apache.asterix.om.types.runtime.RuntimeRecordTypeInfo) BinaryHashMap(org.apache.asterix.runtime.evaluators.functions.BinaryHashMap)

Example 2 with BinaryEntry

use of org.apache.hyracks.data.std.util.BinaryEntry in project asterixdb by apache.

the class ListDeepEqualityChecker method probeHashMap.

private boolean probeHashMap(List<IVisitablePointable> itemsLeft, List<IVisitablePointable> itemTagTypesLeft, List<IVisitablePointable> itemsRight, List<IVisitablePointable> itemTagTypesRight) throws HyracksDataException {
    // Probe phase: Probe items from second list
    for (int indexRight = 0; indexRight < itemsRight.size(); indexRight++) {
        IVisitablePointable itemRight = itemsRight.get(indexRight);
        byte[] buf = itemRight.getByteArray();
        int off = itemRight.getStartOffset();
        int len = itemRight.getLength();
        keyEntry.set(buf, off, len);
        BinaryEntry entry = hashMap.get(keyEntry);
        // The items doesn't match
        if (entry == null) {
            return false;
        }
        int indexLeft = IntegerPointable.getInteger(entry.getBuf(), entry.getOffset());
        ATypeTag fieldTypeLeft = PointableHelper.getTypeTag(itemTagTypesLeft.get(indexLeft));
        if (fieldTypeLeft.isDerivedType() && fieldTypeLeft != PointableHelper.getTypeTag(itemTagTypesRight.get(indexRight))) {
            return false;
        }
        itemVisitorArg.first = itemRight;
        itemsLeft.get(indexLeft).accept(visitor, itemVisitorArg);
        if (itemVisitorArg.second == false)
            return false;
    }
    return true;
}
Also used : BinaryEntry(org.apache.hyracks.data.std.util.BinaryEntry) IVisitablePointable(org.apache.asterix.om.pointables.base.IVisitablePointable) ATypeTag(org.apache.asterix.om.types.ATypeTag)

Example 3 with BinaryEntry

use of org.apache.hyracks.data.std.util.BinaryEntry in project asterixdb by apache.

the class RecordDeepEqualityChecker method compareValues.

private boolean compareValues(List<IVisitablePointable> fieldTypesLeft, List<IVisitablePointable> fieldValuesLeft, List<IVisitablePointable> fieldNamesRight, List<IVisitablePointable> fieldTypesRight, List<IVisitablePointable> fieldValuesRight) throws HyracksDataException {
    // Probe phase: Probe items from second record
    for (int i = 0; i < fieldNamesRight.size(); i++) {
        IVisitablePointable fieldName = fieldNamesRight.get(i);
        keyEntry.set(fieldName.getByteArray(), fieldName.getStartOffset(), fieldName.getLength());
        BinaryEntry entry = hashMap.get(keyEntry);
        if (entry == null) {
            return false;
        }
        int fieldIdLeft = AInt32SerializerDeserializer.getInt(entry.getBuf(), entry.getOffset());
        ATypeTag fieldTypeLeft = PointableHelper.getTypeTag(fieldTypesLeft.get(fieldIdLeft));
        if (fieldTypeLeft.isDerivedType() && fieldTypeLeft != PointableHelper.getTypeTag(fieldTypesRight.get(i))) {
            return false;
        }
        nestedVisitorArg.first = fieldValuesRight.get(i);
        fieldValuesLeft.get(fieldIdLeft).accept(visitor, nestedVisitorArg);
        if (nestedVisitorArg.second == false) {
            return false;
        }
    }
    return true;
}
Also used : BinaryEntry(org.apache.hyracks.data.std.util.BinaryEntry) IVisitablePointable(org.apache.asterix.om.pointables.base.IVisitablePointable) ATypeTag(org.apache.asterix.om.types.ATypeTag)

Example 4 with BinaryEntry

use of org.apache.hyracks.data.std.util.BinaryEntry in project asterixdb by apache.

the class FullTextContainsEvaluator method initializeFullTextContains.

private void initializeFullTextContains() {
    // We use a hash set to store tokens from the right side (query predicate).
    // Initialize necessary variables.
    hashFunc = new PointableBinaryHashFunctionFactory(UTF8StringLowercaseTokenPointable.FACTORY).createBinaryHashFunction();
    keyEntry = new BinaryEntry();
    // Parameter: number of bucket, frame size, hashFunction, Comparator, byte array
    // that contains the key (this array will be set later.)
    rightHashSet = new BinaryHashSet(HASH_SET_SLOT_SIZE, HASH_SET_FRAME_SIZE, hashFunc, strLowerCaseTokenCmp, null);
    tokenizerForLeftArray = BinaryTokenizerFactoryProvider.INSTANCE.getWordTokenizerFactory(ATypeTag.STRING, false, true).createTokenizer();
}
Also used : BinaryEntry(org.apache.hyracks.data.std.util.BinaryEntry) BinaryHashSet(org.apache.hyracks.data.std.util.BinaryHashSet) PointableBinaryHashFunctionFactory(org.apache.hyracks.data.std.accessors.PointableBinaryHashFunctionFactory)

Example 5 with BinaryEntry

use of org.apache.hyracks.data.std.util.BinaryEntry in project asterixdb by apache.

the class SimilarityJaccardCheckEvaluator method probeHashMap.

@Override
protected int probeHashMap(AbstractAsterixListIterator probeIter, int buildListSize, int probeListSize) throws HyracksDataException {
    // Apply length filter.
    int lengthLowerBound = (int) Math.ceil(jaccThresh * probeListSize);
    if ((lengthLowerBound > buildListSize) || (buildListSize > (int) Math.floor(1.0f / jaccThresh * probeListSize))) {
        return -1;
    }
    // Probe phase: Probe items from second list, and compute intersection size.
    int intersectionSize = 0;
    int probeListCount = 0;
    int minUnionSize = buildListSize;
    while (probeIter.hasNext()) {
        probeListCount++;
        byte[] buf = probeIter.getData();
        int off = probeIter.getPos();
        int len = probeIter.getItemLen();
        keyEntry.set(buf, off, len);
        BinaryEntry entry = hashMap.get(keyEntry);
        if (entry != null) {
            // Increment second value.
            int firstValInt = IntegerPointable.getInteger(entry.getBuf(), entry.getOffset());
            // Irrelevant for the intersection size.
            if (firstValInt == 0) {
                continue;
            }
            int secondValInt = IntegerPointable.getInteger(entry.getBuf(), entry.getOffset() + 4);
            // Subtract old min value.
            intersectionSize -= (firstValInt < secondValInt) ? firstValInt : secondValInt;
            secondValInt++;
            // Add new min value.
            intersectionSize += (firstValInt < secondValInt) ? firstValInt : secondValInt;
            IntegerPointable.setInteger(entry.getBuf(), entry.getOffset() + 4, secondValInt);
        } else {
            // Could not find element in other set. Increase min union size by 1.
            minUnionSize++;
            // Check whether jaccThresh can still be satisfied if there was a mismatch.
            int maxIntersectionSize = Math.min(buildListSize, intersectionSize + (probeListSize - probeListCount));
            int lowerBound = (int) Math.floor(jaccThresh * minUnionSize);
            if (maxIntersectionSize < lowerBound) {
                // Cannot satisfy jaccThresh.
                return -1;
            }
        }
        probeIter.next();
    }
    return intersectionSize;
}
Also used : BinaryEntry(org.apache.hyracks.data.std.util.BinaryEntry)

Aggregations

BinaryEntry (org.apache.hyracks.data.std.util.BinaryEntry)7 IVisitablePointable (org.apache.asterix.om.pointables.base.IVisitablePointable)3 ATypeTag (org.apache.asterix.om.types.ATypeTag)2 DataOutput (java.io.DataOutput)1 RecordBuilder (org.apache.asterix.builders.RecordBuilder)1 AsterixException (org.apache.asterix.common.exceptions.AsterixException)1 RuntimeDataException (org.apache.asterix.common.exceptions.RuntimeDataException)1 AListVisitablePointable (org.apache.asterix.om.pointables.AListVisitablePointable)1 ARecordVisitablePointable (org.apache.asterix.om.pointables.ARecordVisitablePointable)1 PointableAllocator (org.apache.asterix.om.pointables.PointableAllocator)1 RuntimeRecordTypeInfo (org.apache.asterix.om.types.runtime.RuntimeRecordTypeInfo)1 BinaryHashMap (org.apache.asterix.runtime.evaluators.functions.BinaryHashMap)1 PointableHelper (org.apache.asterix.runtime.evaluators.functions.PointableHelper)1 InvalidDataFormatException (org.apache.asterix.runtime.exceptions.InvalidDataFormatException)1 TypeMismatchException (org.apache.asterix.runtime.exceptions.TypeMismatchException)1 IScalarEvaluator (org.apache.hyracks.algebricks.runtime.base.IScalarEvaluator)1 IScalarEvaluatorFactory (org.apache.hyracks.algebricks.runtime.base.IScalarEvaluatorFactory)1 IHyracksTaskContext (org.apache.hyracks.api.context.IHyracksTaskContext)1 IBinaryComparator (org.apache.hyracks.api.dataflow.value.IBinaryComparator)1 IBinaryHashFunction (org.apache.hyracks.api.dataflow.value.IBinaryHashFunction)1