Search in sources :

Example 6 with IBinaryComparator

use of org.apache.hyracks.api.dataflow.value.IBinaryComparator in project asterixdb by apache.

the class ABooleanConstructorDescriptor method createEvaluatorFactory.

@Override
public IScalarEvaluatorFactory createEvaluatorFactory(final IScalarEvaluatorFactory[] args) {
    return new IScalarEvaluatorFactory() {

        private static final long serialVersionUID = 1L;

        @Override
        public IScalarEvaluator createScalarEvaluator(IHyracksTaskContext ctx) throws HyracksDataException {
            return new IScalarEvaluator() {

                private ArrayBackedValueStorage resultStorage = new ArrayBackedValueStorage();

                private DataOutput out = resultStorage.getDataOutput();

                private IPointable inputArg = new VoidPointable();

                private IScalarEvaluator eval = args[0].createScalarEvaluator(ctx);

                private final byte[] TRUE = UTF8StringUtil.writeStringToBytes("true");

                private final byte[] FALSE = UTF8StringUtil.writeStringToBytes("false");

                IBinaryComparator utf8BinaryComparator = BinaryComparatorFactoryProvider.UTF8STRING_POINTABLE_INSTANCE.createBinaryComparator();

                @SuppressWarnings("unchecked")
                private ISerializerDeserializer<ABoolean> booleanSerde = SerializerDeserializerProvider.INSTANCE.getSerializerDeserializer(BuiltinType.ABOOLEAN);

                @Override
                public void evaluate(IFrameTupleReference tuple, IPointable result) throws HyracksDataException {
                    try {
                        resultStorage.reset();
                        eval.evaluate(tuple, inputArg);
                        byte[] serString = inputArg.getByteArray();
                        int startOffset = inputArg.getStartOffset();
                        int len = inputArg.getLength();
                        if (serString[startOffset] == ATypeTag.SERIALIZED_STRING_TYPE_TAG) {
                            if (utf8BinaryComparator.compare(serString, startOffset + 1, len - 1, TRUE, 0, TRUE.length) == 0) {
                                booleanSerde.serialize(ABoolean.TRUE, out);
                                result.set(resultStorage);
                                return;
                            } else if (utf8BinaryComparator.compare(serString, startOffset + 1, len - 1, FALSE, 0, FALSE.length) == 0) {
                                booleanSerde.serialize(ABoolean.FALSE, out);
                                result.set(resultStorage);
                                return;
                            } else {
                                throw new InvalidDataFormatException(getIdentifier(), ATypeTag.SERIALIZED_BOOLEAN_TYPE_TAG);
                            }
                        } else {
                            throw new TypeMismatchException(getIdentifier(), 0, serString[startOffset], ATypeTag.SERIALIZED_STRING_TYPE_TAG);
                        }
                    } catch (IOException e) {
                        throw new InvalidDataFormatException(getIdentifier(), e, ATypeTag.SERIALIZED_BOOLEAN_TYPE_TAG);
                    }
                }
            };
        }
    };
}
Also used : DataOutput(java.io.DataOutput) TypeMismatchException(org.apache.asterix.runtime.exceptions.TypeMismatchException) IBinaryComparator(org.apache.hyracks.api.dataflow.value.IBinaryComparator) IPointable(org.apache.hyracks.data.std.api.IPointable) IOException(java.io.IOException) IScalarEvaluator(org.apache.hyracks.algebricks.runtime.base.IScalarEvaluator) ISerializerDeserializer(org.apache.hyracks.api.dataflow.value.ISerializerDeserializer) IScalarEvaluatorFactory(org.apache.hyracks.algebricks.runtime.base.IScalarEvaluatorFactory) InvalidDataFormatException(org.apache.asterix.runtime.exceptions.InvalidDataFormatException) ArrayBackedValueStorage(org.apache.hyracks.data.std.util.ArrayBackedValueStorage) IHyracksTaskContext(org.apache.hyracks.api.context.IHyracksTaskContext) VoidPointable(org.apache.hyracks.data.std.primitive.VoidPointable) IFrameTupleReference(org.apache.hyracks.dataflow.common.data.accessors.IFrameTupleReference)

Example 7 with IBinaryComparator

use of org.apache.hyracks.api.dataflow.value.IBinaryComparator in project asterixdb by apache.

the class RangeMapBuilder method verifyRangeOrder.

public static void verifyRangeOrder(IRangeMap rangeMap, boolean ascending) throws CompilationException {
    // TODO Add support for composite fields.
    int fieldIndex = 0;
    int fieldType = rangeMap.getTag(0, 0);
    BinaryComparatorFactoryProvider comparatorFactory = BinaryComparatorFactoryProvider.INSTANCE;
    IBinaryComparatorFactory bcf = comparatorFactory.getBinaryComparatorFactory(ATypeTag.VALUE_TYPE_MAPPING[fieldType], ascending);
    IBinaryComparator comparator = bcf.createBinaryComparator();
    int c = 0;
    for (int split = 1; split < rangeMap.getSplitCount(); ++split) {
        if (fieldType != rangeMap.getTag(fieldIndex, split)) {
            throw new CompilationException("Range field contains more than a single type of items (" + fieldType + " and " + rangeMap.getTag(fieldIndex, split) + ").");
        }
        int previousSplit = split - 1;
        try {
            c = comparator.compare(rangeMap.getByteArray(fieldIndex, previousSplit), rangeMap.getStartOffset(fieldIndex, previousSplit), rangeMap.getLength(fieldIndex, previousSplit), rangeMap.getByteArray(fieldIndex, split), rangeMap.getStartOffset(fieldIndex, split), rangeMap.getLength(fieldIndex, split));
        } catch (HyracksDataException e) {
            throw new CompilationException(e);
        }
        if (c >= 0) {
            throw new CompilationException("Range fields are not in sorted order.");
        }
    }
}
Also used : BinaryComparatorFactoryProvider(org.apache.asterix.formats.nontagged.BinaryComparatorFactoryProvider) CompilationException(org.apache.asterix.common.exceptions.CompilationException) IBinaryComparatorFactory(org.apache.hyracks.api.dataflow.value.IBinaryComparatorFactory) IBinaryComparator(org.apache.hyracks.api.dataflow.value.IBinaryComparator) HyracksDataException(org.apache.hyracks.api.exceptions.HyracksDataException)

Example 8 with IBinaryComparator

use of org.apache.hyracks.api.dataflow.value.IBinaryComparator in project asterixdb by apache.

the class RunMergingFrameReader method createEntryComparator.

private Comparator<ReferenceEntry> createEntryComparator(final IBinaryComparator[] comparators) {
    return new Comparator<ReferenceEntry>() {

        public int compare(ReferenceEntry tp1, ReferenceEntry tp2) {
            int nmk1 = tp1.getNormalizedKey();
            int nmk2 = tp2.getNormalizedKey();
            if (nmk1 != nmk2) {
                return ((((long) nmk1) & 0xffffffffL) < (((long) nmk2) & 0xffffffffL)) ? -1 : 1;
            }
            IFrameTupleAccessor fta1 = tp1.getAccessor();
            IFrameTupleAccessor fta2 = tp2.getAccessor();
            byte[] b1 = fta1.getBuffer().array();
            byte[] b2 = fta2.getBuffer().array();
            int[] tPointers1 = tp1.getTPointers();
            int[] tPointers2 = tp2.getTPointers();
            for (int f = 0; f < sortFields.length; ++f) {
                int c;
                try {
                    c = comparators[f].compare(b1, tPointers1[2 * f + 1], tPointers1[2 * f + 2], b2, tPointers2[2 * f + 1], tPointers2[2 * f + 2]);
                    if (c != 0) {
                        return c;
                    }
                } catch (HyracksDataException e) {
                    throw new IllegalArgumentException(e);
                }
            }
            int runid1 = tp1.getRunid();
            int runid2 = tp2.getRunid();
            return runid1 < runid2 ? -1 : (runid1 == runid2 ? 0 : 1);
        }
    };
}
Also used : ReferenceEntry(org.apache.hyracks.dataflow.std.util.ReferenceEntry) IFrameTupleAccessor(org.apache.hyracks.api.comm.IFrameTupleAccessor) HyracksDataException(org.apache.hyracks.api.exceptions.HyracksDataException) Comparator(java.util.Comparator) IBinaryComparator(org.apache.hyracks.api.dataflow.value.IBinaryComparator)

Example 9 with IBinaryComparator

use of org.apache.hyracks.api.dataflow.value.IBinaryComparator in project asterixdb by apache.

the class FieldPrefixCompressor method getKeyPartitions.

// we perform an analysis pass over the tuples to determine the costs and
// benefits of different compression options
// a "keypartition" is a range of tuples that has an identical first field
// for each keypartition we chose a prefix length to use for compression
// i.e., all tuples in a keypartition will be compressed based on the same
// prefix length (number of fields)
// the prefix length may be different for different keypartitions
// the occurrenceThreshold determines the minimum number of tuples that must
// share a common prefix in order for us to consider compressing them
private ArrayList<KeyPartition> getKeyPartitions(BTreeFieldPrefixNSMLeafFrame frame, MultiComparator cmp, int occurrenceThreshold) throws HyracksDataException {
    IBinaryComparator[] cmps = cmp.getComparators();
    int fieldCount = typeTraits.length;
    int maxCmps = cmps.length - 1;
    ByteBuffer buf = frame.getBuffer();
    byte[] pageArray = buf.array();
    IPrefixSlotManager slotManager = (IPrefixSlotManager) frame.getSlotManager();
    ArrayList<KeyPartition> keyPartitions = new ArrayList<KeyPartition>();
    KeyPartition kp = new KeyPartition(maxCmps);
    keyPartitions.add(kp);
    TypeAwareTupleWriter tupleWriter = new TypeAwareTupleWriter(typeTraits);
    FieldPrefixTupleReference prevTuple = new FieldPrefixTupleReference(tupleWriter.createTupleReference());
    prevTuple.setFieldCount(fieldCount);
    FieldPrefixTupleReference tuple = new FieldPrefixTupleReference(tupleWriter.createTupleReference());
    tuple.setFieldCount(fieldCount);
    kp.firstTupleIndex = 0;
    int tupleCount = frame.getTupleCount();
    for (int i = 1; i < tupleCount; i++) {
        prevTuple.resetByTupleIndex(frame, i - 1);
        tuple.resetByTupleIndex(frame, i);
        int prefixFieldsMatch = 0;
        for (int j = 0; j < maxCmps; j++) {
            if (cmps[j].compare(pageArray, prevTuple.getFieldStart(j), prevTuple.getFieldLength(j), pageArray, tuple.getFieldStart(j), prevTuple.getFieldLength(j)) == 0) {
                prefixFieldsMatch++;
                kp.pmi[j].matches++;
                int prefixBytes = tupleWriter.bytesRequired(tuple, 0, prefixFieldsMatch);
                int spaceBenefit = tupleWriter.bytesRequired(tuple) - tupleWriter.bytesRequired(tuple, prefixFieldsMatch, tuple.getFieldCount() - prefixFieldsMatch);
                if (kp.pmi[j].matches == occurrenceThreshold) {
                    // if we compress this prefix, we pay the cost of storing it once, plus
                    // the size for one prefix slot
                    kp.pmi[j].prefixBytes += prefixBytes;
                    kp.pmi[j].spaceCost += prefixBytes + slotManager.getSlotSize();
                    kp.pmi[j].prefixSlotsNeeded++;
                    kp.pmi[j].spaceBenefit += occurrenceThreshold * spaceBenefit;
                } else if (kp.pmi[j].matches > occurrenceThreshold) {
                    // we are beyond the occurrence threshold, every additional tuple with a
                    // matching prefix increases the benefit
                    kp.pmi[j].spaceBenefit += spaceBenefit;
                }
            } else {
                kp.pmi[j].matches = 1;
                break;
            }
        }
        // this means not even the first field matched, so we start to consider a new "key partition"
        if (maxCmps > 0 && prefixFieldsMatch == 0) {
            kp.lastTupleIndex = i - 1;
            // remove keyPartitions that don't have enough tuples
            if ((kp.lastTupleIndex - kp.firstTupleIndex) + 1 < occurrenceThreshold)
                keyPartitions.remove(keyPartitions.size() - 1);
            kp = new KeyPartition(maxCmps);
            keyPartitions.add(kp);
            kp.firstTupleIndex = i;
        }
    }
    kp.lastTupleIndex = tupleCount - 1;
    // remove keyPartitions that don't have enough tuples
    if ((kp.lastTupleIndex - kp.firstTupleIndex) + 1 < occurrenceThreshold)
        keyPartitions.remove(keyPartitions.size() - 1);
    return keyPartitions;
}
Also used : ArrayList(java.util.ArrayList) IBinaryComparator(org.apache.hyracks.api.dataflow.value.IBinaryComparator) IPrefixSlotManager(org.apache.hyracks.storage.am.btree.api.IPrefixSlotManager) FieldPrefixTupleReference(org.apache.hyracks.storage.am.btree.impls.FieldPrefixTupleReference) ByteBuffer(java.nio.ByteBuffer) TypeAwareTupleWriter(org.apache.hyracks.storage.am.common.tuples.TypeAwareTupleWriter)

Example 10 with IBinaryComparator

use of org.apache.hyracks.api.dataflow.value.IBinaryComparator in project asterixdb by apache.

the class ARecordSerializerDeserializer method getFieldOffsetByName.

public static int getFieldOffsetByName(byte[] serRecord, int start, int len, byte[] fieldName, int nstart) throws HyracksDataException {
    // a record with len < 5 is empty
    if (serRecord[start] != ATypeTag.SERIALIZED_RECORD_TYPE_TAG || len <= 5 || serRecord[start + 5] != 1) {
        return -1;
    }
    // 6 is the index of the first byte of the openPartOffset value.
    int openPartOffset = start + AInt32SerializerDeserializer.getInt(serRecord, start + 6);
    int numberOfOpenField = AInt32SerializerDeserializer.getInt(serRecord, openPartOffset);
    int fieldUtflength = UTF8StringUtil.getUTFLength(fieldName, nstart + 1);
    int fieldUtfMetaLen = UTF8StringUtil.getNumBytesToStoreLength(fieldUtflength);
    IBinaryHashFunction utf8HashFunction = BinaryHashFunctionFactoryProvider.UTF8STRING_POINTABLE_INSTANCE.createBinaryHashFunction();
    IBinaryComparator utf8BinaryComparator = BinaryComparatorFactoryProvider.UTF8STRING_POINTABLE_INSTANCE.createBinaryComparator();
    int fieldNameHashCode = utf8HashFunction.hash(fieldName, nstart + 1, fieldUtflength + fieldUtfMetaLen);
    int offset = openPartOffset + 4;
    int fieldOffset = -1;
    int mid = 0;
    int high = numberOfOpenField - 1;
    int low = 0;
    while (low <= high) {
        mid = (high + low) / 2;
        // 8 = hash code (4) + offset to the (name + tag + value ) of the field (4).
        int h = AInt32SerializerDeserializer.getInt(serRecord, offset + (8 * mid));
        if (h == fieldNameHashCode) {
            fieldOffset = start + AInt32SerializerDeserializer.getInt(serRecord, offset + (8 * mid) + 4);
            // the utf8 comparator do not require to put the precise length, we can just pass a estimated limit.
            if (utf8BinaryComparator.compare(serRecord, fieldOffset, len, fieldName, nstart + 1, fieldUtflength + fieldUtfMetaLen) == 0) {
                // since they are equal, we can directly use the meta length and the utf length.
                return fieldOffset + fieldUtfMetaLen + fieldUtflength;
            } else {
                // this else part has not been tested yet
                for (int j = mid + 1; j < numberOfOpenField; j++) {
                    h = AInt32SerializerDeserializer.getInt(serRecord, offset + (8 * j));
                    if (h == fieldNameHashCode) {
                        fieldOffset = start + AInt32SerializerDeserializer.getInt(serRecord, offset + (8 * j) + 4);
                        if (utf8BinaryComparator.compare(serRecord, fieldOffset, len, fieldName, nstart + 1, fieldUtflength) == 0) {
                            return fieldOffset + fieldUtfMetaLen + fieldUtflength;
                        }
                    } else {
                        break;
                    }
                }
            }
        }
        if (fieldNameHashCode > h) {
            low = mid + 1;
        } else {
            high = mid - 1;
        }
    }
    // no field with this name.
    return -1;
}
Also used : IBinaryHashFunction(org.apache.hyracks.api.dataflow.value.IBinaryHashFunction) IBinaryComparator(org.apache.hyracks.api.dataflow.value.IBinaryComparator)

Aggregations

IBinaryComparator (org.apache.hyracks.api.dataflow.value.IBinaryComparator)21 IHyracksTaskContext (org.apache.hyracks.api.context.IHyracksTaskContext)7 ArrayBackedValueStorage (org.apache.hyracks.data.std.util.ArrayBackedValueStorage)7 DataOutput (java.io.DataOutput)6 InvalidDataFormatException (org.apache.asterix.runtime.exceptions.InvalidDataFormatException)6 TypeMismatchException (org.apache.asterix.runtime.exceptions.TypeMismatchException)6 IScalarEvaluator (org.apache.hyracks.algebricks.runtime.base.IScalarEvaluator)6 IScalarEvaluatorFactory (org.apache.hyracks.algebricks.runtime.base.IScalarEvaluatorFactory)6 HyracksDataException (org.apache.hyracks.api.exceptions.HyracksDataException)6 IPointable (org.apache.hyracks.data.std.api.IPointable)6 VoidPointable (org.apache.hyracks.data.std.primitive.VoidPointable)6 IFrameTupleReference (org.apache.hyracks.dataflow.common.data.accessors.IFrameTupleReference)6 IOException (java.io.IOException)5 ISerializerDeserializer (org.apache.hyracks.api.dataflow.value.ISerializerDeserializer)5 ATypeTag (org.apache.asterix.om.types.ATypeTag)4 ByteBuffer (java.nio.ByteBuffer)3 IBinaryHashFunction (org.apache.hyracks.api.dataflow.value.IBinaryHashFunction)3 MultiComparator (org.apache.hyracks.storage.common.MultiComparator)3 BinaryHashMap (org.apache.asterix.runtime.evaluators.functions.BinaryHashMap)2 IBinaryComparatorFactory (org.apache.hyracks.api.dataflow.value.IBinaryComparatorFactory)2