use of org.apache.hyracks.api.dataflow.value.IBinaryComparator in project asterixdb by apache.
the class ABooleanConstructorDescriptor method createEvaluatorFactory.
@Override
public IScalarEvaluatorFactory createEvaluatorFactory(final IScalarEvaluatorFactory[] args) {
return new IScalarEvaluatorFactory() {
private static final long serialVersionUID = 1L;
@Override
public IScalarEvaluator createScalarEvaluator(IHyracksTaskContext ctx) throws HyracksDataException {
return new IScalarEvaluator() {
private ArrayBackedValueStorage resultStorage = new ArrayBackedValueStorage();
private DataOutput out = resultStorage.getDataOutput();
private IPointable inputArg = new VoidPointable();
private IScalarEvaluator eval = args[0].createScalarEvaluator(ctx);
private final byte[] TRUE = UTF8StringUtil.writeStringToBytes("true");
private final byte[] FALSE = UTF8StringUtil.writeStringToBytes("false");
IBinaryComparator utf8BinaryComparator = BinaryComparatorFactoryProvider.UTF8STRING_POINTABLE_INSTANCE.createBinaryComparator();
@SuppressWarnings("unchecked")
private ISerializerDeserializer<ABoolean> booleanSerde = SerializerDeserializerProvider.INSTANCE.getSerializerDeserializer(BuiltinType.ABOOLEAN);
@Override
public void evaluate(IFrameTupleReference tuple, IPointable result) throws HyracksDataException {
try {
resultStorage.reset();
eval.evaluate(tuple, inputArg);
byte[] serString = inputArg.getByteArray();
int startOffset = inputArg.getStartOffset();
int len = inputArg.getLength();
if (serString[startOffset] == ATypeTag.SERIALIZED_STRING_TYPE_TAG) {
if (utf8BinaryComparator.compare(serString, startOffset + 1, len - 1, TRUE, 0, TRUE.length) == 0) {
booleanSerde.serialize(ABoolean.TRUE, out);
result.set(resultStorage);
return;
} else if (utf8BinaryComparator.compare(serString, startOffset + 1, len - 1, FALSE, 0, FALSE.length) == 0) {
booleanSerde.serialize(ABoolean.FALSE, out);
result.set(resultStorage);
return;
} else {
throw new InvalidDataFormatException(getIdentifier(), ATypeTag.SERIALIZED_BOOLEAN_TYPE_TAG);
}
} else {
throw new TypeMismatchException(getIdentifier(), 0, serString[startOffset], ATypeTag.SERIALIZED_STRING_TYPE_TAG);
}
} catch (IOException e) {
throw new InvalidDataFormatException(getIdentifier(), e, ATypeTag.SERIALIZED_BOOLEAN_TYPE_TAG);
}
}
};
}
};
}
use of org.apache.hyracks.api.dataflow.value.IBinaryComparator in project asterixdb by apache.
the class RangeMapBuilder method verifyRangeOrder.
public static void verifyRangeOrder(IRangeMap rangeMap, boolean ascending) throws CompilationException {
// TODO Add support for composite fields.
int fieldIndex = 0;
int fieldType = rangeMap.getTag(0, 0);
BinaryComparatorFactoryProvider comparatorFactory = BinaryComparatorFactoryProvider.INSTANCE;
IBinaryComparatorFactory bcf = comparatorFactory.getBinaryComparatorFactory(ATypeTag.VALUE_TYPE_MAPPING[fieldType], ascending);
IBinaryComparator comparator = bcf.createBinaryComparator();
int c = 0;
for (int split = 1; split < rangeMap.getSplitCount(); ++split) {
if (fieldType != rangeMap.getTag(fieldIndex, split)) {
throw new CompilationException("Range field contains more than a single type of items (" + fieldType + " and " + rangeMap.getTag(fieldIndex, split) + ").");
}
int previousSplit = split - 1;
try {
c = comparator.compare(rangeMap.getByteArray(fieldIndex, previousSplit), rangeMap.getStartOffset(fieldIndex, previousSplit), rangeMap.getLength(fieldIndex, previousSplit), rangeMap.getByteArray(fieldIndex, split), rangeMap.getStartOffset(fieldIndex, split), rangeMap.getLength(fieldIndex, split));
} catch (HyracksDataException e) {
throw new CompilationException(e);
}
if (c >= 0) {
throw new CompilationException("Range fields are not in sorted order.");
}
}
}
use of org.apache.hyracks.api.dataflow.value.IBinaryComparator in project asterixdb by apache.
the class RunMergingFrameReader method createEntryComparator.
private Comparator<ReferenceEntry> createEntryComparator(final IBinaryComparator[] comparators) {
return new Comparator<ReferenceEntry>() {
public int compare(ReferenceEntry tp1, ReferenceEntry tp2) {
int nmk1 = tp1.getNormalizedKey();
int nmk2 = tp2.getNormalizedKey();
if (nmk1 != nmk2) {
return ((((long) nmk1) & 0xffffffffL) < (((long) nmk2) & 0xffffffffL)) ? -1 : 1;
}
IFrameTupleAccessor fta1 = tp1.getAccessor();
IFrameTupleAccessor fta2 = tp2.getAccessor();
byte[] b1 = fta1.getBuffer().array();
byte[] b2 = fta2.getBuffer().array();
int[] tPointers1 = tp1.getTPointers();
int[] tPointers2 = tp2.getTPointers();
for (int f = 0; f < sortFields.length; ++f) {
int c;
try {
c = comparators[f].compare(b1, tPointers1[2 * f + 1], tPointers1[2 * f + 2], b2, tPointers2[2 * f + 1], tPointers2[2 * f + 2]);
if (c != 0) {
return c;
}
} catch (HyracksDataException e) {
throw new IllegalArgumentException(e);
}
}
int runid1 = tp1.getRunid();
int runid2 = tp2.getRunid();
return runid1 < runid2 ? -1 : (runid1 == runid2 ? 0 : 1);
}
};
}
use of org.apache.hyracks.api.dataflow.value.IBinaryComparator in project asterixdb by apache.
the class FieldPrefixCompressor method getKeyPartitions.
// we perform an analysis pass over the tuples to determine the costs and
// benefits of different compression options
// a "keypartition" is a range of tuples that has an identical first field
// for each keypartition we chose a prefix length to use for compression
// i.e., all tuples in a keypartition will be compressed based on the same
// prefix length (number of fields)
// the prefix length may be different for different keypartitions
// the occurrenceThreshold determines the minimum number of tuples that must
// share a common prefix in order for us to consider compressing them
private ArrayList<KeyPartition> getKeyPartitions(BTreeFieldPrefixNSMLeafFrame frame, MultiComparator cmp, int occurrenceThreshold) throws HyracksDataException {
IBinaryComparator[] cmps = cmp.getComparators();
int fieldCount = typeTraits.length;
int maxCmps = cmps.length - 1;
ByteBuffer buf = frame.getBuffer();
byte[] pageArray = buf.array();
IPrefixSlotManager slotManager = (IPrefixSlotManager) frame.getSlotManager();
ArrayList<KeyPartition> keyPartitions = new ArrayList<KeyPartition>();
KeyPartition kp = new KeyPartition(maxCmps);
keyPartitions.add(kp);
TypeAwareTupleWriter tupleWriter = new TypeAwareTupleWriter(typeTraits);
FieldPrefixTupleReference prevTuple = new FieldPrefixTupleReference(tupleWriter.createTupleReference());
prevTuple.setFieldCount(fieldCount);
FieldPrefixTupleReference tuple = new FieldPrefixTupleReference(tupleWriter.createTupleReference());
tuple.setFieldCount(fieldCount);
kp.firstTupleIndex = 0;
int tupleCount = frame.getTupleCount();
for (int i = 1; i < tupleCount; i++) {
prevTuple.resetByTupleIndex(frame, i - 1);
tuple.resetByTupleIndex(frame, i);
int prefixFieldsMatch = 0;
for (int j = 0; j < maxCmps; j++) {
if (cmps[j].compare(pageArray, prevTuple.getFieldStart(j), prevTuple.getFieldLength(j), pageArray, tuple.getFieldStart(j), prevTuple.getFieldLength(j)) == 0) {
prefixFieldsMatch++;
kp.pmi[j].matches++;
int prefixBytes = tupleWriter.bytesRequired(tuple, 0, prefixFieldsMatch);
int spaceBenefit = tupleWriter.bytesRequired(tuple) - tupleWriter.bytesRequired(tuple, prefixFieldsMatch, tuple.getFieldCount() - prefixFieldsMatch);
if (kp.pmi[j].matches == occurrenceThreshold) {
// if we compress this prefix, we pay the cost of storing it once, plus
// the size for one prefix slot
kp.pmi[j].prefixBytes += prefixBytes;
kp.pmi[j].spaceCost += prefixBytes + slotManager.getSlotSize();
kp.pmi[j].prefixSlotsNeeded++;
kp.pmi[j].spaceBenefit += occurrenceThreshold * spaceBenefit;
} else if (kp.pmi[j].matches > occurrenceThreshold) {
// we are beyond the occurrence threshold, every additional tuple with a
// matching prefix increases the benefit
kp.pmi[j].spaceBenefit += spaceBenefit;
}
} else {
kp.pmi[j].matches = 1;
break;
}
}
// this means not even the first field matched, so we start to consider a new "key partition"
if (maxCmps > 0 && prefixFieldsMatch == 0) {
kp.lastTupleIndex = i - 1;
// remove keyPartitions that don't have enough tuples
if ((kp.lastTupleIndex - kp.firstTupleIndex) + 1 < occurrenceThreshold)
keyPartitions.remove(keyPartitions.size() - 1);
kp = new KeyPartition(maxCmps);
keyPartitions.add(kp);
kp.firstTupleIndex = i;
}
}
kp.lastTupleIndex = tupleCount - 1;
// remove keyPartitions that don't have enough tuples
if ((kp.lastTupleIndex - kp.firstTupleIndex) + 1 < occurrenceThreshold)
keyPartitions.remove(keyPartitions.size() - 1);
return keyPartitions;
}
use of org.apache.hyracks.api.dataflow.value.IBinaryComparator in project asterixdb by apache.
the class ARecordSerializerDeserializer method getFieldOffsetByName.
public static int getFieldOffsetByName(byte[] serRecord, int start, int len, byte[] fieldName, int nstart) throws HyracksDataException {
// a record with len < 5 is empty
if (serRecord[start] != ATypeTag.SERIALIZED_RECORD_TYPE_TAG || len <= 5 || serRecord[start + 5] != 1) {
return -1;
}
// 6 is the index of the first byte of the openPartOffset value.
int openPartOffset = start + AInt32SerializerDeserializer.getInt(serRecord, start + 6);
int numberOfOpenField = AInt32SerializerDeserializer.getInt(serRecord, openPartOffset);
int fieldUtflength = UTF8StringUtil.getUTFLength(fieldName, nstart + 1);
int fieldUtfMetaLen = UTF8StringUtil.getNumBytesToStoreLength(fieldUtflength);
IBinaryHashFunction utf8HashFunction = BinaryHashFunctionFactoryProvider.UTF8STRING_POINTABLE_INSTANCE.createBinaryHashFunction();
IBinaryComparator utf8BinaryComparator = BinaryComparatorFactoryProvider.UTF8STRING_POINTABLE_INSTANCE.createBinaryComparator();
int fieldNameHashCode = utf8HashFunction.hash(fieldName, nstart + 1, fieldUtflength + fieldUtfMetaLen);
int offset = openPartOffset + 4;
int fieldOffset = -1;
int mid = 0;
int high = numberOfOpenField - 1;
int low = 0;
while (low <= high) {
mid = (high + low) / 2;
// 8 = hash code (4) + offset to the (name + tag + value ) of the field (4).
int h = AInt32SerializerDeserializer.getInt(serRecord, offset + (8 * mid));
if (h == fieldNameHashCode) {
fieldOffset = start + AInt32SerializerDeserializer.getInt(serRecord, offset + (8 * mid) + 4);
// the utf8 comparator do not require to put the precise length, we can just pass a estimated limit.
if (utf8BinaryComparator.compare(serRecord, fieldOffset, len, fieldName, nstart + 1, fieldUtflength + fieldUtfMetaLen) == 0) {
// since they are equal, we can directly use the meta length and the utf length.
return fieldOffset + fieldUtfMetaLen + fieldUtflength;
} else {
// this else part has not been tested yet
for (int j = mid + 1; j < numberOfOpenField; j++) {
h = AInt32SerializerDeserializer.getInt(serRecord, offset + (8 * j));
if (h == fieldNameHashCode) {
fieldOffset = start + AInt32SerializerDeserializer.getInt(serRecord, offset + (8 * j) + 4);
if (utf8BinaryComparator.compare(serRecord, fieldOffset, len, fieldName, nstart + 1, fieldUtflength) == 0) {
return fieldOffset + fieldUtfMetaLen + fieldUtflength;
}
} else {
break;
}
}
}
}
if (fieldNameHashCode > h) {
low = mid + 1;
} else {
high = mid - 1;
}
}
// no field with this name.
return -1;
}
Aggregations