use of org.apache.hyracks.data.std.util.BinaryEntry in project asterixdb by apache.
the class RecordAddFieldsDescriptor method createEvaluatorFactory.
@Override
public IScalarEvaluatorFactory createEvaluatorFactory(final IScalarEvaluatorFactory[] args) {
return new IScalarEvaluatorFactory() {
private static final long serialVersionUID = 1L;
@Override
public IScalarEvaluator createScalarEvaluator(final IHyracksTaskContext ctx) throws HyracksDataException {
final PointableAllocator allocator = new PointableAllocator();
final IVisitablePointable vp0 = allocator.allocateRecordValue(inRecType);
final IVisitablePointable vp1 = allocator.allocateListValue(inListType);
final IPointable argPtr0 = new VoidPointable();
final IPointable argPtr1 = new VoidPointable();
final IScalarEvaluator eval0 = args[0].createScalarEvaluator(ctx);
final IScalarEvaluator eval1 = args[1].createScalarEvaluator(ctx);
final ArrayBackedValueStorage fieldNamePointable = new ArrayBackedValueStorage();
final ArrayBackedValueStorage fieldValuePointer = new ArrayBackedValueStorage();
final PointableHelper pointableHelper = new PointableHelper();
try {
pointableHelper.serializeString("field-name", fieldNamePointable, true);
pointableHelper.serializeString("field-value", fieldValuePointer, true);
} catch (AsterixException e) {
throw new HyracksDataException(e);
}
return new IScalarEvaluator() {
// the default 32k frame size
public static final int TABLE_FRAME_SIZE = 32768;
// the default 32k frame size
public static final int TABLE_SIZE = 100;
private final RecordBuilder recordBuilder = new RecordBuilder();
private final RuntimeRecordTypeInfo requiredRecordTypeInfo = new RuntimeRecordTypeInfo();
private final IBinaryHashFunction putHashFunc = ListItemBinaryHashFunctionFactory.INSTANCE.createBinaryHashFunction();
private final IBinaryHashFunction getHashFunc = ListItemBinaryHashFunctionFactory.INSTANCE.createBinaryHashFunction();
private final BinaryEntry keyEntry = new BinaryEntry();
private final BinaryEntry valEntry = new BinaryEntry();
private final IVisitablePointable tempValReference = allocator.allocateEmpty();
private final IBinaryComparator cmp = ListItemBinaryComparatorFactory.INSTANCE.createBinaryComparator();
private BinaryHashMap hashMap = new BinaryHashMap(TABLE_SIZE, TABLE_FRAME_SIZE, putHashFunc, getHashFunc, cmp);
private ArrayBackedValueStorage resultStorage = new ArrayBackedValueStorage();
private DataOutput out = resultStorage.getDataOutput();
@Override
public void evaluate(IFrameTupleReference tuple, IPointable result) throws HyracksDataException {
resultStorage.reset();
recordBuilder.reset(outRecType);
requiredRecordTypeInfo.reset(outRecType);
eval0.evaluate(tuple, argPtr0);
eval1.evaluate(tuple, argPtr1);
// Make sure we get a valid record
byte typeTag0 = argPtr0.getByteArray()[argPtr0.getStartOffset()];
if (typeTag0 != ATypeTag.SERIALIZED_RECORD_TYPE_TAG) {
throw new TypeMismatchException(getIdentifier(), 0, typeTag0, ATypeTag.SERIALIZED_RECORD_TYPE_TAG);
}
// Make sure we get a valid list
byte typeTag1 = argPtr1.getByteArray()[argPtr1.getStartOffset()];
if (typeTag1 != ATypeTag.SERIALIZED_ORDEREDLIST_TYPE_TAG) {
throw new TypeMismatchException(getIdentifier(), 1, typeTag1, ATypeTag.SERIALIZED_ORDEREDLIST_TYPE_TAG);
}
vp0.set(argPtr0);
vp1.set(argPtr1);
ARecordVisitablePointable recordPointable = (ARecordVisitablePointable) vp0;
AListVisitablePointable listPointable = (AListVisitablePointable) vp1;
// Initialize our hashmap
int tableSize = recordPointable.getFieldNames().size() + listPointable.getItems().size();
// Thus avoiding unnecessary object construction
if (hashMap == null || tableSize > TABLE_SIZE) {
hashMap = new BinaryHashMap(tableSize, TABLE_FRAME_SIZE, putHashFunc, getHashFunc, cmp);
} else {
hashMap.clear();
}
addFields(recordPointable, listPointable);
recordBuilder.write(out, true);
result.set(resultStorage);
}
private void addFields(ARecordVisitablePointable inputRecordPointer, AListVisitablePointable listPointable) throws HyracksDataException {
List<IVisitablePointable> inputRecordFieldNames = inputRecordPointer.getFieldNames();
List<IVisitablePointable> inputRecordFieldValues = inputRecordPointer.getFieldValues();
List<IVisitablePointable> inputFields = listPointable.getItems();
IVisitablePointable namePointable = null;
IVisitablePointable valuePointable = null;
int numInputRecordFields = inputRecordFieldNames.size();
try {
// Add original record without duplicate checking
for (int i = 0; i < numInputRecordFields; ++i) {
IVisitablePointable fnp = inputRecordFieldNames.get(i);
IVisitablePointable fvp = inputRecordFieldValues.get(i);
int pos = requiredRecordTypeInfo.getFieldIndex(fnp.getByteArray(), fnp.getStartOffset() + 1, fnp.getLength() - 1);
if (pos >= 0) {
recordBuilder.addField(pos, fvp);
} else {
recordBuilder.addField(fnp, fvp);
}
keyEntry.set(fnp.getByteArray(), fnp.getStartOffset(), fnp.getLength());
valEntry.set(fvp.getByteArray(), fvp.getStartOffset(), fvp.getLength());
hashMap.put(keyEntry, valEntry);
}
// Get the fields from a list of records
for (int i = 0; i < inputFields.size(); i++) {
if (!PointableHelper.sameType(ATypeTag.OBJECT, inputFields.get(i))) {
throw new AsterixException("Expected list of record, got " + PointableHelper.getTypeTag(inputFields.get(i)));
}
List<IVisitablePointable> names = ((ARecordVisitablePointable) inputFields.get(i)).getFieldNames();
List<IVisitablePointable> values = ((ARecordVisitablePointable) inputFields.get(i)).getFieldValues();
// Get name and value of the field to be added
// Use loop to account for the cases where users switches the order of the fields
IVisitablePointable fieldName;
for (int j = 0; j < names.size(); j++) {
fieldName = names.get(j);
// if fieldName is "field-name" then read the name
if (PointableHelper.byteArrayEqual(fieldNamePointable, fieldName)) {
namePointable = values.get(j);
} else {
// otherwise the fieldName is "field-value". Thus, read the value
valuePointable = values.get(j);
}
}
if (namePointable == null || valuePointable == null) {
throw new InvalidDataFormatException(getIdentifier(), "fields to be added");
}
// Check that the field being added is a valid field
int pos = requiredRecordTypeInfo.getFieldIndex(namePointable.getByteArray(), namePointable.getStartOffset() + 1, namePointable.getLength() - 1);
keyEntry.set(namePointable.getByteArray(), namePointable.getStartOffset(), namePointable.getLength());
// Check if already in our built record
BinaryEntry entry = hashMap.get(keyEntry);
if (entry != null) {
tempValReference.set(entry.getBuf(), entry.getOffset(), entry.getLength());
// If value is not equal throw conflicting duplicate field, otherwise ignore
if (!PointableHelper.byteArrayEqual(valuePointable, tempValReference)) {
throw new RuntimeDataException(ErrorCode.DUPLICATE_FIELD_NAME, getIdentifier());
}
} else {
if (pos > -1) {
recordBuilder.addField(pos, valuePointable);
} else {
recordBuilder.addField(namePointable, valuePointable);
}
valEntry.set(valuePointable.getByteArray(), valuePointable.getStartOffset(), valuePointable.getLength());
hashMap.put(keyEntry, valEntry);
}
}
} catch (AsterixException e) {
throw new HyracksDataException(e);
}
}
};
}
};
}
use of org.apache.hyracks.data.std.util.BinaryEntry in project asterixdb by apache.
the class ListDeepEqualityChecker method probeHashMap.
private boolean probeHashMap(List<IVisitablePointable> itemsLeft, List<IVisitablePointable> itemTagTypesLeft, List<IVisitablePointable> itemsRight, List<IVisitablePointable> itemTagTypesRight) throws HyracksDataException {
// Probe phase: Probe items from second list
for (int indexRight = 0; indexRight < itemsRight.size(); indexRight++) {
IVisitablePointable itemRight = itemsRight.get(indexRight);
byte[] buf = itemRight.getByteArray();
int off = itemRight.getStartOffset();
int len = itemRight.getLength();
keyEntry.set(buf, off, len);
BinaryEntry entry = hashMap.get(keyEntry);
// The items doesn't match
if (entry == null) {
return false;
}
int indexLeft = IntegerPointable.getInteger(entry.getBuf(), entry.getOffset());
ATypeTag fieldTypeLeft = PointableHelper.getTypeTag(itemTagTypesLeft.get(indexLeft));
if (fieldTypeLeft.isDerivedType() && fieldTypeLeft != PointableHelper.getTypeTag(itemTagTypesRight.get(indexRight))) {
return false;
}
itemVisitorArg.first = itemRight;
itemsLeft.get(indexLeft).accept(visitor, itemVisitorArg);
if (itemVisitorArg.second == false)
return false;
}
return true;
}
use of org.apache.hyracks.data.std.util.BinaryEntry in project asterixdb by apache.
the class RecordDeepEqualityChecker method compareValues.
private boolean compareValues(List<IVisitablePointable> fieldTypesLeft, List<IVisitablePointable> fieldValuesLeft, List<IVisitablePointable> fieldNamesRight, List<IVisitablePointable> fieldTypesRight, List<IVisitablePointable> fieldValuesRight) throws HyracksDataException {
// Probe phase: Probe items from second record
for (int i = 0; i < fieldNamesRight.size(); i++) {
IVisitablePointable fieldName = fieldNamesRight.get(i);
keyEntry.set(fieldName.getByteArray(), fieldName.getStartOffset(), fieldName.getLength());
BinaryEntry entry = hashMap.get(keyEntry);
if (entry == null) {
return false;
}
int fieldIdLeft = AInt32SerializerDeserializer.getInt(entry.getBuf(), entry.getOffset());
ATypeTag fieldTypeLeft = PointableHelper.getTypeTag(fieldTypesLeft.get(fieldIdLeft));
if (fieldTypeLeft.isDerivedType() && fieldTypeLeft != PointableHelper.getTypeTag(fieldTypesRight.get(i))) {
return false;
}
nestedVisitorArg.first = fieldValuesRight.get(i);
fieldValuesLeft.get(fieldIdLeft).accept(visitor, nestedVisitorArg);
if (nestedVisitorArg.second == false) {
return false;
}
}
return true;
}
use of org.apache.hyracks.data.std.util.BinaryEntry in project asterixdb by apache.
the class FullTextContainsEvaluator method initializeFullTextContains.
private void initializeFullTextContains() {
// We use a hash set to store tokens from the right side (query predicate).
// Initialize necessary variables.
hashFunc = new PointableBinaryHashFunctionFactory(UTF8StringLowercaseTokenPointable.FACTORY).createBinaryHashFunction();
keyEntry = new BinaryEntry();
// Parameter: number of bucket, frame size, hashFunction, Comparator, byte array
// that contains the key (this array will be set later.)
rightHashSet = new BinaryHashSet(HASH_SET_SLOT_SIZE, HASH_SET_FRAME_SIZE, hashFunc, strLowerCaseTokenCmp, null);
tokenizerForLeftArray = BinaryTokenizerFactoryProvider.INSTANCE.getWordTokenizerFactory(ATypeTag.STRING, false, true).createTokenizer();
}
use of org.apache.hyracks.data.std.util.BinaryEntry in project asterixdb by apache.
the class SimilarityJaccardCheckEvaluator method probeHashMap.
@Override
protected int probeHashMap(AbstractAsterixListIterator probeIter, int buildListSize, int probeListSize) throws HyracksDataException {
// Apply length filter.
int lengthLowerBound = (int) Math.ceil(jaccThresh * probeListSize);
if ((lengthLowerBound > buildListSize) || (buildListSize > (int) Math.floor(1.0f / jaccThresh * probeListSize))) {
return -1;
}
// Probe phase: Probe items from second list, and compute intersection size.
int intersectionSize = 0;
int probeListCount = 0;
int minUnionSize = buildListSize;
while (probeIter.hasNext()) {
probeListCount++;
byte[] buf = probeIter.getData();
int off = probeIter.getPos();
int len = probeIter.getItemLen();
keyEntry.set(buf, off, len);
BinaryEntry entry = hashMap.get(keyEntry);
if (entry != null) {
// Increment second value.
int firstValInt = IntegerPointable.getInteger(entry.getBuf(), entry.getOffset());
// Irrelevant for the intersection size.
if (firstValInt == 0) {
continue;
}
int secondValInt = IntegerPointable.getInteger(entry.getBuf(), entry.getOffset() + 4);
// Subtract old min value.
intersectionSize -= (firstValInt < secondValInt) ? firstValInt : secondValInt;
secondValInt++;
// Add new min value.
intersectionSize += (firstValInt < secondValInt) ? firstValInt : secondValInt;
IntegerPointable.setInteger(entry.getBuf(), entry.getOffset() + 4, secondValInt);
} else {
// Could not find element in other set. Increase min union size by 1.
minUnionSize++;
// Check whether jaccThresh can still be satisfied if there was a mismatch.
int maxIntersectionSize = Math.min(buildListSize, intersectionSize + (probeListSize - probeListCount));
int lowerBound = (int) Math.floor(jaccThresh * minUnionSize);
if (maxIntersectionSize < lowerBound) {
// Cannot satisfy jaccThresh.
return -1;
}
}
probeIter.next();
}
return intersectionSize;
}
Aggregations