use of org.apache.hyracks.dataflow.common.data.accessors.IFrameTupleReference in project asterixdb by apache.
the class SimilarityDescriptor method createEvaluatorFactory.
@Override
public IScalarEvaluatorFactory createEvaluatorFactory(final IScalarEvaluatorFactory[] args) {
return new IScalarEvaluatorFactory() {
private static final long serialVersionUID = 1L;
@Override
public IScalarEvaluator createScalarEvaluator(final IHyracksTaskContext ctx) throws HyracksDataException {
return new IScalarEvaluator() {
private final ArrayBackedValueStorage resultStorage = new ArrayBackedValueStorage();
private final DataOutput out = resultStorage.getDataOutput();
private final IPointable inputVal = new VoidPointable();
private final IScalarEvaluator evalLen1 = args[0].createScalarEvaluator(ctx);
private final IScalarEvaluator evalTokens1 = args[1].createScalarEvaluator(ctx);
private final IScalarEvaluator evalLen2 = args[2].createScalarEvaluator(ctx);
private final IScalarEvaluator evalTokens2 = args[3].createScalarEvaluator(ctx);
private final IScalarEvaluator evalTokenPrefix = args[4].createScalarEvaluator(ctx);
private final IScalarEvaluator evalSimilarity = args[5].createScalarEvaluator(ctx);
private final IScalarEvaluator evalThreshold = args[6].createScalarEvaluator(ctx);
private final SimilarityFiltersCache similarityFiltersCache = new SimilarityFiltersCache();
private final IntArray tokens1 = new IntArray();
private final IntArray tokens2 = new IntArray();
private final PartialIntersect parInter = new PartialIntersect();
// result
private final AMutableDouble res = new AMutableDouble(0);
@SuppressWarnings("unchecked")
private final ISerializerDeserializer<ADouble> doubleSerde = SerializerDeserializerProvider.INSTANCE.getSerializerDeserializer(BuiltinType.ADOUBLE);
@Override
public void evaluate(IFrameTupleReference tuple, IPointable result) throws HyracksDataException {
resultStorage.reset();
// similarity threshold
evalThreshold.evaluate(tuple, inputVal);
byte[] data = inputVal.getByteArray();
int offset = inputVal.getStartOffset();
if (data[offset] != ATypeTag.SERIALIZED_DOUBLE_TYPE_TAG) {
throw new TypeMismatchException(getIdentifier(), 0, data[offset], ATypeTag.SERIALIZED_DOUBLE_TYPE_TAG);
}
float similarityThreshold = (float) ADoubleSerializerDeserializer.getDouble(data, offset + 1);
// similarity name
evalSimilarity.evaluate(tuple, inputVal);
data = inputVal.getByteArray();
offset = inputVal.getStartOffset();
int len = inputVal.getLength();
if (data[offset] != ATypeTag.SERIALIZED_STRING_TYPE_TAG) {
throw new TypeMismatchException(getIdentifier(), 1, data[offset], ATypeTag.SERIALIZED_DOUBLE_TYPE_TAG);
}
SimilarityFilters similarityFilters = similarityFiltersCache.get(similarityThreshold, data, offset, len);
evalLen1.evaluate(tuple, inputVal);
data = inputVal.getByteArray();
offset = inputVal.getStartOffset();
if (data[offset] != ATypeTag.SERIALIZED_INT32_TYPE_TAG) {
throw new TypeMismatchException(getIdentifier(), 2, data[offset], ATypeTag.SERIALIZED_INT32_TYPE_TAG);
}
int length1 = IntegerPointable.getInteger(data, offset + 1);
evalLen2.evaluate(tuple, inputVal);
data = inputVal.getByteArray();
offset = inputVal.getStartOffset();
if (data[offset] != ATypeTag.SERIALIZED_INT32_TYPE_TAG) {
throw new TypeMismatchException(getIdentifier(), 3, data[offset], ATypeTag.SERIALIZED_INT32_TYPE_TAG);
}
int length2 = IntegerPointable.getInteger(data, offset + 1);
float sim = 0;
//
if (similarityFilters.passLengthFilter(length1, length2)) {
// -- - tokens1 - --
int i;
tokens1.reset();
evalTokens1.evaluate(tuple, inputVal);
byte[] serList = inputVal.getByteArray();
offset = inputVal.getStartOffset();
if (serList[offset] != ATypeTag.SERIALIZED_ORDEREDLIST_TYPE_TAG && serList[offset] != ATypeTag.SERIALIZED_UNORDEREDLIST_TYPE_TAG) {
throw new TypeMismatchException(getIdentifier(), 4, data[offset], ATypeTag.SERIALIZED_ORDEREDLIST_TYPE_TAG, ATypeTag.SERIALIZED_UNORDEREDLIST_TYPE_TAG);
}
int lengthTokens1;
if (serList[offset] == ATypeTag.SERIALIZED_ORDEREDLIST_TYPE_TAG) {
lengthTokens1 = AOrderedListSerializerDeserializer.getNumberOfItems(serList, offset);
// read tokens
for (i = 0; i < lengthTokens1; i++) {
int itemOffset;
try {
itemOffset = AOrderedListSerializerDeserializer.getItemOffset(serList, offset, i);
} catch (AsterixException e) {
throw new HyracksDataException(e);
}
tokens1.add(IntegerPointable.getInteger(serList, itemOffset));
}
} else {
lengthTokens1 = AUnorderedListSerializerDeserializer.getNumberOfItems(serList, offset);
// read tokens
for (i = 0; i < lengthTokens1; i++) {
int itemOffset;
try {
itemOffset = AUnorderedListSerializerDeserializer.getItemOffset(serList, offset, i);
} catch (AsterixException e) {
throw new HyracksDataException(e);
}
tokens1.add(IntegerPointable.getInteger(serList, itemOffset));
}
}
// pad tokens
for (; i < length1; i++) {
tokens1.add(Integer.MAX_VALUE);
}
// -- - tokens2 - --
tokens2.reset();
evalTokens2.evaluate(tuple, inputVal);
serList = inputVal.getByteArray();
offset = inputVal.getStartOffset();
if (serList[offset] != ATypeTag.SERIALIZED_ORDEREDLIST_TYPE_TAG && serList[offset] != ATypeTag.SERIALIZED_UNORDEREDLIST_TYPE_TAG) {
throw new TypeMismatchException(getIdentifier(), 5, data[offset], ATypeTag.SERIALIZED_ORDEREDLIST_TYPE_TAG, ATypeTag.SERIALIZED_UNORDEREDLIST_TYPE_TAG);
}
int lengthTokens2;
if (serList[0] == ATypeTag.SERIALIZED_ORDEREDLIST_TYPE_TAG) {
lengthTokens2 = AOrderedListSerializerDeserializer.getNumberOfItems(serList, offset);
// read tokens
for (i = 0; i < lengthTokens2; i++) {
int itemOffset;
try {
itemOffset = AOrderedListSerializerDeserializer.getItemOffset(serList, offset, i);
} catch (AsterixException e) {
throw new HyracksDataException(e);
}
tokens2.add(IntegerPointable.getInteger(serList, itemOffset));
}
} else {
lengthTokens2 = AUnorderedListSerializerDeserializer.getNumberOfItems(serList, offset);
// read tokens
for (i = 0; i < lengthTokens2; i++) {
int itemOffset;
try {
itemOffset = AUnorderedListSerializerDeserializer.getItemOffset(serList, offset, i);
} catch (AsterixException e) {
throw new HyracksDataException(e);
}
tokens2.add(IntegerPointable.getInteger(serList, itemOffset));
}
}
// pad tokens
for (; i < length2; i++) {
tokens2.add(Integer.MAX_VALUE);
}
// -- - token prefix - --
evalTokenPrefix.evaluate(tuple, inputVal);
int tokenPrefix = IntegerPointable.getInteger(inputVal.getByteArray(), inputVal.getStartOffset() + 1);
//
// -- - position filter - --
//
SimilarityMetric.getPartialIntersectSize(tokens1.get(), 0, tokens1.length(), tokens2.get(), 0, tokens2.length(), tokenPrefix, parInter);
if (similarityFilters.passPositionFilter(parInter.intersectSize, parInter.posXStop, length1, parInter.posYStop, length2)) {
//
if (similarityFilters.passSuffixFilter(tokens1.get(), 0, tokens1.length(), parInter.posXStart, tokens2.get(), 0, tokens2.length(), parInter.posYStart)) {
sim = similarityFilters.passSimilarityFilter(tokens1.get(), 0, tokens1.length(), parInter.posXStop + 1, tokens2.get(), 0, tokens2.length(), parInter.posYStop + 1, parInter.intersectSize);
}
}
}
res.setValue(sim);
try {
doubleSerde.serialize(res, out);
} catch (IOException e) {
throw new HyracksDataException(e);
}
result.set(resultStorage);
}
};
}
};
}
use of org.apache.hyracks.dataflow.common.data.accessors.IFrameTupleReference in project asterixdb by apache.
the class CreatePolygonDescriptor method createEvaluatorFactory.
@Override
public IScalarEvaluatorFactory createEvaluatorFactory(final IScalarEvaluatorFactory[] args) {
return new IScalarEvaluatorFactory() {
private static final long serialVersionUID = 1L;
@Override
public IScalarEvaluator createScalarEvaluator(IHyracksTaskContext ctx) throws HyracksDataException {
return new IScalarEvaluator() {
private final ListAccessor listAccessor = new ListAccessor();
private final ArrayBackedValueStorage resultStorage = new ArrayBackedValueStorage();
private final DataOutput out = resultStorage.getDataOutput();
private final IScalarEvaluatorFactory listEvalFactory = args[0];
private final IPointable inputArgList = new VoidPointable();
private final IScalarEvaluator evalList = listEvalFactory.createScalarEvaluator(ctx);
@SuppressWarnings("unchecked")
private final ISerializerDeserializer<ANull> nullSerde = SerializerDeserializerProvider.INSTANCE.getSerializerDeserializer(BuiltinType.ANULL);
private final ISerializerDeserializer<AMissing> missingSerde = SerializerDeserializerProvider.INSTANCE.getSerializerDeserializer(BuiltinType.AMISSING);
@Override
public void evaluate(IFrameTupleReference tuple, IPointable result) throws HyracksDataException {
try {
resultStorage.reset();
evalList.evaluate(tuple, inputArgList);
byte[] listBytes = inputArgList.getByteArray();
int offset = inputArgList.getStartOffset();
if (listBytes[offset] != ATypeTag.SERIALIZED_ORDEREDLIST_TYPE_TAG && listBytes[offset] != ATypeTag.SERIALIZED_UNORDEREDLIST_TYPE_TAG) {
throw new TypeMismatchException(getIdentifier(), 0, listBytes[offset], ATypeTag.SERIALIZED_ORDEREDLIST_TYPE_TAG, ATypeTag.SERIALIZED_UNORDEREDLIST_TYPE_TAG);
}
listAccessor.reset(listBytes, offset);
try {
// First check the list consists of a valid items
for (int i = 0; i < listAccessor.size(); i++) {
int itemOffset = listAccessor.getItemOffset(i);
ATypeTag itemType = listAccessor.getItemType(itemOffset);
if (itemType != ATypeTag.DOUBLE) {
if (itemType == ATypeTag.NULL) {
nullSerde.serialize(ANull.NULL, out);
return;
}
if (itemType == ATypeTag.MISSING) {
missingSerde.serialize(AMissing.MISSING, out);
return;
}
throw new UnsupportedItemTypeException(BuiltinFunctions.CREATE_POLYGON, itemType.serialize());
}
}
if (listAccessor.size() < 6) {
throw new InvalidDataFormatException(getIdentifier(), ATypeTag.SERIALIZED_POLYGON_TYPE_TAG);
} else if (listAccessor.size() % 2 != 0) {
throw new InvalidDataFormatException(getIdentifier(), ATypeTag.SERIALIZED_POLYGON_TYPE_TAG);
}
out.writeByte(ATypeTag.SERIALIZED_POLYGON_TYPE_TAG);
out.writeShort(listAccessor.size() / 2);
final int skipTypeTag = listAccessor.itemsAreSelfDescribing() ? 1 : 0;
for (int i = 0; i < listAccessor.size() / 2; i++) {
int firstDoubleOffset = listAccessor.getItemOffset(i * 2) + skipTypeTag;
int secondDobuleOffset = listAccessor.getItemOffset((i * 2) + 1) + skipTypeTag;
APointSerializerDeserializer.serialize(ADoubleSerializerDeserializer.getDouble(listBytes, firstDoubleOffset), ADoubleSerializerDeserializer.getDouble(listBytes, secondDobuleOffset), out);
}
result.set(resultStorage);
} catch (AsterixException ex) {
throw new HyracksDataException(ex);
}
} catch (IOException e1) {
throw new HyracksDataException(e1);
}
}
};
}
};
}
use of org.apache.hyracks.dataflow.common.data.accessors.IFrameTupleReference in project asterixdb by apache.
the class CreateUUIDDescriptor method createEvaluatorFactory.
@Override
public IScalarEvaluatorFactory createEvaluatorFactory(IScalarEvaluatorFactory[] args) {
return new IScalarEvaluatorFactory() {
private static final long serialVersionUID = 1L;
@SuppressWarnings("unchecked")
private final ISerializerDeserializer<AUUID> uuidSerDe = SerializerDeserializerProvider.INSTANCE.getSerializerDeserializer(BuiltinType.AUUID);
@Override
public IScalarEvaluator createScalarEvaluator(IHyracksTaskContext ctx) throws HyracksDataException {
return new IScalarEvaluator() {
private final ArrayBackedValueStorage resultStorage = new ArrayBackedValueStorage();
private final DataOutput output = resultStorage.getDataOutput();
private final AGeneratedUUID uuid = new AGeneratedUUID();
@Override
public void evaluate(IFrameTupleReference tuple, IPointable result) throws HyracksDataException {
uuid.nextUUID();
resultStorage.reset();
uuidSerDe.serialize(uuid, output);
result.set(resultStorage);
}
};
}
};
}
use of org.apache.hyracks.dataflow.common.data.accessors.IFrameTupleReference in project asterixdb by apache.
the class DeepEqualityDescriptor method createEvaluatorFactory.
@Override
public IScalarEvaluatorFactory createEvaluatorFactory(final IScalarEvaluatorFactory[] args) {
final IScalarEvaluatorFactory evalFactoryLeft = args[0];
final IScalarEvaluatorFactory evalFactoryRight = args[1];
return new IScalarEvaluatorFactory() {
private static final long serialVersionUID = 1L;
@SuppressWarnings("unchecked")
private final ISerializerDeserializer<ABoolean> boolSerde = SerializerDeserializerProvider.INSTANCE.getSerializerDeserializer(BuiltinType.ABOOLEAN);
@Override
public IScalarEvaluator createScalarEvaluator(IHyracksTaskContext ctx) throws HyracksDataException {
final ArrayBackedValueStorage resultStorage = new ArrayBackedValueStorage();
final DataOutput out = resultStorage.getDataOutput();
final IScalarEvaluator evalLeft = evalFactoryLeft.createScalarEvaluator(ctx);
final IScalarEvaluator evalRight = evalFactoryRight.createScalarEvaluator(ctx);
return new IScalarEvaluator() {
private final DeepEqualAssessor deepEqualAssessor = new DeepEqualAssessor();
private final PointableAllocator allocator = new PointableAllocator();
private final IVisitablePointable pointableLeft = allocator.allocateFieldValue(inputTypeLeft);
private final IVisitablePointable pointableRight = allocator.allocateFieldValue(inputTypeRight);
@Override
public void evaluate(IFrameTupleReference tuple, IPointable result) throws HyracksDataException {
try {
evalLeft.evaluate(tuple, pointableLeft);
evalRight.evaluate(tuple, pointableRight);
// Using deep equality assessment to assess the equality of the two values
boolean isEqual = deepEqualAssessor.isEqual(pointableLeft, pointableRight);
ABoolean resultBit = isEqual ? ABoolean.TRUE : ABoolean.FALSE;
resultStorage.reset();
boolSerde.serialize(resultBit, out);
result.set(resultStorage);
} catch (Exception ioe) {
throw new HyracksDataException(ioe);
}
}
};
}
};
}
use of org.apache.hyracks.dataflow.common.data.accessors.IFrameTupleReference in project asterixdb by apache.
the class CodePointToStringDescriptor method createEvaluatorFactory.
@Override
public IScalarEvaluatorFactory createEvaluatorFactory(final IScalarEvaluatorFactory[] args) {
return new IScalarEvaluatorFactory() {
private static final long serialVersionUID = 1L;
@Override
public IScalarEvaluator createScalarEvaluator(IHyracksTaskContext ctx) throws HyracksDataException {
return new IScalarEvaluator() {
private ArrayBackedValueStorage resultStorage = new ArrayBackedValueStorage();
private DataOutput out = resultStorage.getDataOutput();
private IScalarEvaluatorFactory listEvalFactory = args[0];
private IPointable inputArgList = new VoidPointable();
private IScalarEvaluator evalList = listEvalFactory.createScalarEvaluator(ctx);
private final byte[] currentUTF8 = new byte[6];
private final byte[] tempStoreForLength = new byte[5];
private final byte stringTypeTag = ATypeTag.SERIALIZED_STRING_TYPE_TAG;
@Override
public void evaluate(IFrameTupleReference tuple, IPointable result) throws HyracksDataException {
try {
resultStorage.reset();
evalList.evaluate(tuple, inputArgList);
byte[] serOrderedList = inputArgList.getByteArray();
int offset = inputArgList.getStartOffset();
int size;
if (ATypeTag.VALUE_TYPE_MAPPING[serOrderedList[offset]] != ATypeTag.ARRAY) {
throw new TypeMismatchException(getIdentifier().getName(), 0, serOrderedList[offset]);
} else {
switch(ATypeTag.VALUE_TYPE_MAPPING[serOrderedList[offset + 1]]) {
case TINYINT:
case SMALLINT:
case INTEGER:
case BIGINT:
case FLOAT:
case DOUBLE:
case ANY:
size = AOrderedListSerializerDeserializer.getNumberOfItems(serOrderedList, offset);
break;
default:
throw new UnsupportedTypeException(getIdentifier(), serOrderedList[offset]);
}
}
try {
// calculate length first
int utf_8_len = 0;
for (int i = 0; i < size; i++) {
int itemOffset = AOrderedListSerializerDeserializer.getItemOffset(serOrderedList, offset, i);
int codePoint = 0;
codePoint = ATypeHierarchy.getIntegerValueWithDifferentTypeTagPosition(getIdentifier().getName(), 0, serOrderedList, itemOffset, offset + 1);
utf_8_len += UTF8StringUtil.codePointToUTF8(codePoint, currentUTF8);
}
out.writeByte(stringTypeTag);
UTF8StringUtil.writeUTF8Length(utf_8_len, tempStoreForLength, out);
for (int i = 0; i < size; i++) {
int itemOffset = AOrderedListSerializerDeserializer.getItemOffset(serOrderedList, offset, i);
int codePoint = 0;
codePoint = ATypeHierarchy.getIntegerValueWithDifferentTypeTagPosition(getIdentifier().getName(), 0, serOrderedList, itemOffset, offset + 1);
utf_8_len = UTF8StringUtil.codePointToUTF8(codePoint, currentUTF8);
for (int j = 0; j < utf_8_len; j++) {
out.writeByte(currentUTF8[j]);
}
}
result.set(resultStorage);
} catch (AsterixException ex) {
throw new HyracksDataException(ex);
}
} catch (IOException e1) {
throw new HyracksDataException(e1);
}
}
};
}
};
}
Aggregations