use of org.apache.hyracks.data.std.primitive.UTF8StringPointable in project asterixdb by apache.
the class UTF8StringCharacterIteratorTest method testEachIterator.
private void testEachIterator(String testString) {
UTF8StringPointable ptr = UTF8StringPointable.generateUTF8Pointable(testString);
iterator.reset(ptr);
for (char ch : testString.toCharArray()) {
assertTrue(iterator.hasNext());
assertEquals(ch, iterator.next());
}
assertFalse(iterator.hasNext());
iterator.reset();
for (char ch : testString.toCharArray()) {
assertTrue(iterator.hasNext());
assertEquals(ch, iterator.next());
}
assertFalse(iterator.hasNext());
}
use of org.apache.hyracks.data.std.primitive.UTF8StringPointable in project asterixdb by apache.
the class SimilarityMetricEditDistanceTest method test.
@Test
public void test() throws Exception {
// For this case, the edit-distance of two strings is 3.
UTF8StringPointable leftStrPointable1 = generateUTF8Pointable("coupon not available in store");
UTF8StringPointable rightStrPointable1 = generateUTF8Pointable("coupon is available in store");
// The edit-distance between leftStrPointable1 and the following is 14.
UTF8StringPointable rightStrPointable2 = generateUTF8Pointable("coupon in store");
byte[] leftBytes1 = leftStrPointable1.getByteArray();
int leftStartOffset1 = leftStrPointable1.getStartOffset();
byte[] rightBytes1 = rightStrPointable1.getByteArray();
int rightStartOffset1 = rightStrPointable1.getStartOffset();
byte[] rightBytes2 = rightStrPointable2.getByteArray();
int rightStartOffset2 = rightStrPointable2.getStartOffset();
// Case 1 - normal - no early termination
int edThresh = 3;
int edVal = ed.UTF8StringEditDistance(leftBytes1, leftStartOffset1, rightBytes1, rightStartOffset1, edThresh);
assertEquals(edThresh, edVal);
// Case 2 - the length difference between two strings is greater than edThresh.
// Even without calculating the distance, the method should return -1.
edVal = ed.UTF8StringEditDistance(leftBytes1, leftStartOffset1, rightBytes2, rightStartOffset2, edThresh);
assertEquals(SimilarityMetricEditDistance.SIMILARITY_THRESHOLD_NOT_SATISFIED_VALUE, edVal);
// Case 3 - the edit distance is 14, but the threshold is 1.
// The early termination should happen and the returned value should be -1.
edThresh = 1;
edVal = ed.UTF8StringEditDistance(leftBytes1, leftStartOffset1, rightBytes2, rightStartOffset2, edThresh);
assertEquals(SimilarityMetricEditDistance.SIMILARITY_THRESHOLD_NOT_SATISFIED_VALUE, edVal);
// Case 4 - the edit distance is 14, but the threshold is 13.
// The early termination will not happen. But, the resulting edit distance is greater than the given threshold.
// So, the final returned value should be -1.
edThresh = 13;
edVal = ed.UTF8StringEditDistance(leftBytes1, leftStartOffset1, rightBytes2, rightStartOffset2, edThresh);
assertEquals(SimilarityMetricEditDistance.SIMILARITY_THRESHOLD_NOT_SATISFIED_VALUE, edVal);
}
use of org.apache.hyracks.data.std.primitive.UTF8StringPointable in project asterixdb by apache.
the class TemporalYearAccessor method createEvaluatorFactory.
@Override
public IScalarEvaluatorFactory createEvaluatorFactory(final IScalarEvaluatorFactory[] args) {
return new IScalarEvaluatorFactory() {
private static final long serialVersionUID = 1L;
@Override
public IScalarEvaluator createScalarEvaluator(IHyracksTaskContext ctx) throws HyracksDataException {
return new IScalarEvaluator() {
private final ArrayBackedValueStorage resultStorage = new ArrayBackedValueStorage();
private final DataOutput out = resultStorage.getDataOutput();
private final IPointable argPtr = new VoidPointable();
private final IScalarEvaluator eval = args[0].createScalarEvaluator(ctx);
private final GregorianCalendarSystem calSystem = GregorianCalendarSystem.getInstance();
private final UTF8StringPointable strExprPtr = new UTF8StringPointable();
private final UTF8StringCharacterIterator strIter = new UTF8StringCharacterIterator();
// for output: type integer
@SuppressWarnings("unchecked")
private final ISerializerDeserializer<AInt64> intSerde = SerializerDeserializerProvider.INSTANCE.getSerializerDeserializer(BuiltinType.AINT64);
private final AMutableInt64 aMutableInt64 = new AMutableInt64(0);
@Override
public void evaluate(IFrameTupleReference tuple, IPointable result) throws HyracksDataException {
eval.evaluate(tuple, argPtr);
byte[] bytes = argPtr.getByteArray();
int startOffset = argPtr.getStartOffset();
int len = argPtr.getLength();
resultStorage.reset();
try {
if (bytes[startOffset] == ATypeTag.SERIALIZED_DURATION_TYPE_TAG) {
aMutableInt64.setValue(calSystem.getDurationYear(ADurationSerializerDeserializer.getYearMonth(bytes, startOffset + 1)));
intSerde.serialize(aMutableInt64, out);
result.set(resultStorage);
return;
}
if (bytes[startOffset] == ATypeTag.SERIALIZED_YEAR_MONTH_DURATION_TYPE_TAG) {
aMutableInt64.setValue(calSystem.getDurationYear(AYearMonthDurationSerializerDeserializer.getYearMonth(bytes, startOffset + 1)));
intSerde.serialize(aMutableInt64, out);
result.set(resultStorage);
return;
}
long chrononTimeInMs = 0;
if (bytes[startOffset] == ATypeTag.SERIALIZED_DATE_TYPE_TAG) {
chrononTimeInMs = AInt32SerializerDeserializer.getInt(bytes, startOffset + 1) * GregorianCalendarSystem.CHRONON_OF_DAY;
} else if (bytes[startOffset] == ATypeTag.SERIALIZED_DATETIME_TYPE_TAG) {
chrononTimeInMs = AInt64SerializerDeserializer.getLong(bytes, startOffset + 1);
} else if (bytes[startOffset] == ATypeTag.SERIALIZED_STRING_TYPE_TAG) {
int year;
strExprPtr.set(bytes, startOffset + 1, len - 1);
strIter.reset(strExprPtr);
char firstChar = strIter.next();
if (firstChar == '-') {
// in case of a negative year
year = -1 * ((strIter.next() - '0') * 1000 + (strIter.next() - '0') * 100 + (strIter.next() - '0') * 10 + (strIter.next() - '0'));
} else {
year = (firstChar - '0') * 1000 + (strIter.next() - '0') * 100 + (strIter.next() - '0') * 10 + (strIter.next() - '0');
}
aMutableInt64.setValue(year);
intSerde.serialize(aMutableInt64, out);
result.set(resultStorage);
return;
} else {
throw new TypeMismatchException(getIdentifier(), 0, bytes[startOffset], ATypeTag.SERIALIZED_DURATION_TYPE_TAG, ATypeTag.SERIALIZED_YEAR_MONTH_DURATION_TYPE_TAG, ATypeTag.SERIALIZED_DATE_TYPE_TAG, ATypeTag.SERIALIZED_DATETIME_TYPE_TAG, ATypeTag.SERIALIZED_STRING_TYPE_TAG);
}
int year = calSystem.getYear(chrononTimeInMs);
aMutableInt64.setValue(year);
intSerde.serialize(aMutableInt64, out);
result.set(resultStorage);
} catch (IOException e) {
throw new HyracksDataException(e);
}
}
};
}
};
}
use of org.apache.hyracks.data.std.primitive.UTF8StringPointable in project asterixdb by apache.
the class SubstringBeforeDescriptor method createEvaluatorFactory.
@Override
public IScalarEvaluatorFactory createEvaluatorFactory(final IScalarEvaluatorFactory[] args) {
return new IScalarEvaluatorFactory() {
private static final long serialVersionUID = 1L;
@Override
public IScalarEvaluator createScalarEvaluator(final IHyracksTaskContext ctx) throws HyracksDataException {
return new IScalarEvaluator() {
private ArrayBackedValueStorage resultStorage = new ArrayBackedValueStorage();
private DataOutput out = resultStorage.getDataOutput();
private IPointable array0 = new VoidPointable();
private IPointable array1 = new VoidPointable();
private IScalarEvaluator evalString = args[0].createScalarEvaluator(ctx);
private IScalarEvaluator evalPattern = args[1].createScalarEvaluator(ctx);
private final GrowableArray array = new GrowableArray();
private final UTF8StringBuilder builder = new UTF8StringBuilder();
private final UTF8StringPointable stringPtr = new UTF8StringPointable();
private final UTF8StringPointable patternPtr = new UTF8StringPointable();
@Override
public void evaluate(IFrameTupleReference tuple, IPointable result) throws HyracksDataException {
resultStorage.reset();
evalString.evaluate(tuple, array0);
byte[] src = array0.getByteArray();
int srcOffset = array0.getStartOffset();
int srcLen = array0.getLength();
evalPattern.evaluate(tuple, array1);
byte[] pattern = array1.getByteArray();
int patternOffset = array1.getStartOffset();
int patternLen = array1.getLength();
if (src[srcOffset] != ATypeTag.SERIALIZED_STRING_TYPE_TAG) {
throw new TypeMismatchException(getIdentifier(), 0, src[srcOffset], ATypeTag.SERIALIZED_STRING_TYPE_TAG);
}
if (pattern[patternOffset] != ATypeTag.SERIALIZED_STRING_TYPE_TAG) {
throw new TypeMismatchException(getIdentifier(), 1, pattern[patternOffset], ATypeTag.SERIALIZED_STRING_TYPE_TAG);
}
try {
stringPtr.set(src, srcOffset + 1, srcLen - 1);
patternPtr.set(pattern, patternOffset + 1, patternLen - 1);
array.reset();
UTF8StringPointable.substrBefore(stringPtr, patternPtr, builder, array);
out.writeByte(ATypeTag.SERIALIZED_STRING_TYPE_TAG);
out.write(array.getByteArray(), 0, array.getLength());
} catch (IOException e) {
throw new HyracksDataException(e);
}
result.set(resultStorage);
}
};
}
};
}
use of org.apache.hyracks.data.std.primitive.UTF8StringPointable in project asterixdb by apache.
the class SubstringDescriptor method createEvaluatorFactory.
@Override
public IScalarEvaluatorFactory createEvaluatorFactory(final IScalarEvaluatorFactory[] args) {
return new IScalarEvaluatorFactory() {
private static final long serialVersionUID = 1L;
@Override
public IScalarEvaluator createScalarEvaluator(final IHyracksTaskContext ctx) throws HyracksDataException {
return new IScalarEvaluator() {
private final ArrayBackedValueStorage resultStorage = new ArrayBackedValueStorage();
private final DataOutput out = resultStorage.getDataOutput();
private IPointable argString = new VoidPointable();
private IPointable argStart = new VoidPointable();
private IPointable argLen = new VoidPointable();
private final IScalarEvaluator evalString = args[0].createScalarEvaluator(ctx);
private final IScalarEvaluator evalStart = args[1].createScalarEvaluator(ctx);
private final IScalarEvaluator evalLen = args[2].createScalarEvaluator(ctx);
private final GrowableArray array = new GrowableArray();
private final UTF8StringBuilder builder = new UTF8StringBuilder();
private final UTF8StringPointable string = new UTF8StringPointable();
@Override
public void evaluate(IFrameTupleReference tuple, IPointable result) throws HyracksDataException {
resultStorage.reset();
evalString.evaluate(tuple, argString);
evalStart.evaluate(tuple, argStart);
evalLen.evaluate(tuple, argLen);
byte[] bytes = argStart.getByteArray();
int offset = argStart.getStartOffset();
int start = ATypeHierarchy.getIntegerValue(getIdentifier().getName(), 0, bytes, offset) - 1;
bytes = argLen.getByteArray();
offset = argLen.getStartOffset();
int len = ATypeHierarchy.getIntegerValue(getIdentifier().getName(), 1, bytes, offset);
bytes = argString.getByteArray();
offset = argString.getStartOffset();
int length = argString.getLength();
if (bytes[offset] != ATypeTag.SERIALIZED_STRING_TYPE_TAG) {
throw new TypeMismatchException(getIdentifier(), 0, bytes[offset], ATypeTag.SERIALIZED_STRING_TYPE_TAG);
}
string.set(bytes, offset + 1, length - 1);
array.reset();
try {
UTF8StringPointable.substr(string, start, len, builder, array);
} catch (StringIndexOutOfBoundsException e) {
throw new RuntimeDataException(ErrorCode.OUT_OF_BOUND, getIdentifier(), 1, start + len - 1);
} catch (IOException e) {
throw new HyracksDataException(e);
}
try {
out.writeByte(ATypeTag.SERIALIZED_STRING_TYPE_TAG);
out.write(array.getByteArray(), 0, array.getLength());
} catch (IOException e) {
throw new HyracksDataException(e);
}
result.set(resultStorage);
}
};
}
};
}
Aggregations