Search in sources :

Example 21 with UTF8StringPointable

use of org.apache.hyracks.data.std.primitive.UTF8StringPointable in project asterixdb by apache.

the class UTF8StringCharacterIteratorTest method testEachIterator.

private void testEachIterator(String testString) {
    UTF8StringPointable ptr = UTF8StringPointable.generateUTF8Pointable(testString);
    iterator.reset(ptr);
    for (char ch : testString.toCharArray()) {
        assertTrue(iterator.hasNext());
        assertEquals(ch, iterator.next());
    }
    assertFalse(iterator.hasNext());
    iterator.reset();
    for (char ch : testString.toCharArray()) {
        assertTrue(iterator.hasNext());
        assertEquals(ch, iterator.next());
    }
    assertFalse(iterator.hasNext());
}
Also used : UTF8StringPointable(org.apache.hyracks.data.std.primitive.UTF8StringPointable)

Example 22 with UTF8StringPointable

use of org.apache.hyracks.data.std.primitive.UTF8StringPointable in project asterixdb by apache.

the class SimilarityMetricEditDistanceTest method test.

@Test
public void test() throws Exception {
    // For this case, the edit-distance of two strings is 3.
    UTF8StringPointable leftStrPointable1 = generateUTF8Pointable("coupon not available in store");
    UTF8StringPointable rightStrPointable1 = generateUTF8Pointable("coupon is available in store");
    // The edit-distance between leftStrPointable1 and the following is 14.
    UTF8StringPointable rightStrPointable2 = generateUTF8Pointable("coupon in store");
    byte[] leftBytes1 = leftStrPointable1.getByteArray();
    int leftStartOffset1 = leftStrPointable1.getStartOffset();
    byte[] rightBytes1 = rightStrPointable1.getByteArray();
    int rightStartOffset1 = rightStrPointable1.getStartOffset();
    byte[] rightBytes2 = rightStrPointable2.getByteArray();
    int rightStartOffset2 = rightStrPointable2.getStartOffset();
    // Case 1 - normal - no early termination
    int edThresh = 3;
    int edVal = ed.UTF8StringEditDistance(leftBytes1, leftStartOffset1, rightBytes1, rightStartOffset1, edThresh);
    assertEquals(edThresh, edVal);
    // Case 2 - the length difference between two strings is greater than edThresh.
    // Even without calculating the distance, the method should return -1.
    edVal = ed.UTF8StringEditDistance(leftBytes1, leftStartOffset1, rightBytes2, rightStartOffset2, edThresh);
    assertEquals(SimilarityMetricEditDistance.SIMILARITY_THRESHOLD_NOT_SATISFIED_VALUE, edVal);
    // Case 3 - the edit distance is 14, but the threshold is 1.
    // The early termination should happen and the returned value should be -1.
    edThresh = 1;
    edVal = ed.UTF8StringEditDistance(leftBytes1, leftStartOffset1, rightBytes2, rightStartOffset2, edThresh);
    assertEquals(SimilarityMetricEditDistance.SIMILARITY_THRESHOLD_NOT_SATISFIED_VALUE, edVal);
    // Case 4 - the edit distance is 14, but the threshold is 13.
    // The early termination will not happen. But, the resulting edit distance is greater than the given threshold.
    // So, the final returned value should be -1.
    edThresh = 13;
    edVal = ed.UTF8StringEditDistance(leftBytes1, leftStartOffset1, rightBytes2, rightStartOffset2, edThresh);
    assertEquals(SimilarityMetricEditDistance.SIMILARITY_THRESHOLD_NOT_SATISFIED_VALUE, edVal);
}
Also used : UTF8StringPointable(org.apache.hyracks.data.std.primitive.UTF8StringPointable) Test(org.junit.Test)

Example 23 with UTF8StringPointable

use of org.apache.hyracks.data.std.primitive.UTF8StringPointable in project asterixdb by apache.

the class TemporalYearAccessor method createEvaluatorFactory.

@Override
public IScalarEvaluatorFactory createEvaluatorFactory(final IScalarEvaluatorFactory[] args) {
    return new IScalarEvaluatorFactory() {

        private static final long serialVersionUID = 1L;

        @Override
        public IScalarEvaluator createScalarEvaluator(IHyracksTaskContext ctx) throws HyracksDataException {
            return new IScalarEvaluator() {

                private final ArrayBackedValueStorage resultStorage = new ArrayBackedValueStorage();

                private final DataOutput out = resultStorage.getDataOutput();

                private final IPointable argPtr = new VoidPointable();

                private final IScalarEvaluator eval = args[0].createScalarEvaluator(ctx);

                private final GregorianCalendarSystem calSystem = GregorianCalendarSystem.getInstance();

                private final UTF8StringPointable strExprPtr = new UTF8StringPointable();

                private final UTF8StringCharacterIterator strIter = new UTF8StringCharacterIterator();

                // for output: type integer
                @SuppressWarnings("unchecked")
                private final ISerializerDeserializer<AInt64> intSerde = SerializerDeserializerProvider.INSTANCE.getSerializerDeserializer(BuiltinType.AINT64);

                private final AMutableInt64 aMutableInt64 = new AMutableInt64(0);

                @Override
                public void evaluate(IFrameTupleReference tuple, IPointable result) throws HyracksDataException {
                    eval.evaluate(tuple, argPtr);
                    byte[] bytes = argPtr.getByteArray();
                    int startOffset = argPtr.getStartOffset();
                    int len = argPtr.getLength();
                    resultStorage.reset();
                    try {
                        if (bytes[startOffset] == ATypeTag.SERIALIZED_DURATION_TYPE_TAG) {
                            aMutableInt64.setValue(calSystem.getDurationYear(ADurationSerializerDeserializer.getYearMonth(bytes, startOffset + 1)));
                            intSerde.serialize(aMutableInt64, out);
                            result.set(resultStorage);
                            return;
                        }
                        if (bytes[startOffset] == ATypeTag.SERIALIZED_YEAR_MONTH_DURATION_TYPE_TAG) {
                            aMutableInt64.setValue(calSystem.getDurationYear(AYearMonthDurationSerializerDeserializer.getYearMonth(bytes, startOffset + 1)));
                            intSerde.serialize(aMutableInt64, out);
                            result.set(resultStorage);
                            return;
                        }
                        long chrononTimeInMs = 0;
                        if (bytes[startOffset] == ATypeTag.SERIALIZED_DATE_TYPE_TAG) {
                            chrononTimeInMs = AInt32SerializerDeserializer.getInt(bytes, startOffset + 1) * GregorianCalendarSystem.CHRONON_OF_DAY;
                        } else if (bytes[startOffset] == ATypeTag.SERIALIZED_DATETIME_TYPE_TAG) {
                            chrononTimeInMs = AInt64SerializerDeserializer.getLong(bytes, startOffset + 1);
                        } else if (bytes[startOffset] == ATypeTag.SERIALIZED_STRING_TYPE_TAG) {
                            int year;
                            strExprPtr.set(bytes, startOffset + 1, len - 1);
                            strIter.reset(strExprPtr);
                            char firstChar = strIter.next();
                            if (firstChar == '-') {
                                // in case of a negative year
                                year = -1 * ((strIter.next() - '0') * 1000 + (strIter.next() - '0') * 100 + (strIter.next() - '0') * 10 + (strIter.next() - '0'));
                            } else {
                                year = (firstChar - '0') * 1000 + (strIter.next() - '0') * 100 + (strIter.next() - '0') * 10 + (strIter.next() - '0');
                            }
                            aMutableInt64.setValue(year);
                            intSerde.serialize(aMutableInt64, out);
                            result.set(resultStorage);
                            return;
                        } else {
                            throw new TypeMismatchException(getIdentifier(), 0, bytes[startOffset], ATypeTag.SERIALIZED_DURATION_TYPE_TAG, ATypeTag.SERIALIZED_YEAR_MONTH_DURATION_TYPE_TAG, ATypeTag.SERIALIZED_DATE_TYPE_TAG, ATypeTag.SERIALIZED_DATETIME_TYPE_TAG, ATypeTag.SERIALIZED_STRING_TYPE_TAG);
                        }
                        int year = calSystem.getYear(chrononTimeInMs);
                        aMutableInt64.setValue(year);
                        intSerde.serialize(aMutableInt64, out);
                        result.set(resultStorage);
                    } catch (IOException e) {
                        throw new HyracksDataException(e);
                    }
                }
            };
        }
    };
}
Also used : DataOutput(java.io.DataOutput) GregorianCalendarSystem(org.apache.asterix.om.base.temporal.GregorianCalendarSystem) UTF8StringPointable(org.apache.hyracks.data.std.primitive.UTF8StringPointable) TypeMismatchException(org.apache.asterix.runtime.exceptions.TypeMismatchException) IPointable(org.apache.hyracks.data.std.api.IPointable) IOException(java.io.IOException) IScalarEvaluator(org.apache.hyracks.algebricks.runtime.base.IScalarEvaluator) ISerializerDeserializer(org.apache.hyracks.api.dataflow.value.ISerializerDeserializer) HyracksDataException(org.apache.hyracks.api.exceptions.HyracksDataException) IScalarEvaluatorFactory(org.apache.hyracks.algebricks.runtime.base.IScalarEvaluatorFactory) UTF8StringCharacterIterator(org.apache.hyracks.data.std.util.UTF8StringCharacterIterator) ArrayBackedValueStorage(org.apache.hyracks.data.std.util.ArrayBackedValueStorage) IHyracksTaskContext(org.apache.hyracks.api.context.IHyracksTaskContext) VoidPointable(org.apache.hyracks.data.std.primitive.VoidPointable) IFrameTupleReference(org.apache.hyracks.dataflow.common.data.accessors.IFrameTupleReference) AMutableInt64(org.apache.asterix.om.base.AMutableInt64)

Example 24 with UTF8StringPointable

use of org.apache.hyracks.data.std.primitive.UTF8StringPointable in project asterixdb by apache.

the class SubstringBeforeDescriptor method createEvaluatorFactory.

@Override
public IScalarEvaluatorFactory createEvaluatorFactory(final IScalarEvaluatorFactory[] args) {
    return new IScalarEvaluatorFactory() {

        private static final long serialVersionUID = 1L;

        @Override
        public IScalarEvaluator createScalarEvaluator(final IHyracksTaskContext ctx) throws HyracksDataException {
            return new IScalarEvaluator() {

                private ArrayBackedValueStorage resultStorage = new ArrayBackedValueStorage();

                private DataOutput out = resultStorage.getDataOutput();

                private IPointable array0 = new VoidPointable();

                private IPointable array1 = new VoidPointable();

                private IScalarEvaluator evalString = args[0].createScalarEvaluator(ctx);

                private IScalarEvaluator evalPattern = args[1].createScalarEvaluator(ctx);

                private final GrowableArray array = new GrowableArray();

                private final UTF8StringBuilder builder = new UTF8StringBuilder();

                private final UTF8StringPointable stringPtr = new UTF8StringPointable();

                private final UTF8StringPointable patternPtr = new UTF8StringPointable();

                @Override
                public void evaluate(IFrameTupleReference tuple, IPointable result) throws HyracksDataException {
                    resultStorage.reset();
                    evalString.evaluate(tuple, array0);
                    byte[] src = array0.getByteArray();
                    int srcOffset = array0.getStartOffset();
                    int srcLen = array0.getLength();
                    evalPattern.evaluate(tuple, array1);
                    byte[] pattern = array1.getByteArray();
                    int patternOffset = array1.getStartOffset();
                    int patternLen = array1.getLength();
                    if (src[srcOffset] != ATypeTag.SERIALIZED_STRING_TYPE_TAG) {
                        throw new TypeMismatchException(getIdentifier(), 0, src[srcOffset], ATypeTag.SERIALIZED_STRING_TYPE_TAG);
                    }
                    if (pattern[patternOffset] != ATypeTag.SERIALIZED_STRING_TYPE_TAG) {
                        throw new TypeMismatchException(getIdentifier(), 1, pattern[patternOffset], ATypeTag.SERIALIZED_STRING_TYPE_TAG);
                    }
                    try {
                        stringPtr.set(src, srcOffset + 1, srcLen - 1);
                        patternPtr.set(pattern, patternOffset + 1, patternLen - 1);
                        array.reset();
                        UTF8StringPointable.substrBefore(stringPtr, patternPtr, builder, array);
                        out.writeByte(ATypeTag.SERIALIZED_STRING_TYPE_TAG);
                        out.write(array.getByteArray(), 0, array.getLength());
                    } catch (IOException e) {
                        throw new HyracksDataException(e);
                    }
                    result.set(resultStorage);
                }
            };
        }
    };
}
Also used : DataOutput(java.io.DataOutput) UTF8StringPointable(org.apache.hyracks.data.std.primitive.UTF8StringPointable) TypeMismatchException(org.apache.asterix.runtime.exceptions.TypeMismatchException) GrowableArray(org.apache.hyracks.data.std.util.GrowableArray) IPointable(org.apache.hyracks.data.std.api.IPointable) IOException(java.io.IOException) IScalarEvaluator(org.apache.hyracks.algebricks.runtime.base.IScalarEvaluator) UTF8StringBuilder(org.apache.hyracks.data.std.util.UTF8StringBuilder) HyracksDataException(org.apache.hyracks.api.exceptions.HyracksDataException) IScalarEvaluatorFactory(org.apache.hyracks.algebricks.runtime.base.IScalarEvaluatorFactory) ArrayBackedValueStorage(org.apache.hyracks.data.std.util.ArrayBackedValueStorage) IHyracksTaskContext(org.apache.hyracks.api.context.IHyracksTaskContext) VoidPointable(org.apache.hyracks.data.std.primitive.VoidPointable) IFrameTupleReference(org.apache.hyracks.dataflow.common.data.accessors.IFrameTupleReference)

Example 25 with UTF8StringPointable

use of org.apache.hyracks.data.std.primitive.UTF8StringPointable in project asterixdb by apache.

the class SubstringDescriptor method createEvaluatorFactory.

@Override
public IScalarEvaluatorFactory createEvaluatorFactory(final IScalarEvaluatorFactory[] args) {
    return new IScalarEvaluatorFactory() {

        private static final long serialVersionUID = 1L;

        @Override
        public IScalarEvaluator createScalarEvaluator(final IHyracksTaskContext ctx) throws HyracksDataException {
            return new IScalarEvaluator() {

                private final ArrayBackedValueStorage resultStorage = new ArrayBackedValueStorage();

                private final DataOutput out = resultStorage.getDataOutput();

                private IPointable argString = new VoidPointable();

                private IPointable argStart = new VoidPointable();

                private IPointable argLen = new VoidPointable();

                private final IScalarEvaluator evalString = args[0].createScalarEvaluator(ctx);

                private final IScalarEvaluator evalStart = args[1].createScalarEvaluator(ctx);

                private final IScalarEvaluator evalLen = args[2].createScalarEvaluator(ctx);

                private final GrowableArray array = new GrowableArray();

                private final UTF8StringBuilder builder = new UTF8StringBuilder();

                private final UTF8StringPointable string = new UTF8StringPointable();

                @Override
                public void evaluate(IFrameTupleReference tuple, IPointable result) throws HyracksDataException {
                    resultStorage.reset();
                    evalString.evaluate(tuple, argString);
                    evalStart.evaluate(tuple, argStart);
                    evalLen.evaluate(tuple, argLen);
                    byte[] bytes = argStart.getByteArray();
                    int offset = argStart.getStartOffset();
                    int start = ATypeHierarchy.getIntegerValue(getIdentifier().getName(), 0, bytes, offset) - 1;
                    bytes = argLen.getByteArray();
                    offset = argLen.getStartOffset();
                    int len = ATypeHierarchy.getIntegerValue(getIdentifier().getName(), 1, bytes, offset);
                    bytes = argString.getByteArray();
                    offset = argString.getStartOffset();
                    int length = argString.getLength();
                    if (bytes[offset] != ATypeTag.SERIALIZED_STRING_TYPE_TAG) {
                        throw new TypeMismatchException(getIdentifier(), 0, bytes[offset], ATypeTag.SERIALIZED_STRING_TYPE_TAG);
                    }
                    string.set(bytes, offset + 1, length - 1);
                    array.reset();
                    try {
                        UTF8StringPointable.substr(string, start, len, builder, array);
                    } catch (StringIndexOutOfBoundsException e) {
                        throw new RuntimeDataException(ErrorCode.OUT_OF_BOUND, getIdentifier(), 1, start + len - 1);
                    } catch (IOException e) {
                        throw new HyracksDataException(e);
                    }
                    try {
                        out.writeByte(ATypeTag.SERIALIZED_STRING_TYPE_TAG);
                        out.write(array.getByteArray(), 0, array.getLength());
                    } catch (IOException e) {
                        throw new HyracksDataException(e);
                    }
                    result.set(resultStorage);
                }
            };
        }
    };
}
Also used : DataOutput(java.io.DataOutput) UTF8StringPointable(org.apache.hyracks.data.std.primitive.UTF8StringPointable) TypeMismatchException(org.apache.asterix.runtime.exceptions.TypeMismatchException) GrowableArray(org.apache.hyracks.data.std.util.GrowableArray) IPointable(org.apache.hyracks.data.std.api.IPointable) IOException(java.io.IOException) IScalarEvaluator(org.apache.hyracks.algebricks.runtime.base.IScalarEvaluator) UTF8StringBuilder(org.apache.hyracks.data.std.util.UTF8StringBuilder) HyracksDataException(org.apache.hyracks.api.exceptions.HyracksDataException) IScalarEvaluatorFactory(org.apache.hyracks.algebricks.runtime.base.IScalarEvaluatorFactory) ArrayBackedValueStorage(org.apache.hyracks.data.std.util.ArrayBackedValueStorage) IHyracksTaskContext(org.apache.hyracks.api.context.IHyracksTaskContext) VoidPointable(org.apache.hyracks.data.std.primitive.VoidPointable) IFrameTupleReference(org.apache.hyracks.dataflow.common.data.accessors.IFrameTupleReference) RuntimeDataException(org.apache.asterix.common.exceptions.RuntimeDataException)

Aggregations

UTF8StringPointable (org.apache.hyracks.data.std.primitive.UTF8StringPointable)44 IScalarEvaluatorFactory (org.apache.hyracks.algebricks.runtime.base.IScalarEvaluatorFactory)40 IHyracksTaskContext (org.apache.hyracks.api.context.IHyracksTaskContext)40 IPointable (org.apache.hyracks.data.std.api.IPointable)37 IFrameTupleReference (org.apache.hyracks.dataflow.common.data.accessors.IFrameTupleReference)37 DataOutput (java.io.DataOutput)35 IScalarEvaluator (org.apache.hyracks.algebricks.runtime.base.IScalarEvaluator)35 VoidPointable (org.apache.hyracks.data.std.primitive.VoidPointable)35 ArrayBackedValueStorage (org.apache.hyracks.data.std.util.ArrayBackedValueStorage)35 TypeMismatchException (org.apache.asterix.runtime.exceptions.TypeMismatchException)33 IOException (java.io.IOException)31 InvalidDataFormatException (org.apache.asterix.runtime.exceptions.InvalidDataFormatException)27 ISerializerDeserializer (org.apache.hyracks.api.dataflow.value.ISerializerDeserializer)26 HyracksDataException (org.apache.hyracks.api.exceptions.HyracksDataException)13 UTF8StringWriter (org.apache.hyracks.util.string.UTF8StringWriter)5 AMutableDuration (org.apache.asterix.om.base.AMutableDuration)4 AMutablePoint (org.apache.asterix.om.base.AMutablePoint)4 GrowableArray (org.apache.hyracks.data.std.util.GrowableArray)4 UTF8StringBuilder (org.apache.hyracks.data.std.util.UTF8StringBuilder)4 AMutableInterval (org.apache.asterix.om.base.AMutableInterval)3