Search in sources :

Example 6 with GrowableArray

use of org.apache.hyracks.data.std.util.GrowableArray in project asterixdb by apache.

the class UTF8StringPointableTest method testSubstr.

@Test
public void testSubstr() throws Exception {
    GrowableArray storage = new GrowableArray();
    UTF8StringBuilder builder = new UTF8StringBuilder();
    STRING_LEN_128.substr(1, 127, builder, storage);
    UTF8StringPointable result = new UTF8StringPointable();
    result.set(storage.getByteArray(), 0, storage.getLength());
    assertEquals(0, STRING_LEN_127.compareTo(result));
    storage.reset();
    STRING_UTF8_MIX.substr(0, UTF8StringSample.STRING_UTF8_MIX.length(), builder, storage);
    result.set(storage.getByteArray(), 0, storage.getLength());
    assertEquals(0, STRING_UTF8_MIX.compareTo(result));
}
Also used : GrowableArray(org.apache.hyracks.data.std.util.GrowableArray) UTF8StringBuilder(org.apache.hyracks.data.std.util.UTF8StringBuilder) Test(org.junit.Test)

Example 7 with GrowableArray

use of org.apache.hyracks.data.std.util.GrowableArray in project asterixdb by apache.

the class NGramTokenizerTest method runTestNGramTokenizerWithHashedUTF8Tokens.

void runTestNGramTokenizerWithHashedUTF8Tokens(boolean prePost) throws IOException {
    HashedUTF8NGramTokenFactory tokenFactory = new HashedUTF8NGramTokenFactory();
    NGramUTF8StringBinaryTokenizer tokenizer = new NGramUTF8StringBinaryTokenizer(gramLength, prePost, true, false, tokenFactory);
    tokenizer.reset(inputBuffer, 0, inputBuffer.length);
    ArrayList<String> expectedGrams = new ArrayList<String>();
    getExpectedGrams(str, gramLength, expectedGrams, prePost);
    ArrayList<Integer> expectedHashedGrams = new ArrayList<Integer>();
    for (String s : expectedGrams) {
        int hash = tokenHash(s, 1);
        expectedHashedGrams.add(hash);
    }
    int tokenCount = 0;
    while (tokenizer.hasNext()) {
        tokenizer.next();
        // serialize hashed token
        GrowableArray tokenData = new GrowableArray();
        IToken token = tokenizer.getToken();
        token.serializeToken(tokenData);
        // deserialize token
        ByteArrayInputStream bais = new ByteArrayInputStream(tokenData.getByteArray());
        DataInput in = new DataInputStream(bais);
        Integer hashedGram = in.readInt();
        // System.out.println(hashedGram);
        Assert.assertEquals(expectedHashedGrams.get(tokenCount), hashedGram);
        tokenCount++;
    }
// System.out.println("---------");
}
Also used : ArrayList(java.util.ArrayList) GrowableArray(org.apache.hyracks.data.std.util.GrowableArray) DataInputStream(java.io.DataInputStream) DataInput(java.io.DataInput) ByteArrayInputStream(java.io.ByteArrayInputStream)

Example 8 with GrowableArray

use of org.apache.hyracks.data.std.util.GrowableArray in project asterixdb by apache.

the class WordTokenizerTest method testWordTokenizerWithCountedHashedUTF8Tokens.

@Test
public void testWordTokenizerWithCountedHashedUTF8Tokens() throws IOException {
    HashedUTF8WordTokenFactory tokenFactory = new HashedUTF8WordTokenFactory();
    DelimitedUTF8StringBinaryTokenizer tokenizer = new DelimitedUTF8StringBinaryTokenizer(false, false, tokenFactory);
    tokenizer.reset(inputBuffer, 0, inputBuffer.length);
    int tokenCount = 0;
    while (tokenizer.hasNext()) {
        tokenizer.next();
        // serialize hashed token
        GrowableArray tokenData = new GrowableArray();
        IToken token = tokenizer.getToken();
        token.serializeToken(tokenData);
        // deserialize token
        ByteArrayInputStream bais = new ByteArrayInputStream(tokenData.getByteArray());
        DataInput in = new DataInputStream(bais);
        Integer hashedToken = in.readInt();
        Assert.assertEquals(hashedToken, expectedCountedHashedUTF8Tokens.get(tokenCount));
        tokenCount++;
    }
}
Also used : DataInput(java.io.DataInput) ByteArrayInputStream(java.io.ByteArrayInputStream) GrowableArray(org.apache.hyracks.data.std.util.GrowableArray) DataInputStream(java.io.DataInputStream) Test(org.junit.Test)

Example 9 with GrowableArray

use of org.apache.hyracks.data.std.util.GrowableArray in project asterixdb by apache.

the class LSMInvertedIndexTestUtils method getExpectedResults.

@SuppressWarnings("unchecked")
public static void getExpectedResults(int[] scanCountArray, TreeSet<CheckTuple> checkTuples, ITupleReference searchDocument, IBinaryTokenizer tokenizer, ISerializerDeserializer tokenSerde, IInvertedIndexSearchModifier searchModifier, List<Integer> expectedResults, boolean isPartitioned) throws IOException {
    // Reset scan count array.
    Arrays.fill(scanCountArray, 0);
    expectedResults.clear();
    GrowableArray tokenData = new GrowableArray();
    tokenizer.reset(searchDocument.getFieldData(0), searchDocument.getFieldStart(0), searchDocument.getFieldLength(0));
    // Run though tokenizer to get number of tokens.
    int numQueryTokens = 0;
    while (tokenizer.hasNext()) {
        tokenizer.next();
        numQueryTokens++;
    }
    short numTokensLowerBound = -1;
    short numTokensUpperBound = -1;
    int invListElementField = 1;
    if (isPartitioned) {
        numTokensLowerBound = searchModifier.getNumTokensLowerBound((short) numQueryTokens);
        numTokensUpperBound = searchModifier.getNumTokensUpperBound((short) numQueryTokens);
        invListElementField = 2;
    }
    int occurrenceThreshold = searchModifier.getOccurrenceThreshold(numQueryTokens);
    tokenizer.reset(searchDocument.getFieldData(0), searchDocument.getFieldStart(0), searchDocument.getFieldLength(0));
    while (tokenizer.hasNext()) {
        tokenizer.next();
        IToken token = tokenizer.getToken();
        tokenData.reset();
        token.serializeToken(tokenData);
        ByteArrayInputStream inStream = new ByteArrayInputStream(tokenData.getByteArray(), 0, tokenData.getLength());
        DataInput dataIn = new DataInputStream(inStream);
        Comparable tokenObj = (Comparable) tokenSerde.deserialize(dataIn);
        CheckTuple lowKey;
        if (numTokensLowerBound < 0) {
            // Index is not partitioned, or no length filtering is possible for this search modifier.
            lowKey = new CheckTuple(1, 1);
            lowKey.appendField(tokenObj);
        } else {
            // Index is length partitioned, and search modifier supports length filtering.
            lowKey = new CheckTuple(2, 2);
            lowKey.appendField(tokenObj);
            lowKey.appendField(Short.valueOf(numTokensLowerBound));
        }
        CheckTuple highKey;
        if (numTokensUpperBound < 0) {
            // Index is not partitioned, or no length filtering is possible for this search modifier.
            highKey = new CheckTuple(1, 1);
            highKey.appendField(tokenObj);
        } else {
            // Index is length partitioned, and search modifier supports length filtering.
            highKey = new CheckTuple(2, 2);
            highKey.appendField(tokenObj);
            highKey.appendField(Short.valueOf(numTokensUpperBound));
        }
        // Get view over check tuples containing inverted-list corresponding to token.
        SortedSet<CheckTuple> invList = OrderedIndexTestUtils.getPrefixExpectedSubset(checkTuples, lowKey, highKey);
        Iterator<CheckTuple> invListIter = invList.iterator();
        // Iterate over inverted list and update scan count array.
        while (invListIter.hasNext()) {
            CheckTuple checkTuple = invListIter.next();
            Integer element = (Integer) checkTuple.getField(invListElementField);
            scanCountArray[element]++;
        }
    }
    // Run through scan count array, and see whether elements satisfy the given occurrence threshold.
    expectedResults.clear();
    for (int i = 0; i < scanCountArray.length; i++) {
        if (scanCountArray[i] >= occurrenceThreshold) {
            expectedResults.add(i);
        }
    }
}
Also used : DataInput(java.io.DataInput) CheckTuple(org.apache.hyracks.storage.am.common.CheckTuple) IToken(org.apache.hyracks.storage.am.lsm.invertedindex.tokenizers.IToken) ByteArrayInputStream(java.io.ByteArrayInputStream) GrowableArray(org.apache.hyracks.data.std.util.GrowableArray) DataInputStream(java.io.DataInputStream)

Example 10 with GrowableArray

use of org.apache.hyracks.data.std.util.GrowableArray in project asterixdb by apache.

the class TestTupleGenerator method next.

public ITupleReference next() throws HyracksDataException {
    if (reuseObject) {
        for (int i = 0; i < types.length; i++) {
            fields[i].reset();
        }
    } else {
        this.fields = new GrowableArray[types.length];
        for (int i = 0; i < types.length; i++) {
            fields[i] = new GrowableArray();
        }
        tuple = new TestTupleReference(fields);
    }
    for (int i = 0; i < types.length; i++) {
        FieldType type = types[i];
        switch(type) {
            case Boolean:
                Boolean aBoolean = random.nextBoolean();
                BooleanSerializerDeserializer.INSTANCE.serialize(aBoolean, fields[i].getDataOutput());
                break;
            case Double:
                double aDouble = random.nextDouble();
                DoubleSerializerDeserializer.INSTANCE.serialize(aDouble, fields[i].getDataOutput());
                break;
            case Integer64:
                long aLong = random.nextLong();
                Integer64SerializerDeserializer.INSTANCE.serialize(aLong, fields[i].getDataOutput());
                break;
            case String:
                String aString = RandomStringUtils.randomAlphanumeric(stringFieldSizes);
                stringSerde.serialize(aString, fields[i].getDataOutput());
                break;
            default:
                break;
        }
    }
    return tuple;
}
Also used : GrowableArray(org.apache.hyracks.data.std.util.GrowableArray)

Aggregations

GrowableArray (org.apache.hyracks.data.std.util.GrowableArray)23 UTF8StringBuilder (org.apache.hyracks.data.std.util.UTF8StringBuilder)14 Test (org.junit.Test)12 ByteArrayInputStream (java.io.ByteArrayInputStream)7 DataInput (java.io.DataInput)7 DataInputStream (java.io.DataInputStream)7 IOException (java.io.IOException)6 DataOutput (java.io.DataOutput)5 IScalarEvaluator (org.apache.hyracks.algebricks.runtime.base.IScalarEvaluator)5 IScalarEvaluatorFactory (org.apache.hyracks.algebricks.runtime.base.IScalarEvaluatorFactory)5 IHyracksTaskContext (org.apache.hyracks.api.context.IHyracksTaskContext)5 HyracksDataException (org.apache.hyracks.api.exceptions.HyracksDataException)5 IPointable (org.apache.hyracks.data.std.api.IPointable)5 VoidPointable (org.apache.hyracks.data.std.primitive.VoidPointable)5 ArrayBackedValueStorage (org.apache.hyracks.data.std.util.ArrayBackedValueStorage)5 IFrameTupleReference (org.apache.hyracks.dataflow.common.data.accessors.IFrameTupleReference)5 TypeMismatchException (org.apache.asterix.runtime.exceptions.TypeMismatchException)4 UTF8StringPointable (org.apache.hyracks.data.std.primitive.UTF8StringPointable)4 ArrayList (java.util.ArrayList)3 RuntimeDataException (org.apache.asterix.common.exceptions.RuntimeDataException)2