Search in sources :

Example 91 with DataInput

use of java.io.DataInput in project asterixdb by apache.

the class FixedSizeElementInvertedListCursor method printInvList.

@SuppressWarnings("rawtypes")
@Override
public String printInvList(ISerializerDeserializer[] serdes) throws HyracksDataException {
    int oldCurrentOff = currentOff;
    int oldCurrentPageId = currentPageIx;
    int oldCurrentElementIx = currentElementIx;
    currentOff = startOff - elementSize;
    currentPageIx = 0;
    currentElementIx = 0;
    StringBuilder strBuilder = new StringBuilder();
    while (hasNext()) {
        next();
        for (int i = 0; i < tuple.getFieldCount(); i++) {
            ByteArrayInputStream inStream = new ByteArrayInputStream(tuple.getFieldData(i), tuple.getFieldStart(i), tuple.getFieldLength(i));
            DataInput dataIn = new DataInputStream(inStream);
            Object o = serdes[i].deserialize(dataIn);
            strBuilder.append(o.toString());
            if (i + 1 < tuple.getFieldCount())
                strBuilder.append(",");
        }
        strBuilder.append(" ");
    }
    // reset previous state
    currentOff = oldCurrentOff;
    currentPageIx = oldCurrentPageId;
    currentElementIx = oldCurrentElementIx;
    return strBuilder.toString();
}
Also used : DataInput(java.io.DataInput) ByteArrayInputStream(java.io.ByteArrayInputStream) DataInputStream(java.io.DataInputStream)

Example 92 with DataInput

use of java.io.DataInput in project asterixdb by apache.

the class NGramTokenizerTest method runTestNGramTokenizerWithHashedUTF8Tokens.

void runTestNGramTokenizerWithHashedUTF8Tokens(boolean prePost) throws IOException {
    HashedUTF8NGramTokenFactory tokenFactory = new HashedUTF8NGramTokenFactory();
    NGramUTF8StringBinaryTokenizer tokenizer = new NGramUTF8StringBinaryTokenizer(gramLength, prePost, true, false, tokenFactory);
    tokenizer.reset(inputBuffer, 0, inputBuffer.length);
    ArrayList<String> expectedGrams = new ArrayList<String>();
    getExpectedGrams(str, gramLength, expectedGrams, prePost);
    ArrayList<Integer> expectedHashedGrams = new ArrayList<Integer>();
    for (String s : expectedGrams) {
        int hash = tokenHash(s, 1);
        expectedHashedGrams.add(hash);
    }
    int tokenCount = 0;
    while (tokenizer.hasNext()) {
        tokenizer.next();
        // serialize hashed token
        GrowableArray tokenData = new GrowableArray();
        IToken token = tokenizer.getToken();
        token.serializeToken(tokenData);
        // deserialize token
        ByteArrayInputStream bais = new ByteArrayInputStream(tokenData.getByteArray());
        DataInput in = new DataInputStream(bais);
        Integer hashedGram = in.readInt();
        // System.out.println(hashedGram);
        Assert.assertEquals(expectedHashedGrams.get(tokenCount), hashedGram);
        tokenCount++;
    }
// System.out.println("---------");
}
Also used : ArrayList(java.util.ArrayList) GrowableArray(org.apache.hyracks.data.std.util.GrowableArray) DataInputStream(java.io.DataInputStream) DataInput(java.io.DataInput) ByteArrayInputStream(java.io.ByteArrayInputStream)

Example 93 with DataInput

use of java.io.DataInput in project asterixdb by apache.

the class WordTokenizerTest method testWordTokenizerWithCountedHashedUTF8Tokens.

@Test
public void testWordTokenizerWithCountedHashedUTF8Tokens() throws IOException {
    HashedUTF8WordTokenFactory tokenFactory = new HashedUTF8WordTokenFactory();
    DelimitedUTF8StringBinaryTokenizer tokenizer = new DelimitedUTF8StringBinaryTokenizer(false, false, tokenFactory);
    tokenizer.reset(inputBuffer, 0, inputBuffer.length);
    int tokenCount = 0;
    while (tokenizer.hasNext()) {
        tokenizer.next();
        // serialize hashed token
        GrowableArray tokenData = new GrowableArray();
        IToken token = tokenizer.getToken();
        token.serializeToken(tokenData);
        // deserialize token
        ByteArrayInputStream bais = new ByteArrayInputStream(tokenData.getByteArray());
        DataInput in = new DataInputStream(bais);
        Integer hashedToken = in.readInt();
        Assert.assertEquals(hashedToken, expectedCountedHashedUTF8Tokens.get(tokenCount));
        tokenCount++;
    }
}
Also used : DataInput(java.io.DataInput) ByteArrayInputStream(java.io.ByteArrayInputStream) GrowableArray(org.apache.hyracks.data.std.util.GrowableArray) DataInputStream(java.io.DataInputStream) Test(org.junit.Test)

Example 94 with DataInput

use of java.io.DataInput in project asterixdb by apache.

the class LSMInvertedIndexTestUtils method getExpectedResults.

@SuppressWarnings("unchecked")
public static void getExpectedResults(int[] scanCountArray, TreeSet<CheckTuple> checkTuples, ITupleReference searchDocument, IBinaryTokenizer tokenizer, ISerializerDeserializer tokenSerde, IInvertedIndexSearchModifier searchModifier, List<Integer> expectedResults, boolean isPartitioned) throws IOException {
    // Reset scan count array.
    Arrays.fill(scanCountArray, 0);
    expectedResults.clear();
    GrowableArray tokenData = new GrowableArray();
    tokenizer.reset(searchDocument.getFieldData(0), searchDocument.getFieldStart(0), searchDocument.getFieldLength(0));
    // Run though tokenizer to get number of tokens.
    int numQueryTokens = 0;
    while (tokenizer.hasNext()) {
        tokenizer.next();
        numQueryTokens++;
    }
    short numTokensLowerBound = -1;
    short numTokensUpperBound = -1;
    int invListElementField = 1;
    if (isPartitioned) {
        numTokensLowerBound = searchModifier.getNumTokensLowerBound((short) numQueryTokens);
        numTokensUpperBound = searchModifier.getNumTokensUpperBound((short) numQueryTokens);
        invListElementField = 2;
    }
    int occurrenceThreshold = searchModifier.getOccurrenceThreshold(numQueryTokens);
    tokenizer.reset(searchDocument.getFieldData(0), searchDocument.getFieldStart(0), searchDocument.getFieldLength(0));
    while (tokenizer.hasNext()) {
        tokenizer.next();
        IToken token = tokenizer.getToken();
        tokenData.reset();
        token.serializeToken(tokenData);
        ByteArrayInputStream inStream = new ByteArrayInputStream(tokenData.getByteArray(), 0, tokenData.getLength());
        DataInput dataIn = new DataInputStream(inStream);
        Comparable tokenObj = (Comparable) tokenSerde.deserialize(dataIn);
        CheckTuple lowKey;
        if (numTokensLowerBound < 0) {
            // Index is not partitioned, or no length filtering is possible for this search modifier.
            lowKey = new CheckTuple(1, 1);
            lowKey.appendField(tokenObj);
        } else {
            // Index is length partitioned, and search modifier supports length filtering.
            lowKey = new CheckTuple(2, 2);
            lowKey.appendField(tokenObj);
            lowKey.appendField(Short.valueOf(numTokensLowerBound));
        }
        CheckTuple highKey;
        if (numTokensUpperBound < 0) {
            // Index is not partitioned, or no length filtering is possible for this search modifier.
            highKey = new CheckTuple(1, 1);
            highKey.appendField(tokenObj);
        } else {
            // Index is length partitioned, and search modifier supports length filtering.
            highKey = new CheckTuple(2, 2);
            highKey.appendField(tokenObj);
            highKey.appendField(Short.valueOf(numTokensUpperBound));
        }
        // Get view over check tuples containing inverted-list corresponding to token.
        SortedSet<CheckTuple> invList = OrderedIndexTestUtils.getPrefixExpectedSubset(checkTuples, lowKey, highKey);
        Iterator<CheckTuple> invListIter = invList.iterator();
        // Iterate over inverted list and update scan count array.
        while (invListIter.hasNext()) {
            CheckTuple checkTuple = invListIter.next();
            Integer element = (Integer) checkTuple.getField(invListElementField);
            scanCountArray[element]++;
        }
    }
    // Run through scan count array, and see whether elements satisfy the given occurrence threshold.
    expectedResults.clear();
    for (int i = 0; i < scanCountArray.length; i++) {
        if (scanCountArray[i] >= occurrenceThreshold) {
            expectedResults.add(i);
        }
    }
}
Also used : DataInput(java.io.DataInput) CheckTuple(org.apache.hyracks.storage.am.common.CheckTuple) IToken(org.apache.hyracks.storage.am.lsm.invertedindex.tokenizers.IToken) ByteArrayInputStream(java.io.ByteArrayInputStream) GrowableArray(org.apache.hyracks.data.std.util.GrowableArray) DataInputStream(java.io.DataInputStream)

Example 95 with DataInput

use of java.io.DataInput in project geode by apache.

the class InternalDataSerializer method invokeFromData.

/**
   * For backward compatibility this method should be used to invoke fromData on a DSFID or
   * DataSerializable. It will invoke the correct fromData method based on the class's version
   * information. This method does not read information about the class of the object. When
   * serializing use the method invokeToData to write the contents of the object.
   * 
   * @param ds the object to write
   * @param in the input stream.
   */
public static void invokeFromData(Object ds, DataInput in) throws IOException, ClassNotFoundException {
    try {
        boolean invoked = false;
        Version v = InternalDataSerializer.getVersionForDataStreamOrNull(in);
        if (v != null && v != Version.CURRENT) {
            // get versions where DataOutput was upgraded
            Version[] versions = null;
            if (ds instanceof SerializationVersions) {
                SerializationVersions vds = (SerializationVersions) ds;
                versions = vds.getSerializationVersions();
            }
            // there has been a change in the message
            if (versions != null && versions.length > 0) {
                for (Version version : versions) {
                    // if peer version is less than the greatest upgraded version
                    if (v.compareTo(version) < 0) {
                        ds.getClass().getMethod("fromDataPre" + '_' + version.getMethodSuffix(), new Class[] { DataInput.class }).invoke(ds, in);
                        invoked = true;
                        break;
                    }
                }
            }
        }
        if (!invoked) {
            if (ds instanceof DataSerializableFixedID) {
                ((DataSerializableFixedID) ds).fromData(in);
            } else {
                ((DataSerializable) ds).fromData(in);
            }
        }
    } catch (EOFException | ClassNotFoundException | CacheClosedException ex) {
        // client went away - ignore
        throw ex;
    } catch (Exception ex) {
        throw new SerializationException(LocalizedStrings.DataSerializer_COULD_NOT_CREATE_AN_INSTANCE_OF_0.toLocalizedString(ds.getClass().getName()), ex);
    }
}
Also used : SerializationException(org.apache.geode.SerializationException) CacheClosedException(org.apache.geode.cache.CacheClosedException) DataSerializable(org.apache.geode.DataSerializable) InvocationTargetException(java.lang.reflect.InvocationTargetException) NonPortableClassException(org.apache.geode.pdx.NonPortableClassException) IOException(java.io.IOException) CancelException(org.apache.geode.CancelException) EOFException(java.io.EOFException) UTFDataFormatException(java.io.UTFDataFormatException) GemFireIOException(org.apache.geode.GemFireIOException) SerializationException(org.apache.geode.SerializationException) CacheClosedException(org.apache.geode.cache.CacheClosedException) NotSerializableException(java.io.NotSerializableException) ToDataException(org.apache.geode.ToDataException) DataInput(java.io.DataInput) EOFException(java.io.EOFException) ObjectStreamClass(java.io.ObjectStreamClass)

Aggregations

DataInput (java.io.DataInput)247 Test (org.junit.Test)132 DataOutputStream (java.io.DataOutputStream)117 DataInputStream (java.io.DataInputStream)112 ByteArrayInputStream (java.io.ByteArrayInputStream)106 UnitTest (org.apache.geode.test.junit.categories.UnitTest)96 ByteArrayOutputStream (java.io.ByteArrayOutputStream)29 DataOutput (java.io.DataOutput)23 IOException (java.io.IOException)21 ArrayList (java.util.ArrayList)16 ARecord (org.apache.asterix.om.base.ARecord)14 EOFException (java.io.EOFException)9 File (java.io.File)9 BufferedInputStream (java.io.BufferedInputStream)8 HashMap (java.util.HashMap)7 HeapDataOutputStream (org.apache.geode.internal.HeapDataOutputStream)7 MembershipTest (org.apache.geode.test.junit.categories.MembershipTest)7 GrowableArray (org.apache.hyracks.data.std.util.GrowableArray)7 FileInputStream (java.io.FileInputStream)6 ByteBuffer (java.nio.ByteBuffer)6