use of java.io.DataInput in project asterixdb by apache.
the class FixedSizeElementInvertedListCursor method printInvList.
@SuppressWarnings("rawtypes")
@Override
public String printInvList(ISerializerDeserializer[] serdes) throws HyracksDataException {
int oldCurrentOff = currentOff;
int oldCurrentPageId = currentPageIx;
int oldCurrentElementIx = currentElementIx;
currentOff = startOff - elementSize;
currentPageIx = 0;
currentElementIx = 0;
StringBuilder strBuilder = new StringBuilder();
while (hasNext()) {
next();
for (int i = 0; i < tuple.getFieldCount(); i++) {
ByteArrayInputStream inStream = new ByteArrayInputStream(tuple.getFieldData(i), tuple.getFieldStart(i), tuple.getFieldLength(i));
DataInput dataIn = new DataInputStream(inStream);
Object o = serdes[i].deserialize(dataIn);
strBuilder.append(o.toString());
if (i + 1 < tuple.getFieldCount())
strBuilder.append(",");
}
strBuilder.append(" ");
}
// reset previous state
currentOff = oldCurrentOff;
currentPageIx = oldCurrentPageId;
currentElementIx = oldCurrentElementIx;
return strBuilder.toString();
}
use of java.io.DataInput in project asterixdb by apache.
the class NGramTokenizerTest method runTestNGramTokenizerWithHashedUTF8Tokens.
void runTestNGramTokenizerWithHashedUTF8Tokens(boolean prePost) throws IOException {
HashedUTF8NGramTokenFactory tokenFactory = new HashedUTF8NGramTokenFactory();
NGramUTF8StringBinaryTokenizer tokenizer = new NGramUTF8StringBinaryTokenizer(gramLength, prePost, true, false, tokenFactory);
tokenizer.reset(inputBuffer, 0, inputBuffer.length);
ArrayList<String> expectedGrams = new ArrayList<String>();
getExpectedGrams(str, gramLength, expectedGrams, prePost);
ArrayList<Integer> expectedHashedGrams = new ArrayList<Integer>();
for (String s : expectedGrams) {
int hash = tokenHash(s, 1);
expectedHashedGrams.add(hash);
}
int tokenCount = 0;
while (tokenizer.hasNext()) {
tokenizer.next();
// serialize hashed token
GrowableArray tokenData = new GrowableArray();
IToken token = tokenizer.getToken();
token.serializeToken(tokenData);
// deserialize token
ByteArrayInputStream bais = new ByteArrayInputStream(tokenData.getByteArray());
DataInput in = new DataInputStream(bais);
Integer hashedGram = in.readInt();
// System.out.println(hashedGram);
Assert.assertEquals(expectedHashedGrams.get(tokenCount), hashedGram);
tokenCount++;
}
// System.out.println("---------");
}
use of java.io.DataInput in project asterixdb by apache.
the class WordTokenizerTest method testWordTokenizerWithCountedHashedUTF8Tokens.
@Test
public void testWordTokenizerWithCountedHashedUTF8Tokens() throws IOException {
HashedUTF8WordTokenFactory tokenFactory = new HashedUTF8WordTokenFactory();
DelimitedUTF8StringBinaryTokenizer tokenizer = new DelimitedUTF8StringBinaryTokenizer(false, false, tokenFactory);
tokenizer.reset(inputBuffer, 0, inputBuffer.length);
int tokenCount = 0;
while (tokenizer.hasNext()) {
tokenizer.next();
// serialize hashed token
GrowableArray tokenData = new GrowableArray();
IToken token = tokenizer.getToken();
token.serializeToken(tokenData);
// deserialize token
ByteArrayInputStream bais = new ByteArrayInputStream(tokenData.getByteArray());
DataInput in = new DataInputStream(bais);
Integer hashedToken = in.readInt();
Assert.assertEquals(hashedToken, expectedCountedHashedUTF8Tokens.get(tokenCount));
tokenCount++;
}
}
use of java.io.DataInput in project asterixdb by apache.
the class LSMInvertedIndexTestUtils method getExpectedResults.
@SuppressWarnings("unchecked")
public static void getExpectedResults(int[] scanCountArray, TreeSet<CheckTuple> checkTuples, ITupleReference searchDocument, IBinaryTokenizer tokenizer, ISerializerDeserializer tokenSerde, IInvertedIndexSearchModifier searchModifier, List<Integer> expectedResults, boolean isPartitioned) throws IOException {
// Reset scan count array.
Arrays.fill(scanCountArray, 0);
expectedResults.clear();
GrowableArray tokenData = new GrowableArray();
tokenizer.reset(searchDocument.getFieldData(0), searchDocument.getFieldStart(0), searchDocument.getFieldLength(0));
// Run though tokenizer to get number of tokens.
int numQueryTokens = 0;
while (tokenizer.hasNext()) {
tokenizer.next();
numQueryTokens++;
}
short numTokensLowerBound = -1;
short numTokensUpperBound = -1;
int invListElementField = 1;
if (isPartitioned) {
numTokensLowerBound = searchModifier.getNumTokensLowerBound((short) numQueryTokens);
numTokensUpperBound = searchModifier.getNumTokensUpperBound((short) numQueryTokens);
invListElementField = 2;
}
int occurrenceThreshold = searchModifier.getOccurrenceThreshold(numQueryTokens);
tokenizer.reset(searchDocument.getFieldData(0), searchDocument.getFieldStart(0), searchDocument.getFieldLength(0));
while (tokenizer.hasNext()) {
tokenizer.next();
IToken token = tokenizer.getToken();
tokenData.reset();
token.serializeToken(tokenData);
ByteArrayInputStream inStream = new ByteArrayInputStream(tokenData.getByteArray(), 0, tokenData.getLength());
DataInput dataIn = new DataInputStream(inStream);
Comparable tokenObj = (Comparable) tokenSerde.deserialize(dataIn);
CheckTuple lowKey;
if (numTokensLowerBound < 0) {
// Index is not partitioned, or no length filtering is possible for this search modifier.
lowKey = new CheckTuple(1, 1);
lowKey.appendField(tokenObj);
} else {
// Index is length partitioned, and search modifier supports length filtering.
lowKey = new CheckTuple(2, 2);
lowKey.appendField(tokenObj);
lowKey.appendField(Short.valueOf(numTokensLowerBound));
}
CheckTuple highKey;
if (numTokensUpperBound < 0) {
// Index is not partitioned, or no length filtering is possible for this search modifier.
highKey = new CheckTuple(1, 1);
highKey.appendField(tokenObj);
} else {
// Index is length partitioned, and search modifier supports length filtering.
highKey = new CheckTuple(2, 2);
highKey.appendField(tokenObj);
highKey.appendField(Short.valueOf(numTokensUpperBound));
}
// Get view over check tuples containing inverted-list corresponding to token.
SortedSet<CheckTuple> invList = OrderedIndexTestUtils.getPrefixExpectedSubset(checkTuples, lowKey, highKey);
Iterator<CheckTuple> invListIter = invList.iterator();
// Iterate over inverted list and update scan count array.
while (invListIter.hasNext()) {
CheckTuple checkTuple = invListIter.next();
Integer element = (Integer) checkTuple.getField(invListElementField);
scanCountArray[element]++;
}
}
// Run through scan count array, and see whether elements satisfy the given occurrence threshold.
expectedResults.clear();
for (int i = 0; i < scanCountArray.length; i++) {
if (scanCountArray[i] >= occurrenceThreshold) {
expectedResults.add(i);
}
}
}
use of java.io.DataInput in project geode by apache.
the class InternalDataSerializer method invokeFromData.
/**
* For backward compatibility this method should be used to invoke fromData on a DSFID or
* DataSerializable. It will invoke the correct fromData method based on the class's version
* information. This method does not read information about the class of the object. When
* serializing use the method invokeToData to write the contents of the object.
*
* @param ds the object to write
* @param in the input stream.
*/
public static void invokeFromData(Object ds, DataInput in) throws IOException, ClassNotFoundException {
try {
boolean invoked = false;
Version v = InternalDataSerializer.getVersionForDataStreamOrNull(in);
if (v != null && v != Version.CURRENT) {
// get versions where DataOutput was upgraded
Version[] versions = null;
if (ds instanceof SerializationVersions) {
SerializationVersions vds = (SerializationVersions) ds;
versions = vds.getSerializationVersions();
}
// there has been a change in the message
if (versions != null && versions.length > 0) {
for (Version version : versions) {
// if peer version is less than the greatest upgraded version
if (v.compareTo(version) < 0) {
ds.getClass().getMethod("fromDataPre" + '_' + version.getMethodSuffix(), new Class[] { DataInput.class }).invoke(ds, in);
invoked = true;
break;
}
}
}
}
if (!invoked) {
if (ds instanceof DataSerializableFixedID) {
((DataSerializableFixedID) ds).fromData(in);
} else {
((DataSerializable) ds).fromData(in);
}
}
} catch (EOFException | ClassNotFoundException | CacheClosedException ex) {
// client went away - ignore
throw ex;
} catch (Exception ex) {
throw new SerializationException(LocalizedStrings.DataSerializer_COULD_NOT_CREATE_AN_INSTANCE_OF_0.toLocalizedString(ds.getClass().getName()), ex);
}
}
Aggregations