use of org.apache.hyracks.storage.am.lsm.invertedindex.tokenizers.TokenizerInfo.TokenizerType in project asterixdb by apache.
the class AbstractTOccurrenceSearcher method tokenizeQuery.
protected void tokenizeQuery(InvertedIndexSearchPredicate searchPred) throws HyracksDataException {
ITupleReference queryTuple = searchPred.getQueryTuple();
int queryFieldIndex = searchPred.getQueryFieldIndex();
IBinaryTokenizer queryTokenizer = searchPred.getQueryTokenizer();
// Is this a full-text query?
// Then, the last argument is conjuctive or disjunctive search option, not a query text.
// Thus, we need to remove the last argument.
boolean isFullTextSearchQuery = searchPred.getIsFullTextSearchQuery();
// Get the type of query tokenizer.
TokenizerType queryTokenizerType = queryTokenizer.getTokenizerType();
int tokenCountInOneField = 0;
queryTokenAppender.reset(queryTokenFrame, true);
queryTokenizer.reset(queryTuple.getFieldData(queryFieldIndex), queryTuple.getFieldStart(queryFieldIndex), queryTuple.getFieldLength(queryFieldIndex));
while (queryTokenizer.hasNext()) {
queryTokenizer.next();
queryTokenBuilder.reset();
tokenCountInOneField++;
try {
IToken token = queryTokenizer.getToken();
// If it's a list, it can have multiple keywords in it. But, each keyword should not be a phrase.
if (isFullTextSearchQuery) {
if (queryTokenizerType == TokenizerType.STRING && tokenCountInOneField > 1) {
throw HyracksDataException.create(ErrorCode.FULLTEXT_PHRASE_FOUND);
} else if (queryTokenizerType == TokenizerType.LIST) {
for (int j = 1; j < token.getTokenLength(); j++) {
if (DelimitedUTF8StringBinaryTokenizer.isSeparator((char) token.getData()[token.getStartOffset() + j])) {
throw HyracksDataException.create(ErrorCode.FULLTEXT_PHRASE_FOUND);
}
}
}
}
token.serializeToken(queryTokenBuilder.getFieldData());
queryTokenBuilder.addFieldEndOffset();
// WARNING: assuming one frame is big enough to hold all tokens
queryTokenAppender.append(queryTokenBuilder.getFieldEndOffsets(), queryTokenBuilder.getByteArray(), 0, queryTokenBuilder.getSize());
} catch (IOException e) {
throw new HyracksDataException(e);
}
}
}
Aggregations