use of org.apache.lucene.analysis.tokenattributes.PositionLengthAttribute in project lucene-solr by apache.
the class QueryBuilder method createFieldQuery.
/**
* Creates a query from a token stream.
*
* @param source the token stream to create the query from
* @param operator default boolean operator used for this query
* @param field field to create queries against
* @param quoted true if phrases should be generated when terms occur at more than one position
* @param phraseSlop slop factor for phrase/multiphrase queries
*/
protected Query createFieldQuery(TokenStream source, BooleanClause.Occur operator, String field, boolean quoted, int phraseSlop) {
assert operator == BooleanClause.Occur.SHOULD || operator == BooleanClause.Occur.MUST;
// Build an appropriate query based on the analysis chain.
try (CachingTokenFilter stream = new CachingTokenFilter(source)) {
TermToBytesRefAttribute termAtt = stream.getAttribute(TermToBytesRefAttribute.class);
PositionIncrementAttribute posIncAtt = stream.addAttribute(PositionIncrementAttribute.class);
PositionLengthAttribute posLenAtt = stream.addAttribute(PositionLengthAttribute.class);
if (termAtt == null) {
return null;
}
// phase 1: read through the stream and assess the situation:
// counting the number of tokens/positions and marking if we have any synonyms.
int numTokens = 0;
int positionCount = 0;
boolean hasSynonyms = false;
boolean isGraph = false;
stream.reset();
while (stream.incrementToken()) {
numTokens++;
int positionIncrement = posIncAtt.getPositionIncrement();
if (positionIncrement != 0) {
positionCount += positionIncrement;
} else {
hasSynonyms = true;
}
int positionLength = posLenAtt.getPositionLength();
if (enableGraphQueries && positionLength > 1) {
isGraph = true;
}
}
if (numTokens == 0) {
return null;
} else if (numTokens == 1) {
// single term
return analyzeTerm(field, stream);
} else if (isGraph) {
// graph
if (quoted) {
return analyzeGraphPhrase(stream, field, phraseSlop);
} else {
return analyzeGraphBoolean(field, stream, operator);
}
} else if (quoted && positionCount > 1) {
// phrase
if (hasSynonyms) {
// complex phrase with synonyms
return analyzeMultiPhrase(field, stream, phraseSlop);
} else {
// simple phrase
return analyzePhrase(field, stream, phraseSlop);
}
} else {
// boolean
if (positionCount == 1) {
// only one position, with synonyms
return analyzeBoolean(field, stream);
} else {
// complex case: multiple positions
return analyzeMultiBoolean(field, stream, operator);
}
}
} catch (IOException e) {
throw new RuntimeException("Error analyzing query text", e);
}
}
use of org.apache.lucene.analysis.tokenattributes.PositionLengthAttribute in project lucene-solr by apache.
the class NGramTokenizerTest method testNGrams.
static void testNGrams(int minGram, int maxGram, String s, final String nonTokenChars, boolean edgesOnly) throws IOException {
// convert the string to code points
final int[] codePoints = toCodePoints(s);
final int[] offsets = new int[codePoints.length + 1];
for (int i = 0; i < codePoints.length; ++i) {
offsets[i + 1] = offsets[i] + Character.charCount(codePoints[i]);
}
final Tokenizer grams = new NGramTokenizer(minGram, maxGram, edgesOnly) {
@Override
protected boolean isTokenChar(int chr) {
return nonTokenChars.indexOf(chr) < 0;
}
};
grams.setReader(new StringReader(s));
final CharTermAttribute termAtt = grams.addAttribute(CharTermAttribute.class);
final PositionIncrementAttribute posIncAtt = grams.addAttribute(PositionIncrementAttribute.class);
final PositionLengthAttribute posLenAtt = grams.addAttribute(PositionLengthAttribute.class);
final OffsetAttribute offsetAtt = grams.addAttribute(OffsetAttribute.class);
grams.reset();
for (int start = 0; start < codePoints.length; ++start) {
nextGram: for (int end = start + minGram; end <= start + maxGram && end <= codePoints.length; ++end) {
if (edgesOnly && start > 0 && isTokenChar(nonTokenChars, codePoints[start - 1])) {
// not on an edge
continue nextGram;
}
for (int j = start; j < end; ++j) {
if (!isTokenChar(nonTokenChars, codePoints[j])) {
continue nextGram;
}
}
assertTrue(grams.incrementToken());
assertArrayEquals(Arrays.copyOfRange(codePoints, start, end), toCodePoints(termAtt));
assertEquals(1, posIncAtt.getPositionIncrement());
assertEquals(1, posLenAtt.getPositionLength());
assertEquals(offsets[start], offsetAtt.startOffset());
assertEquals(offsets[end], offsetAtt.endOffset());
}
}
assertFalse(grams.incrementToken());
grams.end();
assertEquals(s.length(), offsetAtt.startOffset());
assertEquals(s.length(), offsetAtt.endOffset());
}
Aggregations