use of org.apache.lucene.queries.intervals.IntervalsSource in project OpenSearch by opensearch-project.
the class IntervalBuilder method analyzeSynonyms.
protected IntervalsSource analyzeSynonyms(TokenStream ts, int maxGaps, IntervalMode mode) throws IOException {
List<IntervalsSource> terms = new ArrayList<>();
List<IntervalsSource> synonyms = new ArrayList<>();
TermToBytesRefAttribute bytesAtt = ts.addAttribute(TermToBytesRefAttribute.class);
PositionIncrementAttribute posAtt = ts.addAttribute(PositionIncrementAttribute.class);
ts.reset();
int spaces = 0;
while (ts.incrementToken()) {
int posInc = posAtt.getPositionIncrement();
if (posInc > 0) {
if (synonyms.size() == 1) {
terms.add(extend(synonyms.get(0), spaces));
} else if (synonyms.size() > 1) {
terms.add(extend(Intervals.or(synonyms.toArray(new IntervalsSource[0])), spaces));
}
synonyms.clear();
spaces = posInc - 1;
}
synonyms.add(Intervals.term(BytesRef.deepCopyOf(bytesAtt.getBytesRef())));
}
if (synonyms.size() == 1) {
terms.add(extend(synonyms.get(0), spaces));
} else {
terms.add(extend(Intervals.or(synonyms.toArray(new IntervalsSource[0])), spaces));
}
return combineSources(terms, maxGaps, mode);
}
use of org.apache.lucene.queries.intervals.IntervalsSource in project OpenSearch by opensearch-project.
the class IntervalBuilder method analyzeGraph.
protected List<IntervalsSource> analyzeGraph(TokenStream source) throws IOException {
source.reset();
GraphTokenStreamFiniteStrings graph = new GraphTokenStreamFiniteStrings(source);
List<IntervalsSource> clauses = new ArrayList<>();
int[] articulationPoints = graph.articulationPoints();
int lastState = 0;
int maxClauseCount = BooleanQuery.getMaxClauseCount();
for (int i = 0; i <= articulationPoints.length; i++) {
int start = lastState;
int end = -1;
if (i < articulationPoints.length) {
end = articulationPoints[i];
}
lastState = end;
if (graph.hasSidePath(start)) {
List<IntervalsSource> paths = new ArrayList<>();
Iterator<TokenStream> it = graph.getFiniteStrings(start, end);
while (it.hasNext()) {
TokenStream ts = it.next();
IntervalsSource phrase = combineSources(analyzeTerms(ts), 0, IntervalMode.ORDERED);
if (paths.size() >= maxClauseCount) {
throw new BooleanQuery.TooManyClauses();
}
paths.add(phrase);
}
if (paths.size() > 0) {
clauses.add(Intervals.or(paths.toArray(new IntervalsSource[0])));
}
} else {
Iterator<TokenStream> it = graph.getFiniteStrings(start, end);
TokenStream ts = it.next();
clauses.addAll(analyzeTerms(ts));
assert it.hasNext() == false;
}
}
return clauses;
}
use of org.apache.lucene.queries.intervals.IntervalsSource in project OpenSearch by opensearch-project.
the class IntervalBuilderTests method testGraphSynonyms.
public void testGraphSynonyms() throws IOException {
// term1 term2:2/term3 term4 term5
CannedTokenStream ts = new CannedTokenStream(new Token("term1", 1, 2), new Token("term2", 1, 3, 4, 2), new Token("term3", 0, 3, 4), new Token("term4", 5, 6), new Token("term5", 6, 7));
IntervalsSource source = BUILDER.analyzeText(new CachingTokenFilter(ts), -1, true);
IntervalsSource expected = Intervals.ordered(Intervals.term("term1"), Intervals.or(Intervals.term("term2"), Intervals.phrase("term3", "term4")), Intervals.term("term5"));
assertEquals(expected, source);
}
use of org.apache.lucene.queries.intervals.IntervalsSource in project OpenSearch by opensearch-project.
the class IntervalBuilderTests method testGraphTerminatesOnGap.
public void testGraphTerminatesOnGap() throws IOException {
// term1 term2:2/term3 term4 [] term5
CannedTokenStream ts = new CannedTokenStream(new Token("term1", 1, 2), new Token("term2", 1, 2, 3, 2), new Token("term3", 0, 2, 3), new Token("term4", 2, 3), new Token("term5", 2, 6, 7));
IntervalsSource source = BUILDER.analyzeText(new CachingTokenFilter(ts), -1, true);
IntervalsSource expected = Intervals.ordered(Intervals.term("term1"), Intervals.or(Intervals.term("term2"), Intervals.phrase("term3", "term4")), Intervals.extend(Intervals.term("term5"), 1, 0));
assertEquals(expected, source);
}
use of org.apache.lucene.queries.intervals.IntervalsSource in project OpenSearch by opensearch-project.
the class IntervalBuilderTests method testPhraseWithStopword.
public void testPhraseWithStopword() throws IOException {
CannedTokenStream ts = new CannedTokenStream(new Token("term1", 1, 1, 2), new Token("term3", 2, 5, 6));
IntervalsSource source = BUILDER.analyzeText(new CachingTokenFilter(ts), 0, true);
IntervalsSource expected = Intervals.phrase(Intervals.term("term1"), Intervals.extend(Intervals.term("term3"), 1, 0));
assertEquals(expected, source);
}
Aggregations