Search in sources :

Example 11 with IntervalsSource

use of org.apache.lucene.queries.intervals.IntervalsSource in project OpenSearch by opensearch-project.

the class IntervalBuilder method analyzeSynonyms.

protected IntervalsSource analyzeSynonyms(TokenStream ts, int maxGaps, IntervalMode mode) throws IOException {
    List<IntervalsSource> terms = new ArrayList<>();
    List<IntervalsSource> synonyms = new ArrayList<>();
    TermToBytesRefAttribute bytesAtt = ts.addAttribute(TermToBytesRefAttribute.class);
    PositionIncrementAttribute posAtt = ts.addAttribute(PositionIncrementAttribute.class);
    ts.reset();
    int spaces = 0;
    while (ts.incrementToken()) {
        int posInc = posAtt.getPositionIncrement();
        if (posInc > 0) {
            if (synonyms.size() == 1) {
                terms.add(extend(synonyms.get(0), spaces));
            } else if (synonyms.size() > 1) {
                terms.add(extend(Intervals.or(synonyms.toArray(new IntervalsSource[0])), spaces));
            }
            synonyms.clear();
            spaces = posInc - 1;
        }
        synonyms.add(Intervals.term(BytesRef.deepCopyOf(bytesAtt.getBytesRef())));
    }
    if (synonyms.size() == 1) {
        terms.add(extend(synonyms.get(0), spaces));
    } else {
        terms.add(extend(Intervals.or(synonyms.toArray(new IntervalsSource[0])), spaces));
    }
    return combineSources(terms, maxGaps, mode);
}
Also used : TermToBytesRefAttribute(org.apache.lucene.analysis.tokenattributes.TermToBytesRefAttribute) ArrayList(java.util.ArrayList) IntervalsSource(org.apache.lucene.queries.intervals.IntervalsSource) PositionIncrementAttribute(org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute)

Example 12 with IntervalsSource

use of org.apache.lucene.queries.intervals.IntervalsSource in project OpenSearch by opensearch-project.

the class IntervalBuilder method analyzeGraph.

protected List<IntervalsSource> analyzeGraph(TokenStream source) throws IOException {
    source.reset();
    GraphTokenStreamFiniteStrings graph = new GraphTokenStreamFiniteStrings(source);
    List<IntervalsSource> clauses = new ArrayList<>();
    int[] articulationPoints = graph.articulationPoints();
    int lastState = 0;
    int maxClauseCount = BooleanQuery.getMaxClauseCount();
    for (int i = 0; i <= articulationPoints.length; i++) {
        int start = lastState;
        int end = -1;
        if (i < articulationPoints.length) {
            end = articulationPoints[i];
        }
        lastState = end;
        if (graph.hasSidePath(start)) {
            List<IntervalsSource> paths = new ArrayList<>();
            Iterator<TokenStream> it = graph.getFiniteStrings(start, end);
            while (it.hasNext()) {
                TokenStream ts = it.next();
                IntervalsSource phrase = combineSources(analyzeTerms(ts), 0, IntervalMode.ORDERED);
                if (paths.size() >= maxClauseCount) {
                    throw new BooleanQuery.TooManyClauses();
                }
                paths.add(phrase);
            }
            if (paths.size() > 0) {
                clauses.add(Intervals.or(paths.toArray(new IntervalsSource[0])));
            }
        } else {
            Iterator<TokenStream> it = graph.getFiniteStrings(start, end);
            TokenStream ts = it.next();
            clauses.addAll(analyzeTerms(ts));
            assert it.hasNext() == false;
        }
    }
    return clauses;
}
Also used : TokenStream(org.apache.lucene.analysis.TokenStream) GraphTokenStreamFiniteStrings(org.apache.lucene.util.graph.GraphTokenStreamFiniteStrings) ArrayList(java.util.ArrayList) IntervalsSource(org.apache.lucene.queries.intervals.IntervalsSource)

Example 13 with IntervalsSource

use of org.apache.lucene.queries.intervals.IntervalsSource in project OpenSearch by opensearch-project.

the class IntervalBuilderTests method testGraphSynonyms.

public void testGraphSynonyms() throws IOException {
    // term1 term2:2/term3 term4 term5
    CannedTokenStream ts = new CannedTokenStream(new Token("term1", 1, 2), new Token("term2", 1, 3, 4, 2), new Token("term3", 0, 3, 4), new Token("term4", 5, 6), new Token("term5", 6, 7));
    IntervalsSource source = BUILDER.analyzeText(new CachingTokenFilter(ts), -1, true);
    IntervalsSource expected = Intervals.ordered(Intervals.term("term1"), Intervals.or(Intervals.term("term2"), Intervals.phrase("term3", "term4")), Intervals.term("term5"));
    assertEquals(expected, source);
}
Also used : CachingTokenFilter(org.apache.lucene.analysis.CachingTokenFilter) Token(org.apache.lucene.analysis.Token) IntervalsSource(org.apache.lucene.queries.intervals.IntervalsSource) CannedTokenStream(org.apache.lucene.analysis.CannedTokenStream)

Example 14 with IntervalsSource

use of org.apache.lucene.queries.intervals.IntervalsSource in project OpenSearch by opensearch-project.

the class IntervalBuilderTests method testGraphTerminatesOnGap.

public void testGraphTerminatesOnGap() throws IOException {
    // term1 term2:2/term3 term4 [] term5
    CannedTokenStream ts = new CannedTokenStream(new Token("term1", 1, 2), new Token("term2", 1, 2, 3, 2), new Token("term3", 0, 2, 3), new Token("term4", 2, 3), new Token("term5", 2, 6, 7));
    IntervalsSource source = BUILDER.analyzeText(new CachingTokenFilter(ts), -1, true);
    IntervalsSource expected = Intervals.ordered(Intervals.term("term1"), Intervals.or(Intervals.term("term2"), Intervals.phrase("term3", "term4")), Intervals.extend(Intervals.term("term5"), 1, 0));
    assertEquals(expected, source);
}
Also used : CachingTokenFilter(org.apache.lucene.analysis.CachingTokenFilter) Token(org.apache.lucene.analysis.Token) IntervalsSource(org.apache.lucene.queries.intervals.IntervalsSource) CannedTokenStream(org.apache.lucene.analysis.CannedTokenStream)

Example 15 with IntervalsSource

use of org.apache.lucene.queries.intervals.IntervalsSource in project OpenSearch by opensearch-project.

the class IntervalBuilderTests method testPhraseWithStopword.

public void testPhraseWithStopword() throws IOException {
    CannedTokenStream ts = new CannedTokenStream(new Token("term1", 1, 1, 2), new Token("term3", 2, 5, 6));
    IntervalsSource source = BUILDER.analyzeText(new CachingTokenFilter(ts), 0, true);
    IntervalsSource expected = Intervals.phrase(Intervals.term("term1"), Intervals.extend(Intervals.term("term3"), 1, 0));
    assertEquals(expected, source);
}
Also used : CachingTokenFilter(org.apache.lucene.analysis.CachingTokenFilter) Token(org.apache.lucene.analysis.Token) IntervalsSource(org.apache.lucene.queries.intervals.IntervalsSource) CannedTokenStream(org.apache.lucene.analysis.CannedTokenStream)

Aggregations

IntervalsSource (org.apache.lucene.queries.intervals.IntervalsSource)18 CachingTokenFilter (org.apache.lucene.analysis.CachingTokenFilter)12 CannedTokenStream (org.apache.lucene.analysis.CannedTokenStream)12 Token (org.apache.lucene.analysis.Token)11 ArrayList (java.util.ArrayList)3 PositionIncrementAttribute (org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute)2 TermToBytesRefAttribute (org.apache.lucene.analysis.tokenattributes.TermToBytesRefAttribute)2 BytesRef (org.apache.lucene.util.BytesRef)2 TokenStream (org.apache.lucene.analysis.TokenStream)1 StandardAnalyzer (org.apache.lucene.analysis.standard.StandardAnalyzer)1 Term (org.apache.lucene.index.Term)1 IntervalQuery (org.apache.lucene.queries.intervals.IntervalQuery)1 GraphTokenStreamFiniteStrings (org.apache.lucene.util.graph.GraphTokenStreamFiniteStrings)1 NamedAnalyzer (org.opensearch.index.analysis.NamedAnalyzer)1 MappedFieldType (org.opensearch.index.mapper.MappedFieldType)1 Result (org.opensearch.percolator.QueryAnalyzer.Result)1 QueryAnalyzer.selectBestResult (org.opensearch.percolator.QueryAnalyzer.selectBestResult)1