Search in sources :

Example 11 with SpanQuery

use of org.apache.lucene.search.spans.SpanQuery in project lucene-solr by apache.

the class TokenSourcesTest method testOverlapWithOffsetExactPhrase.

public void testOverlapWithOffsetExactPhrase() throws IOException, InvalidTokenOffsetsException {
    final String TEXT = "the fox did not jump";
    final Directory directory = newDirectory();
    final IndexWriter indexWriter = new IndexWriter(directory, newIndexWriterConfig(null));
    try {
        final Document document = new Document();
        FieldType customType = new FieldType(TextField.TYPE_NOT_STORED);
        customType.setStoreTermVectors(true);
        // no positions!
        customType.setStoreTermVectorOffsets(true);
        document.add(new Field(FIELD, new OverlappingTokenStream(), customType));
        indexWriter.addDocument(document);
    } finally {
        indexWriter.close();
    }
    final IndexReader indexReader = DirectoryReader.open(directory);
    try {
        assertEquals(1, indexReader.numDocs());
        final IndexSearcher indexSearcher = newSearcher(indexReader);
        // final DisjunctionMaxQuery query = new DisjunctionMaxQuery(1);
        // query.add(new SpanTermQuery(new Term(FIELD, "{fox}")));
        // query.add(new SpanTermQuery(new Term(FIELD, "fox")));
        final Query phraseQuery = new SpanNearQuery(new SpanQuery[] { new SpanTermQuery(new Term(FIELD, "the")), new SpanTermQuery(new Term(FIELD, "fox")) }, 0, true);
        TopDocs hits = indexSearcher.search(phraseQuery, 1);
        assertEquals(1, hits.totalHits);
        final Highlighter highlighter = new Highlighter(new SimpleHTMLFormatter(), new SimpleHTMLEncoder(), new QueryScorer(phraseQuery));
        final TokenStream tokenStream = TokenSources.getTermVectorTokenStreamOrNull(FIELD, indexReader.getTermVectors(0), -1);
        assertEquals("<B>the fox</B> did not jump", highlighter.getBestFragment(tokenStream, TEXT));
    } finally {
        indexReader.close();
        directory.close();
    }
}
Also used : IndexSearcher(org.apache.lucene.search.IndexSearcher) CannedTokenStream(org.apache.lucene.analysis.CannedTokenStream) TokenStream(org.apache.lucene.analysis.TokenStream) Query(org.apache.lucene.search.Query) SpanTermQuery(org.apache.lucene.search.spans.SpanTermQuery) SpanQuery(org.apache.lucene.search.spans.SpanQuery) SpanNearQuery(org.apache.lucene.search.spans.SpanNearQuery) DisjunctionMaxQuery(org.apache.lucene.search.DisjunctionMaxQuery) Term(org.apache.lucene.index.Term) Document(org.apache.lucene.document.Document) FieldType(org.apache.lucene.document.FieldType) TopDocs(org.apache.lucene.search.TopDocs) Field(org.apache.lucene.document.Field) TextField(org.apache.lucene.document.TextField) IndexWriter(org.apache.lucene.index.IndexWriter) RandomIndexWriter(org.apache.lucene.index.RandomIndexWriter) SpanTermQuery(org.apache.lucene.search.spans.SpanTermQuery) IndexReader(org.apache.lucene.index.IndexReader) SpanNearQuery(org.apache.lucene.search.spans.SpanNearQuery) Directory(org.apache.lucene.store.Directory)

Example 12 with SpanQuery

use of org.apache.lucene.search.spans.SpanQuery in project lucene-solr by apache.

the class TokenSourcesTest method testOverlapWithPositionsAndOffsetExactPhrase.

public void testOverlapWithPositionsAndOffsetExactPhrase() throws IOException, InvalidTokenOffsetsException {
    final String TEXT = "the fox did not jump";
    final Directory directory = newDirectory();
    final IndexWriter indexWriter = new IndexWriter(directory, newIndexWriterConfig(null));
    try {
        final Document document = new Document();
        FieldType customType = new FieldType(TextField.TYPE_NOT_STORED);
        customType.setStoreTermVectors(true);
        customType.setStoreTermVectorPositions(true);
        customType.setStoreTermVectorOffsets(true);
        document.add(new Field(FIELD, new OverlappingTokenStream(), customType));
        indexWriter.addDocument(document);
    } finally {
        indexWriter.close();
    }
    final IndexReader indexReader = DirectoryReader.open(directory);
    try {
        assertEquals(1, indexReader.numDocs());
        final IndexSearcher indexSearcher = newSearcher(indexReader);
        // final DisjunctionMaxQuery query = new DisjunctionMaxQuery(1);
        // query.add(new SpanTermQuery(new Term(FIELD, "the")));
        // query.add(new SpanTermQuery(new Term(FIELD, "fox")));
        final Query phraseQuery = new SpanNearQuery(new SpanQuery[] { new SpanTermQuery(new Term(FIELD, "the")), new SpanTermQuery(new Term(FIELD, "fox")) }, 0, true);
        TopDocs hits = indexSearcher.search(phraseQuery, 1);
        assertEquals(1, hits.totalHits);
        final Highlighter highlighter = new Highlighter(new SimpleHTMLFormatter(), new SimpleHTMLEncoder(), new QueryScorer(phraseQuery));
        final TokenStream tokenStream = TokenSources.getTermVectorTokenStreamOrNull(FIELD, indexReader.getTermVectors(0), -1);
        assertEquals("<B>the fox</B> did not jump", highlighter.getBestFragment(tokenStream, TEXT));
    } finally {
        indexReader.close();
        directory.close();
    }
}
Also used : IndexSearcher(org.apache.lucene.search.IndexSearcher) CannedTokenStream(org.apache.lucene.analysis.CannedTokenStream) TokenStream(org.apache.lucene.analysis.TokenStream) Query(org.apache.lucene.search.Query) SpanTermQuery(org.apache.lucene.search.spans.SpanTermQuery) SpanQuery(org.apache.lucene.search.spans.SpanQuery) SpanNearQuery(org.apache.lucene.search.spans.SpanNearQuery) DisjunctionMaxQuery(org.apache.lucene.search.DisjunctionMaxQuery) Term(org.apache.lucene.index.Term) Document(org.apache.lucene.document.Document) FieldType(org.apache.lucene.document.FieldType) TopDocs(org.apache.lucene.search.TopDocs) Field(org.apache.lucene.document.Field) TextField(org.apache.lucene.document.TextField) IndexWriter(org.apache.lucene.index.IndexWriter) RandomIndexWriter(org.apache.lucene.index.RandomIndexWriter) SpanTermQuery(org.apache.lucene.search.spans.SpanTermQuery) IndexReader(org.apache.lucene.index.IndexReader) SpanNearQuery(org.apache.lucene.search.spans.SpanNearQuery) Directory(org.apache.lucene.store.Directory)

Example 13 with SpanQuery

use of org.apache.lucene.search.spans.SpanQuery in project lucene-solr by apache.

the class SpanNearClauseFactory method makeSpanClause.

public SpanQuery makeSpanClause() {
    SpanQuery[] spanQueries = new SpanQuery[size()];
    Iterator<SpanQuery> sqi = weightBySpanQuery.keySet().iterator();
    int i = 0;
    while (sqi.hasNext()) {
        SpanQuery sq = sqi.next();
        float boost = weightBySpanQuery.get(sq);
        if (boost != 1f) {
            sq = new SpanBoostQuery(sq, boost);
        }
        spanQueries[i++] = sq;
    }
    if (spanQueries.length == 1)
        return spanQueries[0];
    else
        return new SpanOrQuery(spanQueries);
}
Also used : SpanBoostQuery(org.apache.lucene.search.spans.SpanBoostQuery) SpanOrQuery(org.apache.lucene.search.spans.SpanOrQuery) SpanQuery(org.apache.lucene.search.spans.SpanQuery)

Example 14 with SpanQuery

use of org.apache.lucene.search.spans.SpanQuery in project lucene-solr by apache.

the class PayloadSpanUtil method queryToSpanQuery.

private void queryToSpanQuery(Query query, Collection<byte[]> payloads) throws IOException {
    if (query instanceof BooleanQuery) {
        for (BooleanClause clause : (BooleanQuery) query) {
            if (!clause.isProhibited()) {
                queryToSpanQuery(clause.getQuery(), payloads);
            }
        }
    } else if (query instanceof PhraseQuery) {
        Term[] phraseQueryTerms = ((PhraseQuery) query).getTerms();
        SpanQuery[] clauses = new SpanQuery[phraseQueryTerms.length];
        for (int i = 0; i < phraseQueryTerms.length; i++) {
            clauses[i] = new SpanTermQuery(phraseQueryTerms[i]);
        }
        int slop = ((PhraseQuery) query).getSlop();
        boolean inorder = false;
        if (slop == 0) {
            inorder = true;
        }
        SpanNearQuery sp = new SpanNearQuery(clauses, slop, inorder);
        getPayloads(payloads, sp);
    } else if (query instanceof TermQuery) {
        SpanTermQuery stq = new SpanTermQuery(((TermQuery) query).getTerm());
        getPayloads(payloads, stq);
    } else if (query instanceof SpanQuery) {
        getPayloads(payloads, (SpanQuery) query);
    } else if (query instanceof DisjunctionMaxQuery) {
        for (Iterator<Query> iterator = ((DisjunctionMaxQuery) query).iterator(); iterator.hasNext(); ) {
            queryToSpanQuery(iterator.next(), payloads);
        }
    } else if (query instanceof MultiPhraseQuery) {
        final MultiPhraseQuery mpq = (MultiPhraseQuery) query;
        final Term[][] termArrays = mpq.getTermArrays();
        final int[] positions = mpq.getPositions();
        if (positions.length > 0) {
            int maxPosition = positions[positions.length - 1];
            for (int i = 0; i < positions.length - 1; ++i) {
                if (positions[i] > maxPosition) {
                    maxPosition = positions[i];
                }
            }
            @SuppressWarnings({ "rawtypes", "unchecked" }) final List<Query>[] disjunctLists = new List[maxPosition + 1];
            int distinctPositions = 0;
            for (int i = 0; i < termArrays.length; ++i) {
                final Term[] termArray = termArrays[i];
                List<Query> disjuncts = disjunctLists[positions[i]];
                if (disjuncts == null) {
                    disjuncts = (disjunctLists[positions[i]] = new ArrayList<>(termArray.length));
                    ++distinctPositions;
                }
                for (final Term term : termArray) {
                    disjuncts.add(new SpanTermQuery(term));
                }
            }
            int positionGaps = 0;
            int position = 0;
            final SpanQuery[] clauses = new SpanQuery[distinctPositions];
            for (int i = 0; i < disjunctLists.length; ++i) {
                List<Query> disjuncts = disjunctLists[i];
                if (disjuncts != null) {
                    clauses[position++] = new SpanOrQuery(disjuncts.toArray(new SpanQuery[disjuncts.size()]));
                } else {
                    ++positionGaps;
                }
            }
            final int slop = mpq.getSlop();
            final boolean inorder = (slop == 0);
            SpanNearQuery sp = new SpanNearQuery(clauses, slop + positionGaps, inorder);
            getPayloads(payloads, sp);
        }
    }
}
Also used : BooleanQuery(org.apache.lucene.search.BooleanQuery) SpanTermQuery(org.apache.lucene.search.spans.SpanTermQuery) TermQuery(org.apache.lucene.search.TermQuery) PhraseQuery(org.apache.lucene.search.PhraseQuery) MultiPhraseQuery(org.apache.lucene.search.MultiPhraseQuery) Query(org.apache.lucene.search.Query) SpanNearQuery(org.apache.lucene.search.spans.SpanNearQuery) SpanTermQuery(org.apache.lucene.search.spans.SpanTermQuery) PhraseQuery(org.apache.lucene.search.PhraseQuery) DisjunctionMaxQuery(org.apache.lucene.search.DisjunctionMaxQuery) TermQuery(org.apache.lucene.search.TermQuery) BooleanQuery(org.apache.lucene.search.BooleanQuery) MultiPhraseQuery(org.apache.lucene.search.MultiPhraseQuery) SpanOrQuery(org.apache.lucene.search.spans.SpanOrQuery) SpanQuery(org.apache.lucene.search.spans.SpanQuery) DisjunctionMaxQuery(org.apache.lucene.search.DisjunctionMaxQuery) ArrayList(java.util.ArrayList) MultiPhraseQuery(org.apache.lucene.search.MultiPhraseQuery) Term(org.apache.lucene.index.Term) SpanOrQuery(org.apache.lucene.search.spans.SpanOrQuery) SpanQuery(org.apache.lucene.search.spans.SpanQuery) BooleanClause(org.apache.lucene.search.BooleanClause) SpanTermQuery(org.apache.lucene.search.spans.SpanTermQuery) ArrayList(java.util.ArrayList) List(java.util.List) SpanNearQuery(org.apache.lucene.search.spans.SpanNearQuery)

Example 15 with SpanQuery

use of org.apache.lucene.search.spans.SpanQuery in project lucene-solr by apache.

the class TestUnifiedHighlighterExtensibility method testUnifiedHighlighterExtensibility.

/**
   * This test is for maintaining the extensibility of the UnifiedHighlighter
   * for customizations out of package.
   */
@Test
public void testUnifiedHighlighterExtensibility() {
    final int maxLength = 1000;
    UnifiedHighlighter uh = new UnifiedHighlighter(null, new MockAnalyzer(random())) {

        @Override
        protected Map<String, Object[]> highlightFieldsAsObjects(String[] fieldsIn, Query query, int[] docIdsIn, int[] maxPassagesIn) throws IOException {
            return super.highlightFieldsAsObjects(fieldsIn, query, docIdsIn, maxPassagesIn);
        }

        @Override
        protected OffsetSource getOffsetSource(String field) {
            return super.getOffsetSource(field);
        }

        @Override
        protected BreakIterator getBreakIterator(String field) {
            return super.getBreakIterator(field);
        }

        @Override
        protected PassageScorer getScorer(String field) {
            return super.getScorer(field);
        }

        @Override
        protected PassageFormatter getFormatter(String field) {
            return super.getFormatter(field);
        }

        @Override
        public Analyzer getIndexAnalyzer() {
            return super.getIndexAnalyzer();
        }

        @Override
        public IndexSearcher getIndexSearcher() {
            return super.getIndexSearcher();
        }

        @Override
        protected int getMaxNoHighlightPassages(String field) {
            return super.getMaxNoHighlightPassages(field);
        }

        @Override
        protected Boolean requiresRewrite(SpanQuery spanQuery) {
            return super.requiresRewrite(spanQuery);
        }

        @Override
        protected LimitedStoredFieldVisitor newLimitedStoredFieldsVisitor(String[] fields) {
            return super.newLimitedStoredFieldsVisitor(fields);
        }

        @Override
        protected List<CharSequence[]> loadFieldValues(String[] fields, DocIdSetIterator docIter, int cacheCharsThreshold) throws IOException {
            return super.loadFieldValues(fields, docIter, cacheCharsThreshold);
        }

        @Override
        protected FieldHighlighter getFieldHighlighter(String field, Query query, Set<Term> allTerms, int maxPassages) {
            // THIS IS A COPY of the superclass impl; but use CustomFieldHighlighter
            BytesRef[] terms = filterExtractedTerms(getFieldMatcher(field), allTerms);
            Set<HighlightFlag> highlightFlags = getFlags(field);
            PhraseHelper phraseHelper = getPhraseHelper(field, query, highlightFlags);
            CharacterRunAutomaton[] automata = getAutomata(field, query, highlightFlags);
            OffsetSource offsetSource = getOptimizedOffsetSource(field, terms, phraseHelper, automata);
            return new CustomFieldHighlighter(field, getOffsetStrategy(offsetSource, field, terms, phraseHelper, automata, highlightFlags), new SplittingBreakIterator(getBreakIterator(field), UnifiedHighlighter.MULTIVAL_SEP_CHAR), getScorer(field), maxPassages, getMaxNoHighlightPassages(field), getFormatter(field));
        }

        @Override
        protected FieldOffsetStrategy getOffsetStrategy(OffsetSource offsetSource, String field, BytesRef[] terms, PhraseHelper phraseHelper, CharacterRunAutomaton[] automata, Set<HighlightFlag> highlightFlags) {
            return super.getOffsetStrategy(offsetSource, field, terms, phraseHelper, automata, highlightFlags);
        }

        @Override
        public int getMaxLength() {
            return maxLength;
        }
    };
    assertEquals(uh.getMaxLength(), maxLength);
}
Also used : Set(java.util.Set) Query(org.apache.lucene.search.Query) SpanQuery(org.apache.lucene.search.spans.SpanQuery) CharacterRunAutomaton(org.apache.lucene.util.automaton.CharacterRunAutomaton) PhraseHelper(org.apache.lucene.search.uhighlight.PhraseHelper) SpanQuery(org.apache.lucene.search.spans.SpanQuery) MockAnalyzer(org.apache.lucene.analysis.MockAnalyzer) UnifiedHighlighter(org.apache.lucene.search.uhighlight.UnifiedHighlighter) SplittingBreakIterator(org.apache.lucene.search.uhighlight.SplittingBreakIterator) DocIdSetIterator(org.apache.lucene.search.DocIdSetIterator) BytesRef(org.apache.lucene.util.BytesRef) Test(org.junit.Test)

Aggregations

SpanQuery (org.apache.lucene.search.spans.SpanQuery)316 Test (org.junit.Test)217 Term (org.apache.lucene.index.Term)127 SpanTermQuery (org.apache.lucene.search.spans.SpanTermQuery)126 KrillIndex (de.ids_mannheim.korap.KrillIndex)110 SpanQueryWrapper (de.ids_mannheim.korap.query.wrap.SpanQueryWrapper)65 SpanNearQuery (org.apache.lucene.search.spans.SpanNearQuery)52 Result (de.ids_mannheim.korap.response.Result)48 QueryBuilder (de.ids_mannheim.korap.query.QueryBuilder)39 SpanOrQuery (org.apache.lucene.search.spans.SpanOrQuery)35 ArrayList (java.util.ArrayList)34 SpanElementQuery (de.ids_mannheim.korap.query.SpanElementQuery)32 SpanNextQuery (de.ids_mannheim.korap.query.SpanNextQuery)31 Query (org.apache.lucene.search.Query)30 IndexReader (org.apache.lucene.index.IndexReader)19 BooleanQuery (org.apache.lucene.search.BooleanQuery)19 IndexSearcher (org.apache.lucene.search.IndexSearcher)18 SpanBoostQuery (org.apache.lucene.search.spans.SpanBoostQuery)18 SpanMultiTermQueryWrapper (org.apache.lucene.search.spans.SpanMultiTermQueryWrapper)18 Document (org.apache.lucene.document.Document)17