Search in sources :

Example 6 with SpanNotQuery

use of org.apache.lucene.search.spans.SpanNotQuery in project lucene-solr by apache.

the class TestPayloadSpans method testSpanNot.

public void testSpanNot() throws Exception {
    SpanQuery[] clauses = new SpanQuery[2];
    clauses[0] = new SpanTermQuery(new Term(PayloadHelper.FIELD, "one"));
    clauses[1] = new SpanTermQuery(new Term(PayloadHelper.FIELD, "three"));
    SpanQuery spq = new SpanNearQuery(clauses, 5, true);
    SpanNotQuery snq = new SpanNotQuery(spq, new SpanTermQuery(new Term(PayloadHelper.FIELD, "two")));
    Directory directory = newDirectory();
    RandomIndexWriter writer = new RandomIndexWriter(random(), directory, newIndexWriterConfig(new PayloadAnalyzer()).setSimilarity(similarity));
    Document doc = new Document();
    doc.add(newTextField(PayloadHelper.FIELD, "one two three one four three", Field.Store.YES));
    writer.addDocument(doc);
    IndexReader reader = getOnlyLeafReader(writer.getReader());
    writer.close();
    checkSpans(snq.createWeight(newSearcher(reader, false), false, 1f).getSpans(reader.leaves().get(0), SpanWeight.Postings.PAYLOADS), 1, new int[] { 2 });
    reader.close();
    directory.close();
}
Also used : SpanTermQuery(org.apache.lucene.search.spans.SpanTermQuery) IndexReader(org.apache.lucene.index.IndexReader) Term(org.apache.lucene.index.Term) Document(org.apache.lucene.document.Document) SpanNearQuery(org.apache.lucene.search.spans.SpanNearQuery) RandomIndexWriter(org.apache.lucene.index.RandomIndexWriter) SpanQuery(org.apache.lucene.search.spans.SpanQuery) SpanNotQuery(org.apache.lucene.search.spans.SpanNotQuery) Directory(org.apache.lucene.store.Directory)

Example 7 with SpanNotQuery

use of org.apache.lucene.search.spans.SpanNotQuery in project lucene-solr by apache.

the class SynonymTokenizer method testNotSpanSimpleQuery.

public void testNotSpanSimpleQuery() throws Exception {
    doSearching(new SpanNotQuery(new SpanNearQuery(new SpanQuery[] { new SpanTermQuery(new Term(FIELD_NAME, "shot")), new SpanTermQuery(new Term(FIELD_NAME, "kennedy")) }, 3, false), new SpanTermQuery(new Term(FIELD_NAME, "john"))));
    TestHighlightRunner helper = new TestHighlightRunner() {

        @Override
        public void run() throws Exception {
            mode = QUERY;
            doStandardHighlights(analyzer, searcher, hits, query, HighlighterTest.this);
        }
    };
    helper.run();
    assertTrue("Failed to find correct number of highlights " + numHighlights + " found", numHighlights == 4);
}
Also used : TestHighlightRunner(org.apache.lucene.search.highlight.SynonymTokenizer.TestHighlightRunner) SpanTermQuery(org.apache.lucene.search.spans.SpanTermQuery) Term(org.apache.lucene.index.Term) SpanNearQuery(org.apache.lucene.search.spans.SpanNearQuery) SpanNotQuery(org.apache.lucene.search.spans.SpanNotQuery)

Example 8 with SpanNotQuery

use of org.apache.lucene.search.spans.SpanNotQuery in project lucene-solr by apache.

the class MultiTermHighlighting method extractAutomata.

/**
   * Extracts MultiTermQueries that match the provided field predicate.
   * Returns equivalent automata that will match terms.
   */
public static CharacterRunAutomaton[] extractAutomata(Query query, Predicate<String> fieldMatcher, boolean lookInSpan, Function<Query, Collection<Query>> preRewriteFunc) {
    // TODO Lucene needs a Query visitor API!  LUCENE-3041
    List<CharacterRunAutomaton> list = new ArrayList<>();
    Collection<Query> customSubQueries = preRewriteFunc.apply(query);
    if (customSubQueries != null) {
        for (Query sub : customSubQueries) {
            list.addAll(Arrays.asList(extractAutomata(sub, fieldMatcher, lookInSpan, preRewriteFunc)));
        }
    } else if (query instanceof BooleanQuery) {
        for (BooleanClause clause : (BooleanQuery) query) {
            if (!clause.isProhibited()) {
                list.addAll(Arrays.asList(extractAutomata(clause.getQuery(), fieldMatcher, lookInSpan, preRewriteFunc)));
            }
        }
    } else if (query instanceof ConstantScoreQuery) {
        list.addAll(Arrays.asList(extractAutomata(((ConstantScoreQuery) query).getQuery(), fieldMatcher, lookInSpan, preRewriteFunc)));
    } else if (query instanceof BoostQuery) {
        list.addAll(Arrays.asList(extractAutomata(((BoostQuery) query).getQuery(), fieldMatcher, lookInSpan, preRewriteFunc)));
    } else if (query instanceof DisjunctionMaxQuery) {
        for (Query sub : ((DisjunctionMaxQuery) query).getDisjuncts()) {
            list.addAll(Arrays.asList(extractAutomata(sub, fieldMatcher, lookInSpan, preRewriteFunc)));
        }
    } else if (lookInSpan && query instanceof SpanOrQuery) {
        for (Query sub : ((SpanOrQuery) query).getClauses()) {
            list.addAll(Arrays.asList(extractAutomata(sub, fieldMatcher, lookInSpan, preRewriteFunc)));
        }
    } else if (lookInSpan && query instanceof SpanNearQuery) {
        for (Query sub : ((SpanNearQuery) query).getClauses()) {
            list.addAll(Arrays.asList(extractAutomata(sub, fieldMatcher, lookInSpan, preRewriteFunc)));
        }
    } else if (lookInSpan && query instanceof SpanNotQuery) {
        list.addAll(Arrays.asList(extractAutomata(((SpanNotQuery) query).getInclude(), fieldMatcher, lookInSpan, preRewriteFunc)));
    } else if (lookInSpan && query instanceof SpanPositionCheckQuery) {
        list.addAll(Arrays.asList(extractAutomata(((SpanPositionCheckQuery) query).getMatch(), fieldMatcher, lookInSpan, preRewriteFunc)));
    } else if (lookInSpan && query instanceof SpanBoostQuery) {
        list.addAll(Arrays.asList(extractAutomata(((SpanBoostQuery) query).getQuery(), fieldMatcher, lookInSpan, preRewriteFunc)));
    } else if (lookInSpan && query instanceof SpanMultiTermQueryWrapper) {
        list.addAll(Arrays.asList(extractAutomata(((SpanMultiTermQueryWrapper<?>) query).getWrappedQuery(), fieldMatcher, lookInSpan, preRewriteFunc)));
    } else if (query instanceof PrefixQuery) {
        final PrefixQuery pq = (PrefixQuery) query;
        Term prefix = pq.getPrefix();
        if (fieldMatcher.test(prefix.field())) {
            list.add(new CharacterRunAutomaton(Operations.concatenate(Automata.makeString(prefix.text()), Automata.makeAnyString())) {

                @Override
                public String toString() {
                    return pq.toString();
                }
            });
        }
    } else if (query instanceof FuzzyQuery) {
        final FuzzyQuery fq = (FuzzyQuery) query;
        if (fieldMatcher.test(fq.getField())) {
            String utf16 = fq.getTerm().text();
            int[] termText = new int[utf16.codePointCount(0, utf16.length())];
            for (int cp, i = 0, j = 0; i < utf16.length(); i += Character.charCount(cp)) {
                termText[j++] = cp = utf16.codePointAt(i);
            }
            int termLength = termText.length;
            int prefixLength = Math.min(fq.getPrefixLength(), termLength);
            String suffix = UnicodeUtil.newString(termText, prefixLength, termText.length - prefixLength);
            LevenshteinAutomata builder = new LevenshteinAutomata(suffix, fq.getTranspositions());
            String prefix = UnicodeUtil.newString(termText, 0, prefixLength);
            Automaton automaton = builder.toAutomaton(fq.getMaxEdits(), prefix);
            list.add(new CharacterRunAutomaton(automaton) {

                @Override
                public String toString() {
                    return fq.toString();
                }
            });
        }
    } else if (query instanceof TermRangeQuery) {
        final TermRangeQuery tq = (TermRangeQuery) query;
        if (fieldMatcher.test(tq.getField())) {
            final CharsRef lowerBound;
            if (tq.getLowerTerm() == null) {
                lowerBound = null;
            } else {
                lowerBound = new CharsRef(tq.getLowerTerm().utf8ToString());
            }
            final CharsRef upperBound;
            if (tq.getUpperTerm() == null) {
                upperBound = null;
            } else {
                upperBound = new CharsRef(tq.getUpperTerm().utf8ToString());
            }
            final boolean includeLower = tq.includesLower();
            final boolean includeUpper = tq.includesUpper();
            final CharsRef scratch = new CharsRef();
            @SuppressWarnings("deprecation") final Comparator<CharsRef> comparator = CharsRef.getUTF16SortedAsUTF8Comparator();
            // this is *not* an automaton, but its very simple
            list.add(new CharacterRunAutomaton(Automata.makeEmpty()) {

                @Override
                public boolean run(char[] s, int offset, int length) {
                    scratch.chars = s;
                    scratch.offset = offset;
                    scratch.length = length;
                    if (lowerBound != null) {
                        int cmp = comparator.compare(scratch, lowerBound);
                        if (cmp < 0 || (!includeLower && cmp == 0)) {
                            return false;
                        }
                    }
                    if (upperBound != null) {
                        int cmp = comparator.compare(scratch, upperBound);
                        if (cmp > 0 || (!includeUpper && cmp == 0)) {
                            return false;
                        }
                    }
                    return true;
                }

                @Override
                public String toString() {
                    return tq.toString();
                }
            });
        }
    } else if (query instanceof AutomatonQuery) {
        final AutomatonQuery aq = (AutomatonQuery) query;
        if (fieldMatcher.test(aq.getField())) {
            list.add(new CharacterRunAutomaton(aq.getAutomaton()) {

                @Override
                public String toString() {
                    return aq.toString();
                }
            });
        }
    }
    return list.toArray(new CharacterRunAutomaton[list.size()]);
}
Also used : BooleanQuery(org.apache.lucene.search.BooleanQuery) Query(org.apache.lucene.search.Query) AutomatonQuery(org.apache.lucene.search.AutomatonQuery) SpanPositionCheckQuery(org.apache.lucene.search.spans.SpanPositionCheckQuery) SpanBoostQuery(org.apache.lucene.search.spans.SpanBoostQuery) SpanNotQuery(org.apache.lucene.search.spans.SpanNotQuery) ConstantScoreQuery(org.apache.lucene.search.ConstantScoreQuery) SpanNearQuery(org.apache.lucene.search.spans.SpanNearQuery) PrefixQuery(org.apache.lucene.search.PrefixQuery) FuzzyQuery(org.apache.lucene.search.FuzzyQuery) DisjunctionMaxQuery(org.apache.lucene.search.DisjunctionMaxQuery) BooleanQuery(org.apache.lucene.search.BooleanQuery) BoostQuery(org.apache.lucene.search.BoostQuery) TermRangeQuery(org.apache.lucene.search.TermRangeQuery) SpanOrQuery(org.apache.lucene.search.spans.SpanOrQuery) CharacterRunAutomaton(org.apache.lucene.util.automaton.CharacterRunAutomaton) ArrayList(java.util.ArrayList) SpanBoostQuery(org.apache.lucene.search.spans.SpanBoostQuery) BoostQuery(org.apache.lucene.search.BoostQuery) SpanNotQuery(org.apache.lucene.search.spans.SpanNotQuery) Comparator(java.util.Comparator) AutomatonQuery(org.apache.lucene.search.AutomatonQuery) ConstantScoreQuery(org.apache.lucene.search.ConstantScoreQuery) SpanMultiTermQueryWrapper(org.apache.lucene.search.spans.SpanMultiTermQueryWrapper) CharacterRunAutomaton(org.apache.lucene.util.automaton.CharacterRunAutomaton) Automaton(org.apache.lucene.util.automaton.Automaton) DisjunctionMaxQuery(org.apache.lucene.search.DisjunctionMaxQuery) LevenshteinAutomata(org.apache.lucene.util.automaton.LevenshteinAutomata) TermRangeQuery(org.apache.lucene.search.TermRangeQuery) Term(org.apache.lucene.index.Term) SpanOrQuery(org.apache.lucene.search.spans.SpanOrQuery) CharsRef(org.apache.lucene.util.CharsRef) BooleanClause(org.apache.lucene.search.BooleanClause) SpanPositionCheckQuery(org.apache.lucene.search.spans.SpanPositionCheckQuery) PrefixQuery(org.apache.lucene.search.PrefixQuery) SpanBoostQuery(org.apache.lucene.search.spans.SpanBoostQuery) FuzzyQuery(org.apache.lucene.search.FuzzyQuery) SpanNearQuery(org.apache.lucene.search.spans.SpanNearQuery)

Example 9 with SpanNotQuery

use of org.apache.lucene.search.spans.SpanNotQuery in project lucene-solr by apache.

the class TestUnifiedHighlighterMTQ method testSpanNot.

public void testSpanNot() throws Exception {
    RandomIndexWriter iw = new RandomIndexWriter(random(), dir, indexAnalyzer);
    Field body = new Field("body", "", fieldType);
    Document doc = new Document();
    doc.add(body);
    body.setStringValue("This is a test.");
    iw.addDocument(doc);
    body.setStringValue("Test a one sentence document.");
    iw.addDocument(doc);
    IndexReader ir = iw.getReader();
    iw.close();
    IndexSearcher searcher = newSearcher(ir);
    UnifiedHighlighter highlighter = new UnifiedHighlighter(searcher, indexAnalyzer);
    SpanQuery include = new SpanMultiTermQueryWrapper<>(new WildcardQuery(new Term("body", "te*")));
    SpanQuery exclude = new SpanTermQuery(new Term("body", "bogus"));
    Query query = new SpanNotQuery(include, exclude);
    TopDocs topDocs = searcher.search(query, 10, Sort.INDEXORDER);
    assertEquals(2, topDocs.totalHits);
    String[] snippets = highlighter.highlight("body", query, topDocs);
    assertEquals(2, snippets.length);
    assertEquals("This is a <b>test</b>.", snippets[0]);
    assertEquals("<b>Test</b> a one sentence document.", snippets[1]);
    ir.close();
}
Also used : IndexSearcher(org.apache.lucene.search.IndexSearcher) WildcardQuery(org.apache.lucene.search.WildcardQuery) SpanMultiTermQueryWrapper(org.apache.lucene.search.spans.SpanMultiTermQueryWrapper) Query(org.apache.lucene.search.Query) PhraseQuery(org.apache.lucene.search.PhraseQuery) RegexpQuery(org.apache.lucene.search.RegexpQuery) SpanFirstQuery(org.apache.lucene.search.spans.SpanFirstQuery) PrefixQuery(org.apache.lucene.search.PrefixQuery) FuzzyQuery(org.apache.lucene.search.FuzzyQuery) MatchAllDocsQuery(org.apache.lucene.search.MatchAllDocsQuery) WildcardQuery(org.apache.lucene.search.WildcardQuery) DisjunctionMaxQuery(org.apache.lucene.search.DisjunctionMaxQuery) SpanTermQuery(org.apache.lucene.search.spans.SpanTermQuery) SpanBoostQuery(org.apache.lucene.search.spans.SpanBoostQuery) SpanNotQuery(org.apache.lucene.search.spans.SpanNotQuery) ConstantScoreQuery(org.apache.lucene.search.ConstantScoreQuery) SpanQuery(org.apache.lucene.search.spans.SpanQuery) SpanNearQuery(org.apache.lucene.search.spans.SpanNearQuery) TermQuery(org.apache.lucene.search.TermQuery) BooleanQuery(org.apache.lucene.search.BooleanQuery) BoostQuery(org.apache.lucene.search.BoostQuery) TermRangeQuery(org.apache.lucene.search.TermRangeQuery) SpanOrQuery(org.apache.lucene.search.spans.SpanOrQuery) Term(org.apache.lucene.index.Term) Document(org.apache.lucene.document.Document) SpanQuery(org.apache.lucene.search.spans.SpanQuery) SpanNotQuery(org.apache.lucene.search.spans.SpanNotQuery) TopDocs(org.apache.lucene.search.TopDocs) Field(org.apache.lucene.document.Field) SpanTermQuery(org.apache.lucene.search.spans.SpanTermQuery) IndexReader(org.apache.lucene.index.IndexReader) RandomIndexWriter(org.apache.lucene.index.RandomIndexWriter)

Example 10 with SpanNotQuery

use of org.apache.lucene.search.spans.SpanNotQuery in project Krill by KorAP.

the class TestIndex method indexLucene.

@Test
public void indexLucene() throws Exception {
    // Base analyzer for searching and indexing
    StandardAnalyzer analyzer = new StandardAnalyzer();
    // Based on
    // http://lucene.apache.org/core/4_0_0/core/org/apache/lucene/
    // analysis/Analyzer.html?is-external=true
    // Create configuration with base analyzer
    IndexWriterConfig config = new IndexWriterConfig(analyzer);
    // Add a document 1 with the correct fields
    IndexWriter w = new IndexWriter(index, config);
    Collection docs = initIndexer();
    @SuppressWarnings("unchecked") Iterator<Map<String, String>> i = (Iterator<Map<String, String>>) docs.iterator();
    for (; i.hasNext(); ) {
        addDoc(w, i.next());
    }
    ;
    assertEquals(3, w.numDocs());
    w.close();
    // Check directory
    DirectoryReader reader = DirectoryReader.open(index);
    assertEquals(docs.size(), reader.maxDoc());
    assertEquals(docs.size(), reader.numDocs());
    // Check searcher
    IndexSearcher searcher = new IndexSearcher(reader);
    // textClass
    // All texts of text class "news"
    assertEquals(2, searcher.search(new TermQuery(new Term("textClass", "news")), 10).totalHits);
    // textClass
    // All texts of text class "sports"
    assertEquals(2, searcher.search(new TermQuery(new Term("textClass", "sports")), 10).totalHits);
    // TextIndex
    // All docs containing "l:nehmen"
    assertEquals(1, searcher.search(new TermQuery(new Term("text", "l:nehmen")), 10).totalHits);
    // TextIndex
    // All docs containing "s:den"
    assertEquals(2, searcher.search(new TermQuery(new Term("text", "s:den")), 10).totalHits);
    /*
        assertEquals(3,
              searcher.search(
                new TermQuery(
                  new Term("text", "T")
            ), 10
          ).totalHits
            );
        */
    // BooleanQuery
    // All docs containing "s:den" and "l:sie"
    TermQuery s_den = new TermQuery(new Term("text", "s:den"));
    TermQuery l_sie = new TermQuery(new Term("text", "l:sie"));
    BooleanQuery bool = new BooleanQuery();
    bool.add(s_den, BooleanClause.Occur.MUST);
    bool.add(l_sie, BooleanClause.Occur.MUST);
    assertEquals(1, searcher.search(bool, 10).totalHits);
    // BooleanQuery
    // All docs containing "s:den" or "l:sie"
    bool = new BooleanQuery();
    bool.add(s_den, BooleanClause.Occur.SHOULD);
    bool.add(l_sie, BooleanClause.Occur.SHOULD);
    assertEquals(2, searcher.search(bool, 10).totalHits);
    // RegexpQuery
    // All docs containing ".{4}en" (liefen und Hunden)
    RegexpQuery srquery = new RegexpQuery(new Term("text", "s:.{4}en"));
    assertEquals(2, searcher.search(srquery, 10).totalHits);
    // RegexpQuery
    // All docs containing "E." (Er) (2x)
    srquery = new RegexpQuery(new Term("text", "s:E."));
    assertEquals(2, searcher.search(srquery, 10).totalHits);
    SpanRegexQueryWrapper ssrquery = new SpanRegexQueryWrapper("text", "s:E.");
    assertEquals(2, searcher.search(ssrquery.toQuery(), 10).totalHits);
    // RegexpQuery
    // All docs containing "E." (er) (0x)
    srquery = new RegexpQuery(new Term("text", "s:e."));
    assertEquals(0, searcher.search(srquery, 10).totalHits);
    ssrquery = new SpanRegexQueryWrapper("text", "s:e.");
    assertEquals(0, searcher.search(ssrquery.toQuery(), 10).totalHits);
    // RegexpQuery
    // All docs containing "E."/i ([Ee]r) (2x)
    srquery = new RegexpQuery(new Term("text", "i:e."));
    assertEquals(2, searcher.search(srquery, 10).totalHits);
    ssrquery = new SpanRegexQueryWrapper("text", "s:e.", true);
    assertEquals("SpanMultiTermQueryWrapper(text:/i:e./)", ssrquery.toQuery().toString());
    assertEquals(2, searcher.search(ssrquery.toQuery(), 10).totalHits);
    // All docs containing "ng"/x (Angst) (2x)
    srquery = new RegexpQuery(new Term("text", "s:.*ng.*"));
    assertEquals(2, searcher.search(srquery, 10).totalHits);
    // Check http://comments.gmane.org/gmane.comp.jakarta.lucene.user/52283
    // for Carstens question on wildcards
    // Wildcardquery
    // All docs containing ".{4}en" (liefen und Hunden)
    WildcardQuery swquery = new WildcardQuery(new Term("text", "s:*ng*"));
    assertEquals("text:s:*ng*", swquery.toString());
    assertEquals(2, searcher.search(swquery, 10).totalHits);
    // [base=angst]
    SpanTermQuery stq = new SpanTermQuery(new Term("text", "l:angst"));
    assertEquals(2, searcher.search(srquery, 10).totalHits);
    // vor Angst
    // [orth=vor][orth=Angst]
    SpanNearQuery snquery = new SpanNearQuery(new SpanQuery[] { new SpanTermQuery(new Term("text", "s:vor")), new SpanTermQuery(new Term("text", "s:Angst")) }, 1, true);
    assertEquals(1, searcher.search(snquery, 10).totalHits);
    // Spannearquery [p:VVFIN][]{,5}[m:nom:sg:fem]
    snquery = new SpanNearQuery(new SpanQuery[] { new SpanTermQuery(new Term("text", "p:VVFIN")), new SpanSegmentQueryWrapper("text", "m:c:nom", "m:n:sg", "m:g:fem").toQuery() }, // slop
    5, // inOrder
    true);
    assertEquals(1, searcher.search(snquery, 10).totalHits);
    // Spannearquery [p:VVFIN][m:acc:sg:masc]
    snquery = new SpanNearQuery(new SpanQuery[] { new SpanTermQuery(new Term("text", "p:VVFIN")), new SpanNearQuery(new SpanQuery[] { new SpanTermQuery(new Term("text", "m:c:acc")), new SpanNearQuery(new SpanQuery[] { new SpanTermQuery(new Term("text", "m:n:sg")), new SpanTermQuery(new Term("text", "m:g:masc")) }, -1, false) }, // slop
    -1, // inOrder
    false) // new SpanTermQuery(new Term("text", "m:-acc:--sg:masc"))
    }, // slop
    0, // inOrder
    true);
    assertEquals(1, searcher.search(snquery, 10).totalHits);
    // Spannearquery [p:VVFIN|m:3:sg:past:ind]
    // Exact match!
    snquery = new SpanNearQuery(new SpanQuery[] { new SpanTermQuery(new Term("text", "p:VVFIN")), new SpanNearQuery(new SpanQuery[] { new SpanTermQuery(new Term("text", "m:p:3")), new SpanNearQuery(new SpanQuery[] { new SpanTermQuery(new Term("text", "m:n:sg")), new SpanNearQuery(new SpanQuery[] { new SpanTermQuery(new Term("text", "m:t:past")), new SpanTermQuery(new Term("text", "m:m:ind")) }, -1, false) }, -1, false) }, -1, false) }, // slop
    -1, // inOrder
    false);
    assertEquals(2, searcher.search(snquery, 10).totalHits);
    // To make sure, this is not equal:
    // Spannearquery [p:VVFIN & m:3:sg:past:ind]
    // Exact match!
    // Maybe it IS equal
    snquery = new SpanNearQuery(new SpanQuery[] { new SpanTermQuery(new Term("text", "p:VVFIN")), new SpanTermQuery(new Term("text", "m:p:3")), new SpanTermQuery(new Term("text", "m:n:sg")), new SpanTermQuery(new Term("text", "m:t:past")), new SpanTermQuery(new Term("text", "m:m:ind")) }, // slop
    -1, // inOrder
    false);
    assertNotEquals(2, searcher.search(snquery, 10).totalHits);
    // assertEquals(2, searcher.search(snquery, 10).totalHits);
    // Spannearquery [p:VVFIN & m:3:sg & past:ind]
    SpanSegmentQueryWrapper sniquery = new SpanSegmentQueryWrapper("text", "p:VVFIN", "m:p:3", "m:n:sg", "m:t:past", "m:m:ind");
    assertEquals(2, searcher.search(sniquery.toQuery(), 10).totalHits);
    // Todo:
    /*
        sniquery = new SpanSegmentQuery(
              "text",
          "p:VVFIN",
          "m:p:3",
          "m:n:sg",
          "m:t:past",
          "m:m:ind"
            );
        */
    // Spannearquery [p:VVFIN][]{,5}[m:nom:sg:fem]
    snquery = new SpanNearQuery(new SpanQuery[] { new SpanTermQuery(new Term("text", "p:VVFIN")), new SpanSegmentQueryWrapper("text", "m:c:nom", "m:n:sg", "m:g:fem").toQuery() }, // slop
    5, // inOrder
    true);
    assertEquals(1, searcher.search(snquery, 10).totalHits);
    sniquery = new SpanSegmentQueryWrapper("text", "p:VVFIN", "m:p:3", "m:t:past", "m:m:ind", "m:n:sg");
    assertEquals(2, searcher.search(sniquery.toQuery(), 10).totalHits);
    // [p = VVFIN & m:p = 3 & m:t = past & m:n != pl] or
    // [p = VVFIN & m:p = 3 & m:t = past & !m:n = pl]
    // TODO: Problem: What should happen in case the category does not exist?
    // pssible solution: & ( m:n != pl & exists(m:n))
    sniquery = new SpanSegmentQueryWrapper("text", "p:VVFIN", "m:p:3", "m:t:past");
    SpanQuery snqquery = new SpanNotQuery(sniquery.toQuery(), new SpanTermQuery(new Term("text", "m:n:pl")));
    assertEquals(2, searcher.search(snqquery, 10).totalHits);
    // [p = NN & (m:c: = dat | m:c = acc)]
    snquery = new SpanNearQuery(new SpanQuery[] { new SpanTermQuery(new Term("text", "p:NN")), new SpanOrQuery(new SpanTermQuery(new Term("text", "m:c:nom")), new SpanTermQuery(new Term("text", "m:c:acc"))) }, -1, false);
    assertEquals(2, searcher.search(snqquery, 10).totalHits);
    // [p = NN & !(m:c: = nom | m:c = acc)]
    snqquery = new SpanNotQuery(new SpanTermQuery(new Term("text", "p:NN")), new SpanOrQuery(new SpanTermQuery(new Term("text", "m:c:nom")), new SpanTermQuery(new Term("text", "m:c:acc"))));
    assertEquals(1, searcher.search(snqquery, 10).totalHits);
    // [p = NN & !(m:c = nom)]
    snqquery = new SpanNotQuery(new SpanTermQuery(new Term("text", "p:NN")), new SpanTermQuery(new Term("text", "m:c:nom")));
    assertEquals(3, searcher.search(snqquery, 10).totalHits);
    // [p=NN & !(m:c = acc)]
    snqquery = new SpanNotQuery(new SpanTermQuery(new Term("text", "p:NN")), new SpanTermQuery(new Term("text", "m:c:acc")));
    assertEquals(2, searcher.search(snqquery, 10).totalHits);
    // [p=PPER][][p=ART]
    snquery = new SpanNearQuery(new SpanQuery[] { new SpanTermQuery(new Term("text", "p:PPER")), new SpanNearQuery(new SpanQuery[] { new SpanTermQuery(new Term("text", "T")), new SpanTermQuery(new Term("text", "p:ART")) }, 0, true) }, 0, true);
    assertEquals(1, searcher.search(snquery, 10).totalHits);
    // Todo:
    // [orth=się][]{2,4}[base=bać]
    // [orth=się][orth!="[.!?,:]"]{,5}[base=bać]|[base=bać][base="on|ja|ty|my|wy"]?[orth=się]
    // [pos=subst & orth="a.*"]{2}
    // [tag=subst:sg:nom:n]
    // [case==acc & case==gen] ??
    // [case~acc & case~gen]
    // [case~~acc]
    // [base=bać][orth!=się]+[orth=się] within s
    // [][][p:VAFIN] within s
    // [][p:VAFIN] within s
    // [][][p:VAFIN]
    snquery = new SpanNearQuery(new SpanQuery[] { new SpanNearQuery(new SpanQuery[] { new SpanTermQuery(new Term("text", "T")), new SpanTermQuery(new Term("text", "T")) }, 0, true), new SpanTermQuery(new Term("text", "p:VAFIN")) }, 0, true);
    assertEquals(1, searcher.search(snquery, 10).totalHits);
    /*
        http://stackoverflow.com/questions/1311199/finding-the-position-of-search-hits-from-lucene
        */
    StringBuilder payloadString = new StringBuilder();
    Map<Term, TermContext> termContexts = new HashMap<>();
    for (LeafReaderContext atomic : reader.leaves()) {
        Bits bitset = atomic.reader().getLiveDocs();
        // Spans spans = NearSpansOrdered();
        Spans spans = snquery.getSpans(atomic, bitset, termContexts);
        while (spans.next()) {
            int docid = atomic.docBase + spans.doc();
            if (spans.isPayloadAvailable()) {
                for (byte[] payload : spans.getPayload()) {
                    /* retrieve payload for current matching span */
                    payloadString.append(new String(payload));
                    payloadString.append(" | ");
                }
                ;
            }
            ;
        }
        ;
    }
    ;
    // assertEquals(33, payloadString.length());
    assertEquals(0, payloadString.length());
    // [][][p:VAFIN]
    // without collecting payloads
    snquery = new SpanNearQuery(new SpanQuery[] { new SpanNearQuery(new SpanQuery[] { new SpanTermQuery(new Term("text", "T")), new SpanTermQuery(new Term("text", "T")) }, 0, true, false), new SpanTermQuery(new Term("text", "p:VAFIN")) }, 0, true, false);
    assertEquals(1, searcher.search(snquery, 10).totalHits);
    payloadString = new StringBuilder();
    termContexts = new HashMap<>();
    for (LeafReaderContext atomic : reader.leaves()) {
        Bits bitset = atomic.reader().getLiveDocs();
        // Spans spans = NearSpansOrdered();
        Spans spans = snquery.getSpans(atomic, bitset, termContexts);
        while (spans.next()) {
            int docid = atomic.docBase + spans.doc();
            for (byte[] payload : spans.getPayload()) {
                /* retrieve payload for current matching span */
                payloadString.append(new String(payload));
                payloadString.append(" | ");
            }
            ;
        }
        ;
    }
    ;
    assertEquals(0, payloadString.length());
    // [][][p:VAFIN] in s
    // ([e:s:<][]*[T] | [T & e:s:<]) [T] ([p:VAFIN & e:s:>] | [T][]*[e:s:>]
    /*
        
        SpanSegmentWithinQuery ssequery = new SpanSegmentWithinQuery(
            "text","s", new SpanSegmentSequenceQuery("text", "T", "T", "p:VAFIN")
            );
        assertEquals(0, searcher.search(ssequery.toQuery(), 10).totalHits);
        
        payloadString = new StringBuilder();
        termContexts = new HashMap<>();
        for (LeafReaderContext atomic : reader.leaves()) {
            Bits bitset = atomic.reader().getLiveDocs();
            // Spans spans = NearSpansOrdered();
            Spans spans = ssequery.toQuery().getSpans(atomic, bitset, termContexts);
        
            while (spans.next()) {
        	int docid = atomic.docBase + spans.doc();
        	for (byte[] payload : spans.getPayload()) {
        	/// retrieve payload for current matching span
        	    payloadString.append(new String(payload));
        	    payloadString.append(" | ");
        	};
            };
        };
        assertEquals(0, payloadString.length(), 1);
        
        ssequery = new SpanSegmentWithinQuery(
            "text","s", new SpanSegmentSequenceQuery("text", "T", "p:VAFIN")
            );
        
        assertEquals("for " + ssequery.toQuery(),
        	     1, searcher.search(ssequery.toQuery(), 10).totalHits);
        
        payloadString = new StringBuilder();
        termContexts = new HashMap<>();
        for (LeafReaderContext atomic : reader.leaves()) {
            Bits bitset = atomic.reader().getLiveDocs();
            // Spans spans = NearSpansOrdered();
            Spans spans = ssequery.toQuery().getSpans(atomic, bitset, termContexts);
        
            while (spans.next()) {
        	int docid = atomic.docBase + spans.doc();
        	for (byte[] payload : spans.getPayload()) {
        	    // retrieve payload for current matching span
        	    payloadString.append(new String(payload));
        	    payloadString.append(" | ");
        	};
        	fail("Doc: " + docid + " with " + spans.start() + "-" + spans.end() + " || " + payloadString.toString());
            };
        };
        assertEquals(20, payloadString.length());
        
        */
    // --------------------______>
    // Spans spans = MultiSpansWrapper.wrap(searcher.getTopReaderContext(), ssequery.toQuery());
    /*
        TopDocs topDocs = is.search(snq, 1);
        Set<String> payloadSet = new HashSet<String>();
        for (int i = 0; i < topDocs.scoreDocs.length; i++) {
          while (spans.next()) {
            Collection<byte[]> payloads = spans.getPayload();
        
            for (final byte [] payload : payloads) {
              payloadSet.add(new String(payload, "UTF-8"));
            }
          }
        }
        */
    /*
        Alternativ:
        IndexReader reader = writer.getReader();
        writer.close();
        IndexSearcher searcher = newSearcher(reader);
        
        PayloadSpanUtil psu = new PayloadSpanUtil(searcher.getTopReaderContext());
        
        Collection<byte[]> payloads = psu.getPayloadsForQuery(new TermQuery(new Term(PayloadHelper.FIELD, "rr")));
        if(VERBOSE)
          System.out.println("Num payloads:" + payloads.size());
        for (final byte [] bytes : payloads) {
          if(VERBOSE)
            System.out.println(new String(bytes, "UTF-8"));
        }
        */
    /* new: */
    // PayloadHelper helper = new PayloadHelper();
    // Map<Term, TermContext> termContexts = new HashMap<>();
    // Spans spans;
    // spans = snquery.getSpans(searcher.getIndexReader());
    // searcher = helper.setUp(similarity, 1000);
    /*
        IndexReader reader = search.getReader(querycontainer.getFoundry());
        Spans luceneSpans;
        Bits bitset = atomic.reader().getLiveDocs();
        for (byte[] payload : luceneSpans.getPayload())
        
        /* Iterate over all matching documents */
    /*
            while (luceneSpans.next() && total < config.getMaxhits()) {
        	Span matchSpan;
        	StringBuilder payloadString = new StringBuilder();
        	int docid = atomic.docBase + luceneSpans.doc();
        	String docname = search.retrieveDocname(docid,
        					querycontainer.getFoundry());
        					total++;
        
        	for (byte[] payload : luceneSpans.getPayload())
        */
    /* retrieve payload for current matching span */
    // payloadString.append(new String(payload));
    /* create span containing result */
    /*
        		matchSpan = new Span(docname);
        		matchSpan.setIndexdocid(docid);
        		matchSpan.setLayer(querycontainer.getLayer());
        		matchSpan.storePayloads(payloadString.toString());
        		matchSpans.add(matchSpan);
        */
    /*
         * topdocs = searcher.search(new ConstantScoreQuery(corpusQ add
         * position to list of positions to be considered for later
         * searches
         */
    /*
        validValues.put(docname,
        		matchSpan.getPayload(config.getPrefix()));
        }
        */
    // Todo: API made by add() typisiert für queries, strings
    // SpanPayloadCheckQuery for sentences!
    /* Support regular expression in SpanSegmentQuery */
    // new Regexp();
    // new Term();
    /*
          Vielleicht: spanSegmentQuery(new Term(), new Wildcard(), new Regex());
         */
    // And Not ->
    // SpanTermDiffQuery
    /*
        SpanNearQuery poquery = new SpanNearQuery(
        
        );
        */
    reader.close();
}
Also used : IndexSearcher(org.apache.lucene.search.IndexSearcher) BooleanQuery(org.apache.lucene.search.BooleanQuery) WildcardQuery(org.apache.lucene.search.WildcardQuery) TermContext(org.apache.lucene.index.TermContext) RegexpQuery(org.apache.lucene.search.RegexpQuery) SpanNotQuery(org.apache.lucene.search.spans.SpanNotQuery) Spans(org.apache.lucene.search.spans.Spans) SpanTermQuery(org.apache.lucene.search.spans.SpanTermQuery) LeafReaderContext(org.apache.lucene.index.LeafReaderContext) SpanTermQuery(org.apache.lucene.search.spans.SpanTermQuery) TermQuery(org.apache.lucene.search.TermQuery) DirectoryReader(org.apache.lucene.index.DirectoryReader) Term(org.apache.lucene.index.Term) MultiTerm(de.ids_mannheim.korap.index.MultiTerm) SpanOrQuery(org.apache.lucene.search.spans.SpanOrQuery) SpanQuery(org.apache.lucene.search.spans.SpanQuery) SpanRegexQueryWrapper(de.ids_mannheim.korap.query.wrap.SpanRegexQueryWrapper) SpanSegmentQueryWrapper(de.ids_mannheim.korap.query.wrap.SpanSegmentQueryWrapper) IndexWriter(org.apache.lucene.index.IndexWriter) StandardAnalyzer(org.apache.lucene.analysis.standard.StandardAnalyzer) Bits(org.apache.lucene.util.Bits) SpanNearQuery(org.apache.lucene.search.spans.SpanNearQuery) IndexWriterConfig(org.apache.lucene.index.IndexWriterConfig) Test(org.junit.Test) Test(de.ids_mannheim.korap.Test)

Aggregations

SpanNotQuery (org.apache.lucene.search.spans.SpanNotQuery)10 Term (org.apache.lucene.index.Term)7 SpanNearQuery (org.apache.lucene.search.spans.SpanNearQuery)6 SpanTermQuery (org.apache.lucene.search.spans.SpanTermQuery)6 SpanQuery (org.apache.lucene.search.spans.SpanQuery)5 SpanOrQuery (org.apache.lucene.search.spans.SpanOrQuery)4 Document (org.apache.lucene.document.Document)3 BooleanQuery (org.apache.lucene.search.BooleanQuery)3 IndexSearcher (org.apache.lucene.search.IndexSearcher)3 Query (org.apache.lucene.search.Query)3 SpanBoostQuery (org.apache.lucene.search.spans.SpanBoostQuery)3 ArrayList (java.util.ArrayList)2 IndexReader (org.apache.lucene.index.IndexReader)2 RandomIndexWriter (org.apache.lucene.index.RandomIndexWriter)2 BoostQuery (org.apache.lucene.search.BoostQuery)2 ConstantScoreQuery (org.apache.lucene.search.ConstantScoreQuery)2 DisjunctionMaxQuery (org.apache.lucene.search.DisjunctionMaxQuery)2 FuzzyQuery (org.apache.lucene.search.FuzzyQuery)2 PrefixQuery (org.apache.lucene.search.PrefixQuery)2 RegexpQuery (org.apache.lucene.search.RegexpQuery)2