Search in sources :

Example 81 with WildcardQuery

use of org.apache.lucene.search.WildcardQuery in project rubia-forums by flashboss.

the class ForumsSearchModuleImpl method findTopics.

@SuppressWarnings("unchecked")
public ResultPage<Topic> findTopics(SearchCriteria criteria) throws ModuleException {
    if (criteria != null) {
        try {
            EntityManager session = getSession();
            FullTextSession fullTextSession = getFullTextSession((Session) session.getDelegate());
            Builder builder = new Builder();
            String keywords = criteria.getKeywords();
            if (keywords != null && keywords.length() != 0) {
                String[] fields = null;
                Searching searching = Searching.valueOf(criteria.getSearching());
                switch(searching) {
                    case TITLE_MSG:
                        fields = new String[] { "message.text", "topic.subject" };
                        break;
                    case MSG:
                        fields = new String[] { "message.text" };
                        break;
                }
                MultiFieldQueryParser parser = new MultiFieldQueryParser(fields, new StandardAnalyzer());
                builder.add(parser.parse(keywords), MUST);
            }
            String forumId = criteria.getForum();
            if (forumId != null && forumId.length() != 0) {
                builder.add(new TermQuery(new Term("topic.forum.id", forumId)), MUST);
            }
            String categoryId = criteria.getCategory();
            if (categoryId != null && categoryId.length() != 0) {
                builder.add(new TermQuery(new Term("topic.forum.category.id", categoryId)), MUST);
            }
            String userName = criteria.getAuthor();
            if (userName != null && userName.length() != 0) {
                builder.add(new WildcardQuery(new Term("poster.userId", userName)), MUST);
            }
            String timePeriod = criteria.getTimePeriod();
            if (timePeriod != null && timePeriod.length() != 0) {
                addPostTimeQuery(builder, TimePeriod.valueOf(timePeriod));
            }
            FullTextQuery fullTextQuery = fullTextSession.createFullTextQuery(builder.build(), Post.class);
            SortOrder sortOrder = SortOrder.valueOf(criteria.getSortOrder());
            SortBy sortBy = valueOf(criteria.getSortBy());
            fullTextQuery.setSort(getSort(sortBy, sortOrder));
            fullTextQuery.setProjection("topic.id");
            LinkedHashSet<Integer> topicIds = new LinkedHashSet<Integer>();
            LinkedHashSet<Integer> topicToDispIds = new LinkedHashSet<Integer>();
            int start = criteria.getPageSize() * criteria.getPageNumber();
            int end = start + criteria.getPageSize();
            int index = 0;
            for (Object o : fullTextQuery.list()) {
                Integer id = (Integer) ((Object[]) o)[0];
                if (topicIds.add(id)) {
                    if (index >= start && index < end) {
                        topicToDispIds.add(id);
                    }
                    index++;
                }
            }
            List<Topic> topics = null;
            if (topicToDispIds.size() > 0) {
                Query q = session.createQuery("from Topic as t join fetch t.poster where t.id IN ( :topicIds )");
                q.setParameter("topicIds", topicToDispIds);
                List<Topic> results = q.getResultList();
                topics = new LinkedList<Topic>();
                for (Integer id : topicToDispIds) {
                    for (Topic topic : results) {
                        if (id.equals(topic.getId())) {
                            topics.add(topic);
                            break;
                        }
                    }
                }
            }
            ResultPage<Topic> resultPage = new ResultPage<Topic>();
            resultPage.setPage(topics);
            resultPage.setResultSize(topicIds.size());
            return resultPage;
        } catch (ParseException e) {
            return null;
        } catch (Exception e) {
            throw new ModuleException(e.getMessage(), e);
        }
    } else {
        throw new IllegalArgumentException("criteria cannot be null");
    }
}
Also used : LinkedHashSet(java.util.LinkedHashSet) WildcardQuery(org.apache.lucene.search.WildcardQuery) FullTextSession(org.hibernate.search.FullTextSession) Search.getFullTextSession(org.hibernate.search.Search.getFullTextSession) FullTextQuery(org.hibernate.search.FullTextQuery) WildcardQuery(org.apache.lucene.search.WildcardQuery) Query(javax.persistence.Query) TermQuery(org.apache.lucene.search.TermQuery) SortBy(it.vige.rubia.search.SortBy) Builder(org.apache.lucene.search.BooleanQuery.Builder) ResultPage(it.vige.rubia.search.ResultPage) Topic(it.vige.rubia.model.Topic) ModuleException(it.vige.rubia.ModuleException) TermQuery(org.apache.lucene.search.TermQuery) MultiFieldQueryParser(org.apache.lucene.queryparser.classic.MultiFieldQueryParser) SortOrder(it.vige.rubia.search.SortOrder) Term(org.apache.lucene.index.Term) ParseException(org.apache.lucene.queryparser.classic.ParseException) ModuleException(it.vige.rubia.ModuleException) EntityManager(javax.persistence.EntityManager) Searching(it.vige.rubia.search.Searching) StandardAnalyzer(org.apache.lucene.analysis.standard.StandardAnalyzer) ParseException(org.apache.lucene.queryparser.classic.ParseException) FullTextQuery(org.hibernate.search.FullTextQuery)

Example 82 with WildcardQuery

use of org.apache.lucene.search.WildcardQuery in project rubia-forums by flashboss.

the class ForumsSearchModuleImpl method findPosts.

@SuppressWarnings("unchecked")
public ResultPage<Post> findPosts(SearchCriteria criteria) throws ModuleException {
    if (criteria != null) {
        try {
            EntityManager session = getSession();
            FullTextSession fullTextSession = getFullTextSession((Session) session.getDelegate());
            Builder builder = new Builder();
            String keywords = criteria.getKeywords();
            if (keywords != null && keywords.length() != 0) {
                String[] fields = null;
                Searching searching = Searching.valueOf(criteria.getSearching());
                switch(searching) {
                    case TITLE_MSG:
                        fields = new String[] { "message.text", "topic.subject" };
                        break;
                    case MSG:
                        fields = new String[] { "message.text" };
                        break;
                }
                MultiFieldQueryParser parser = new MultiFieldQueryParser(fields, new StandardAnalyzer());
                builder.add(parser.parse(keywords), MUST);
            }
            String forumId = criteria.getForum();
            if (forumId != null && forumId.length() != 0) {
                builder.add(new TermQuery(new Term("topic.forum.id", forumId)), MUST);
            }
            String categoryId = criteria.getCategory();
            if (categoryId != null && categoryId.length() != 0) {
                builder.add(new TermQuery(new Term("topic.forum.category.id", categoryId)), MUST);
            }
            String userName = criteria.getAuthor();
            if (userName != null && userName.length() != 0) {
                builder.add(new WildcardQuery(new Term("poster.userId", userName)), MUST);
            }
            String timePeriod = criteria.getTimePeriod();
            if (timePeriod != null && timePeriod.length() != 0) {
                addPostTimeQuery(builder, TimePeriod.valueOf(timePeriod));
            }
            FullTextQuery fullTextQuery = fullTextSession.createFullTextQuery(builder.build(), Post.class);
            SortOrder sortOrder = SortOrder.valueOf(criteria.getSortOrder());
            String sortByStr = criteria.getSortBy();
            SortBy sortBy = null;
            if (sortByStr != null)
                sortBy = valueOf(sortByStr);
            fullTextQuery.setSort(getSort(sortBy, sortOrder));
            fullTextQuery.setFirstResult(criteria.getPageSize() * criteria.getPageNumber());
            fullTextQuery.setMaxResults(criteria.getPageSize());
            ResultPage<Post> resultPage = new ResultPage<Post>();
            resultPage.setPage(fullTextQuery.list());
            resultPage.setResultSize(fullTextQuery.getResultSize());
            return resultPage;
        } catch (ParseException e) {
            return null;
        } catch (Exception e) {
            throw new ModuleException(e.getMessage(), e);
        }
    } else {
        throw new IllegalArgumentException("criteria cannot be null");
    }
}
Also used : TermQuery(org.apache.lucene.search.TermQuery) WildcardQuery(org.apache.lucene.search.WildcardQuery) FullTextSession(org.hibernate.search.FullTextSession) Search.getFullTextSession(org.hibernate.search.Search.getFullTextSession) MultiFieldQueryParser(org.apache.lucene.queryparser.classic.MultiFieldQueryParser) SortBy(it.vige.rubia.search.SortBy) Post(it.vige.rubia.model.Post) Builder(org.apache.lucene.search.BooleanQuery.Builder) SortOrder(it.vige.rubia.search.SortOrder) Term(org.apache.lucene.index.Term) ParseException(org.apache.lucene.queryparser.classic.ParseException) ModuleException(it.vige.rubia.ModuleException) EntityManager(javax.persistence.EntityManager) Searching(it.vige.rubia.search.Searching) ResultPage(it.vige.rubia.search.ResultPage) StandardAnalyzer(org.apache.lucene.analysis.standard.StandardAnalyzer) ParseException(org.apache.lucene.queryparser.classic.ParseException) FullTextQuery(org.hibernate.search.FullTextQuery) ModuleException(it.vige.rubia.ModuleException)

Example 83 with WildcardQuery

use of org.apache.lucene.search.WildcardQuery in project rubia-forums by flashboss.

the class ForumsSearchModuleImpl method findTopics.

@SuppressWarnings("unchecked")
public ResultPage<Topic> findTopics(SearchCriteria criteria) throws ModuleException {
    if (criteria != null) {
        try {
            EntityManager session = getSession();
            FullTextSession fullTextSession = getFullTextSession((Session) session.getDelegate());
            Builder builder = new Builder();
            String keywords = criteria.getKeywords();
            if (keywords != null && keywords.length() != 0) {
                String[] fields = null;
                Searching searching = Searching.valueOf(criteria.getSearching());
                switch(searching) {
                    case TITLE_MSG:
                        fields = new String[] { "message.text", "topic.subject" };
                        break;
                    case MSG:
                        fields = new String[] { "message.text" };
                        break;
                }
                MultiFieldQueryParser parser = new MultiFieldQueryParser(fields, new StandardAnalyzer());
                builder.add(parser.parse(keywords), MUST);
            }
            String forumId = criteria.getForum();
            if (forumId != null && forumId.length() != 0) {
                builder.add(new TermQuery(new Term("topic.forum.id", forumId)), MUST);
            }
            String categoryId = criteria.getCategory();
            if (categoryId != null && categoryId.length() != 0) {
                builder.add(new TermQuery(new Term("topic.forum.category.id", categoryId)), MUST);
            }
            String userName = criteria.getAuthor();
            if (userName != null && userName.length() != 0) {
                builder.add(new WildcardQuery(new Term("poster.userId", userName)), MUST);
            }
            String timePeriod = criteria.getTimePeriod();
            if (timePeriod != null && timePeriod.length() != 0) {
                addPostTimeQuery(builder, TimePeriod.valueOf(timePeriod));
            }
            FullTextQuery fullTextQuery = fullTextSession.createFullTextQuery(builder.build(), Post.class);
            SortOrder sortOrder = SortOrder.valueOf(criteria.getSortOrder());
            SortBy sortBy = valueOf(criteria.getSortBy());
            fullTextQuery.setSort(getSort(sortBy, sortOrder));
            fullTextQuery.setProjection("topic.id");
            LinkedHashSet<Integer> topicIds = new LinkedHashSet<Integer>();
            LinkedHashSet<Integer> topicToDispIds = new LinkedHashSet<Integer>();
            int start = criteria.getPageSize() * criteria.getPageNumber();
            int end = start + criteria.getPageSize();
            int index = 0;
            for (Object o : fullTextQuery.list()) {
                Integer id = (Integer) ((Object[]) o)[0];
                if (topicIds.add(id)) {
                    if (index >= start && index < end) {
                        topicToDispIds.add(id);
                    }
                    index++;
                }
            }
            List<Topic> topics = null;
            if (topicToDispIds.size() > 0) {
                Query q = session.createQuery("from Topic as t join fetch t.poster where t.id IN ( :topicIds )");
                q.setParameter("topicIds", topicToDispIds);
                List<Topic> results = q.getResultList();
                topics = new LinkedList<Topic>();
                for (Integer id : topicToDispIds) {
                    for (Topic topic : results) {
                        if (id.equals(topic.getId())) {
                            topics.add(topic);
                            break;
                        }
                    }
                }
            }
            ResultPage<Topic> resultPage = new ResultPage<Topic>();
            resultPage.setPage(topics);
            resultPage.setResultSize(topicIds.size());
            return resultPage;
        } catch (ParseException e) {
            return null;
        } catch (Exception e) {
            throw new ModuleException(e.getMessage(), e);
        }
    } else {
        throw new IllegalArgumentException("criteria cannot be null");
    }
}
Also used : LinkedHashSet(java.util.LinkedHashSet) WildcardQuery(org.apache.lucene.search.WildcardQuery) FullTextSession(org.hibernate.search.FullTextSession) Search.getFullTextSession(org.hibernate.search.Search.getFullTextSession) FullTextQuery(org.hibernate.search.FullTextQuery) WildcardQuery(org.apache.lucene.search.WildcardQuery) Query(javax.persistence.Query) TermQuery(org.apache.lucene.search.TermQuery) SortBy(it.vige.rubia.search.SortBy) Builder(org.apache.lucene.search.BooleanQuery.Builder) ResultPage(it.vige.rubia.search.ResultPage) Topic(it.vige.rubia.model.Topic) ModuleException(it.vige.rubia.ModuleException) TermQuery(org.apache.lucene.search.TermQuery) MultiFieldQueryParser(org.apache.lucene.queryparser.classic.MultiFieldQueryParser) SortOrder(it.vige.rubia.search.SortOrder) Term(org.apache.lucene.index.Term) ParseException(org.apache.lucene.queryparser.classic.ParseException) ModuleException(it.vige.rubia.ModuleException) EntityManager(javax.persistence.EntityManager) Searching(it.vige.rubia.search.Searching) StandardAnalyzer(org.apache.lucene.analysis.standard.StandardAnalyzer) ParseException(org.apache.lucene.queryparser.classic.ParseException) FullTextQuery(org.hibernate.search.FullTextQuery)

Example 84 with WildcardQuery

use of org.apache.lucene.search.WildcardQuery in project jackrabbit-oak by apache.

the class LuceneIndex method addNonFullTextConstraints.

private static void addNonFullTextConstraints(List<Query> qs, Filter filter, IndexReader reader, Analyzer analyzer, IndexDefinition indexDefinition) {
    if (!filter.matchesAllTypes()) {
        addNodeTypeConstraints(qs, filter);
    }
    String path = filter.getPath();
    switch(filter.getPathRestriction()) {
        case ALL_CHILDREN:
            if (USE_PATH_RESTRICTION) {
                if ("/".equals(path)) {
                    break;
                }
                if (!path.endsWith("/")) {
                    path += "/";
                }
                qs.add(new PrefixQuery(newPathTerm(path)));
            }
            break;
        case DIRECT_CHILDREN:
            if (USE_PATH_RESTRICTION) {
                if (!path.endsWith("/")) {
                    path += "/";
                }
                qs.add(new PrefixQuery(newPathTerm(path)));
            }
            break;
        case EXACT:
            qs.add(new TermQuery(newPathTerm(path)));
            break;
        case PARENT:
            if (denotesRoot(path)) {
                // there's no parent of the root node
                // we add a path that can not possibly occur because there
                // is no way to say "match no documents" in Lucene
                qs.add(new TermQuery(new Term(FieldNames.PATH, "///")));
            } else {
                qs.add(new TermQuery(newPathTerm(getParentPath(path))));
            }
            break;
        case NO_RESTRICTION:
            break;
    }
    // Fulltext index definition used by LuceneIndex only works with old format
    // which is not nodeType based. So just use the nt:base index
    IndexingRule rule = indexDefinition.getApplicableIndexingRule(JcrConstants.NT_BASE);
    for (PropertyRestriction pr : filter.getPropertyRestrictions()) {
        if (pr.first == null && pr.last == null) {
            // queries (OAK-1208)
            continue;
        }
        // check excluded properties and types
        if (isExcludedProperty(pr, rule)) {
            continue;
        }
        String name = pr.propertyName;
        if (QueryConstants.REP_EXCERPT.equals(name) || QueryConstants.OAK_SCORE_EXPLANATION.equals(name) || QueryConstants.REP_FACET.equals(name)) {
            continue;
        }
        if (JCR_PRIMARYTYPE.equals(name)) {
            continue;
        }
        if (QueryConstants.RESTRICTION_LOCAL_NAME.equals(name)) {
            continue;
        }
        if (skipTokenization(name)) {
            qs.add(new TermQuery(new Term(name, pr.first.getValue(STRING))));
            continue;
        }
        String first = null;
        String last = null;
        boolean isLike = pr.isLike;
        // TODO what to do with escaped tokens?
        if (pr.first != null) {
            first = pr.first.getValue(STRING);
            first = first.replace("\\", "");
        }
        if (pr.last != null) {
            last = pr.last.getValue(STRING);
            last = last.replace("\\", "");
        }
        if (isLike) {
            first = first.replace('%', WildcardQuery.WILDCARD_STRING);
            first = first.replace('_', WildcardQuery.WILDCARD_CHAR);
            int indexOfWS = first.indexOf(WildcardQuery.WILDCARD_STRING);
            int indexOfWC = first.indexOf(WildcardQuery.WILDCARD_CHAR);
            int len = first.length();
            if (indexOfWS == len || indexOfWC == len) {
                // remove trailing "*" for prefixquery
                first = first.substring(0, first.length() - 1);
                if (JCR_PATH.equals(name)) {
                    qs.add(new PrefixQuery(newPathTerm(first)));
                } else {
                    qs.add(new PrefixQuery(new Term(name, first)));
                }
            } else {
                if (JCR_PATH.equals(name)) {
                    qs.add(new WildcardQuery(newPathTerm(first)));
                } else {
                    qs.add(new WildcardQuery(new Term(name, first)));
                }
            }
            continue;
        }
        if (first != null && first.equals(last) && pr.firstIncluding && pr.lastIncluding) {
            if (JCR_PATH.equals(name)) {
                qs.add(new TermQuery(newPathTerm(first)));
            } else {
                if ("*".equals(name)) {
                    addReferenceConstraint(first, qs, reader);
                } else {
                    for (String t : tokenize(first, analyzer)) {
                        qs.add(new TermQuery(new Term(name, t)));
                    }
                }
            }
            continue;
        }
        first = tokenizeAndPoll(first, analyzer);
        last = tokenizeAndPoll(last, analyzer);
        qs.add(TermRangeQuery.newStringRange(name, first, last, pr.firstIncluding, pr.lastIncluding));
    }
}
Also used : TermQuery(org.apache.lucene.search.TermQuery) PropertyRestriction(org.apache.jackrabbit.oak.spi.query.Filter.PropertyRestriction) WildcardQuery(org.apache.lucene.search.WildcardQuery) IndexingRule(org.apache.jackrabbit.oak.plugins.index.lucene.IndexDefinition.IndexingRule) PrefixQuery(org.apache.lucene.search.PrefixQuery) Term(org.apache.lucene.index.Term) FullTextTerm(org.apache.jackrabbit.oak.spi.query.fulltext.FullTextTerm) TermFactory.newPathTerm(org.apache.jackrabbit.oak.plugins.index.lucene.TermFactory.newPathTerm) TermFactory.newFulltextTerm(org.apache.jackrabbit.oak.plugins.index.lucene.TermFactory.newFulltextTerm)

Example 85 with WildcardQuery

use of org.apache.lucene.search.WildcardQuery in project Krill by KorAP.

the class TestIndex method indexLucene.

@Test
public void indexLucene() throws Exception {
    // Base analyzer for searching and indexing
    StandardAnalyzer analyzer = new StandardAnalyzer();
    // Based on
    // http://lucene.apache.org/core/4_0_0/core/org/apache/lucene/
    // analysis/Analyzer.html?is-external=true
    // Create configuration with base analyzer
    IndexWriterConfig config = new IndexWriterConfig(analyzer);
    // Add a document 1 with the correct fields
    IndexWriter w = new IndexWriter(index, config);
    Collection docs = initIndexer();
    @SuppressWarnings("unchecked") Iterator<Map<String, String>> i = (Iterator<Map<String, String>>) docs.iterator();
    for (; i.hasNext(); ) {
        addDoc(w, i.next());
    }
    ;
    assertEquals(3, w.numDocs());
    w.close();
    // Check directory
    DirectoryReader reader = DirectoryReader.open(index);
    assertEquals(docs.size(), reader.maxDoc());
    assertEquals(docs.size(), reader.numDocs());
    // Check searcher
    IndexSearcher searcher = new IndexSearcher(reader);
    // textClass
    // All texts of text class "news"
    assertEquals(2, searcher.search(new TermQuery(new Term("textClass", "news")), 10).totalHits);
    // textClass
    // All texts of text class "sports"
    assertEquals(2, searcher.search(new TermQuery(new Term("textClass", "sports")), 10).totalHits);
    // TextIndex
    // All docs containing "l:nehmen"
    assertEquals(1, searcher.search(new TermQuery(new Term("text", "l:nehmen")), 10).totalHits);
    // TextIndex
    // All docs containing "s:den"
    assertEquals(2, searcher.search(new TermQuery(new Term("text", "s:den")), 10).totalHits);
    /*
        assertEquals(3,
              searcher.search(
                new TermQuery(
                  new Term("text", "T")
            ), 10
          ).totalHits
            );
        */
    // BooleanQuery
    // All docs containing "s:den" and "l:sie"
    TermQuery s_den = new TermQuery(new Term("text", "s:den"));
    TermQuery l_sie = new TermQuery(new Term("text", "l:sie"));
    BooleanQuery bool = new BooleanQuery();
    bool.add(s_den, BooleanClause.Occur.MUST);
    bool.add(l_sie, BooleanClause.Occur.MUST);
    assertEquals(1, searcher.search(bool, 10).totalHits);
    // BooleanQuery
    // All docs containing "s:den" or "l:sie"
    bool = new BooleanQuery();
    bool.add(s_den, BooleanClause.Occur.SHOULD);
    bool.add(l_sie, BooleanClause.Occur.SHOULD);
    assertEquals(2, searcher.search(bool, 10).totalHits);
    // RegexpQuery
    // All docs containing ".{4}en" (liefen und Hunden)
    RegexpQuery srquery = new RegexpQuery(new Term("text", "s:.{4}en"));
    assertEquals(2, searcher.search(srquery, 10).totalHits);
    // RegexpQuery
    // All docs containing "E." (Er) (2x)
    srquery = new RegexpQuery(new Term("text", "s:E."));
    assertEquals(2, searcher.search(srquery, 10).totalHits);
    SpanRegexQueryWrapper ssrquery = new SpanRegexQueryWrapper("text", "s:E.");
    assertEquals(2, searcher.search(ssrquery.toQuery(), 10).totalHits);
    // RegexpQuery
    // All docs containing "E." (er) (0x)
    srquery = new RegexpQuery(new Term("text", "s:e."));
    assertEquals(0, searcher.search(srquery, 10).totalHits);
    ssrquery = new SpanRegexQueryWrapper("text", "s:e.");
    assertEquals(0, searcher.search(ssrquery.toQuery(), 10).totalHits);
    // RegexpQuery
    // All docs containing "E."/i ([Ee]r) (2x)
    srquery = new RegexpQuery(new Term("text", "i:e."));
    assertEquals(2, searcher.search(srquery, 10).totalHits);
    ssrquery = new SpanRegexQueryWrapper("text", "s:e.", true);
    assertEquals("SpanMultiTermQueryWrapper(text:/i:e./)", ssrquery.toQuery().toString());
    assertEquals(2, searcher.search(ssrquery.toQuery(), 10).totalHits);
    // All docs containing "ng"/x (Angst) (2x)
    srquery = new RegexpQuery(new Term("text", "s:.*ng.*"));
    assertEquals(2, searcher.search(srquery, 10).totalHits);
    // Check http://comments.gmane.org/gmane.comp.jakarta.lucene.user/52283
    // for Carstens question on wildcards
    // Wildcardquery
    // All docs containing ".{4}en" (liefen und Hunden)
    WildcardQuery swquery = new WildcardQuery(new Term("text", "s:*ng*"));
    assertEquals("text:s:*ng*", swquery.toString());
    assertEquals(2, searcher.search(swquery, 10).totalHits);
    // [base=angst]
    SpanTermQuery stq = new SpanTermQuery(new Term("text", "l:angst"));
    assertEquals(2, searcher.search(srquery, 10).totalHits);
    // vor Angst
    // [orth=vor][orth=Angst]
    SpanNearQuery snquery = new SpanNearQuery(new SpanQuery[] { new SpanTermQuery(new Term("text", "s:vor")), new SpanTermQuery(new Term("text", "s:Angst")) }, 1, true);
    assertEquals(1, searcher.search(snquery, 10).totalHits);
    // Spannearquery [p:VVFIN][]{,5}[m:nom:sg:fem]
    snquery = new SpanNearQuery(new SpanQuery[] { new SpanTermQuery(new Term("text", "p:VVFIN")), new SpanSegmentQueryWrapper("text", "m:c:nom", "m:n:sg", "m:g:fem").toQuery() }, // slop
    5, // inOrder
    true);
    assertEquals(1, searcher.search(snquery, 10).totalHits);
    // Spannearquery [p:VVFIN][m:acc:sg:masc]
    snquery = new SpanNearQuery(new SpanQuery[] { new SpanTermQuery(new Term("text", "p:VVFIN")), new SpanNearQuery(new SpanQuery[] { new SpanTermQuery(new Term("text", "m:c:acc")), new SpanNearQuery(new SpanQuery[] { new SpanTermQuery(new Term("text", "m:n:sg")), new SpanTermQuery(new Term("text", "m:g:masc")) }, -1, false) }, // slop
    -1, // inOrder
    false) // new SpanTermQuery(new Term("text", "m:-acc:--sg:masc"))
    }, // slop
    0, // inOrder
    true);
    assertEquals(1, searcher.search(snquery, 10).totalHits);
    // Spannearquery [p:VVFIN|m:3:sg:past:ind]
    // Exact match!
    snquery = new SpanNearQuery(new SpanQuery[] { new SpanTermQuery(new Term("text", "p:VVFIN")), new SpanNearQuery(new SpanQuery[] { new SpanTermQuery(new Term("text", "m:p:3")), new SpanNearQuery(new SpanQuery[] { new SpanTermQuery(new Term("text", "m:n:sg")), new SpanNearQuery(new SpanQuery[] { new SpanTermQuery(new Term("text", "m:t:past")), new SpanTermQuery(new Term("text", "m:m:ind")) }, -1, false) }, -1, false) }, -1, false) }, // slop
    -1, // inOrder
    false);
    assertEquals(2, searcher.search(snquery, 10).totalHits);
    // To make sure, this is not equal:
    // Spannearquery [p:VVFIN & m:3:sg:past:ind]
    // Exact match!
    // Maybe it IS equal
    snquery = new SpanNearQuery(new SpanQuery[] { new SpanTermQuery(new Term("text", "p:VVFIN")), new SpanTermQuery(new Term("text", "m:p:3")), new SpanTermQuery(new Term("text", "m:n:sg")), new SpanTermQuery(new Term("text", "m:t:past")), new SpanTermQuery(new Term("text", "m:m:ind")) }, // slop
    -1, // inOrder
    false);
    assertNotEquals(2, searcher.search(snquery, 10).totalHits);
    // assertEquals(2, searcher.search(snquery, 10).totalHits);
    // Spannearquery [p:VVFIN & m:3:sg & past:ind]
    SpanSegmentQueryWrapper sniquery = new SpanSegmentQueryWrapper("text", "p:VVFIN", "m:p:3", "m:n:sg", "m:t:past", "m:m:ind");
    assertEquals(2, searcher.search(sniquery.toQuery(), 10).totalHits);
    // Todo:
    /*
        sniquery = new SpanSegmentQuery(
              "text",
          "p:VVFIN",
          "m:p:3",
          "m:n:sg",
          "m:t:past",
          "m:m:ind"
            );
        */
    // Spannearquery [p:VVFIN][]{,5}[m:nom:sg:fem]
    snquery = new SpanNearQuery(new SpanQuery[] { new SpanTermQuery(new Term("text", "p:VVFIN")), new SpanSegmentQueryWrapper("text", "m:c:nom", "m:n:sg", "m:g:fem").toQuery() }, // slop
    5, // inOrder
    true);
    assertEquals(1, searcher.search(snquery, 10).totalHits);
    sniquery = new SpanSegmentQueryWrapper("text", "p:VVFIN", "m:p:3", "m:t:past", "m:m:ind", "m:n:sg");
    assertEquals(2, searcher.search(sniquery.toQuery(), 10).totalHits);
    // [p = VVFIN & m:p = 3 & m:t = past & m:n != pl] or
    // [p = VVFIN & m:p = 3 & m:t = past & !m:n = pl]
    // TODO: Problem: What should happen in case the category does not exist?
    // pssible solution: & ( m:n != pl & exists(m:n))
    sniquery = new SpanSegmentQueryWrapper("text", "p:VVFIN", "m:p:3", "m:t:past");
    SpanQuery snqquery = new SpanNotQuery(sniquery.toQuery(), new SpanTermQuery(new Term("text", "m:n:pl")));
    assertEquals(2, searcher.search(snqquery, 10).totalHits);
    // [p = NN & (m:c: = dat | m:c = acc)]
    snquery = new SpanNearQuery(new SpanQuery[] { new SpanTermQuery(new Term("text", "p:NN")), new SpanOrQuery(new SpanTermQuery(new Term("text", "m:c:nom")), new SpanTermQuery(new Term("text", "m:c:acc"))) }, -1, false);
    assertEquals(2, searcher.search(snqquery, 10).totalHits);
    // [p = NN & !(m:c: = nom | m:c = acc)]
    snqquery = new SpanNotQuery(new SpanTermQuery(new Term("text", "p:NN")), new SpanOrQuery(new SpanTermQuery(new Term("text", "m:c:nom")), new SpanTermQuery(new Term("text", "m:c:acc"))));
    assertEquals(1, searcher.search(snqquery, 10).totalHits);
    // [p = NN & !(m:c = nom)]
    snqquery = new SpanNotQuery(new SpanTermQuery(new Term("text", "p:NN")), new SpanTermQuery(new Term("text", "m:c:nom")));
    assertEquals(3, searcher.search(snqquery, 10).totalHits);
    // [p=NN & !(m:c = acc)]
    snqquery = new SpanNotQuery(new SpanTermQuery(new Term("text", "p:NN")), new SpanTermQuery(new Term("text", "m:c:acc")));
    assertEquals(2, searcher.search(snqquery, 10).totalHits);
    // [p=PPER][][p=ART]
    snquery = new SpanNearQuery(new SpanQuery[] { new SpanTermQuery(new Term("text", "p:PPER")), new SpanNearQuery(new SpanQuery[] { new SpanTermQuery(new Term("text", "T")), new SpanTermQuery(new Term("text", "p:ART")) }, 0, true) }, 0, true);
    assertEquals(1, searcher.search(snquery, 10).totalHits);
    // Todo:
    // [orth=się][]{2,4}[base=bać]
    // [orth=się][orth!="[.!?,:]"]{,5}[base=bać]|[base=bać][base="on|ja|ty|my|wy"]?[orth=się]
    // [pos=subst & orth="a.*"]{2}
    // [tag=subst:sg:nom:n]
    // [case==acc & case==gen] ??
    // [case~acc & case~gen]
    // [case~~acc]
    // [base=bać][orth!=się]+[orth=się] within s
    // [][][p:VAFIN] within s
    // [][p:VAFIN] within s
    // [][][p:VAFIN]
    snquery = new SpanNearQuery(new SpanQuery[] { new SpanNearQuery(new SpanQuery[] { new SpanTermQuery(new Term("text", "T")), new SpanTermQuery(new Term("text", "T")) }, 0, true), new SpanTermQuery(new Term("text", "p:VAFIN")) }, 0, true);
    assertEquals(1, searcher.search(snquery, 10).totalHits);
    /*
        http://stackoverflow.com/questions/1311199/finding-the-position-of-search-hits-from-lucene
        */
    StringBuilder payloadString = new StringBuilder();
    Map<Term, TermContext> termContexts = new HashMap<>();
    for (LeafReaderContext atomic : reader.leaves()) {
        Bits bitset = atomic.reader().getLiveDocs();
        // Spans spans = NearSpansOrdered();
        Spans spans = snquery.getSpans(atomic, bitset, termContexts);
        while (spans.next()) {
            int docid = atomic.docBase + spans.doc();
            if (spans.isPayloadAvailable()) {
                for (byte[] payload : spans.getPayload()) {
                    /* retrieve payload for current matching span */
                    payloadString.append(new String(payload));
                    payloadString.append(" | ");
                }
                ;
            }
            ;
        }
        ;
    }
    ;
    // assertEquals(33, payloadString.length());
    assertEquals(0, payloadString.length());
    // [][][p:VAFIN]
    // without collecting payloads
    snquery = new SpanNearQuery(new SpanQuery[] { new SpanNearQuery(new SpanQuery[] { new SpanTermQuery(new Term("text", "T")), new SpanTermQuery(new Term("text", "T")) }, 0, true, false), new SpanTermQuery(new Term("text", "p:VAFIN")) }, 0, true, false);
    assertEquals(1, searcher.search(snquery, 10).totalHits);
    payloadString = new StringBuilder();
    termContexts = new HashMap<>();
    for (LeafReaderContext atomic : reader.leaves()) {
        Bits bitset = atomic.reader().getLiveDocs();
        // Spans spans = NearSpansOrdered();
        Spans spans = snquery.getSpans(atomic, bitset, termContexts);
        while (spans.next()) {
            int docid = atomic.docBase + spans.doc();
            for (byte[] payload : spans.getPayload()) {
                /* retrieve payload for current matching span */
                payloadString.append(new String(payload));
                payloadString.append(" | ");
            }
            ;
        }
        ;
    }
    ;
    assertEquals(0, payloadString.length());
    // [][][p:VAFIN] in s
    // ([e:s:<][]*[T] | [T & e:s:<]) [T] ([p:VAFIN & e:s:>] | [T][]*[e:s:>]
    /*
        
        SpanSegmentWithinQuery ssequery = new SpanSegmentWithinQuery(
            "text","s", new SpanSegmentSequenceQuery("text", "T", "T", "p:VAFIN")
            );
        assertEquals(0, searcher.search(ssequery.toQuery(), 10).totalHits);
        
        payloadString = new StringBuilder();
        termContexts = new HashMap<>();
        for (LeafReaderContext atomic : reader.leaves()) {
            Bits bitset = atomic.reader().getLiveDocs();
            // Spans spans = NearSpansOrdered();
            Spans spans = ssequery.toQuery().getSpans(atomic, bitset, termContexts);
        
            while (spans.next()) {
        	int docid = atomic.docBase + spans.doc();
        	for (byte[] payload : spans.getPayload()) {
        	/// retrieve payload for current matching span
        	    payloadString.append(new String(payload));
        	    payloadString.append(" | ");
        	};
            };
        };
        assertEquals(0, payloadString.length(), 1);
        
        ssequery = new SpanSegmentWithinQuery(
            "text","s", new SpanSegmentSequenceQuery("text", "T", "p:VAFIN")
            );
        
        assertEquals("for " + ssequery.toQuery(),
        	     1, searcher.search(ssequery.toQuery(), 10).totalHits);
        
        payloadString = new StringBuilder();
        termContexts = new HashMap<>();
        for (LeafReaderContext atomic : reader.leaves()) {
            Bits bitset = atomic.reader().getLiveDocs();
            // Spans spans = NearSpansOrdered();
            Spans spans = ssequery.toQuery().getSpans(atomic, bitset, termContexts);
        
            while (spans.next()) {
        	int docid = atomic.docBase + spans.doc();
        	for (byte[] payload : spans.getPayload()) {
        	    // retrieve payload for current matching span
        	    payloadString.append(new String(payload));
        	    payloadString.append(" | ");
        	};
        	fail("Doc: " + docid + " with " + spans.start() + "-" + spans.end() + " || " + payloadString.toString());
            };
        };
        assertEquals(20, payloadString.length());
        
        */
    // --------------------______>
    // Spans spans = MultiSpansWrapper.wrap(searcher.getTopReaderContext(), ssequery.toQuery());
    /*
        TopDocs topDocs = is.search(snq, 1);
        Set<String> payloadSet = new HashSet<String>();
        for (int i = 0; i < topDocs.scoreDocs.length; i++) {
          while (spans.next()) {
            Collection<byte[]> payloads = spans.getPayload();
        
            for (final byte [] payload : payloads) {
              payloadSet.add(new String(payload, "UTF-8"));
            }
          }
        }
        */
    /*
        Alternativ:
        IndexReader reader = writer.getReader();
        writer.close();
        IndexSearcher searcher = newSearcher(reader);
        
        PayloadSpanUtil psu = new PayloadSpanUtil(searcher.getTopReaderContext());
        
        Collection<byte[]> payloads = psu.getPayloadsForQuery(new TermQuery(new Term(PayloadHelper.FIELD, "rr")));
        if(VERBOSE)
          System.out.println("Num payloads:" + payloads.size());
        for (final byte [] bytes : payloads) {
          if(VERBOSE)
            System.out.println(new String(bytes, "UTF-8"));
        }
        */
    /* new: */
    // PayloadHelper helper = new PayloadHelper();
    // Map<Term, TermContext> termContexts = new HashMap<>();
    // Spans spans;
    // spans = snquery.getSpans(searcher.getIndexReader());
    // searcher = helper.setUp(similarity, 1000);
    /*
        IndexReader reader = search.getReader(querycontainer.getFoundry());
        Spans luceneSpans;
        Bits bitset = atomic.reader().getLiveDocs();
        for (byte[] payload : luceneSpans.getPayload())
        
        /* Iterate over all matching documents */
    /*
            while (luceneSpans.next() && total < config.getMaxhits()) {
        	Span matchSpan;
        	StringBuilder payloadString = new StringBuilder();
        	int docid = atomic.docBase + luceneSpans.doc();
        	String docname = search.retrieveDocname(docid,
        					querycontainer.getFoundry());
        					total++;
        
        	for (byte[] payload : luceneSpans.getPayload())
        */
    /* retrieve payload for current matching span */
    // payloadString.append(new String(payload));
    /* create span containing result */
    /*
        		matchSpan = new Span(docname);
        		matchSpan.setIndexdocid(docid);
        		matchSpan.setLayer(querycontainer.getLayer());
        		matchSpan.storePayloads(payloadString.toString());
        		matchSpans.add(matchSpan);
        */
    /*
         * topdocs = searcher.search(new ConstantScoreQuery(corpusQ add
         * position to list of positions to be considered for later
         * searches
         */
    /*
        validValues.put(docname,
        		matchSpan.getPayload(config.getPrefix()));
        }
        */
    // Todo: API made by add() typisiert für queries, strings
    // SpanPayloadCheckQuery for sentences!
    /* Support regular expression in SpanSegmentQuery */
    // new Regexp();
    // new Term();
    /*
          Vielleicht: spanSegmentQuery(new Term(), new Wildcard(), new Regex());
         */
    // And Not ->
    // SpanTermDiffQuery
    /*
        SpanNearQuery poquery = new SpanNearQuery(
        
        );
        */
    reader.close();
}
Also used : IndexSearcher(org.apache.lucene.search.IndexSearcher) BooleanQuery(org.apache.lucene.search.BooleanQuery) WildcardQuery(org.apache.lucene.search.WildcardQuery) TermContext(org.apache.lucene.index.TermContext) RegexpQuery(org.apache.lucene.search.RegexpQuery) SpanNotQuery(org.apache.lucene.search.spans.SpanNotQuery) Spans(org.apache.lucene.search.spans.Spans) SpanTermQuery(org.apache.lucene.search.spans.SpanTermQuery) LeafReaderContext(org.apache.lucene.index.LeafReaderContext) SpanTermQuery(org.apache.lucene.search.spans.SpanTermQuery) TermQuery(org.apache.lucene.search.TermQuery) DirectoryReader(org.apache.lucene.index.DirectoryReader) Term(org.apache.lucene.index.Term) MultiTerm(de.ids_mannheim.korap.index.MultiTerm) SpanOrQuery(org.apache.lucene.search.spans.SpanOrQuery) SpanQuery(org.apache.lucene.search.spans.SpanQuery) SpanRegexQueryWrapper(de.ids_mannheim.korap.query.wrap.SpanRegexQueryWrapper) SpanSegmentQueryWrapper(de.ids_mannheim.korap.query.wrap.SpanSegmentQueryWrapper) IndexWriter(org.apache.lucene.index.IndexWriter) StandardAnalyzer(org.apache.lucene.analysis.standard.StandardAnalyzer) Bits(org.apache.lucene.util.Bits) SpanNearQuery(org.apache.lucene.search.spans.SpanNearQuery) IndexWriterConfig(org.apache.lucene.index.IndexWriterConfig) Test(org.junit.Test) Test(de.ids_mannheim.korap.Test)

Aggregations

WildcardQuery (org.apache.lucene.search.WildcardQuery)102 Term (org.apache.lucene.index.Term)94 BooleanQuery (org.apache.lucene.search.BooleanQuery)38 TermQuery (org.apache.lucene.search.TermQuery)38 Query (org.apache.lucene.search.Query)29 PrefixQuery (org.apache.lucene.search.PrefixQuery)27 FuzzyQuery (org.apache.lucene.search.FuzzyQuery)23 Document (org.apache.lucene.document.Document)19 IndexSearcher (org.apache.lucene.search.IndexSearcher)17 RegexpQuery (org.apache.lucene.search.RegexpQuery)17 MatchAllDocsQuery (org.apache.lucene.search.MatchAllDocsQuery)16 PhraseQuery (org.apache.lucene.search.PhraseQuery)16 SpanTermQuery (org.apache.lucene.search.spans.SpanTermQuery)16 BoostQuery (org.apache.lucene.search.BoostQuery)15 TermRangeQuery (org.apache.lucene.search.TermRangeQuery)15 SpanMultiTermQueryWrapper (org.apache.lucene.search.spans.SpanMultiTermQueryWrapper)15 SpanNearQuery (org.apache.lucene.search.spans.SpanNearQuery)15 SpanQuery (org.apache.lucene.search.spans.SpanQuery)15 IndexReader (org.apache.lucene.index.IndexReader)14 ConstantScoreQuery (org.apache.lucene.search.ConstantScoreQuery)14