Search in sources :

Example 11 with WildcardQuery

use of org.apache.lucene.search.WildcardQuery in project lucene-solr by apache.

the class KNearestNeighborClassifier method knnSearch.

private TopDocs knnSearch(String text) throws IOException {
    BooleanQuery.Builder mltQuery = new BooleanQuery.Builder();
    for (String fieldName : textFieldNames) {
        String boost = null;
        //terms boost actually helps in MLT queries
        mlt.setBoost(true);
        if (fieldName.contains("^")) {
            String[] field2boost = fieldName.split("\\^");
            fieldName = field2boost[0];
            boost = field2boost[1];
        }
        if (boost != null) {
            //if we have a field boost, we add it
            mlt.setBoostFactor(Float.parseFloat(boost));
        }
        mltQuery.add(new BooleanClause(mlt.like(fieldName, new StringReader(text)), BooleanClause.Occur.SHOULD));
        // restore neutral boost for next field
        mlt.setBoostFactor(1);
    }
    Query classFieldQuery = new WildcardQuery(new Term(classFieldName, "*"));
    mltQuery.add(new BooleanClause(classFieldQuery, BooleanClause.Occur.MUST));
    if (query != null) {
        mltQuery.add(query, BooleanClause.Occur.MUST);
    }
    return indexSearcher.search(mltQuery.build(), k);
}
Also used : BooleanClause(org.apache.lucene.search.BooleanClause) BooleanQuery(org.apache.lucene.search.BooleanQuery) WildcardQuery(org.apache.lucene.search.WildcardQuery) Query(org.apache.lucene.search.Query) WildcardQuery(org.apache.lucene.search.WildcardQuery) BooleanQuery(org.apache.lucene.search.BooleanQuery) StringReader(java.io.StringReader) Term(org.apache.lucene.index.Term)

Example 12 with WildcardQuery

use of org.apache.lucene.search.WildcardQuery in project lucene-solr by apache.

the class KNearestFuzzyClassifier method knnSearch.

private TopDocs knnSearch(String text) throws IOException {
    BooleanQuery.Builder bq = new BooleanQuery.Builder();
    FuzzyLikeThisQuery fuzzyLikeThisQuery = new FuzzyLikeThisQuery(300, analyzer);
    for (String fieldName : textFieldNames) {
        // TODO: make this parameters configurable
        fuzzyLikeThisQuery.addTerms(text, fieldName, 1f, 2);
    }
    bq.add(fuzzyLikeThisQuery, BooleanClause.Occur.MUST);
    Query classFieldQuery = new WildcardQuery(new Term(classFieldName, "*"));
    bq.add(new BooleanClause(classFieldQuery, BooleanClause.Occur.MUST));
    if (query != null) {
        bq.add(query, BooleanClause.Occur.MUST);
    }
    return indexSearcher.search(bq.build(), k);
}
Also used : BooleanClause(org.apache.lucene.search.BooleanClause) BooleanQuery(org.apache.lucene.search.BooleanQuery) FuzzyLikeThisQuery(org.apache.lucene.sandbox.queries.FuzzyLikeThisQuery) WildcardQuery(org.apache.lucene.search.WildcardQuery) Query(org.apache.lucene.search.Query) FuzzyLikeThisQuery(org.apache.lucene.sandbox.queries.FuzzyLikeThisQuery) WildcardQuery(org.apache.lucene.search.WildcardQuery) BooleanQuery(org.apache.lucene.search.BooleanQuery) Term(org.apache.lucene.index.Term)

Example 13 with WildcardQuery

use of org.apache.lucene.search.WildcardQuery in project lucene-solr by apache.

the class TestSpanMultiTermQueryWrapper method testNoSuchMultiTermsInSpanFirst.

public void testNoSuchMultiTermsInSpanFirst() throws Exception {
    //this hasn't been a problem  
    FuzzyQuery fuzzyNoSuch = new FuzzyQuery(new Term("field", "noSuch"), 1, 0, 1, false);
    SpanQuery spanNoSuch = new SpanMultiTermQueryWrapper<>(fuzzyNoSuch);
    SpanQuery spanFirst = new SpanFirstQuery(spanNoSuch, 10);
    assertEquals(0, searcher.search(spanFirst, 10).totalHits);
    WildcardQuery wcNoSuch = new WildcardQuery(new Term("field", "noSuch*"));
    SpanQuery spanWCNoSuch = new SpanMultiTermQueryWrapper<>(wcNoSuch);
    spanFirst = new SpanFirstQuery(spanWCNoSuch, 10);
    assertEquals(0, searcher.search(spanFirst, 10).totalHits);
    RegexpQuery rgxNoSuch = new RegexpQuery(new Term("field", "noSuch"));
    SpanQuery spanRgxNoSuch = new SpanMultiTermQueryWrapper<>(rgxNoSuch);
    spanFirst = new SpanFirstQuery(spanRgxNoSuch, 10);
    assertEquals(0, searcher.search(spanFirst, 10).totalHits);
    PrefixQuery prfxNoSuch = new PrefixQuery(new Term("field", "noSuch"));
    SpanQuery spanPrfxNoSuch = new SpanMultiTermQueryWrapper<>(prfxNoSuch);
    spanFirst = new SpanFirstQuery(spanPrfxNoSuch, 10);
    assertEquals(0, searcher.search(spanFirst, 10).totalHits);
}
Also used : WildcardQuery(org.apache.lucene.search.WildcardQuery) PrefixQuery(org.apache.lucene.search.PrefixQuery) FuzzyQuery(org.apache.lucene.search.FuzzyQuery) Term(org.apache.lucene.index.Term) RegexpQuery(org.apache.lucene.search.RegexpQuery)

Example 14 with WildcardQuery

use of org.apache.lucene.search.WildcardQuery in project lucene-solr by apache.

the class SynonymTokenizer method testConstantScoreMultiTermQuery.

public void testConstantScoreMultiTermQuery() throws Exception {
    numHighlights = 0;
    query = new WildcardQuery(new Term(FIELD_NAME, "ken*"));
    ((WildcardQuery) query).setRewriteMethod(MultiTermQuery.CONSTANT_SCORE_REWRITE);
    searcher = newSearcher(reader);
    // query = unReWrittenQuery.rewrite(reader);
    if (VERBOSE)
        System.out.println("Searching for: " + query.toString(FIELD_NAME));
    hits = searcher.search(query, 1000);
    for (int i = 0; i < hits.totalHits; i++) {
        final int docId = hits.scoreDocs[i].doc;
        final Document doc = searcher.doc(docId);
        String text = doc.get(FIELD_NAME);
        TokenStream tokenStream = getAnyTokenStream(FIELD_NAME, docId);
        int maxNumFragmentsRequired = 2;
        String fragmentSeparator = "...";
        QueryScorer scorer = new QueryScorer(query, HighlighterTest.FIELD_NAME);
        Highlighter highlighter = new Highlighter(this, scorer);
        highlighter.setTextFragmenter(new SimpleFragmenter(20));
        String result = highlighter.getBestFragments(tokenStream, text, maxNumFragmentsRequired, fragmentSeparator);
        if (VERBOSE)
            System.out.println("\t" + result);
    }
    assertTrue("Failed to find correct number of highlights " + numHighlights + " found", numHighlights == 5);
    // try null field
    hits = searcher.search(query, 1000);
    numHighlights = 0;
    for (int i = 0; i < hits.totalHits; i++) {
        final int docId = hits.scoreDocs[i].doc;
        final Document doc = searcher.doc(docId);
        String text = doc.get(FIELD_NAME);
        TokenStream tokenStream = getAnyTokenStream(FIELD_NAME, docId);
        int maxNumFragmentsRequired = 2;
        String fragmentSeparator = "...";
        QueryScorer scorer = new QueryScorer(query, null);
        Highlighter highlighter = new Highlighter(this, scorer);
        highlighter.setTextFragmenter(new SimpleFragmenter(20));
        String result = highlighter.getBestFragments(tokenStream, text, maxNumFragmentsRequired, fragmentSeparator);
        if (VERBOSE)
            System.out.println("\t" + result);
    }
    assertTrue("Failed to find correct number of highlights " + numHighlights + " found", numHighlights == 5);
    // try default field
    hits = searcher.search(query, 1000);
    numHighlights = 0;
    for (int i = 0; i < hits.totalHits; i++) {
        final int docId = hits.scoreDocs[i].doc;
        final Document doc = searcher.doc(docId);
        String text = doc.get(FIELD_NAME);
        TokenStream tokenStream = getAnyTokenStream(FIELD_NAME, docId);
        int maxNumFragmentsRequired = 2;
        String fragmentSeparator = "...";
        QueryScorer scorer = new QueryScorer(query, "random_field", HighlighterTest.FIELD_NAME);
        Highlighter highlighter = new Highlighter(this, scorer);
        highlighter.setTextFragmenter(new SimpleFragmenter(20));
        String result = highlighter.getBestFragments(tokenStream, text, maxNumFragmentsRequired, fragmentSeparator);
        if (VERBOSE)
            System.out.println("\t" + result);
    }
    assertTrue("Failed to find correct number of highlights " + numHighlights + " found", numHighlights == 5);
}
Also used : WildcardQuery(org.apache.lucene.search.WildcardQuery) CannedTokenStream(org.apache.lucene.analysis.CannedTokenStream) TokenStream(org.apache.lucene.analysis.TokenStream) Term(org.apache.lucene.index.Term) Document(org.apache.lucene.document.Document) IntPoint(org.apache.lucene.document.IntPoint)

Example 15 with WildcardQuery

use of org.apache.lucene.search.WildcardQuery in project lucene-solr by apache.

the class SynonymTokenizer method testUnRewrittenQuery.

public void testUnRewrittenQuery() throws Exception {
    final TestHighlightRunner helper = new TestHighlightRunner() {

        @Override
        public void run() throws Exception {
            numHighlights = 0;
            // test to show how rewritten query can still be used
            searcher = newSearcher(reader);
            BooleanQuery.Builder query = new BooleanQuery.Builder();
            query.add(new WildcardQuery(new Term(FIELD_NAME, "jf?")), Occur.SHOULD);
            query.add(new WildcardQuery(new Term(FIELD_NAME, "kenned*")), Occur.SHOULD);
            if (VERBOSE)
                System.out.println("Searching with primitive query");
            // forget to set this and...
            // query=query.rewrite(reader);
            TopDocs hits = searcher.search(query.build(), 1000);
            // create an instance of the highlighter with the tags used to surround
            // highlighted text
            // QueryHighlightExtractor highlighter = new
            // QueryHighlightExtractor(this,
            // query, new StandardAnalyzer(TEST_VERSION));
            int maxNumFragmentsRequired = 3;
            for (int i = 0; i < hits.totalHits; i++) {
                final int docId = hits.scoreDocs[i].doc;
                final Document doc = searcher.doc(docId);
                String text = doc.get(FIELD_NAME);
                TokenStream tokenStream = getAnyTokenStream(FIELD_NAME, docId);
                Highlighter highlighter = getHighlighter(query.build(), FIELD_NAME, HighlighterTest.this, false);
                highlighter.setTextFragmenter(new SimpleFragmenter(40));
                String highlightedText = highlighter.getBestFragments(tokenStream, text, maxNumFragmentsRequired, "...");
                if (VERBOSE)
                    System.out.println(highlightedText);
            }
            // We expect to have zero highlights if the query is multi-terms and is
            // not
            // rewritten!
            assertTrue("Failed to find correct number of highlights " + numHighlights + " found", numHighlights == 0);
        }
    };
    helper.start();
}
Also used : BooleanQuery(org.apache.lucene.search.BooleanQuery) WildcardQuery(org.apache.lucene.search.WildcardQuery) TestHighlightRunner(org.apache.lucene.search.highlight.SynonymTokenizer.TestHighlightRunner) CannedTokenStream(org.apache.lucene.analysis.CannedTokenStream) TokenStream(org.apache.lucene.analysis.TokenStream) Builder(org.apache.lucene.search.PhraseQuery.Builder) DocumentBuilder(javax.xml.parsers.DocumentBuilder) Term(org.apache.lucene.index.Term) Document(org.apache.lucene.document.Document) IntPoint(org.apache.lucene.document.IntPoint) TopDocs(org.apache.lucene.search.TopDocs)

Aggregations

WildcardQuery (org.apache.lucene.search.WildcardQuery)69 Term (org.apache.lucene.index.Term)61 BooleanQuery (org.apache.lucene.search.BooleanQuery)28 PrefixQuery (org.apache.lucene.search.PrefixQuery)24 Query (org.apache.lucene.search.Query)21 FuzzyQuery (org.apache.lucene.search.FuzzyQuery)20 TermQuery (org.apache.lucene.search.TermQuery)18 Document (org.apache.lucene.document.Document)17 BoostQuery (org.apache.lucene.search.BoostQuery)15 IndexSearcher (org.apache.lucene.search.IndexSearcher)15 RegexpQuery (org.apache.lucene.search.RegexpQuery)15 MatchAllDocsQuery (org.apache.lucene.search.MatchAllDocsQuery)14 SpanTermQuery (org.apache.lucene.search.spans.SpanTermQuery)14 Field (org.apache.lucene.document.Field)13 IndexReader (org.apache.lucene.index.IndexReader)13 RandomIndexWriter (org.apache.lucene.index.RandomIndexWriter)13 PhraseQuery (org.apache.lucene.search.PhraseQuery)13 TermRangeQuery (org.apache.lucene.search.TermRangeQuery)13 TopDocs (org.apache.lucene.search.TopDocs)13 SpanNearQuery (org.apache.lucene.search.spans.SpanNearQuery)13