Search in sources :

Example 66 with TermQuery

use of org.apache.lucene.search.TermQuery in project elasticsearch by elastic.

the class XMoreLikeThis method addToQuery.

/**
     * Add to an existing boolean query the More Like This query from this PriorityQueue
     */
private void addToQuery(PriorityQueue<ScoreTerm> q, BooleanQuery.Builder query) {
    ScoreTerm scoreTerm;
    float bestScore = -1;
    while ((scoreTerm = q.pop()) != null) {
        Query tq = new TermQuery(new Term(scoreTerm.topField, scoreTerm.word));
        if (boost) {
            if (bestScore == -1) {
                bestScore = (scoreTerm.score);
            }
            float myScore = (scoreTerm.score);
            tq = new BoostQuery(tq, boostFactor * myScore / bestScore);
        }
        try {
            query.add(tq, BooleanClause.Occur.SHOULD);
        } catch (BooleanQuery.TooManyClauses ignore) {
            break;
        }
    }
}
Also used : TermQuery(org.apache.lucene.search.TermQuery) BooleanQuery(org.apache.lucene.search.BooleanQuery) Query(org.apache.lucene.search.Query) TermQuery(org.apache.lucene.search.TermQuery) BooleanQuery(org.apache.lucene.search.BooleanQuery) BoostQuery(org.apache.lucene.search.BoostQuery) Term(org.apache.lucene.index.Term) BoostQuery(org.apache.lucene.search.BoostQuery)

Example 67 with TermQuery

use of org.apache.lucene.search.TermQuery in project elasticsearch by elastic.

the class AllTermQuery method rewrite.

@Override
public Query rewrite(IndexReader reader) throws IOException {
    Query rewritten = super.rewrite(reader);
    if (rewritten != this) {
        return rewritten;
    }
    boolean hasPayloads = false;
    for (LeafReaderContext context : reader.leaves()) {
        final Terms terms = context.reader().terms(term.field());
        if (terms != null) {
            if (terms.hasPayloads()) {
                hasPayloads = true;
                break;
            }
        }
    }
    // which rewrites query with an empty reader.
    if (hasPayloads == false) {
        return new TermQuery(term);
    }
    return this;
}
Also used : TermQuery(org.apache.lucene.search.TermQuery) Query(org.apache.lucene.search.Query) TermQuery(org.apache.lucene.search.TermQuery) Terms(org.apache.lucene.index.Terms) LeafReaderContext(org.apache.lucene.index.LeafReaderContext)

Example 68 with TermQuery

use of org.apache.lucene.search.TermQuery in project elasticsearch by elastic.

the class XMoreLikeThisTests method testTopN.

public void testTopN() throws Exception {
    int numDocs = 100;
    int topN = 25;
    // add series of docs with terms of decreasing df
    Directory dir = newDirectory();
    RandomIndexWriter writer = new RandomIndexWriter(random(), dir);
    for (int i = 0; i < numDocs; i++) {
        addDoc(writer, generateStrSeq(0, i + 1));
    }
    IndexReader reader = writer.getReader();
    writer.close();
    // setup MLT query
    MoreLikeThis mlt = new MoreLikeThis(reader);
    mlt.setAnalyzer(new MockAnalyzer(random(), MockTokenizer.WHITESPACE, false));
    mlt.setMaxQueryTerms(topN);
    mlt.setMinDocFreq(1);
    mlt.setMinTermFreq(1);
    mlt.setMinWordLen(1);
    mlt.setFieldNames(new String[] { "text" });
    // perform MLT query
    String likeText = "";
    for (String text : generateStrSeq(0, numDocs)) {
        likeText += text + " ";
    }
    BooleanQuery query = (BooleanQuery) mlt.like("text", new StringReader(likeText));
    // check best terms are topN of highest idf
    List<BooleanClause> clauses = query.clauses();
    assertEquals("Expected" + topN + "clauses only!", topN, clauses.size());
    Term[] expectedTerms = new Term[topN];
    int idx = 0;
    for (String text : generateStrSeq(numDocs - topN, topN)) {
        expectedTerms[idx++] = new Term("text", text);
    }
    for (BooleanClause clause : clauses) {
        Term term = ((TermQuery) clause.getQuery()).getTerm();
        assertTrue(Arrays.asList(expectedTerms).contains(term));
    }
    // clean up
    reader.close();
    dir.close();
}
Also used : BooleanQuery(org.apache.lucene.search.BooleanQuery) TermQuery(org.apache.lucene.search.TermQuery) MoreLikeThis(org.apache.lucene.queries.mlt.MoreLikeThis) Term(org.apache.lucene.index.Term) BooleanClause(org.apache.lucene.search.BooleanClause) MockAnalyzer(org.apache.lucene.analysis.MockAnalyzer) IndexReader(org.apache.lucene.index.IndexReader) StringReader(java.io.StringReader) RandomIndexWriter(org.apache.lucene.index.RandomIndexWriter) Directory(org.apache.lucene.store.Directory)

Example 69 with TermQuery

use of org.apache.lucene.search.TermQuery in project OpenGrok by OpenGrok.

the class HistoryContextTest method testGetContext_3args.

@Test
public void testGetContext_3args() throws Exception {
    String path = "/mercurial/Makefile";
    String filename = repositories.getSourceRoot() + path;
    // Construct a query equivalent to hist:dummy
    TermQuery q1 = new TermQuery(new Term("hist", "dummy"));
    ArrayList<Hit> hits = new ArrayList<>();
    assertTrue(new HistoryContext(q1).getContext(filename, path, hits));
    assertEquals(1, hits.size());
    assertTrue(hits.get(0).getLine().contains("Created a small <b>dummy</b> program"));
    // Construct a query equivalent to hist:"dummy program"        
    PhraseQuery.Builder q2 = new PhraseQuery.Builder();
    q2.add(new Term("hist", "dummy"));
    q2.add(new Term("hist", "program"));
    hits.clear();
    assertTrue(new HistoryContext(q2.build()).getContext(filename, path, hits));
    assertEquals(1, hits.size());
    assertTrue(hits.get(0).getLine().contains("Created a small <b>dummy program</b>"));
    // Search for a term that doesn't exist
    TermQuery q3 = new TermQuery(new Term("hist", "term_does_not_exist"));
    hits.clear();
    assertFalse(new HistoryContext(q3).getContext(filename, path, hits));
    assertEquals(0, hits.size());
    // Search for term with multiple hits - hist:small OR hist:target
    BooleanQuery.Builder q4 = new BooleanQuery.Builder();
    q4.add(new TermQuery(new Term("hist", "small")), Occur.SHOULD);
    q4.add(new TermQuery(new Term("hist", "target")), Occur.SHOULD);
    hits.clear();
    assertTrue(new HistoryContext(q4.build()).getContext(filename, path, hits));
    assertEquals(2, hits.size());
    assertTrue(hits.get(0).getLine().contains("Add lint make <b>target</b> and fix lint warnings"));
    assertTrue(hits.get(1).getLine().contains("Created a <b>small</b> dummy program"));
}
Also used : TermQuery(org.apache.lucene.search.TermQuery) BooleanQuery(org.apache.lucene.search.BooleanQuery) Hit(org.opensolaris.opengrok.search.Hit) PhraseQuery(org.apache.lucene.search.PhraseQuery) ArrayList(java.util.ArrayList) Term(org.apache.lucene.index.Term) Test(org.junit.Test)

Example 70 with TermQuery

use of org.apache.lucene.search.TermQuery in project ansj_seg by NLPchina.

the class PhraseTest method main.

public static void main(String[] args) throws IOException, ParseException {
    DicLibrary.insert(DicLibrary.DEFAULT, "上网人");
    DicLibrary.insert(DicLibrary.DEFAULT, "网人");
    AnsjAnalyzer ansjAnalyzer = new AnsjAnalyzer(AnsjAnalyzer.TYPE.index_ansj);
    TokenStream tokenStream = ansjAnalyzer.tokenStream("上网人员测试", "test");
    while (tokenStream.incrementToken()) {
        System.out.println(tokenStream.getAttribute(CharTermAttribute.class));
    }
    IndexWriterConfig config = new IndexWriterConfig(ansjAnalyzer);
    IndexWriter writer = new IndexWriter(new RAMDirectory(), config);
    Document doc = new Document();
    doc.add(new TextField("test", "上网人员测试", Field.Store.YES));
    writer.addDocument(doc);
    writer.commit();
    IndexSearcher searcher = new IndexSearcher(DirectoryReader.open(writer));
    System.out.println(searcher.count(new TermQuery(new Term("test", "网人"))));
    Query q = new QueryParser("test", new AnsjAnalyzer(AnsjAnalyzer.TYPE.index_ansj)).parse("\"上网人\"");
    System.out.println(q);
    System.out.println(searcher.count(q));
}
Also used : IndexSearcher(org.apache.lucene.search.IndexSearcher) TermQuery(org.apache.lucene.search.TermQuery) TokenStream(org.apache.lucene.analysis.TokenStream) Query(org.apache.lucene.search.Query) TermQuery(org.apache.lucene.search.TermQuery) Term(org.apache.lucene.index.Term) Document(org.apache.lucene.document.Document) RAMDirectory(org.apache.lucene.store.RAMDirectory) QueryParser(org.apache.lucene.queryparser.classic.QueryParser) CharTermAttribute(org.apache.lucene.analysis.tokenattributes.CharTermAttribute) AnsjAnalyzer(org.ansj.lucene6.AnsjAnalyzer) IndexWriter(org.apache.lucene.index.IndexWriter) TextField(org.apache.lucene.document.TextField) IndexWriterConfig(org.apache.lucene.index.IndexWriterConfig)

Aggregations

TermQuery (org.apache.lucene.search.TermQuery)673 Term (org.apache.lucene.index.Term)560 BooleanQuery (org.apache.lucene.search.BooleanQuery)343 Query (org.apache.lucene.search.Query)275 IndexSearcher (org.apache.lucene.search.IndexSearcher)252 Document (org.apache.lucene.document.Document)210 TopDocs (org.apache.lucene.search.TopDocs)164 Directory (org.apache.lucene.store.Directory)164 IndexReader (org.apache.lucene.index.IndexReader)125 MatchAllDocsQuery (org.apache.lucene.search.MatchAllDocsQuery)125 PhraseQuery (org.apache.lucene.search.PhraseQuery)122 RandomIndexWriter (org.apache.lucene.index.RandomIndexWriter)116 MockAnalyzer (org.apache.lucene.analysis.MockAnalyzer)114 SpanTermQuery (org.apache.lucene.search.spans.SpanTermQuery)97 BoostQuery (org.apache.lucene.search.BoostQuery)85 Field (org.apache.lucene.document.Field)81 Test (org.junit.Test)75 PrefixQuery (org.apache.lucene.search.PrefixQuery)74 ArrayList (java.util.ArrayList)72 FuzzyQuery (org.apache.lucene.search.FuzzyQuery)62