Search in sources :

Example 21 with IndexReader

use of org.apache.lucene.index.IndexReader in project elasticsearch by elastic.

the class XMoreLikeThisTests method testTopN.

public void testTopN() throws Exception {
    int numDocs = 100;
    int topN = 25;
    // add series of docs with terms of decreasing df
    Directory dir = newDirectory();
    RandomIndexWriter writer = new RandomIndexWriter(random(), dir);
    for (int i = 0; i < numDocs; i++) {
        addDoc(writer, generateStrSeq(0, i + 1));
    }
    IndexReader reader = writer.getReader();
    writer.close();
    // setup MLT query
    MoreLikeThis mlt = new MoreLikeThis(reader);
    mlt.setAnalyzer(new MockAnalyzer(random(), MockTokenizer.WHITESPACE, false));
    mlt.setMaxQueryTerms(topN);
    mlt.setMinDocFreq(1);
    mlt.setMinTermFreq(1);
    mlt.setMinWordLen(1);
    mlt.setFieldNames(new String[] { "text" });
    // perform MLT query
    String likeText = "";
    for (String text : generateStrSeq(0, numDocs)) {
        likeText += text + " ";
    }
    BooleanQuery query = (BooleanQuery) mlt.like("text", new StringReader(likeText));
    // check best terms are topN of highest idf
    List<BooleanClause> clauses = query.clauses();
    assertEquals("Expected" + topN + "clauses only!", topN, clauses.size());
    Term[] expectedTerms = new Term[topN];
    int idx = 0;
    for (String text : generateStrSeq(numDocs - topN, topN)) {
        expectedTerms[idx++] = new Term("text", text);
    }
    for (BooleanClause clause : clauses) {
        Term term = ((TermQuery) clause.getQuery()).getTerm();
        assertTrue(Arrays.asList(expectedTerms).contains(term));
    }
    // clean up
    reader.close();
    dir.close();
}
Also used : BooleanQuery(org.apache.lucene.search.BooleanQuery) TermQuery(org.apache.lucene.search.TermQuery) MoreLikeThis(org.apache.lucene.queries.mlt.MoreLikeThis) Term(org.apache.lucene.index.Term) BooleanClause(org.apache.lucene.search.BooleanClause) MockAnalyzer(org.apache.lucene.analysis.MockAnalyzer) IndexReader(org.apache.lucene.index.IndexReader) StringReader(java.io.StringReader) RandomIndexWriter(org.apache.lucene.index.RandomIndexWriter) Directory(org.apache.lucene.store.Directory)

Example 22 with IndexReader

use of org.apache.lucene.index.IndexReader in project elasticsearch by elastic.

the class ShardCoreKeyMapTests method testMissingShard.

public void testMissingShard() throws IOException {
    try (Directory dir = newDirectory();
        RandomIndexWriter w = new RandomIndexWriter(random(), dir)) {
        w.addDocument(new Document());
        try (IndexReader reader = w.getReader()) {
            ShardCoreKeyMap map = new ShardCoreKeyMap();
            for (LeafReaderContext ctx : reader.leaves()) {
                try {
                    map.add(ctx.reader());
                    fail();
                } catch (IllegalArgumentException expected) {
                // ok
                }
            }
        }
    }
}
Also used : IndexReader(org.apache.lucene.index.IndexReader) LeafReaderContext(org.apache.lucene.index.LeafReaderContext) Document(org.apache.lucene.document.Document) RandomIndexWriter(org.apache.lucene.index.RandomIndexWriter) Directory(org.apache.lucene.store.Directory)

Example 23 with IndexReader

use of org.apache.lucene.index.IndexReader in project elasticsearch by elastic.

the class SimpleAllTests method testMultipleTokensAllNoBoost.

public void testMultipleTokensAllNoBoost() throws Exception {
    Directory dir = new RAMDirectory();
    IndexWriter indexWriter = new IndexWriter(dir, new IndexWriterConfig(Lucene.STANDARD_ANALYZER));
    FieldType allFt = getAllFieldType();
    Document doc = new Document();
    doc.add(new Field("_id", "1", StoredField.TYPE));
    doc.add(new AllField("_all", "something moo", 1.0f, allFt));
    doc.add(new AllField("_all", "else koo", 1.0f, allFt));
    indexWriter.addDocument(doc);
    doc = new Document();
    doc.add(new Field("_id", "2", StoredField.TYPE));
    doc.add(new AllField("_all", "else koo", 1.0f, allFt));
    doc.add(new AllField("_all", "something moo", 1.0f, allFt));
    indexWriter.addDocument(doc);
    IndexReader reader = DirectoryReader.open(indexWriter);
    IndexSearcher searcher = new IndexSearcher(reader);
    TopDocs docs = searcher.search(new AllTermQuery(new Term("_all", "else")), 10);
    assertThat(docs.totalHits, equalTo(2));
    assertThat(docs.scoreDocs[0].doc, equalTo(0));
    assertThat(docs.scoreDocs[1].doc, equalTo(1));
    docs = searcher.search(new AllTermQuery(new Term("_all", "koo")), 10);
    assertThat(docs.totalHits, equalTo(2));
    assertThat(docs.scoreDocs[0].doc, equalTo(0));
    assertThat(docs.scoreDocs[1].doc, equalTo(1));
    docs = searcher.search(new AllTermQuery(new Term("_all", "something")), 10);
    assertThat(docs.totalHits, equalTo(2));
    assertThat(docs.scoreDocs[0].doc, equalTo(0));
    assertThat(docs.scoreDocs[1].doc, equalTo(1));
    docs = searcher.search(new AllTermQuery(new Term("_all", "moo")), 10);
    assertThat(docs.totalHits, equalTo(2));
    assertThat(docs.scoreDocs[0].doc, equalTo(0));
    assertThat(docs.scoreDocs[1].doc, equalTo(1));
    indexWriter.close();
}
Also used : IndexSearcher(org.apache.lucene.search.IndexSearcher) Term(org.apache.lucene.index.Term) Document(org.apache.lucene.document.Document) RAMDirectory(org.apache.lucene.store.RAMDirectory) FieldType(org.apache.lucene.document.FieldType) TopDocs(org.apache.lucene.search.TopDocs) StoredField(org.apache.lucene.document.StoredField) Field(org.apache.lucene.document.Field) IndexWriter(org.apache.lucene.index.IndexWriter) IndexReader(org.apache.lucene.index.IndexReader) RAMDirectory(org.apache.lucene.store.RAMDirectory) Directory(org.apache.lucene.store.Directory) IndexWriterConfig(org.apache.lucene.index.IndexWriterConfig)

Example 24 with IndexReader

use of org.apache.lucene.index.IndexReader in project elasticsearch by elastic.

the class SimpleAllTests method testMultipleTokensAllWithBoost.

public void testMultipleTokensAllWithBoost() throws Exception {
    Directory dir = new RAMDirectory();
    IndexWriter indexWriter = new IndexWriter(dir, new IndexWriterConfig(Lucene.STANDARD_ANALYZER));
    FieldType allFt = getAllFieldType();
    Document doc = new Document();
    doc.add(new Field("_id", "1", StoredField.TYPE));
    doc.add(new AllField("_all", "something moo", 1.0f, allFt));
    doc.add(new AllField("_all", "else koo", 1.0f, allFt));
    indexWriter.addDocument(doc);
    doc = new Document();
    doc.add(new Field("_id", "2", StoredField.TYPE));
    doc.add(new AllField("_all", "else koo", 2.0f, allFt));
    doc.add(new AllField("_all", "something moo", 1.0f, allFt));
    indexWriter.addDocument(doc);
    IndexReader reader = DirectoryReader.open(indexWriter);
    IndexSearcher searcher = new IndexSearcher(reader);
    TopDocs docs = searcher.search(new AllTermQuery(new Term("_all", "else")), 10);
    assertThat(docs.totalHits, equalTo(2));
    assertThat(docs.scoreDocs[0].doc, equalTo(1));
    assertThat(docs.scoreDocs[1].doc, equalTo(0));
    docs = searcher.search(new AllTermQuery(new Term("_all", "koo")), 10);
    assertThat(docs.totalHits, equalTo(2));
    assertThat(docs.scoreDocs[0].doc, equalTo(1));
    assertThat(docs.scoreDocs[1].doc, equalTo(0));
    docs = searcher.search(new AllTermQuery(new Term("_all", "something")), 10);
    assertThat(docs.totalHits, equalTo(2));
    assertThat(docs.scoreDocs[0].doc, equalTo(0));
    assertThat(docs.scoreDocs[1].doc, equalTo(1));
    docs = searcher.search(new AllTermQuery(new Term("_all", "moo")), 10);
    assertThat(docs.totalHits, equalTo(2));
    assertThat(docs.scoreDocs[0].doc, equalTo(0));
    assertThat(docs.scoreDocs[1].doc, equalTo(1));
    indexWriter.close();
}
Also used : IndexSearcher(org.apache.lucene.search.IndexSearcher) Term(org.apache.lucene.index.Term) Document(org.apache.lucene.document.Document) RAMDirectory(org.apache.lucene.store.RAMDirectory) FieldType(org.apache.lucene.document.FieldType) TopDocs(org.apache.lucene.search.TopDocs) StoredField(org.apache.lucene.document.StoredField) Field(org.apache.lucene.document.Field) IndexWriter(org.apache.lucene.index.IndexWriter) IndexReader(org.apache.lucene.index.IndexReader) RAMDirectory(org.apache.lucene.store.RAMDirectory) Directory(org.apache.lucene.store.Directory) IndexWriterConfig(org.apache.lucene.index.IndexWriterConfig)

Example 25 with IndexReader

use of org.apache.lucene.index.IndexReader in project elasticsearch by elastic.

the class SimpleAllTests method testNoTokens.

public void testNoTokens() throws Exception {
    Directory dir = new RAMDirectory();
    IndexWriter indexWriter = new IndexWriter(dir, new IndexWriterConfig(Lucene.KEYWORD_ANALYZER));
    FieldType allFt = getAllFieldType();
    Document doc = new Document();
    doc.add(new Field("_id", "1", StoredField.TYPE));
    doc.add(new AllField("_all", "", 2.0f, allFt));
    indexWriter.addDocument(doc);
    IndexReader reader = DirectoryReader.open(indexWriter);
    IndexSearcher searcher = new IndexSearcher(reader);
    TopDocs docs = searcher.search(new MatchAllDocsQuery(), 10);
    assertThat(docs.totalHits, equalTo(1));
    assertThat(docs.scoreDocs[0].doc, equalTo(0));
}
Also used : IndexSearcher(org.apache.lucene.search.IndexSearcher) TopDocs(org.apache.lucene.search.TopDocs) StoredField(org.apache.lucene.document.StoredField) Field(org.apache.lucene.document.Field) IndexWriter(org.apache.lucene.index.IndexWriter) IndexReader(org.apache.lucene.index.IndexReader) Document(org.apache.lucene.document.Document) MatchAllDocsQuery(org.apache.lucene.search.MatchAllDocsQuery) RAMDirectory(org.apache.lucene.store.RAMDirectory) RAMDirectory(org.apache.lucene.store.RAMDirectory) Directory(org.apache.lucene.store.Directory) IndexWriterConfig(org.apache.lucene.index.IndexWriterConfig) FieldType(org.apache.lucene.document.FieldType)

Aggregations

IndexReader (org.apache.lucene.index.IndexReader)962 Document (org.apache.lucene.document.Document)610 Directory (org.apache.lucene.store.Directory)603 RandomIndexWriter (org.apache.lucene.index.RandomIndexWriter)549 IndexSearcher (org.apache.lucene.search.IndexSearcher)410 Term (org.apache.lucene.index.Term)332 TopDocs (org.apache.lucene.search.TopDocs)204 TermQuery (org.apache.lucene.search.TermQuery)160 Query (org.apache.lucene.search.Query)158 IndexWriter (org.apache.lucene.index.IndexWriter)150 MatchAllDocsQuery (org.apache.lucene.search.MatchAllDocsQuery)144 MockAnalyzer (org.apache.lucene.analysis.MockAnalyzer)143 IndexWriterConfig (org.apache.lucene.index.IndexWriterConfig)142 Field (org.apache.lucene.document.Field)135 BytesRef (org.apache.lucene.util.BytesRef)134 IOException (java.io.IOException)133 BooleanQuery (org.apache.lucene.search.BooleanQuery)122 ArrayList (java.util.ArrayList)108 TextField (org.apache.lucene.document.TextField)81 Test (org.junit.Test)81