Search in sources :

Example 21 with RandomIndexWriter

use of org.apache.lucene.index.RandomIndexWriter in project elasticsearch by elastic.

the class QueryPhaseTests method countTestCase.

private void countTestCase(boolean withDeletions) throws Exception {
    Directory dir = newDirectory();
    IndexWriterConfig iwc = newIndexWriterConfig().setMergePolicy(NoMergePolicy.INSTANCE);
    RandomIndexWriter w = new RandomIndexWriter(random(), dir, iwc);
    final int numDocs = scaledRandomIntBetween(100, 200);
    for (int i = 0; i < numDocs; ++i) {
        Document doc = new Document();
        if (randomBoolean()) {
            doc.add(new StringField("foo", "bar", Store.NO));
        }
        if (randomBoolean()) {
            doc.add(new StringField("foo", "baz", Store.NO));
        }
        if (withDeletions && (rarely() || i == 0)) {
            doc.add(new StringField("delete", "yes", Store.NO));
        }
        w.addDocument(doc);
    }
    if (withDeletions) {
        w.deleteDocuments(new Term("delete", "yes"));
    }
    final IndexReader reader = w.getReader();
    Query matchAll = new MatchAllDocsQuery();
    Query matchAllCsq = new ConstantScoreQuery(matchAll);
    Query tq = new TermQuery(new Term("foo", "bar"));
    Query tCsq = new ConstantScoreQuery(tq);
    BooleanQuery bq = new BooleanQuery.Builder().add(matchAll, Occur.SHOULD).add(tq, Occur.MUST).build();
    countTestCase(matchAll, reader, false);
    countTestCase(matchAllCsq, reader, false);
    countTestCase(tq, reader, withDeletions);
    countTestCase(tCsq, reader, withDeletions);
    countTestCase(bq, reader, true);
    reader.close();
    w.close();
    dir.close();
}
Also used : TermQuery(org.apache.lucene.search.TermQuery) BooleanQuery(org.apache.lucene.search.BooleanQuery) Query(org.apache.lucene.search.Query) MatchNoDocsQuery(org.apache.lucene.search.MatchNoDocsQuery) ParsedQuery(org.elasticsearch.index.query.ParsedQuery) ConstantScoreQuery(org.apache.lucene.search.ConstantScoreQuery) MatchAllDocsQuery(org.apache.lucene.search.MatchAllDocsQuery) TermQuery(org.apache.lucene.search.TermQuery) BooleanQuery(org.apache.lucene.search.BooleanQuery) Term(org.apache.lucene.index.Term) Document(org.apache.lucene.document.Document) MatchAllDocsQuery(org.apache.lucene.search.MatchAllDocsQuery) StringField(org.apache.lucene.document.StringField) IndexReader(org.apache.lucene.index.IndexReader) ConstantScoreQuery(org.apache.lucene.search.ConstantScoreQuery) RandomIndexWriter(org.apache.lucene.index.RandomIndexWriter) Directory(org.apache.lucene.store.Directory) IndexWriterConfig(org.apache.lucene.index.IndexWriterConfig)

Example 22 with RandomIndexWriter

use of org.apache.lucene.index.RandomIndexWriter in project elasticsearch by elastic.

the class QueryProfilerTests method setup.

@BeforeClass
public static void setup() throws IOException {
    dir = newDirectory();
    RandomIndexWriter w = new RandomIndexWriter(random(), dir);
    final int numDocs = TestUtil.nextInt(random(), 1, 20);
    for (int i = 0; i < numDocs; ++i) {
        final int numHoles = random().nextInt(5);
        for (int j = 0; j < numHoles; ++j) {
            w.addDocument(new Document());
        }
        Document doc = new Document();
        doc.add(new StringField("foo", "bar", Store.NO));
        w.addDocument(doc);
    }
    reader = w.getReader();
    w.close();
    Engine.Searcher engineSearcher = new Engine.Searcher("test", new IndexSearcher(reader));
    searcher = new ContextIndexSearcher(engineSearcher, IndexSearcher.getDefaultQueryCache(), MAYBE_CACHE_POLICY);
}
Also used : ContextIndexSearcher(org.elasticsearch.search.internal.ContextIndexSearcher) IndexSearcher(org.apache.lucene.search.IndexSearcher) StringField(org.apache.lucene.document.StringField) ContextIndexSearcher(org.elasticsearch.search.internal.ContextIndexSearcher) IndexSearcher(org.apache.lucene.search.IndexSearcher) ContextIndexSearcher(org.elasticsearch.search.internal.ContextIndexSearcher) Document(org.apache.lucene.document.Document) RandomIndexWriter(org.apache.lucene.index.RandomIndexWriter) Engine(org.elasticsearch.index.engine.Engine) BeforeClass(org.junit.BeforeClass)

Example 23 with RandomIndexWriter

use of org.apache.lucene.index.RandomIndexWriter in project elasticsearch by elastic.

the class XMoreLikeThisTests method testTopN.

public void testTopN() throws Exception {
    int numDocs = 100;
    int topN = 25;
    // add series of docs with terms of decreasing df
    Directory dir = newDirectory();
    RandomIndexWriter writer = new RandomIndexWriter(random(), dir);
    for (int i = 0; i < numDocs; i++) {
        addDoc(writer, generateStrSeq(0, i + 1));
    }
    IndexReader reader = writer.getReader();
    writer.close();
    // setup MLT query
    MoreLikeThis mlt = new MoreLikeThis(reader);
    mlt.setAnalyzer(new MockAnalyzer(random(), MockTokenizer.WHITESPACE, false));
    mlt.setMaxQueryTerms(topN);
    mlt.setMinDocFreq(1);
    mlt.setMinTermFreq(1);
    mlt.setMinWordLen(1);
    mlt.setFieldNames(new String[] { "text" });
    // perform MLT query
    String likeText = "";
    for (String text : generateStrSeq(0, numDocs)) {
        likeText += text + " ";
    }
    BooleanQuery query = (BooleanQuery) mlt.like("text", new StringReader(likeText));
    // check best terms are topN of highest idf
    List<BooleanClause> clauses = query.clauses();
    assertEquals("Expected" + topN + "clauses only!", topN, clauses.size());
    Term[] expectedTerms = new Term[topN];
    int idx = 0;
    for (String text : generateStrSeq(numDocs - topN, topN)) {
        expectedTerms[idx++] = new Term("text", text);
    }
    for (BooleanClause clause : clauses) {
        Term term = ((TermQuery) clause.getQuery()).getTerm();
        assertTrue(Arrays.asList(expectedTerms).contains(term));
    }
    // clean up
    reader.close();
    dir.close();
}
Also used : BooleanQuery(org.apache.lucene.search.BooleanQuery) TermQuery(org.apache.lucene.search.TermQuery) MoreLikeThis(org.apache.lucene.queries.mlt.MoreLikeThis) Term(org.apache.lucene.index.Term) BooleanClause(org.apache.lucene.search.BooleanClause) MockAnalyzer(org.apache.lucene.analysis.MockAnalyzer) IndexReader(org.apache.lucene.index.IndexReader) StringReader(java.io.StringReader) RandomIndexWriter(org.apache.lucene.index.RandomIndexWriter) Directory(org.apache.lucene.store.Directory)

Example 24 with RandomIndexWriter

use of org.apache.lucene.index.RandomIndexWriter in project elasticsearch by elastic.

the class ShardCoreKeyMapTests method testMissingShard.

public void testMissingShard() throws IOException {
    try (Directory dir = newDirectory();
        RandomIndexWriter w = new RandomIndexWriter(random(), dir)) {
        w.addDocument(new Document());
        try (IndexReader reader = w.getReader()) {
            ShardCoreKeyMap map = new ShardCoreKeyMap();
            for (LeafReaderContext ctx : reader.leaves()) {
                try {
                    map.add(ctx.reader());
                    fail();
                } catch (IllegalArgumentException expected) {
                // ok
                }
            }
        }
    }
}
Also used : IndexReader(org.apache.lucene.index.IndexReader) LeafReaderContext(org.apache.lucene.index.LeafReaderContext) Document(org.apache.lucene.document.Document) RandomIndexWriter(org.apache.lucene.index.RandomIndexWriter) Directory(org.apache.lucene.store.Directory)

Example 25 with RandomIndexWriter

use of org.apache.lucene.index.RandomIndexWriter in project elasticsearch by elastic.

the class ShardCoreKeyMapTests method testBasics.

public void testBasics() throws IOException {
    Directory dir1 = newDirectory();
    RandomIndexWriter w1 = new RandomIndexWriter(random(), dir1);
    w1.addDocument(new Document());
    Directory dir2 = newDirectory();
    RandomIndexWriter w2 = new RandomIndexWriter(random(), dir2);
    w2.addDocument(new Document());
    Directory dir3 = newDirectory();
    RandomIndexWriter w3 = new RandomIndexWriter(random(), dir3);
    w3.addDocument(new Document());
    ShardId shardId1 = new ShardId("index1", "_na_", 1);
    ShardId shardId2 = new ShardId("index1", "_na_", 3);
    ShardId shardId3 = new ShardId("index2", "_na_", 2);
    ElasticsearchDirectoryReader reader1 = ElasticsearchDirectoryReader.wrap(w1.getReader(), shardId1);
    ElasticsearchDirectoryReader reader2 = ElasticsearchDirectoryReader.wrap(w2.getReader(), shardId2);
    ElasticsearchDirectoryReader reader3 = ElasticsearchDirectoryReader.wrap(w3.getReader(), shardId3);
    ShardCoreKeyMap map = new ShardCoreKeyMap();
    for (DirectoryReader reader : Arrays.asList(reader1, reader2, reader3)) {
        for (LeafReaderContext ctx : reader.leaves()) {
            map.add(ctx.reader());
        }
    }
    assertEquals(3, map.size());
    // Adding them back is a no-op
    for (LeafReaderContext ctx : reader1.leaves()) {
        map.add(ctx.reader());
    }
    assertEquals(3, map.size());
    for (LeafReaderContext ctx : reader2.leaves()) {
        assertEquals(shardId2, map.getShardId(ctx.reader().getCoreCacheKey()));
    }
    w1.addDocument(new Document());
    ElasticsearchDirectoryReader newReader1 = ElasticsearchDirectoryReader.wrap(w1.getReader(), shardId1);
    reader1.close();
    reader1 = newReader1;
    // same for reader2, but with a force merge to trigger evictions
    w2.addDocument(new Document());
    w2.forceMerge(1);
    ElasticsearchDirectoryReader newReader2 = ElasticsearchDirectoryReader.wrap(w2.getReader(), shardId2);
    reader2.close();
    reader2 = newReader2;
    for (DirectoryReader reader : Arrays.asList(reader1, reader2, reader3)) {
        for (LeafReaderContext ctx : reader.leaves()) {
            map.add(ctx.reader());
        }
    }
    final Set<Object> index1Keys = new HashSet<>();
    for (DirectoryReader reader : Arrays.asList(reader1, reader2)) {
        for (LeafReaderContext ctx : reader.leaves()) {
            index1Keys.add(ctx.reader().getCoreCacheKey());
        }
    }
    index1Keys.removeAll(map.getCoreKeysForIndex("index1"));
    assertEquals(Collections.emptySet(), index1Keys);
    reader1.close();
    w1.close();
    reader2.close();
    w2.close();
    reader3.close();
    w3.close();
    assertEquals(0, map.size());
    dir1.close();
    dir2.close();
    dir3.close();
}
Also used : ShardId(org.elasticsearch.index.shard.ShardId) ElasticsearchDirectoryReader(org.elasticsearch.common.lucene.index.ElasticsearchDirectoryReader) DirectoryReader(org.apache.lucene.index.DirectoryReader) ElasticsearchDirectoryReader(org.elasticsearch.common.lucene.index.ElasticsearchDirectoryReader) LeafReaderContext(org.apache.lucene.index.LeafReaderContext) Document(org.apache.lucene.document.Document) RandomIndexWriter(org.apache.lucene.index.RandomIndexWriter) Directory(org.apache.lucene.store.Directory) HashSet(java.util.HashSet)

Aggregations

RandomIndexWriter (org.apache.lucene.index.RandomIndexWriter)775 Document (org.apache.lucene.document.Document)675 Directory (org.apache.lucene.store.Directory)584 IndexReader (org.apache.lucene.index.IndexReader)508 Term (org.apache.lucene.index.Term)324 IndexSearcher (org.apache.lucene.search.IndexSearcher)294 MockAnalyzer (org.apache.lucene.analysis.MockAnalyzer)220 BytesRef (org.apache.lucene.util.BytesRef)142 Field (org.apache.lucene.document.Field)140 MatchAllDocsQuery (org.apache.lucene.search.MatchAllDocsQuery)136 TopDocs (org.apache.lucene.search.TopDocs)134 TermQuery (org.apache.lucene.search.TermQuery)121 DirectoryReader (org.apache.lucene.index.DirectoryReader)119 IndexWriterConfig (org.apache.lucene.index.IndexWriterConfig)110 ArrayList (java.util.ArrayList)91 StringField (org.apache.lucene.document.StringField)89 Analyzer (org.apache.lucene.analysis.Analyzer)88 BooleanQuery (org.apache.lucene.search.BooleanQuery)88 NumericDocValuesField (org.apache.lucene.document.NumericDocValuesField)76 Query (org.apache.lucene.search.Query)73