Search in sources :

Example 56 with Directory

use of org.apache.lucene.store.Directory in project elasticsearch by elastic.

the class XMoreLikeThisTests method testTopN.

public void testTopN() throws Exception {
    int numDocs = 100;
    int topN = 25;
    // add series of docs with terms of decreasing df
    Directory dir = newDirectory();
    RandomIndexWriter writer = new RandomIndexWriter(random(), dir);
    for (int i = 0; i < numDocs; i++) {
        addDoc(writer, generateStrSeq(0, i + 1));
    }
    IndexReader reader = writer.getReader();
    writer.close();
    // setup MLT query
    MoreLikeThis mlt = new MoreLikeThis(reader);
    mlt.setAnalyzer(new MockAnalyzer(random(), MockTokenizer.WHITESPACE, false));
    mlt.setMaxQueryTerms(topN);
    mlt.setMinDocFreq(1);
    mlt.setMinTermFreq(1);
    mlt.setMinWordLen(1);
    mlt.setFieldNames(new String[] { "text" });
    // perform MLT query
    String likeText = "";
    for (String text : generateStrSeq(0, numDocs)) {
        likeText += text + " ";
    }
    BooleanQuery query = (BooleanQuery) mlt.like("text", new StringReader(likeText));
    // check best terms are topN of highest idf
    List<BooleanClause> clauses = query.clauses();
    assertEquals("Expected" + topN + "clauses only!", topN, clauses.size());
    Term[] expectedTerms = new Term[topN];
    int idx = 0;
    for (String text : generateStrSeq(numDocs - topN, topN)) {
        expectedTerms[idx++] = new Term("text", text);
    }
    for (BooleanClause clause : clauses) {
        Term term = ((TermQuery) clause.getQuery()).getTerm();
        assertTrue(Arrays.asList(expectedTerms).contains(term));
    }
    // clean up
    reader.close();
    dir.close();
}
Also used : BooleanQuery(org.apache.lucene.search.BooleanQuery) TermQuery(org.apache.lucene.search.TermQuery) MoreLikeThis(org.apache.lucene.queries.mlt.MoreLikeThis) Term(org.apache.lucene.index.Term) BooleanClause(org.apache.lucene.search.BooleanClause) MockAnalyzer(org.apache.lucene.analysis.MockAnalyzer) IndexReader(org.apache.lucene.index.IndexReader) StringReader(java.io.StringReader) RandomIndexWriter(org.apache.lucene.index.RandomIndexWriter) Directory(org.apache.lucene.store.Directory)

Example 57 with Directory

use of org.apache.lucene.store.Directory in project elasticsearch by elastic.

the class ShardCoreKeyMapTests method testMissingShard.

public void testMissingShard() throws IOException {
    try (Directory dir = newDirectory();
        RandomIndexWriter w = new RandomIndexWriter(random(), dir)) {
        w.addDocument(new Document());
        try (IndexReader reader = w.getReader()) {
            ShardCoreKeyMap map = new ShardCoreKeyMap();
            for (LeafReaderContext ctx : reader.leaves()) {
                try {
                    map.add(ctx.reader());
                    fail();
                } catch (IllegalArgumentException expected) {
                // ok
                }
            }
        }
    }
}
Also used : IndexReader(org.apache.lucene.index.IndexReader) LeafReaderContext(org.apache.lucene.index.LeafReaderContext) Document(org.apache.lucene.document.Document) RandomIndexWriter(org.apache.lucene.index.RandomIndexWriter) Directory(org.apache.lucene.store.Directory)

Example 58 with Directory

use of org.apache.lucene.store.Directory in project elasticsearch by elastic.

the class ShardCoreKeyMapTests method testBasics.

public void testBasics() throws IOException {
    Directory dir1 = newDirectory();
    RandomIndexWriter w1 = new RandomIndexWriter(random(), dir1);
    w1.addDocument(new Document());
    Directory dir2 = newDirectory();
    RandomIndexWriter w2 = new RandomIndexWriter(random(), dir2);
    w2.addDocument(new Document());
    Directory dir3 = newDirectory();
    RandomIndexWriter w3 = new RandomIndexWriter(random(), dir3);
    w3.addDocument(new Document());
    ShardId shardId1 = new ShardId("index1", "_na_", 1);
    ShardId shardId2 = new ShardId("index1", "_na_", 3);
    ShardId shardId3 = new ShardId("index2", "_na_", 2);
    ElasticsearchDirectoryReader reader1 = ElasticsearchDirectoryReader.wrap(w1.getReader(), shardId1);
    ElasticsearchDirectoryReader reader2 = ElasticsearchDirectoryReader.wrap(w2.getReader(), shardId2);
    ElasticsearchDirectoryReader reader3 = ElasticsearchDirectoryReader.wrap(w3.getReader(), shardId3);
    ShardCoreKeyMap map = new ShardCoreKeyMap();
    for (DirectoryReader reader : Arrays.asList(reader1, reader2, reader3)) {
        for (LeafReaderContext ctx : reader.leaves()) {
            map.add(ctx.reader());
        }
    }
    assertEquals(3, map.size());
    // Adding them back is a no-op
    for (LeafReaderContext ctx : reader1.leaves()) {
        map.add(ctx.reader());
    }
    assertEquals(3, map.size());
    for (LeafReaderContext ctx : reader2.leaves()) {
        assertEquals(shardId2, map.getShardId(ctx.reader().getCoreCacheKey()));
    }
    w1.addDocument(new Document());
    ElasticsearchDirectoryReader newReader1 = ElasticsearchDirectoryReader.wrap(w1.getReader(), shardId1);
    reader1.close();
    reader1 = newReader1;
    // same for reader2, but with a force merge to trigger evictions
    w2.addDocument(new Document());
    w2.forceMerge(1);
    ElasticsearchDirectoryReader newReader2 = ElasticsearchDirectoryReader.wrap(w2.getReader(), shardId2);
    reader2.close();
    reader2 = newReader2;
    for (DirectoryReader reader : Arrays.asList(reader1, reader2, reader3)) {
        for (LeafReaderContext ctx : reader.leaves()) {
            map.add(ctx.reader());
        }
    }
    final Set<Object> index1Keys = new HashSet<>();
    for (DirectoryReader reader : Arrays.asList(reader1, reader2)) {
        for (LeafReaderContext ctx : reader.leaves()) {
            index1Keys.add(ctx.reader().getCoreCacheKey());
        }
    }
    index1Keys.removeAll(map.getCoreKeysForIndex("index1"));
    assertEquals(Collections.emptySet(), index1Keys);
    reader1.close();
    w1.close();
    reader2.close();
    w2.close();
    reader3.close();
    w3.close();
    assertEquals(0, map.size());
    dir1.close();
    dir2.close();
    dir3.close();
}
Also used : ShardId(org.elasticsearch.index.shard.ShardId) ElasticsearchDirectoryReader(org.elasticsearch.common.lucene.index.ElasticsearchDirectoryReader) DirectoryReader(org.apache.lucene.index.DirectoryReader) ElasticsearchDirectoryReader(org.elasticsearch.common.lucene.index.ElasticsearchDirectoryReader) LeafReaderContext(org.apache.lucene.index.LeafReaderContext) Document(org.apache.lucene.document.Document) RandomIndexWriter(org.apache.lucene.index.RandomIndexWriter) Directory(org.apache.lucene.store.Directory) HashSet(java.util.HashSet)

Example 59 with Directory

use of org.apache.lucene.store.Directory in project elasticsearch by elastic.

the class SimpleAllTests method testMultipleTokensAllNoBoost.

public void testMultipleTokensAllNoBoost() throws Exception {
    Directory dir = new RAMDirectory();
    IndexWriter indexWriter = new IndexWriter(dir, new IndexWriterConfig(Lucene.STANDARD_ANALYZER));
    FieldType allFt = getAllFieldType();
    Document doc = new Document();
    doc.add(new Field("_id", "1", StoredField.TYPE));
    doc.add(new AllField("_all", "something moo", 1.0f, allFt));
    doc.add(new AllField("_all", "else koo", 1.0f, allFt));
    indexWriter.addDocument(doc);
    doc = new Document();
    doc.add(new Field("_id", "2", StoredField.TYPE));
    doc.add(new AllField("_all", "else koo", 1.0f, allFt));
    doc.add(new AllField("_all", "something moo", 1.0f, allFt));
    indexWriter.addDocument(doc);
    IndexReader reader = DirectoryReader.open(indexWriter);
    IndexSearcher searcher = new IndexSearcher(reader);
    TopDocs docs = searcher.search(new AllTermQuery(new Term("_all", "else")), 10);
    assertThat(docs.totalHits, equalTo(2));
    assertThat(docs.scoreDocs[0].doc, equalTo(0));
    assertThat(docs.scoreDocs[1].doc, equalTo(1));
    docs = searcher.search(new AllTermQuery(new Term("_all", "koo")), 10);
    assertThat(docs.totalHits, equalTo(2));
    assertThat(docs.scoreDocs[0].doc, equalTo(0));
    assertThat(docs.scoreDocs[1].doc, equalTo(1));
    docs = searcher.search(new AllTermQuery(new Term("_all", "something")), 10);
    assertThat(docs.totalHits, equalTo(2));
    assertThat(docs.scoreDocs[0].doc, equalTo(0));
    assertThat(docs.scoreDocs[1].doc, equalTo(1));
    docs = searcher.search(new AllTermQuery(new Term("_all", "moo")), 10);
    assertThat(docs.totalHits, equalTo(2));
    assertThat(docs.scoreDocs[0].doc, equalTo(0));
    assertThat(docs.scoreDocs[1].doc, equalTo(1));
    indexWriter.close();
}
Also used : IndexSearcher(org.apache.lucene.search.IndexSearcher) Term(org.apache.lucene.index.Term) Document(org.apache.lucene.document.Document) RAMDirectory(org.apache.lucene.store.RAMDirectory) FieldType(org.apache.lucene.document.FieldType) TopDocs(org.apache.lucene.search.TopDocs) StoredField(org.apache.lucene.document.StoredField) Field(org.apache.lucene.document.Field) IndexWriter(org.apache.lucene.index.IndexWriter) IndexReader(org.apache.lucene.index.IndexReader) RAMDirectory(org.apache.lucene.store.RAMDirectory) Directory(org.apache.lucene.store.Directory) IndexWriterConfig(org.apache.lucene.index.IndexWriterConfig)

Example 60 with Directory

use of org.apache.lucene.store.Directory in project elasticsearch by elastic.

the class SimpleAllTests method testMultipleTokensAllWithBoost.

public void testMultipleTokensAllWithBoost() throws Exception {
    Directory dir = new RAMDirectory();
    IndexWriter indexWriter = new IndexWriter(dir, new IndexWriterConfig(Lucene.STANDARD_ANALYZER));
    FieldType allFt = getAllFieldType();
    Document doc = new Document();
    doc.add(new Field("_id", "1", StoredField.TYPE));
    doc.add(new AllField("_all", "something moo", 1.0f, allFt));
    doc.add(new AllField("_all", "else koo", 1.0f, allFt));
    indexWriter.addDocument(doc);
    doc = new Document();
    doc.add(new Field("_id", "2", StoredField.TYPE));
    doc.add(new AllField("_all", "else koo", 2.0f, allFt));
    doc.add(new AllField("_all", "something moo", 1.0f, allFt));
    indexWriter.addDocument(doc);
    IndexReader reader = DirectoryReader.open(indexWriter);
    IndexSearcher searcher = new IndexSearcher(reader);
    TopDocs docs = searcher.search(new AllTermQuery(new Term("_all", "else")), 10);
    assertThat(docs.totalHits, equalTo(2));
    assertThat(docs.scoreDocs[0].doc, equalTo(1));
    assertThat(docs.scoreDocs[1].doc, equalTo(0));
    docs = searcher.search(new AllTermQuery(new Term("_all", "koo")), 10);
    assertThat(docs.totalHits, equalTo(2));
    assertThat(docs.scoreDocs[0].doc, equalTo(1));
    assertThat(docs.scoreDocs[1].doc, equalTo(0));
    docs = searcher.search(new AllTermQuery(new Term("_all", "something")), 10);
    assertThat(docs.totalHits, equalTo(2));
    assertThat(docs.scoreDocs[0].doc, equalTo(0));
    assertThat(docs.scoreDocs[1].doc, equalTo(1));
    docs = searcher.search(new AllTermQuery(new Term("_all", "moo")), 10);
    assertThat(docs.totalHits, equalTo(2));
    assertThat(docs.scoreDocs[0].doc, equalTo(0));
    assertThat(docs.scoreDocs[1].doc, equalTo(1));
    indexWriter.close();
}
Also used : IndexSearcher(org.apache.lucene.search.IndexSearcher) Term(org.apache.lucene.index.Term) Document(org.apache.lucene.document.Document) RAMDirectory(org.apache.lucene.store.RAMDirectory) FieldType(org.apache.lucene.document.FieldType) TopDocs(org.apache.lucene.search.TopDocs) StoredField(org.apache.lucene.document.StoredField) Field(org.apache.lucene.document.Field) IndexWriter(org.apache.lucene.index.IndexWriter) IndexReader(org.apache.lucene.index.IndexReader) RAMDirectory(org.apache.lucene.store.RAMDirectory) Directory(org.apache.lucene.store.Directory) IndexWriterConfig(org.apache.lucene.index.IndexWriterConfig)

Aggregations

Directory (org.apache.lucene.store.Directory)2194 Document (org.apache.lucene.document.Document)1374 MockAnalyzer (org.apache.lucene.analysis.MockAnalyzer)816 RandomIndexWriter (org.apache.lucene.index.RandomIndexWriter)669 IndexReader (org.apache.lucene.index.IndexReader)590 IndexSearcher (org.apache.lucene.search.IndexSearcher)383 BytesRef (org.apache.lucene.util.BytesRef)376 RAMDirectory (org.apache.lucene.store.RAMDirectory)360 Term (org.apache.lucene.index.Term)325 StringField (org.apache.lucene.document.StringField)313 IndexWriter (org.apache.lucene.index.IndexWriter)312 IndexWriterConfig (org.apache.lucene.index.IndexWriterConfig)274 TextField (org.apache.lucene.document.TextField)259 Field (org.apache.lucene.document.Field)257 NumericDocValuesField (org.apache.lucene.document.NumericDocValuesField)257 Test (org.junit.Test)237 FSDirectory (org.apache.lucene.store.FSDirectory)221 Analyzer (org.apache.lucene.analysis.Analyzer)193 DirectoryReader (org.apache.lucene.index.DirectoryReader)193 TopDocs (org.apache.lucene.search.TopDocs)174