Search in sources :

Example 66 with DirectoryReader

use of org.apache.lucene.index.DirectoryReader in project lucene-solr by apache.

the class TestSuggestField method testExtremeDeduplication.

public void testExtremeDeduplication() throws Exception {
    Analyzer analyzer = new MockAnalyzer(random());
    RandomIndexWriter iw = new RandomIndexWriter(random(), dir, iwcWithSuggestField(analyzer, "suggest_field"));
    final int num = atLeast(5000);
    int bestWeight = Integer.MIN_VALUE;
    for (int i = 0; i < num; i++) {
        Document document = new Document();
        int weight = TestUtil.nextInt(random(), 10, 100);
        bestWeight = Math.max(weight, bestWeight);
        document.add(new SuggestField("suggest_field", "abc", weight));
        iw.addDocument(document);
        if (rarely()) {
            iw.commit();
        }
    }
    Document document = new Document();
    document.add(new SuggestField("suggest_field", "abd", 7));
    iw.addDocument(document);
    if (random().nextBoolean()) {
        iw.forceMerge(1);
    }
    DirectoryReader reader = iw.getReader();
    Entry[] expectedEntries = new Entry[2];
    expectedEntries[0] = new Entry("abc", bestWeight);
    expectedEntries[1] = new Entry("abd", 7);
    SuggestIndexSearcher suggestIndexSearcher = new SuggestIndexSearcher(reader);
    PrefixCompletionQuery query = new PrefixCompletionQuery(analyzer, new Term("suggest_field", "a"));
    TopSuggestDocsCollector collector = new TopSuggestDocsCollector(2, true);
    suggestIndexSearcher.suggest(query, collector);
    TopSuggestDocs lookupDocs = collector.get();
    assertSuggestions(lookupDocs, expectedEntries);
    reader.close();
    iw.close();
}
Also used : DirectoryReader(org.apache.lucene.index.DirectoryReader) Term(org.apache.lucene.index.Term) Analyzer(org.apache.lucene.analysis.Analyzer) MockAnalyzer(org.apache.lucene.analysis.MockAnalyzer) Document(org.apache.lucene.document.Document) IntPoint(org.apache.lucene.document.IntPoint) MockAnalyzer(org.apache.lucene.analysis.MockAnalyzer) RandomIndexWriter(org.apache.lucene.index.RandomIndexWriter)

Example 67 with DirectoryReader

use of org.apache.lucene.index.DirectoryReader in project lucene-solr by apache.

the class TestSuggestField method testSuggestOnAllDeletedDocuments.

@Test
public void testSuggestOnAllDeletedDocuments() throws Exception {
    Analyzer analyzer = new MockAnalyzer(random());
    // using IndexWriter instead of RandomIndexWriter
    IndexWriter iw = new IndexWriter(dir, iwcWithSuggestField(analyzer, "suggest_field"));
    int num = Math.min(1000, atLeast(10));
    for (int i = 0; i < num; i++) {
        Document document = new Document();
        document.add(new SuggestField("suggest_field", "abc_" + i, i));
        document.add(newStringField("delete", "delete", Field.Store.NO));
        iw.addDocument(document);
        if (usually()) {
            iw.commit();
        }
    }
    iw.deleteDocuments(new Term("delete", "delete"));
    DirectoryReader reader = DirectoryReader.open(iw);
    SuggestIndexSearcher indexSearcher = new SuggestIndexSearcher(reader);
    PrefixCompletionQuery query = new PrefixCompletionQuery(analyzer, new Term("suggest_field", "abc_"));
    TopSuggestDocs suggest = indexSearcher.suggest(query, num, false);
    assertThat(suggest.totalHits, equalTo(0));
    reader.close();
    iw.close();
}
Also used : MockAnalyzer(org.apache.lucene.analysis.MockAnalyzer) IndexWriter(org.apache.lucene.index.IndexWriter) RandomIndexWriter(org.apache.lucene.index.RandomIndexWriter) DirectoryReader(org.apache.lucene.index.DirectoryReader) Term(org.apache.lucene.index.Term) Analyzer(org.apache.lucene.analysis.Analyzer) MockAnalyzer(org.apache.lucene.analysis.MockAnalyzer) Document(org.apache.lucene.document.Document) IntPoint(org.apache.lucene.document.IntPoint) Test(org.junit.Test)

Example 68 with DirectoryReader

use of org.apache.lucene.index.DirectoryReader in project lucene-solr by apache.

the class TestUtil method addIndexesSlowly.

public static void addIndexesSlowly(IndexWriter writer, DirectoryReader... readers) throws IOException {
    List<CodecReader> leaves = new ArrayList<>();
    for (DirectoryReader reader : readers) {
        for (LeafReaderContext context : reader.leaves()) {
            leaves.add(SlowCodecReaderWrapper.wrap(context.reader()));
        }
    }
    writer.addIndexes(leaves.toArray(new CodecReader[leaves.size()]));
}
Also used : CodecReader(org.apache.lucene.index.CodecReader) DirectoryReader(org.apache.lucene.index.DirectoryReader) ArrayList(java.util.ArrayList) LeafReaderContext(org.apache.lucene.index.LeafReaderContext)

Example 69 with DirectoryReader

use of org.apache.lucene.index.DirectoryReader in project lucene-solr by apache.

the class TestCompressingTermVectorsFormat method testChunkCleanup.

/**
   * writes some tiny segments with incomplete compressed blocks,
   * and ensures merge recompresses them.
   */
public void testChunkCleanup() throws IOException {
    Directory dir = newDirectory();
    IndexWriterConfig iwConf = newIndexWriterConfig(new MockAnalyzer(random()));
    iwConf.setMergePolicy(NoMergePolicy.INSTANCE);
    // we have to enforce certain things like maxDocsPerChunk to cause dirty chunks to be created
    // by this test.
    iwConf.setCodec(CompressingCodec.randomInstance(random(), 4 * 1024, 100, false, 8));
    IndexWriter iw = new IndexWriter(dir, iwConf);
    DirectoryReader ir = DirectoryReader.open(iw);
    for (int i = 0; i < 5; i++) {
        Document doc = new Document();
        FieldType ft = new FieldType(TextField.TYPE_NOT_STORED);
        ft.setStoreTermVectors(true);
        doc.add(new Field("text", "not very long at all", ft));
        iw.addDocument(doc);
        // force flush
        DirectoryReader ir2 = DirectoryReader.openIfChanged(ir);
        assertNotNull(ir2);
        ir.close();
        ir = ir2;
        // examine dirty counts:
        for (LeafReaderContext leaf : ir2.leaves()) {
            CodecReader sr = (CodecReader) leaf.reader();
            CompressingTermVectorsReader reader = (CompressingTermVectorsReader) sr.getTermVectorsReader();
            assertEquals(1, reader.getNumChunks());
            assertEquals(1, reader.getNumDirtyChunks());
        }
    }
    iw.getConfig().setMergePolicy(newLogMergePolicy());
    iw.forceMerge(1);
    DirectoryReader ir2 = DirectoryReader.openIfChanged(ir);
    assertNotNull(ir2);
    ir.close();
    ir = ir2;
    CodecReader sr = (CodecReader) getOnlyLeafReader(ir);
    CompressingTermVectorsReader reader = (CompressingTermVectorsReader) sr.getTermVectorsReader();
    // we could get lucky, and have zero, but typically one.
    assertTrue(reader.getNumDirtyChunks() <= 1);
    ir.close();
    iw.close();
    dir.close();
}
Also used : Field(org.apache.lucene.document.Field) TextField(org.apache.lucene.document.TextField) CodecReader(org.apache.lucene.index.CodecReader) MockAnalyzer(org.apache.lucene.analysis.MockAnalyzer) IndexWriter(org.apache.lucene.index.IndexWriter) RandomIndexWriter(org.apache.lucene.index.RandomIndexWriter) DirectoryReader(org.apache.lucene.index.DirectoryReader) LeafReaderContext(org.apache.lucene.index.LeafReaderContext) Document(org.apache.lucene.document.Document) Directory(org.apache.lucene.store.Directory) IndexWriterConfig(org.apache.lucene.index.IndexWriterConfig) FieldType(org.apache.lucene.document.FieldType)

Example 70 with DirectoryReader

use of org.apache.lucene.index.DirectoryReader in project lucene-solr by apache.

the class DocSetUtil method createDocSet.

public static DocSet createDocSet(SolrIndexSearcher searcher, Term term) throws IOException {
    // raw reader to avoid extra wrapping overhead
    DirectoryReader reader = searcher.getRawReader();
    int maxDoc = searcher.getIndexReader().maxDoc();
    int smallSetSize = smallSetSize(maxDoc);
    String field = term.field();
    BytesRef termVal = term.bytes();
    int maxCount = 0;
    int firstReader = -1;
    List<LeafReaderContext> leaves = reader.leaves();
    // use array for slightly higher scanning cost, but fewer memory allocations
    PostingsEnum[] postList = new PostingsEnum[leaves.size()];
    for (LeafReaderContext ctx : leaves) {
        assert leaves.get(ctx.ord) == ctx;
        LeafReader r = ctx.reader();
        Fields f = r.fields();
        Terms t = f.terms(field);
        // field is missing
        if (t == null)
            continue;
        TermsEnum te = t.iterator();
        if (te.seekExact(termVal)) {
            maxCount += te.docFreq();
            postList[ctx.ord] = te.postings(null, PostingsEnum.NONE);
            if (firstReader < 0)
                firstReader = ctx.ord;
        }
    }
    DocSet answer = null;
    if (maxCount == 0) {
        answer = DocSet.EMPTY;
    } else if (maxCount <= smallSetSize) {
        answer = createSmallSet(leaves, postList, maxCount, firstReader);
    } else {
        answer = createBigSet(leaves, postList, maxDoc, firstReader);
    }
    return DocSetUtil.getDocSet(answer, searcher);
}
Also used : LeafReader(org.apache.lucene.index.LeafReader) DirectoryReader(org.apache.lucene.index.DirectoryReader) Terms(org.apache.lucene.index.Terms) TermsEnum(org.apache.lucene.index.TermsEnum) Fields(org.apache.lucene.index.Fields) LeafReaderContext(org.apache.lucene.index.LeafReaderContext) PostingsEnum(org.apache.lucene.index.PostingsEnum) BytesRef(org.apache.lucene.util.BytesRef)

Aggregations

DirectoryReader (org.apache.lucene.index.DirectoryReader)362 Document (org.apache.lucene.document.Document)228 Directory (org.apache.lucene.store.Directory)206 RandomIndexWriter (org.apache.lucene.index.RandomIndexWriter)149 IndexWriter (org.apache.lucene.index.IndexWriter)139 Term (org.apache.lucene.index.Term)134 IndexSearcher (org.apache.lucene.search.IndexSearcher)101 MockAnalyzer (org.apache.lucene.analysis.MockAnalyzer)98 IndexWriterConfig (org.apache.lucene.index.IndexWriterConfig)96 Test (org.junit.Test)64 StringField (org.apache.lucene.document.StringField)61 Analyzer (org.apache.lucene.analysis.Analyzer)54 BytesRef (org.apache.lucene.util.BytesRef)51 LeafReader (org.apache.lucene.index.LeafReader)49 ArrayList (java.util.ArrayList)46 Field (org.apache.lucene.document.Field)45 LeafReaderContext (org.apache.lucene.index.LeafReaderContext)44 TermQuery (org.apache.lucene.search.TermQuery)42 IOException (java.io.IOException)37 TextField (org.apache.lucene.document.TextField)36