Search in sources :

Example 56 with RandomIndexWriter

use of org.apache.lucene.index.RandomIndexWriter in project lucene-solr by apache.

the class TestNRTCachingDirectory method testNRTAndCommit.

public void testNRTAndCommit() throws Exception {
    Directory dir = newDirectory();
    NRTCachingDirectory cachedDir = new NRTCachingDirectory(dir, 2.0, 25.0);
    MockAnalyzer analyzer = new MockAnalyzer(random());
    analyzer.setMaxTokenLength(TestUtil.nextInt(random(), 1, IndexWriter.MAX_TERM_LENGTH));
    IndexWriterConfig conf = newIndexWriterConfig(analyzer);
    RandomIndexWriter w = new RandomIndexWriter(random(), cachedDir, conf);
    final LineFileDocs docs = new LineFileDocs(random());
    final int numDocs = TestUtil.nextInt(random(), 100, 400);
    if (VERBOSE) {
        System.out.println("TEST: numDocs=" + numDocs);
    }
    final List<BytesRef> ids = new ArrayList<>();
    DirectoryReader r = null;
    for (int docCount = 0; docCount < numDocs; docCount++) {
        final Document doc = docs.nextDoc();
        ids.add(new BytesRef(doc.get("docid")));
        w.addDocument(doc);
        if (random().nextInt(20) == 17) {
            if (r == null) {
                r = DirectoryReader.open(w.w);
            } else {
                final DirectoryReader r2 = DirectoryReader.openIfChanged(r);
                if (r2 != null) {
                    r.close();
                    r = r2;
                }
            }
            assertEquals(1 + docCount, r.numDocs());
            final IndexSearcher s = newSearcher(r);
            // Just make sure search can run; we can't assert
            // totHits since it could be 0
            TopDocs hits = s.search(new TermQuery(new Term("body", "the")), 10);
        // System.out.println("tot hits " + hits.totalHits);
        }
    }
    if (r != null) {
        r.close();
    }
    // Close should force cache to clear since all files are sync'd
    w.close();
    final String[] cachedFiles = cachedDir.listCachedFiles();
    for (String file : cachedFiles) {
        System.out.println("FAIL: cached file " + file + " remains after sync");
    }
    assertEquals(0, cachedFiles.length);
    r = DirectoryReader.open(dir);
    for (BytesRef id : ids) {
        assertEquals(1, r.docFreq(new Term("docid", id)));
    }
    r.close();
    cachedDir.close();
    docs.close();
}
Also used : IndexSearcher(org.apache.lucene.search.IndexSearcher) TermQuery(org.apache.lucene.search.TermQuery) DirectoryReader(org.apache.lucene.index.DirectoryReader) ArrayList(java.util.ArrayList) Term(org.apache.lucene.index.Term) Document(org.apache.lucene.document.Document) TopDocs(org.apache.lucene.search.TopDocs) MockAnalyzer(org.apache.lucene.analysis.MockAnalyzer) RandomIndexWriter(org.apache.lucene.index.RandomIndexWriter) BytesRef(org.apache.lucene.util.BytesRef) IndexWriterConfig(org.apache.lucene.index.IndexWriterConfig) LineFileDocs(org.apache.lucene.util.LineFileDocs)

Example 57 with RandomIndexWriter

use of org.apache.lucene.index.RandomIndexWriter in project lucene-solr by apache.

the class TestSimilarity2 method testEmptyField.

/** similar to the above, but ORs the query with a real field */
public void testEmptyField() throws Exception {
    Directory dir = newDirectory();
    RandomIndexWriter iw = new RandomIndexWriter(random(), dir);
    Document doc = new Document();
    doc.add(newTextField("foo", "bar", Field.Store.NO));
    iw.addDocument(doc);
    IndexReader ir = iw.getReader();
    iw.close();
    IndexSearcher is = newSearcher(ir);
    for (Similarity sim : sims) {
        is.setSimilarity(sim);
        BooleanQuery.Builder query = new BooleanQuery.Builder();
        query.add(new TermQuery(new Term("foo", "bar")), BooleanClause.Occur.SHOULD);
        query.add(new TermQuery(new Term("bar", "baz")), BooleanClause.Occur.SHOULD);
        assertEquals(1, is.search(query.build(), 10).totalHits);
    }
    ir.close();
    dir.close();
}
Also used : IndexSearcher(org.apache.lucene.search.IndexSearcher) BooleanQuery(org.apache.lucene.search.BooleanQuery) SpanTermQuery(org.apache.lucene.search.spans.SpanTermQuery) TermQuery(org.apache.lucene.search.TermQuery) IndexReader(org.apache.lucene.index.IndexReader) Term(org.apache.lucene.index.Term) Document(org.apache.lucene.document.Document) RandomIndexWriter(org.apache.lucene.index.RandomIndexWriter) Directory(org.apache.lucene.store.Directory)

Example 58 with RandomIndexWriter

use of org.apache.lucene.index.RandomIndexWriter in project lucene-solr by apache.

the class TestSimilarity2 method testEmptyIndex.

/** because of stupid things like querynorm, it's possible we computeStats on a field that doesnt exist at all
   *  test this against a totally empty index, to make sure sims handle it
   */
public void testEmptyIndex() throws Exception {
    Directory dir = newDirectory();
    RandomIndexWriter iw = new RandomIndexWriter(random(), dir);
    IndexReader ir = iw.getReader();
    iw.close();
    IndexSearcher is = newSearcher(ir);
    for (Similarity sim : sims) {
        is.setSimilarity(sim);
        assertEquals(0, is.search(new TermQuery(new Term("foo", "bar")), 10).totalHits);
    }
    ir.close();
    dir.close();
}
Also used : IndexSearcher(org.apache.lucene.search.IndexSearcher) SpanTermQuery(org.apache.lucene.search.spans.SpanTermQuery) TermQuery(org.apache.lucene.search.TermQuery) IndexReader(org.apache.lucene.index.IndexReader) Term(org.apache.lucene.index.Term) RandomIndexWriter(org.apache.lucene.index.RandomIndexWriter) Directory(org.apache.lucene.store.Directory)

Example 59 with RandomIndexWriter

use of org.apache.lucene.index.RandomIndexWriter in project lucene-solr by apache.

the class TestSimilarity2 method testEmptyTerm.

/** similar to the above, however the field exists, but we query with a term that doesnt exist too */
public void testEmptyTerm() throws Exception {
    Directory dir = newDirectory();
    RandomIndexWriter iw = new RandomIndexWriter(random(), dir);
    Document doc = new Document();
    doc.add(newTextField("foo", "bar", Field.Store.NO));
    iw.addDocument(doc);
    IndexReader ir = iw.getReader();
    iw.close();
    IndexSearcher is = newSearcher(ir);
    for (Similarity sim : sims) {
        is.setSimilarity(sim);
        BooleanQuery.Builder query = new BooleanQuery.Builder();
        query.add(new TermQuery(new Term("foo", "bar")), BooleanClause.Occur.SHOULD);
        query.add(new TermQuery(new Term("foo", "baz")), BooleanClause.Occur.SHOULD);
        assertEquals(1, is.search(query.build(), 10).totalHits);
    }
    ir.close();
    dir.close();
}
Also used : IndexSearcher(org.apache.lucene.search.IndexSearcher) BooleanQuery(org.apache.lucene.search.BooleanQuery) SpanTermQuery(org.apache.lucene.search.spans.SpanTermQuery) TermQuery(org.apache.lucene.search.TermQuery) IndexReader(org.apache.lucene.index.IndexReader) Term(org.apache.lucene.index.Term) Document(org.apache.lucene.document.Document) RandomIndexWriter(org.apache.lucene.index.RandomIndexWriter) Directory(org.apache.lucene.store.Directory)

Example 60 with RandomIndexWriter

use of org.apache.lucene.index.RandomIndexWriter in project lucene-solr by apache.

the class TestSimilarity2 method testNoFieldSkew.

/** make sure scores are not skewed by docs not containing the field */
public void testNoFieldSkew() throws Exception {
    Directory dir = newDirectory();
    // an evil merge policy could reorder our docs for no reason
    IndexWriterConfig iwConfig = newIndexWriterConfig().setMergePolicy(newLogMergePolicy());
    RandomIndexWriter iw = new RandomIndexWriter(random(), dir, iwConfig);
    Document doc = new Document();
    doc.add(newTextField("foo", "bar baz somethingelse", Field.Store.NO));
    iw.addDocument(doc);
    IndexReader ir = iw.getReader();
    IndexSearcher is = newSearcher(ir);
    BooleanQuery.Builder queryBuilder = new BooleanQuery.Builder();
    queryBuilder.add(new TermQuery(new Term("foo", "bar")), BooleanClause.Occur.SHOULD);
    queryBuilder.add(new TermQuery(new Term("foo", "baz")), BooleanClause.Occur.SHOULD);
    Query query = queryBuilder.build();
    // collect scores
    List<Explanation> scores = new ArrayList<>();
    for (Similarity sim : sims) {
        is.setSimilarity(sim);
        scores.add(is.explain(query, 0));
    }
    ir.close();
    // add some additional docs without the field
    int numExtraDocs = TestUtil.nextInt(random(), 1, 1000);
    for (int i = 0; i < numExtraDocs; i++) {
        iw.addDocument(new Document());
    }
    // check scores are the same
    ir = iw.getReader();
    is = newSearcher(ir);
    for (int i = 0; i < sims.size(); i++) {
        is.setSimilarity(sims.get(i));
        Explanation expected = scores.get(i);
        Explanation actual = is.explain(query, 0);
        assertEquals(sims.get(i).toString() + ": actual=" + actual + ",expected=" + expected, expected.getValue(), actual.getValue(), 0F);
    }
    iw.close();
    ir.close();
    dir.close();
}
Also used : IndexSearcher(org.apache.lucene.search.IndexSearcher) BooleanQuery(org.apache.lucene.search.BooleanQuery) SpanTermQuery(org.apache.lucene.search.spans.SpanTermQuery) TermQuery(org.apache.lucene.search.TermQuery) Query(org.apache.lucene.search.Query) SpanTermQuery(org.apache.lucene.search.spans.SpanTermQuery) TermQuery(org.apache.lucene.search.TermQuery) BooleanQuery(org.apache.lucene.search.BooleanQuery) SpanOrQuery(org.apache.lucene.search.spans.SpanOrQuery) Explanation(org.apache.lucene.search.Explanation) ArrayList(java.util.ArrayList) Term(org.apache.lucene.index.Term) Document(org.apache.lucene.document.Document) IndexReader(org.apache.lucene.index.IndexReader) RandomIndexWriter(org.apache.lucene.index.RandomIndexWriter) Directory(org.apache.lucene.store.Directory) IndexWriterConfig(org.apache.lucene.index.IndexWriterConfig)

Aggregations

RandomIndexWriter (org.apache.lucene.index.RandomIndexWriter)779 Document (org.apache.lucene.document.Document)679 Directory (org.apache.lucene.store.Directory)588 IndexReader (org.apache.lucene.index.IndexReader)510 Term (org.apache.lucene.index.Term)325 IndexSearcher (org.apache.lucene.search.IndexSearcher)294 MockAnalyzer (org.apache.lucene.analysis.MockAnalyzer)220 BytesRef (org.apache.lucene.util.BytesRef)142 Field (org.apache.lucene.document.Field)141 MatchAllDocsQuery (org.apache.lucene.search.MatchAllDocsQuery)136 TopDocs (org.apache.lucene.search.TopDocs)134 TermQuery (org.apache.lucene.search.TermQuery)121 DirectoryReader (org.apache.lucene.index.DirectoryReader)120 IndexWriterConfig (org.apache.lucene.index.IndexWriterConfig)110 ArrayList (java.util.ArrayList)95 StringField (org.apache.lucene.document.StringField)93 Analyzer (org.apache.lucene.analysis.Analyzer)88 BooleanQuery (org.apache.lucene.search.BooleanQuery)88 NumericDocValuesField (org.apache.lucene.document.NumericDocValuesField)77 Test (org.junit.Test)75