Examples with DirectoryReader - org.apache.lucene.index.DirectoryReader

Example 76 with DirectoryReader

use of org.apache.lucene.index.DirectoryReader in project lucene-solr by apache.

the class TestDocValuesStatsCollector method testOneDoc.

public void testOneDoc() throws IOException {
    try (Directory dir = newDirectory();
        IndexWriter indexWriter = new IndexWriter(dir, newIndexWriterConfig())) {
        String field = "numeric";
        Document doc = new Document();
        doc.add(new NumericDocValuesField(field, 1));
        doc.add(new StringField("id", "doc1", Store.NO));
        indexWriter.addDocument(doc);
        try (DirectoryReader reader = DirectoryReader.open(indexWriter)) {
            IndexSearcher searcher = new IndexSearcher(reader);
            LongDocValuesStats stats = new LongDocValuesStats(field);
            searcher.search(new MatchAllDocsQuery(), new DocValuesStatsCollector(stats));
            assertEquals(1, stats.count());
            assertEquals(0, stats.missing());
            assertEquals(1, stats.max().longValue());
            assertEquals(1, stats.min().longValue());
            assertEquals(1, stats.sum().longValue());
            assertEquals(1, stats.mean(), 0.0001);
            assertEquals(0, stats.variance(), 0.0001);
            assertEquals(0, stats.stdev(), 0.0001);
        }
    }
}

Also used : LongDocValuesStats(org.apache.lucene.search.DocValuesStats.LongDocValuesStats) SortedLongDocValuesStats(org.apache.lucene.search.DocValuesStats.SortedLongDocValuesStats) SortedNumericDocValuesField(org.apache.lucene.document.SortedNumericDocValuesField) NumericDocValuesField(org.apache.lucene.document.NumericDocValuesField) IndexWriter(org.apache.lucene.index.IndexWriter) DirectoryReader(org.apache.lucene.index.DirectoryReader) StringField(org.apache.lucene.document.StringField) Document(org.apache.lucene.document.Document) Directory(org.apache.lucene.store.Directory)

Example 77 with DirectoryReader

use of org.apache.lucene.index.DirectoryReader in project lucene-solr by apache.

the class TestDocValuesStatsCollector method testDocsWithDoubleValues.

public void testDocsWithDoubleValues() throws IOException {
    try (Directory dir = newDirectory();
        IndexWriter indexWriter = new IndexWriter(dir, newIndexWriterConfig())) {
        String field = "numeric";
        int numDocs = TestUtil.nextInt(random(), 1, 100);
        double[] docValues = new double[numDocs];
        double nextVal = 1.0;
        for (int i = 0; i < numDocs; i++) {
            Document doc = new Document();
            if (random().nextBoolean()) {
                // not all documents have a value
                doc.add(new DoubleDocValuesField(field, nextVal));
                doc.add(new StringField("id", "doc" + i, Store.NO));
                docValues[i] = nextVal;
                ++nextVal;
            }
            indexWriter.addDocument(doc);
        }
        // 20% of cases delete some docs
        if (random().nextDouble() < 0.2) {
            for (int i = 0; i < numDocs; i++) {
                if (random().nextBoolean()) {
                    indexWriter.deleteDocuments(new Term("id", "doc" + i));
                    docValues[i] = 0;
                }
            }
        }
        try (DirectoryReader reader = DirectoryReader.open(indexWriter)) {
            IndexSearcher searcher = new IndexSearcher(reader);
            DoubleDocValuesStats stats = new DoubleDocValuesStats(field);
            searcher.search(new MatchAllDocsQuery(), new DocValuesStatsCollector(stats));
            int expCount = (int) Arrays.stream(docValues).filter(v -> v > 0).count();
            assertEquals(expCount, stats.count());
            int numDocsWithoutField = (int) getZeroValues(docValues).count();
            assertEquals(computeExpMissing(numDocsWithoutField, numDocs, reader), stats.missing());
            if (stats.count() > 0) {
                DoubleSummaryStatistics sumStats = getPositiveValues(docValues).summaryStatistics();
                assertEquals(sumStats.getMax(), stats.max().doubleValue(), 0.00001);
                assertEquals(sumStats.getMin(), stats.min().doubleValue(), 0.00001);
                assertEquals(sumStats.getAverage(), stats.mean(), 0.00001);
                assertEquals(sumStats.getSum(), stats.sum(), 0.00001);
                double variance = computeVariance(docValues, stats.mean, stats.count());
                assertEquals(variance, stats.variance(), 0.00001);
                assertEquals(Math.sqrt(variance), stats.stdev(), 0.00001);
            }
        }
    }
}

Also used : DirectoryReader(org.apache.lucene.index.DirectoryReader) Term(org.apache.lucene.index.Term) Document(org.apache.lucene.document.Document) DoubleSummaryStatistics(java.util.DoubleSummaryStatistics) DoubleDocValuesStats(org.apache.lucene.search.DocValuesStats.DoubleDocValuesStats) SortedDoubleDocValuesStats(org.apache.lucene.search.DocValuesStats.SortedDoubleDocValuesStats) IndexWriter(org.apache.lucene.index.IndexWriter) DoubleDocValuesField(org.apache.lucene.document.DoubleDocValuesField) StringField(org.apache.lucene.document.StringField) Directory(org.apache.lucene.store.Directory)

Example 78 with DirectoryReader

use of org.apache.lucene.index.DirectoryReader in project lucene-solr by apache.

the class TestDocValuesStatsCollector method testDocsWithLongValues.

public void testDocsWithLongValues() throws IOException {
    try (Directory dir = newDirectory();
        IndexWriter indexWriter = new IndexWriter(dir, newIndexWriterConfig())) {
        String field = "numeric";
        int numDocs = TestUtil.nextInt(random(), 1, 100);
        long[] docValues = new long[numDocs];
        int nextVal = 1;
        for (int i = 0; i < numDocs; i++) {
            Document doc = new Document();
            if (random().nextBoolean()) {
                // not all documents have a value
                doc.add(new NumericDocValuesField(field, nextVal));
                doc.add(new StringField("id", "doc" + i, Store.NO));
                docValues[i] = nextVal;
                ++nextVal;
            }
            indexWriter.addDocument(doc);
        }
        // 20% of cases delete some docs
        if (random().nextDouble() < 0.2) {
            for (int i = 0; i < numDocs; i++) {
                if (random().nextBoolean()) {
                    indexWriter.deleteDocuments(new Term("id", "doc" + i));
                    docValues[i] = 0;
                }
            }
        }
        try (DirectoryReader reader = DirectoryReader.open(indexWriter)) {
            IndexSearcher searcher = new IndexSearcher(reader);
            LongDocValuesStats stats = new LongDocValuesStats(field);
            searcher.search(new MatchAllDocsQuery(), new DocValuesStatsCollector(stats));
            int expCount = (int) Arrays.stream(docValues).filter(v -> v > 0).count();
            assertEquals(expCount, stats.count());
            int numDocsWithoutField = (int) getZeroValues(docValues).count();
            assertEquals(computeExpMissing(numDocsWithoutField, numDocs, reader), stats.missing());
            if (stats.count() > 0) {
                LongSummaryStatistics sumStats = getPositiveValues(docValues).summaryStatistics();
                assertEquals(sumStats.getMax(), stats.max().longValue());
                assertEquals(sumStats.getMin(), stats.min().longValue());
                assertEquals(sumStats.getAverage(), stats.mean(), 0.00001);
                assertEquals(sumStats.getSum(), stats.sum().longValue());
                double variance = computeVariance(docValues, stats.mean, stats.count());
                assertEquals(variance, stats.variance(), 0.00001);
                assertEquals(Math.sqrt(variance), stats.stdev(), 0.00001);
            }
        }
    }
}

Also used : LongDocValuesStats(org.apache.lucene.search.DocValuesStats.LongDocValuesStats) SortedLongDocValuesStats(org.apache.lucene.search.DocValuesStats.SortedLongDocValuesStats) DirectoryReader(org.apache.lucene.index.DirectoryReader) Term(org.apache.lucene.index.Term) Document(org.apache.lucene.document.Document) LongSummaryStatistics(java.util.LongSummaryStatistics) SortedNumericDocValuesField(org.apache.lucene.document.SortedNumericDocValuesField) NumericDocValuesField(org.apache.lucene.document.NumericDocValuesField) IndexWriter(org.apache.lucene.index.IndexWriter) StringField(org.apache.lucene.document.StringField) Directory(org.apache.lucene.store.Directory)

Example 79 with DirectoryReader

use of org.apache.lucene.index.DirectoryReader in project lucene-solr by apache.

the class SweetSpotSimilarityTest method computeNorm.

private static float computeNorm(Similarity sim, String field, int length) throws IOException {
    String value = IntStream.range(0, length).mapToObj(i -> "a").collect(Collectors.joining(" "));
    Directory dir = new RAMDirectory();
    IndexWriter w = new IndexWriter(dir, newIndexWriterConfig().setSimilarity(sim));
    w.addDocument(Collections.singleton(newTextField(field, value, Store.NO)));
    DirectoryReader reader = DirectoryReader.open(w);
    w.close();
    IndexSearcher searcher = new IndexSearcher(reader);
    searcher.setSimilarity(sim);
    Explanation expl = searcher.explain(new TermQuery(new Term(field, "a")), 0);
    reader.close();
    dir.close();
    Explanation norm = findExplanation(expl, "fieldNorm");
    assertNotNull(norm);
    return norm.getValue();
}

Also used : IntStream(java.util.stream.IntStream) Explanation(org.apache.lucene.search.Explanation) ClassicSimilarity(org.apache.lucene.search.similarities.ClassicSimilarity) TFIDFSimilarity(org.apache.lucene.search.similarities.TFIDFSimilarity) DirectoryReader(org.apache.lucene.index.DirectoryReader) Term(org.apache.lucene.index.Term) RAMDirectory(org.apache.lucene.store.RAMDirectory) IOException(java.io.IOException) PerFieldSimilarityWrapper(org.apache.lucene.search.similarities.PerFieldSimilarityWrapper) Collectors(java.util.stream.Collectors) IndexWriter(org.apache.lucene.index.IndexWriter) TermQuery(org.apache.lucene.search.TermQuery) Similarity(org.apache.lucene.search.similarities.Similarity) Directory(org.apache.lucene.store.Directory) Store(org.apache.lucene.document.Field.Store) LuceneTestCase(org.apache.lucene.util.LuceneTestCase) Collections(java.util.Collections) IndexSearcher(org.apache.lucene.search.IndexSearcher) IndexSearcher(org.apache.lucene.search.IndexSearcher) TermQuery(org.apache.lucene.search.TermQuery) IndexWriter(org.apache.lucene.index.IndexWriter) DirectoryReader(org.apache.lucene.index.DirectoryReader) Explanation(org.apache.lucene.search.Explanation) Term(org.apache.lucene.index.Term) RAMDirectory(org.apache.lucene.store.RAMDirectory) RAMDirectory(org.apache.lucene.store.RAMDirectory) Directory(org.apache.lucene.store.Directory)

Example 80 with DirectoryReader

use of org.apache.lucene.index.DirectoryReader in project lucene-solr by apache.

the class SimplePrimaryNode method verifyAtLeastMarkerCount.

private void verifyAtLeastMarkerCount(int expectedAtLeastCount, DataOutput out) throws IOException {
    IndexSearcher searcher = mgr.acquire();
    try {
        long version = ((DirectoryReader) searcher.getIndexReader()).getVersion();
        int hitCount = searcher.count(new TermQuery(new Term("marker", "marker")));
        if (hitCount < expectedAtLeastCount) {
            message("marker search: expectedAtLeastCount=" + expectedAtLeastCount + " but hitCount=" + hitCount);
            TopDocs hits = searcher.search(new TermQuery(new Term("marker", "marker")), expectedAtLeastCount);
            List<Integer> seen = new ArrayList<>();
            for (ScoreDoc hit : hits.scoreDocs) {
                Document doc = searcher.doc(hit.doc);
                seen.add(Integer.parseInt(doc.get("docid").substring(1)));
            }
            Collections.sort(seen);
            message("saw markers:");
            for (int marker : seen) {
                message("saw m" + marker);
            }
            throw new IllegalStateException("at flush: marker count " + hitCount + " but expected at least " + expectedAtLeastCount + " version=" + version);
        }
        if (out != null) {
            out.writeVLong(version);
            out.writeVInt(hitCount);
        }
    } finally {
        mgr.release(searcher);
    }
}

Also used : IndexSearcher(org.apache.lucene.search.IndexSearcher) TermQuery(org.apache.lucene.search.TermQuery) DirectoryReader(org.apache.lucene.index.DirectoryReader) ArrayList(java.util.ArrayList) Term(org.apache.lucene.index.Term) Document(org.apache.lucene.document.Document) ScoreDoc(org.apache.lucene.search.ScoreDoc) TopDocs(org.apache.lucene.search.TopDocs)

Aggregations

DirectoryReader (org.apache.lucene.index.DirectoryReader)362 Document (org.apache.lucene.document.Document)228 Directory (org.apache.lucene.store.Directory)206 RandomIndexWriter (org.apache.lucene.index.RandomIndexWriter)149 IndexWriter (org.apache.lucene.index.IndexWriter)139 Term (org.apache.lucene.index.Term)134 IndexSearcher (org.apache.lucene.search.IndexSearcher)101 MockAnalyzer (org.apache.lucene.analysis.MockAnalyzer)98 IndexWriterConfig (org.apache.lucene.index.IndexWriterConfig)96 Test (org.junit.Test)64 StringField (org.apache.lucene.document.StringField)61 Analyzer (org.apache.lucene.analysis.Analyzer)54 BytesRef (org.apache.lucene.util.BytesRef)51 LeafReader (org.apache.lucene.index.LeafReader)49 ArrayList (java.util.ArrayList)46 Field (org.apache.lucene.document.Field)45 LeafReaderContext (org.apache.lucene.index.LeafReaderContext)44 TermQuery (org.apache.lucene.search.TermQuery)42 IOException (java.io.IOException)37 TextField (org.apache.lucene.document.TextField)36