Search in sources :

Example 1 with DoubleDocValuesStats

use of org.apache.lucene.search.DocValuesStats.DoubleDocValuesStats in project lucene-solr by apache.

the class TestDocValuesStatsCollector method testDocsWithDoubleValues.

public void testDocsWithDoubleValues() throws IOException {
    try (Directory dir = newDirectory();
        IndexWriter indexWriter = new IndexWriter(dir, newIndexWriterConfig())) {
        String field = "numeric";
        int numDocs = TestUtil.nextInt(random(), 1, 100);
        double[] docValues = new double[numDocs];
        double nextVal = 1.0;
        for (int i = 0; i < numDocs; i++) {
            Document doc = new Document();
            if (random().nextBoolean()) {
                // not all documents have a value
                doc.add(new DoubleDocValuesField(field, nextVal));
                doc.add(new StringField("id", "doc" + i, Store.NO));
                docValues[i] = nextVal;
                ++nextVal;
            }
            indexWriter.addDocument(doc);
        }
        // 20% of cases delete some docs
        if (random().nextDouble() < 0.2) {
            for (int i = 0; i < numDocs; i++) {
                if (random().nextBoolean()) {
                    indexWriter.deleteDocuments(new Term("id", "doc" + i));
                    docValues[i] = 0;
                }
            }
        }
        try (DirectoryReader reader = DirectoryReader.open(indexWriter)) {
            IndexSearcher searcher = new IndexSearcher(reader);
            DoubleDocValuesStats stats = new DoubleDocValuesStats(field);
            searcher.search(new MatchAllDocsQuery(), new DocValuesStatsCollector(stats));
            int expCount = (int) Arrays.stream(docValues).filter(v -> v > 0).count();
            assertEquals(expCount, stats.count());
            int numDocsWithoutField = (int) getZeroValues(docValues).count();
            assertEquals(computeExpMissing(numDocsWithoutField, numDocs, reader), stats.missing());
            if (stats.count() > 0) {
                DoubleSummaryStatistics sumStats = getPositiveValues(docValues).summaryStatistics();
                assertEquals(sumStats.getMax(), stats.max().doubleValue(), 0.00001);
                assertEquals(sumStats.getMin(), stats.min().doubleValue(), 0.00001);
                assertEquals(sumStats.getAverage(), stats.mean(), 0.00001);
                assertEquals(sumStats.getSum(), stats.sum(), 0.00001);
                double variance = computeVariance(docValues, stats.mean, stats.count());
                assertEquals(variance, stats.variance(), 0.00001);
                assertEquals(Math.sqrt(variance), stats.stdev(), 0.00001);
            }
        }
    }
}
Also used : DirectoryReader(org.apache.lucene.index.DirectoryReader) Term(org.apache.lucene.index.Term) Document(org.apache.lucene.document.Document) DoubleSummaryStatistics(java.util.DoubleSummaryStatistics) DoubleDocValuesStats(org.apache.lucene.search.DocValuesStats.DoubleDocValuesStats) SortedDoubleDocValuesStats(org.apache.lucene.search.DocValuesStats.SortedDoubleDocValuesStats) IndexWriter(org.apache.lucene.index.IndexWriter) DoubleDocValuesField(org.apache.lucene.document.DoubleDocValuesField) StringField(org.apache.lucene.document.StringField) Directory(org.apache.lucene.store.Directory)

Aggregations

DoubleSummaryStatistics (java.util.DoubleSummaryStatistics)1 Document (org.apache.lucene.document.Document)1 DoubleDocValuesField (org.apache.lucene.document.DoubleDocValuesField)1 StringField (org.apache.lucene.document.StringField)1 DirectoryReader (org.apache.lucene.index.DirectoryReader)1 IndexWriter (org.apache.lucene.index.IndexWriter)1 Term (org.apache.lucene.index.Term)1 DoubleDocValuesStats (org.apache.lucene.search.DocValuesStats.DoubleDocValuesStats)1 SortedDoubleDocValuesStats (org.apache.lucene.search.DocValuesStats.SortedDoubleDocValuesStats)1 Directory (org.apache.lucene.store.Directory)1