Search in sources :

Example 1 with SortedLongDocValuesStats

use of org.apache.lucene.search.DocValuesStats.SortedLongDocValuesStats in project lucene-solr by apache.

the class TestDocValuesStatsCollector method testDocsWithMultipleLongValues.

public void testDocsWithMultipleLongValues() throws IOException {
    try (Directory dir = newDirectory();
        IndexWriter indexWriter = new IndexWriter(dir, newIndexWriterConfig())) {
        String field = "numeric";
        int numDocs = TestUtil.nextInt(random(), 1, 100);
        long[][] docValues = new long[numDocs][];
        long nextVal = 1;
        for (int i = 0; i < numDocs; i++) {
            Document doc = new Document();
            if (random().nextBoolean()) {
                // not all documents have a value
                int numValues = TestUtil.nextInt(random(), 1, 5);
                docValues[i] = new long[numValues];
                for (int j = 0; j < numValues; j++) {
                    doc.add(new SortedNumericDocValuesField(field, nextVal));
                    docValues[i][j] = nextVal;
                    ++nextVal;
                }
                doc.add(new StringField("id", "doc" + i, Store.NO));
            }
            indexWriter.addDocument(doc);
        }
        // 20% of cases delete some docs
        if (random().nextDouble() < 0.2) {
            for (int i = 0; i < numDocs; i++) {
                if (random().nextBoolean()) {
                    indexWriter.deleteDocuments(new Term("id", "doc" + i));
                    docValues[i] = null;
                }
            }
        }
        try (DirectoryReader reader = DirectoryReader.open(indexWriter)) {
            IndexSearcher searcher = new IndexSearcher(reader);
            SortedLongDocValuesStats stats = new SortedLongDocValuesStats(field);
            searcher.search(new MatchAllDocsQuery(), new DocValuesStatsCollector(stats));
            assertEquals(nonNull(docValues).count(), stats.count());
            int numDocsWithoutField = (int) isNull(docValues).count();
            assertEquals(computeExpMissing(numDocsWithoutField, numDocs, reader), stats.missing());
            if (stats.count() > 0) {
                LongSummaryStatistics sumStats = filterAndFlatValues(docValues, (v) -> v != null).summaryStatistics();
                assertEquals(sumStats.getMax(), stats.max().longValue());
                assertEquals(sumStats.getMin(), stats.min().longValue());
                assertEquals(sumStats.getAverage(), stats.mean(), 0.00001);
                assertEquals(sumStats.getSum(), stats.sum().longValue());
                assertEquals(sumStats.getCount(), stats.valuesCount());
                double variance = computeVariance(filterAndFlatValues(docValues, (v) -> v != null), stats.mean, stats.count());
                assertEquals(variance, stats.variance(), 0.00001);
                assertEquals(Math.sqrt(variance), stats.stdev(), 0.00001);
            }
        }
    }
}
Also used : Arrays(java.util.Arrays) DoubleDocValuesStats(org.apache.lucene.search.DocValuesStats.DoubleDocValuesStats) StringField(org.apache.lucene.document.StringField) SortedNumericDocValuesField(org.apache.lucene.document.SortedNumericDocValuesField) Term(org.apache.lucene.index.Term) TestUtil(org.apache.lucene.util.TestUtil) DoubleSummaryStatistics(java.util.DoubleSummaryStatistics) SortedSetDocValuesStats(org.apache.lucene.search.DocValuesStats.SortedSetDocValuesStats) Document(org.apache.lucene.document.Document) SortedDocValuesStats(org.apache.lucene.search.DocValuesStats.SortedDocValuesStats) Directory(org.apache.lucene.store.Directory) Store(org.apache.lucene.document.Field.Store) LongSummaryStatistics(java.util.LongSummaryStatistics) SortedDoubleDocValuesStats(org.apache.lucene.search.DocValuesStats.SortedDoubleDocValuesStats) DoubleDocValuesField(org.apache.lucene.document.DoubleDocValuesField) NumericDocValuesField(org.apache.lucene.document.NumericDocValuesField) LongStream(java.util.stream.LongStream) SortedSetDocValuesField(org.apache.lucene.document.SortedSetDocValuesField) Predicate(java.util.function.Predicate) BytesRef(org.apache.lucene.util.BytesRef) DirectoryReader(org.apache.lucene.index.DirectoryReader) IOException(java.io.IOException) LongDocValuesStats(org.apache.lucene.search.DocValuesStats.LongDocValuesStats) DoubleStream(java.util.stream.DoubleStream) Objects(java.util.Objects) IndexWriter(org.apache.lucene.index.IndexWriter) Stream(java.util.stream.Stream) SortedDocValuesField(org.apache.lucene.document.SortedDocValuesField) LuceneTestCase(org.apache.lucene.util.LuceneTestCase) SortedLongDocValuesStats(org.apache.lucene.search.DocValuesStats.SortedLongDocValuesStats) IndexReader(org.apache.lucene.index.IndexReader) DirectoryReader(org.apache.lucene.index.DirectoryReader) Term(org.apache.lucene.index.Term) Document(org.apache.lucene.document.Document) LongSummaryStatistics(java.util.LongSummaryStatistics) SortedNumericDocValuesField(org.apache.lucene.document.SortedNumericDocValuesField) IndexWriter(org.apache.lucene.index.IndexWriter) StringField(org.apache.lucene.document.StringField) SortedLongDocValuesStats(org.apache.lucene.search.DocValuesStats.SortedLongDocValuesStats) Directory(org.apache.lucene.store.Directory)

Aggregations

IOException (java.io.IOException)1 Arrays (java.util.Arrays)1 DoubleSummaryStatistics (java.util.DoubleSummaryStatistics)1 LongSummaryStatistics (java.util.LongSummaryStatistics)1 Objects (java.util.Objects)1 Predicate (java.util.function.Predicate)1 DoubleStream (java.util.stream.DoubleStream)1 LongStream (java.util.stream.LongStream)1 Stream (java.util.stream.Stream)1 Document (org.apache.lucene.document.Document)1 DoubleDocValuesField (org.apache.lucene.document.DoubleDocValuesField)1 Store (org.apache.lucene.document.Field.Store)1 NumericDocValuesField (org.apache.lucene.document.NumericDocValuesField)1 SortedDocValuesField (org.apache.lucene.document.SortedDocValuesField)1 SortedNumericDocValuesField (org.apache.lucene.document.SortedNumericDocValuesField)1 SortedSetDocValuesField (org.apache.lucene.document.SortedSetDocValuesField)1 StringField (org.apache.lucene.document.StringField)1 DirectoryReader (org.apache.lucene.index.DirectoryReader)1 IndexReader (org.apache.lucene.index.IndexReader)1 IndexWriter (org.apache.lucene.index.IndexWriter)1