Search in sources :

Example 6 with DoubleDocValuesField

use of org.apache.lucene.document.DoubleDocValuesField in project lucene-solr by apache.

the class TestDoubleValuesSource method setUp.

@Override
public void setUp() throws Exception {
    super.setUp();
    dir = newDirectory();
    RandomIndexWriter iw = new RandomIndexWriter(random(), dir);
    int numDocs = TestUtil.nextInt(random(), 2049, 4000);
    for (int i = 0; i < numDocs; i++) {
        Document document = new Document();
        document.add(newTextField("english", English.intToEnglish(i), Field.Store.NO));
        document.add(newTextField("oddeven", (i % 2 == 0) ? "even" : "odd", Field.Store.NO));
        document.add(new NumericDocValuesField("int", random().nextInt()));
        document.add(new NumericDocValuesField("long", random().nextLong()));
        document.add(new FloatDocValuesField("float", random().nextFloat()));
        document.add(new DoubleDocValuesField("double", random().nextDouble()));
        if (i == 545)
            document.add(new DoubleDocValuesField("onefield", 45.72));
        iw.addDocument(document);
    }
    reader = iw.getReader();
    iw.close();
    searcher = newSearcher(reader);
}
Also used : NumericDocValuesField(org.apache.lucene.document.NumericDocValuesField) DoubleDocValuesField(org.apache.lucene.document.DoubleDocValuesField) FloatDocValuesField(org.apache.lucene.document.FloatDocValuesField) Document(org.apache.lucene.document.Document) RandomIndexWriter(org.apache.lucene.index.RandomIndexWriter)

Example 7 with DoubleDocValuesField

use of org.apache.lucene.document.DoubleDocValuesField in project lucene-solr by apache.

the class TestIndexSorting method testMissingDoubleFirst.

public void testMissingDoubleFirst() throws Exception {
    Directory dir = newDirectory();
    IndexWriterConfig iwc = new IndexWriterConfig(new MockAnalyzer(random()));
    SortField sortField = new SortField("foo", SortField.Type.DOUBLE);
    sortField.setMissingValue(Double.NEGATIVE_INFINITY);
    Sort indexSort = new Sort(sortField);
    iwc.setIndexSort(indexSort);
    IndexWriter w = new IndexWriter(dir, iwc);
    Document doc = new Document();
    doc.add(new DoubleDocValuesField("foo", 18.0));
    w.addDocument(doc);
    // so we get more than one segment, so that forceMerge actually does merge, since we only get a sorted segment by merging:
    w.commit();
    // missing
    w.addDocument(new Document());
    w.commit();
    doc = new Document();
    doc.add(new DoubleDocValuesField("foo", 7.0));
    w.addDocument(doc);
    w.forceMerge(1);
    DirectoryReader r = DirectoryReader.open(w);
    LeafReader leaf = getOnlyLeafReader(r);
    assertEquals(3, leaf.maxDoc());
    NumericDocValues values = leaf.getNumericDocValues("foo");
    assertEquals(1, values.nextDoc());
    assertEquals(7.0, Double.longBitsToDouble(values.longValue()), 0.0);
    assertEquals(2, values.nextDoc());
    assertEquals(18.0, Double.longBitsToDouble(values.longValue()), 0.0);
    r.close();
    w.close();
    dir.close();
}
Also used : MockAnalyzer(org.apache.lucene.analysis.MockAnalyzer) DoubleDocValuesField(org.apache.lucene.document.DoubleDocValuesField) Sort(org.apache.lucene.search.Sort) SortField(org.apache.lucene.search.SortField) SortedSetSortField(org.apache.lucene.search.SortedSetSortField) SortedNumericSortField(org.apache.lucene.search.SortedNumericSortField) Document(org.apache.lucene.document.Document) Directory(org.apache.lucene.store.Directory)

Example 8 with DoubleDocValuesField

use of org.apache.lucene.document.DoubleDocValuesField in project lucene-solr by apache.

the class TestIndexSorting method testRandom3.

// pits index time sorting against query time sorting
public void testRandom3() throws Exception {
    int numDocs;
    if (TEST_NIGHTLY) {
        numDocs = atLeast(100000);
    } else {
        numDocs = atLeast(1000);
    }
    List<RandomDoc> docs = new ArrayList<>();
    Sort sort = randomSort();
    if (VERBOSE) {
        System.out.println("TEST: numDocs=" + numDocs + " use sort=" + sort);
    }
    // no index sorting, all search-time sorting:
    Directory dir1 = newFSDirectory(createTempDir());
    IndexWriterConfig iwc1 = newIndexWriterConfig(new MockAnalyzer(random()));
    IndexWriter w1 = new IndexWriter(dir1, iwc1);
    // use index sorting:
    Directory dir2 = newFSDirectory(createTempDir());
    IndexWriterConfig iwc2 = newIndexWriterConfig(new MockAnalyzer(random()));
    iwc2.setIndexSort(sort);
    IndexWriter w2 = new IndexWriter(dir2, iwc2);
    Set<Integer> toDelete = new HashSet<>();
    double deleteChance = random().nextDouble();
    for (int id = 0; id < numDocs; id++) {
        RandomDoc docValues = new RandomDoc(id);
        docs.add(docValues);
        if (VERBOSE) {
            System.out.println("TEST: doc id=" + id);
            System.out.println("  int=" + docValues.intValue);
            System.out.println("  long=" + docValues.longValue);
            System.out.println("  float=" + docValues.floatValue);
            System.out.println("  double=" + docValues.doubleValue);
            System.out.println("  bytes=" + new BytesRef(docValues.bytesValue));
        }
        Document doc = new Document();
        doc.add(new StringField("id", Integer.toString(id), Field.Store.YES));
        doc.add(new NumericDocValuesField("id", id));
        doc.add(new NumericDocValuesField("int", docValues.intValue));
        doc.add(new NumericDocValuesField("long", docValues.longValue));
        doc.add(new DoubleDocValuesField("double", docValues.doubleValue));
        doc.add(new FloatDocValuesField("float", docValues.floatValue));
        doc.add(new SortedDocValuesField("bytes", new BytesRef(docValues.bytesValue)));
        for (int value : docValues.intValues) {
            doc.add(new SortedNumericDocValuesField("multi_valued_int", value));
        }
        for (long value : docValues.longValues) {
            doc.add(new SortedNumericDocValuesField("multi_valued_long", value));
        }
        for (float value : docValues.floatValues) {
            doc.add(new SortedNumericDocValuesField("multi_valued_float", NumericUtils.floatToSortableInt(value)));
        }
        for (double value : docValues.doubleValues) {
            doc.add(new SortedNumericDocValuesField("multi_valued_double", NumericUtils.doubleToSortableLong(value)));
        }
        for (byte[] value : docValues.bytesValues) {
            doc.add(new SortedSetDocValuesField("multi_valued_bytes", new BytesRef(value)));
        }
        w1.addDocument(doc);
        w2.addDocument(doc);
        if (random().nextDouble() < deleteChance) {
            toDelete.add(id);
        }
    }
    for (int id : toDelete) {
        w1.deleteDocuments(new Term("id", Integer.toString(id)));
        w2.deleteDocuments(new Term("id", Integer.toString(id)));
    }
    DirectoryReader r1 = DirectoryReader.open(w1);
    IndexSearcher s1 = newSearcher(r1);
    if (random().nextBoolean()) {
        int maxSegmentCount = TestUtil.nextInt(random(), 1, 5);
        if (VERBOSE) {
            System.out.println("TEST: now forceMerge(" + maxSegmentCount + ")");
        }
        w2.forceMerge(maxSegmentCount);
    }
    DirectoryReader r2 = DirectoryReader.open(w2);
    IndexSearcher s2 = newSearcher(r2);
    for (int iter = 0; iter < 100; iter++) {
        int numHits = TestUtil.nextInt(random(), 1, numDocs);
        if (VERBOSE) {
            System.out.println("TEST: iter=" + iter + " numHits=" + numHits);
        }
        TopFieldCollector c1 = TopFieldCollector.create(sort, numHits, true, true, true);
        s1.search(new MatchAllDocsQuery(), c1);
        TopDocs hits1 = c1.topDocs();
        TopFieldCollector c2 = TopFieldCollector.create(sort, numHits, true, true, true);
        EarlyTerminatingSortingCollector c3 = new EarlyTerminatingSortingCollector(c2, sort, numHits);
        s2.search(new MatchAllDocsQuery(), c3);
        TopDocs hits2 = c2.topDocs();
        if (VERBOSE) {
            System.out.println("  topDocs query-time sort: totalHits=" + hits1.totalHits);
            for (ScoreDoc scoreDoc : hits1.scoreDocs) {
                System.out.println("    " + scoreDoc.doc);
            }
            System.out.println("  topDocs index-time sort: totalHits=" + hits2.totalHits);
            for (ScoreDoc scoreDoc : hits2.scoreDocs) {
                System.out.println("    " + scoreDoc.doc);
            }
        }
        assertTrue(hits2.totalHits <= hits1.totalHits);
        assertEquals(hits2.scoreDocs.length, hits1.scoreDocs.length);
        for (int i = 0; i < hits2.scoreDocs.length; i++) {
            ScoreDoc hit1 = hits1.scoreDocs[i];
            ScoreDoc hit2 = hits2.scoreDocs[i];
            assertEquals(r1.document(hit1.doc).get("id"), r2.document(hit2.doc).get("id"));
            assertEquals(((FieldDoc) hit1).fields, ((FieldDoc) hit2).fields);
        }
    }
    IOUtils.close(r1, r2, w1, w2, dir1, dir2);
}
Also used : IndexSearcher(org.apache.lucene.search.IndexSearcher) ArrayList(java.util.ArrayList) FloatDocValuesField(org.apache.lucene.document.FloatDocValuesField) Document(org.apache.lucene.document.Document) ScoreDoc(org.apache.lucene.search.ScoreDoc) TopDocs(org.apache.lucene.search.TopDocs) MockAnalyzer(org.apache.lucene.analysis.MockAnalyzer) SortedNumericDocValuesField(org.apache.lucene.document.SortedNumericDocValuesField) NumericDocValuesField(org.apache.lucene.document.NumericDocValuesField) DoubleDocValuesField(org.apache.lucene.document.DoubleDocValuesField) SortedDocValuesField(org.apache.lucene.document.SortedDocValuesField) Sort(org.apache.lucene.search.Sort) TopFieldCollector(org.apache.lucene.search.TopFieldCollector) BytesRef(org.apache.lucene.util.BytesRef) Directory(org.apache.lucene.store.Directory) HashSet(java.util.HashSet) MatchAllDocsQuery(org.apache.lucene.search.MatchAllDocsQuery) BinaryPoint(org.apache.lucene.document.BinaryPoint) IntPoint(org.apache.lucene.document.IntPoint) AtomicInteger(java.util.concurrent.atomic.AtomicInteger) EarlyTerminatingSortingCollector(org.apache.lucene.search.EarlyTerminatingSortingCollector) SortedNumericDocValuesField(org.apache.lucene.document.SortedNumericDocValuesField) StringField(org.apache.lucene.document.StringField) SortedSetDocValuesField(org.apache.lucene.document.SortedSetDocValuesField)

Example 9 with DoubleDocValuesField

use of org.apache.lucene.document.DoubleDocValuesField in project lucene-solr by apache.

the class TestSort method testDoubleMissingLast.

/** Tests sorting on type double, specifying the missing value should be treated as Double.MAX_VALUE */
public void testDoubleMissingLast() throws IOException {
    Directory dir = newDirectory();
    RandomIndexWriter writer = new RandomIndexWriter(random(), dir);
    Document doc = new Document();
    writer.addDocument(doc);
    doc = new Document();
    doc.add(new DoubleDocValuesField("value", -1.3));
    doc.add(newStringField("value", "-1.3", Field.Store.YES));
    writer.addDocument(doc);
    doc = new Document();
    doc.add(new DoubleDocValuesField("value", 4.2333333333333));
    doc.add(newStringField("value", "4.2333333333333", Field.Store.YES));
    writer.addDocument(doc);
    doc = new Document();
    doc.add(new DoubleDocValuesField("value", 4.2333333333332));
    doc.add(newStringField("value", "4.2333333333332", Field.Store.YES));
    writer.addDocument(doc);
    IndexReader ir = writer.getReader();
    writer.close();
    IndexSearcher searcher = newSearcher(ir);
    SortField sortField = new SortField("value", SortField.Type.DOUBLE);
    sortField.setMissingValue(Double.MAX_VALUE);
    Sort sort = new Sort(sortField);
    TopDocs td = searcher.search(new MatchAllDocsQuery(), 10, sort);
    assertEquals(4, td.totalHits);
    // null treated as Double.MAX_VALUE
    assertEquals("-1.3", searcher.doc(td.scoreDocs[0].doc).get("value"));
    assertEquals("4.2333333333332", searcher.doc(td.scoreDocs[1].doc).get("value"));
    assertEquals("4.2333333333333", searcher.doc(td.scoreDocs[2].doc).get("value"));
    assertNull(searcher.doc(td.scoreDocs[3].doc).get("value"));
    ir.close();
    dir.close();
}
Also used : DoubleDocValuesField(org.apache.lucene.document.DoubleDocValuesField) IndexReader(org.apache.lucene.index.IndexReader) Document(org.apache.lucene.document.Document) RandomIndexWriter(org.apache.lucene.index.RandomIndexWriter) Directory(org.apache.lucene.store.Directory)

Example 10 with DoubleDocValuesField

use of org.apache.lucene.document.DoubleDocValuesField in project lucene-solr by apache.

the class TestDocValuesStatsCollector method testDocsWithDoubleValues.

public void testDocsWithDoubleValues() throws IOException {
    try (Directory dir = newDirectory();
        IndexWriter indexWriter = new IndexWriter(dir, newIndexWriterConfig())) {
        String field = "numeric";
        int numDocs = TestUtil.nextInt(random(), 1, 100);
        double[] docValues = new double[numDocs];
        double nextVal = 1.0;
        for (int i = 0; i < numDocs; i++) {
            Document doc = new Document();
            if (random().nextBoolean()) {
                // not all documents have a value
                doc.add(new DoubleDocValuesField(field, nextVal));
                doc.add(new StringField("id", "doc" + i, Store.NO));
                docValues[i] = nextVal;
                ++nextVal;
            }
            indexWriter.addDocument(doc);
        }
        // 20% of cases delete some docs
        if (random().nextDouble() < 0.2) {
            for (int i = 0; i < numDocs; i++) {
                if (random().nextBoolean()) {
                    indexWriter.deleteDocuments(new Term("id", "doc" + i));
                    docValues[i] = 0;
                }
            }
        }
        try (DirectoryReader reader = DirectoryReader.open(indexWriter)) {
            IndexSearcher searcher = new IndexSearcher(reader);
            DoubleDocValuesStats stats = new DoubleDocValuesStats(field);
            searcher.search(new MatchAllDocsQuery(), new DocValuesStatsCollector(stats));
            int expCount = (int) Arrays.stream(docValues).filter(v -> v > 0).count();
            assertEquals(expCount, stats.count());
            int numDocsWithoutField = (int) getZeroValues(docValues).count();
            assertEquals(computeExpMissing(numDocsWithoutField, numDocs, reader), stats.missing());
            if (stats.count() > 0) {
                DoubleSummaryStatistics sumStats = getPositiveValues(docValues).summaryStatistics();
                assertEquals(sumStats.getMax(), stats.max().doubleValue(), 0.00001);
                assertEquals(sumStats.getMin(), stats.min().doubleValue(), 0.00001);
                assertEquals(sumStats.getAverage(), stats.mean(), 0.00001);
                assertEquals(sumStats.getSum(), stats.sum(), 0.00001);
                double variance = computeVariance(docValues, stats.mean, stats.count());
                assertEquals(variance, stats.variance(), 0.00001);
                assertEquals(Math.sqrt(variance), stats.stdev(), 0.00001);
            }
        }
    }
}
Also used : DirectoryReader(org.apache.lucene.index.DirectoryReader) Term(org.apache.lucene.index.Term) Document(org.apache.lucene.document.Document) DoubleSummaryStatistics(java.util.DoubleSummaryStatistics) DoubleDocValuesStats(org.apache.lucene.search.DocValuesStats.DoubleDocValuesStats) SortedDoubleDocValuesStats(org.apache.lucene.search.DocValuesStats.SortedDoubleDocValuesStats) IndexWriter(org.apache.lucene.index.IndexWriter) DoubleDocValuesField(org.apache.lucene.document.DoubleDocValuesField) StringField(org.apache.lucene.document.StringField) Directory(org.apache.lucene.store.Directory)

Aggregations

DoubleDocValuesField (org.apache.lucene.document.DoubleDocValuesField)24 Document (org.apache.lucene.document.Document)18 Directory (org.apache.lucene.store.Directory)13 RandomIndexWriter (org.apache.lucene.index.RandomIndexWriter)11 NumericDocValuesField (org.apache.lucene.document.NumericDocValuesField)9 Field (org.apache.lucene.document.Field)8 IndexReader (org.apache.lucene.index.IndexReader)8 DoublePoint (org.apache.lucene.document.DoublePoint)7 SortedDocValuesField (org.apache.lucene.document.SortedDocValuesField)7 StringField (org.apache.lucene.document.StringField)7 BytesRef (org.apache.lucene.util.BytesRef)7 FloatDocValuesField (org.apache.lucene.document.FloatDocValuesField)6 StoredField (org.apache.lucene.document.StoredField)5 MockAnalyzer (org.apache.lucene.analysis.MockAnalyzer)4 IndexSearcher (org.apache.lucene.search.IndexSearcher)4 MatchAllDocsQuery (org.apache.lucene.search.MatchAllDocsQuery)4 Sort (org.apache.lucene.search.Sort)4 SortField (org.apache.lucene.search.SortField)4 IntPoint (org.apache.lucene.document.IntPoint)3 SortedNumericDocValuesField (org.apache.lucene.document.SortedNumericDocValuesField)3