Search in sources :

Example 91 with NumericDocValuesField

use of org.apache.lucene.document.NumericDocValuesField in project lucene-solr by apache.

the class TestIndexSorting method testMultiValuedRandom1.

public void testMultiValuedRandom1() throws IOException {
    boolean withDeletes = random().nextBoolean();
    Directory dir = newDirectory();
    IndexWriterConfig iwc = new IndexWriterConfig(new MockAnalyzer(random()));
    Sort indexSort = new Sort(new SortedNumericSortField("foo", SortField.Type.LONG));
    iwc.setIndexSort(indexSort);
    IndexWriter w = new IndexWriter(dir, iwc);
    final int numDocs = atLeast(1000);
    final FixedBitSet deleted = new FixedBitSet(numDocs);
    for (int i = 0; i < numDocs; ++i) {
        Document doc = new Document();
        int num = random().nextInt(10);
        for (int j = 0; j < num; j++) {
            doc.add(new SortedNumericDocValuesField("foo", random().nextInt(2000)));
        }
        doc.add(new StringField("id", Integer.toString(i), Store.YES));
        doc.add(new NumericDocValuesField("id", i));
        w.addDocument(doc);
        if (random().nextInt(5) == 0) {
            w.getReader().close();
        } else if (random().nextInt(30) == 0) {
            w.forceMerge(2);
        } else if (random().nextInt(4) == 0) {
            final int id = TestUtil.nextInt(random(), 0, i);
            deleted.set(id);
            w.deleteDocuments(new Term("id", Integer.toString(id)));
        }
    }
    DirectoryReader reader = w.getReader();
    // Now check that the index is consistent
    IndexSearcher searcher = newSearcher(reader);
    for (int i = 0; i < numDocs; ++i) {
        TermQuery termQuery = new TermQuery(new Term("id", Integer.toString(i)));
        final TopDocs topDocs = searcher.search(termQuery, 1);
        if (deleted.get(i)) {
            assertEquals(0, topDocs.totalHits);
        } else {
            assertEquals(1, topDocs.totalHits);
            NumericDocValues values = MultiDocValues.getNumericValues(reader, "id");
            assertEquals(topDocs.scoreDocs[0].doc, values.advance(topDocs.scoreDocs[0].doc));
            assertEquals(i, values.longValue());
            Document document = reader.document(topDocs.scoreDocs[0].doc);
            assertEquals(Integer.toString(i), document.get("id"));
        }
    }
    reader.close();
    w.close();
    dir.close();
}
Also used : IndexSearcher(org.apache.lucene.search.IndexSearcher) TermQuery(org.apache.lucene.search.TermQuery) Document(org.apache.lucene.document.Document) BinaryPoint(org.apache.lucene.document.BinaryPoint) IntPoint(org.apache.lucene.document.IntPoint) SortedNumericSortField(org.apache.lucene.search.SortedNumericSortField) TopDocs(org.apache.lucene.search.TopDocs) SortedNumericDocValuesField(org.apache.lucene.document.SortedNumericDocValuesField) MockAnalyzer(org.apache.lucene.analysis.MockAnalyzer) SortedNumericDocValuesField(org.apache.lucene.document.SortedNumericDocValuesField) NumericDocValuesField(org.apache.lucene.document.NumericDocValuesField) FixedBitSet(org.apache.lucene.util.FixedBitSet) StringField(org.apache.lucene.document.StringField) Sort(org.apache.lucene.search.Sort) Directory(org.apache.lucene.store.Directory)

Example 92 with NumericDocValuesField

use of org.apache.lucene.document.NumericDocValuesField in project lucene-solr by apache.

the class TestIndexSorting method testIndexSortWithSparseField.

public void testIndexSortWithSparseField() throws Exception {
    Directory dir = newDirectory();
    IndexWriterConfig iwc = new IndexWriterConfig(new MockAnalyzer(random()));
    SortField sortField = new SortField("dense_int", SortField.Type.INT, true);
    Sort indexSort = new Sort(sortField);
    iwc.setIndexSort(indexSort);
    IndexWriter w = new IndexWriter(dir, iwc);
    Field textField = newTextField("sparse_text", "", Field.Store.NO);
    for (int i = 0; i < 128; i++) {
        Document doc = new Document();
        doc.add(new NumericDocValuesField("dense_int", i));
        if (i < 64) {
            doc.add(new NumericDocValuesField("sparse_int", i));
            doc.add(new BinaryDocValuesField("sparse_binary", new BytesRef(Integer.toString(i))));
            textField.setStringValue("foo");
            doc.add(textField);
        }
        w.addDocument(doc);
    }
    w.commit();
    w.forceMerge(1);
    DirectoryReader r = DirectoryReader.open(w);
    assertEquals(1, r.leaves().size());
    LeafReader leafReader = r.leaves().get(0).reader();
    NumericDocValues denseValues = leafReader.getNumericDocValues("dense_int");
    NumericDocValues sparseValues = leafReader.getNumericDocValues("sparse_int");
    BinaryDocValues sparseBinaryValues = leafReader.getBinaryDocValues("sparse_binary");
    NumericDocValues normsValues = leafReader.getNormValues("sparse_text");
    for (int docID = 0; docID < 128; docID++) {
        assertTrue(denseValues.advanceExact(docID));
        assertEquals(127 - docID, (int) denseValues.longValue());
        if (docID >= 64) {
            assertTrue(denseValues.advanceExact(docID));
            assertTrue(sparseValues.advanceExact(docID));
            assertTrue(sparseBinaryValues.advanceExact(docID));
            assertTrue(normsValues.advanceExact(docID));
            assertEquals(1, normsValues.longValue());
            assertEquals(127 - docID, (int) sparseValues.longValue());
            assertEquals(new BytesRef(Integer.toString(127 - docID)), sparseBinaryValues.binaryValue());
        } else {
            assertFalse(sparseBinaryValues.advanceExact(docID));
            assertFalse(sparseValues.advanceExact(docID));
            assertFalse(normsValues.advanceExact(docID));
        }
    }
    IOUtils.close(r, w, dir);
}
Also used : SortField(org.apache.lucene.search.SortField) SortedSetSortField(org.apache.lucene.search.SortedSetSortField) SortedNumericSortField(org.apache.lucene.search.SortedNumericSortField) Document(org.apache.lucene.document.Document) BinaryDocValuesField(org.apache.lucene.document.BinaryDocValuesField) BinaryPoint(org.apache.lucene.document.BinaryPoint) IntPoint(org.apache.lucene.document.IntPoint) SortedNumericDocValuesField(org.apache.lucene.document.SortedNumericDocValuesField) StoredField(org.apache.lucene.document.StoredField) SortField(org.apache.lucene.search.SortField) NumericDocValuesField(org.apache.lucene.document.NumericDocValuesField) SortedSetDocValuesField(org.apache.lucene.document.SortedSetDocValuesField) SortedSetSortField(org.apache.lucene.search.SortedSetSortField) BinaryDocValuesField(org.apache.lucene.document.BinaryDocValuesField) SortedDocValuesField(org.apache.lucene.document.SortedDocValuesField) SortedNumericSortField(org.apache.lucene.search.SortedNumericSortField) StringField(org.apache.lucene.document.StringField) DoubleDocValuesField(org.apache.lucene.document.DoubleDocValuesField) FloatDocValuesField(org.apache.lucene.document.FloatDocValuesField) Field(org.apache.lucene.document.Field) TextField(org.apache.lucene.document.TextField) MockAnalyzer(org.apache.lucene.analysis.MockAnalyzer) SortedNumericDocValuesField(org.apache.lucene.document.SortedNumericDocValuesField) NumericDocValuesField(org.apache.lucene.document.NumericDocValuesField) Sort(org.apache.lucene.search.Sort) BytesRef(org.apache.lucene.util.BytesRef) Directory(org.apache.lucene.store.Directory)

Example 93 with NumericDocValuesField

use of org.apache.lucene.document.NumericDocValuesField in project lucene-solr by apache.

the class BaseTestRangeFilter method build.

private static IndexReader build(Random random, TestIndex index) throws IOException {
    /* build an index */
    Document doc = new Document();
    Field idField = newStringField(random, "id", "", Field.Store.YES);
    Field idDVField = new SortedDocValuesField("id", new BytesRef());
    Field intIdField = new IntPoint("id_int", 0);
    Field intDVField = new NumericDocValuesField("id_int", 0);
    Field floatIdField = new FloatPoint("id_float", 0);
    Field floatDVField = new NumericDocValuesField("id_float", 0);
    Field longIdField = new LongPoint("id_long", 0);
    Field longDVField = new NumericDocValuesField("id_long", 0);
    Field doubleIdField = new DoublePoint("id_double", 0);
    Field doubleDVField = new NumericDocValuesField("id_double", 0);
    Field randField = newStringField(random, "rand", "", Field.Store.YES);
    Field randDVField = new SortedDocValuesField("rand", new BytesRef());
    Field bodyField = newStringField(random, "body", "", Field.Store.NO);
    Field bodyDVField = new SortedDocValuesField("body", new BytesRef());
    doc.add(idField);
    doc.add(idDVField);
    doc.add(intIdField);
    doc.add(intDVField);
    doc.add(floatIdField);
    doc.add(floatDVField);
    doc.add(longIdField);
    doc.add(longDVField);
    doc.add(doubleIdField);
    doc.add(doubleDVField);
    doc.add(randField);
    doc.add(randDVField);
    doc.add(bodyField);
    doc.add(bodyDVField);
    RandomIndexWriter writer = new RandomIndexWriter(random, index.index, newIndexWriterConfig(random, new MockAnalyzer(random)).setOpenMode(OpenMode.CREATE).setMaxBufferedDocs(TestUtil.nextInt(random, 50, 1000)).setMergePolicy(newLogMergePolicy()));
    TestUtil.reduceOpenFiles(writer.w);
    while (true) {
        int minCount = 0;
        int maxCount = 0;
        for (int d = minId; d <= maxId; d++) {
            idField.setStringValue(pad(d));
            idDVField.setBytesValue(new BytesRef(pad(d)));
            intIdField.setIntValue(d);
            intDVField.setLongValue(d);
            floatIdField.setFloatValue(d);
            floatDVField.setLongValue(Float.floatToRawIntBits(d));
            longIdField.setLongValue(d);
            longDVField.setLongValue(d);
            doubleIdField.setDoubleValue(d);
            doubleDVField.setLongValue(Double.doubleToRawLongBits(d));
            int r = index.allowNegativeRandomInts ? random.nextInt() : random.nextInt(Integer.MAX_VALUE);
            if (index.maxR < r) {
                index.maxR = r;
                maxCount = 1;
            } else if (index.maxR == r) {
                maxCount++;
            }
            if (r < index.minR) {
                index.minR = r;
                minCount = 1;
            } else if (r == index.minR) {
                minCount++;
            }
            randField.setStringValue(pad(r));
            randDVField.setBytesValue(new BytesRef(pad(r)));
            bodyField.setStringValue("body");
            bodyDVField.setBytesValue(new BytesRef("body"));
            writer.addDocument(doc);
        }
        if (minCount == 1 && maxCount == 1) {
            // our subclasses rely on only 1 doc having the min or
            // max, so, we loop until we satisfy that.  it should be
            // exceedingly rare (Yonik calculates 1 in ~429,000)
            // times) that this loop requires more than one try:
            IndexReader ir = writer.getReader();
            writer.close();
            return ir;
        }
        // try again
        writer.deleteAll();
    }
}
Also used : LongPoint(org.apache.lucene.document.LongPoint) Document(org.apache.lucene.document.Document) LongPoint(org.apache.lucene.document.LongPoint) FloatPoint(org.apache.lucene.document.FloatPoint) DoublePoint(org.apache.lucene.document.DoublePoint) IntPoint(org.apache.lucene.document.IntPoint) NumericDocValuesField(org.apache.lucene.document.NumericDocValuesField) SortedDocValuesField(org.apache.lucene.document.SortedDocValuesField) Field(org.apache.lucene.document.Field) IntPoint(org.apache.lucene.document.IntPoint) NumericDocValuesField(org.apache.lucene.document.NumericDocValuesField) FloatPoint(org.apache.lucene.document.FloatPoint) MockAnalyzer(org.apache.lucene.analysis.MockAnalyzer) SortedDocValuesField(org.apache.lucene.document.SortedDocValuesField) DoublePoint(org.apache.lucene.document.DoublePoint) IndexReader(org.apache.lucene.index.IndexReader) BytesRef(org.apache.lucene.util.BytesRef) RandomIndexWriter(org.apache.lucene.index.RandomIndexWriter)

Example 94 with NumericDocValuesField

use of org.apache.lucene.document.NumericDocValuesField in project lucene-solr by apache.

the class TestSort method testLongMissing.

/** Tests sorting on type long with a missing value */
public void testLongMissing() throws IOException {
    Directory dir = newDirectory();
    RandomIndexWriter writer = new RandomIndexWriter(random(), dir);
    Document doc = new Document();
    writer.addDocument(doc);
    doc = new Document();
    doc.add(new NumericDocValuesField("value", -1));
    doc.add(newStringField("value", "-1", Field.Store.YES));
    writer.addDocument(doc);
    doc = new Document();
    doc.add(new NumericDocValuesField("value", 4));
    doc.add(newStringField("value", "4", Field.Store.YES));
    writer.addDocument(doc);
    IndexReader ir = writer.getReader();
    writer.close();
    IndexSearcher searcher = newSearcher(ir);
    Sort sort = new Sort(new SortField("value", SortField.Type.LONG));
    TopDocs td = searcher.search(new MatchAllDocsQuery(), 10, sort);
    assertEquals(3, td.totalHits);
    // null is treated as 0
    assertEquals("-1", searcher.doc(td.scoreDocs[0].doc).get("value"));
    assertNull(searcher.doc(td.scoreDocs[1].doc).get("value"));
    assertEquals("4", searcher.doc(td.scoreDocs[2].doc).get("value"));
    ir.close();
    dir.close();
}
Also used : NumericDocValuesField(org.apache.lucene.document.NumericDocValuesField) IndexReader(org.apache.lucene.index.IndexReader) Document(org.apache.lucene.document.Document) RandomIndexWriter(org.apache.lucene.index.RandomIndexWriter) Directory(org.apache.lucene.store.Directory)

Example 95 with NumericDocValuesField

use of org.apache.lucene.document.NumericDocValuesField in project lucene-solr by apache.

the class TestSortRandom method testRandomStringSort.

private void testRandomStringSort(SortField.Type type) throws Exception {
    Random random = new Random(random().nextLong());
    final int NUM_DOCS = atLeast(100);
    final Directory dir = newDirectory();
    final RandomIndexWriter writer = new RandomIndexWriter(random, dir);
    final boolean allowDups = random.nextBoolean();
    final Set<String> seen = new HashSet<>();
    final int maxLength = TestUtil.nextInt(random, 5, 100);
    if (VERBOSE) {
        System.out.println("TEST: NUM_DOCS=" + NUM_DOCS + " maxLength=" + maxLength + " allowDups=" + allowDups);
    }
    int numDocs = 0;
    final List<BytesRef> docValues = new ArrayList<>();
    // TODO: deletions
    while (numDocs < NUM_DOCS) {
        final Document doc = new Document();
        // 10% of the time, the document is missing the value:
        final BytesRef br;
        if (random().nextInt(10) != 7) {
            final String s;
            if (random.nextBoolean()) {
                s = TestUtil.randomSimpleString(random, maxLength);
            } else {
                s = TestUtil.randomUnicodeString(random, maxLength);
            }
            if (!allowDups) {
                if (seen.contains(s)) {
                    continue;
                }
                seen.add(s);
            }
            if (VERBOSE) {
                System.out.println("  " + numDocs + ": s=" + s);
            }
            br = new BytesRef(s);
            doc.add(new SortedDocValuesField("stringdv", br));
            docValues.add(br);
        } else {
            br = null;
            if (VERBOSE) {
                System.out.println("  " + numDocs + ": <missing>");
            }
            docValues.add(null);
        }
        doc.add(new NumericDocValuesField("id", numDocs));
        doc.add(new StoredField("id", numDocs));
        writer.addDocument(doc);
        numDocs++;
        if (random.nextInt(40) == 17) {
            // force flush
            writer.getReader().close();
        }
    }
    final IndexReader r = writer.getReader();
    writer.close();
    if (VERBOSE) {
        System.out.println("  reader=" + r);
    }
    final IndexSearcher s = newSearcher(r, false);
    final int ITERS = atLeast(100);
    for (int iter = 0; iter < ITERS; iter++) {
        final boolean reverse = random.nextBoolean();
        final TopFieldDocs hits;
        final SortField sf;
        final boolean sortMissingLast;
        sf = new SortField("stringdv", type, reverse);
        sortMissingLast = random().nextBoolean();
        if (sortMissingLast) {
            sf.setMissingValue(SortField.STRING_LAST);
        }
        final Sort sort;
        if (random.nextBoolean()) {
            sort = new Sort(sf);
        } else {
            sort = new Sort(sf, SortField.FIELD_DOC);
        }
        final int hitCount = TestUtil.nextInt(random, 1, r.maxDoc() + 20);
        final RandomQuery f = new RandomQuery(random.nextLong(), random.nextFloat(), docValues);
        hits = s.search(f, hitCount, sort, random.nextBoolean(), random.nextBoolean());
        if (VERBOSE) {
            System.out.println("\nTEST: iter=" + iter + " " + hits.totalHits + " hits; topN=" + hitCount + "; reverse=" + reverse + "; sortMissingLast=" + sortMissingLast + " sort=" + sort);
        }
        // Compute expected results:
        Collections.sort(f.matchValues, new Comparator<BytesRef>() {

            @Override
            public int compare(BytesRef a, BytesRef b) {
                if (a == null) {
                    if (b == null) {
                        return 0;
                    }
                    if (sortMissingLast) {
                        return 1;
                    } else {
                        return -1;
                    }
                } else if (b == null) {
                    if (sortMissingLast) {
                        return -1;
                    } else {
                        return 1;
                    }
                } else {
                    return a.compareTo(b);
                }
            }
        });
        if (reverse) {
            Collections.reverse(f.matchValues);
        }
        final List<BytesRef> expected = f.matchValues;
        if (VERBOSE) {
            System.out.println("  expected:");
            for (int idx = 0; idx < expected.size(); idx++) {
                BytesRef br = expected.get(idx);
                System.out.println("    " + idx + ": " + (br == null ? "<missing>" : br.utf8ToString()));
                if (idx == hitCount - 1) {
                    break;
                }
            }
        }
        if (VERBOSE) {
            System.out.println("  actual:");
            for (int hitIDX = 0; hitIDX < hits.scoreDocs.length; hitIDX++) {
                final FieldDoc fd = (FieldDoc) hits.scoreDocs[hitIDX];
                BytesRef br = (BytesRef) fd.fields[0];
                System.out.println("    " + hitIDX + ": " + (br == null ? "<missing>" : br.utf8ToString()) + " id=" + s.doc(fd.doc).get("id"));
            }
        }
        for (int hitIDX = 0; hitIDX < hits.scoreDocs.length; hitIDX++) {
            final FieldDoc fd = (FieldDoc) hits.scoreDocs[hitIDX];
            BytesRef br = expected.get(hitIDX);
            BytesRef br2 = (BytesRef) fd.fields[0];
            assertEquals(br, br2);
        }
    }
    r.close();
    dir.close();
}
Also used : ArrayList(java.util.ArrayList) Document(org.apache.lucene.document.Document) StoredField(org.apache.lucene.document.StoredField) Random(java.util.Random) NumericDocValuesField(org.apache.lucene.document.NumericDocValuesField) SortedDocValuesField(org.apache.lucene.document.SortedDocValuesField) IndexReader(org.apache.lucene.index.IndexReader) RandomIndexWriter(org.apache.lucene.index.RandomIndexWriter) BytesRef(org.apache.lucene.util.BytesRef) Directory(org.apache.lucene.store.Directory) HashSet(java.util.HashSet)

Aggregations

NumericDocValuesField (org.apache.lucene.document.NumericDocValuesField)296 Document (org.apache.lucene.document.Document)268 Directory (org.apache.lucene.store.Directory)206 MockAnalyzer (org.apache.lucene.analysis.MockAnalyzer)132 SortedNumericDocValuesField (org.apache.lucene.document.SortedNumericDocValuesField)103 StringField (org.apache.lucene.document.StringField)92 BytesRef (org.apache.lucene.util.BytesRef)85 RandomIndexWriter (org.apache.lucene.index.RandomIndexWriter)75 SortedDocValuesField (org.apache.lucene.document.SortedDocValuesField)69 IndexReader (org.apache.lucene.index.IndexReader)54 Field (org.apache.lucene.document.Field)51 IndexSearcher (org.apache.lucene.search.IndexSearcher)49 BinaryDocValuesField (org.apache.lucene.document.BinaryDocValuesField)44 SortField (org.apache.lucene.search.SortField)42 TextField (org.apache.lucene.document.TextField)41 Sort (org.apache.lucene.search.Sort)41 Term (org.apache.lucene.index.Term)38 IntPoint (org.apache.lucene.document.IntPoint)36 SortedNumericSortField (org.apache.lucene.search.SortedNumericSortField)36 IndexWriterConfig (org.apache.lucene.index.IndexWriterConfig)34