Search in sources :

Example 91 with SortedDocValuesField

use of org.apache.lucene.document.SortedDocValuesField in project lucene-solr by apache.

the class TestDocValuesQueries method doTestDuelPointRangeSortedRangeQuery.

private void doTestDuelPointRangeSortedRangeQuery(boolean sortedSet, int maxValuesPerDoc) throws IOException {
    final int iters = atLeast(10);
    for (int iter = 0; iter < iters; ++iter) {
        Directory dir = newDirectory();
        RandomIndexWriter iw = new RandomIndexWriter(random(), dir);
        final int numDocs = atLeast(100);
        for (int i = 0; i < numDocs; ++i) {
            Document doc = new Document();
            final int numValues = TestUtil.nextInt(random(), 0, maxValuesPerDoc);
            for (int j = 0; j < numValues; ++j) {
                final long value = TestUtil.nextLong(random(), -100, 10000);
                byte[] encoded = new byte[Long.BYTES];
                LongPoint.encodeDimension(value, encoded, 0);
                if (sortedSet) {
                    doc.add(new SortedSetDocValuesField("dv", new BytesRef(encoded)));
                } else {
                    doc.add(new SortedDocValuesField("dv", new BytesRef(encoded)));
                }
                doc.add(new LongPoint("idx", value));
            }
            iw.addDocument(doc);
        }
        if (random().nextBoolean()) {
            iw.deleteDocuments(LongPoint.newRangeQuery("idx", 0L, 10L));
        }
        final IndexReader reader = iw.getReader();
        final IndexSearcher searcher = newSearcher(reader, false);
        iw.close();
        for (int i = 0; i < 100; ++i) {
            long min = random().nextBoolean() ? Long.MIN_VALUE : TestUtil.nextLong(random(), -100, 10000);
            long max = random().nextBoolean() ? Long.MAX_VALUE : TestUtil.nextLong(random(), -100, 10000);
            byte[] encodedMin = new byte[Long.BYTES];
            byte[] encodedMax = new byte[Long.BYTES];
            LongPoint.encodeDimension(min, encodedMin, 0);
            LongPoint.encodeDimension(max, encodedMax, 0);
            boolean includeMin = true;
            boolean includeMax = true;
            if (random().nextBoolean()) {
                includeMin = false;
                min++;
            }
            if (random().nextBoolean()) {
                includeMax = false;
                max--;
            }
            final Query q1 = LongPoint.newRangeQuery("idx", min, max);
            final Query q2;
            if (sortedSet) {
                q2 = SortedSetDocValuesField.newRangeQuery("dv", min == Long.MIN_VALUE && random().nextBoolean() ? null : new BytesRef(encodedMin), max == Long.MAX_VALUE && random().nextBoolean() ? null : new BytesRef(encodedMax), includeMin, includeMax);
            } else {
                q2 = SortedDocValuesField.newRangeQuery("dv", min == Long.MIN_VALUE && random().nextBoolean() ? null : new BytesRef(encodedMin), max == Long.MAX_VALUE && random().nextBoolean() ? null : new BytesRef(encodedMax), includeMin, includeMax);
            }
            assertSameMatches(searcher, q1, q2, false);
        }
        reader.close();
        dir.close();
    }
}
Also used : LongPoint(org.apache.lucene.document.LongPoint) Document(org.apache.lucene.document.Document) LongPoint(org.apache.lucene.document.LongPoint) SortedDocValuesField(org.apache.lucene.document.SortedDocValuesField) IndexReader(org.apache.lucene.index.IndexReader) SortedSetDocValuesField(org.apache.lucene.document.SortedSetDocValuesField) RandomIndexWriter(org.apache.lucene.index.RandomIndexWriter) BytesRef(org.apache.lucene.util.BytesRef) Directory(org.apache.lucene.store.Directory)

Example 92 with SortedDocValuesField

use of org.apache.lucene.document.SortedDocValuesField in project lucene-solr by apache.

the class TestNumericDocValuesUpdates method testDifferentDVFormatPerField.

@Test
public void testDifferentDVFormatPerField() throws Exception {
    // test relies on separate instances of the "same thing"
    assert TestUtil.getDefaultDocValuesFormat() != TestUtil.getDefaultDocValuesFormat();
    Directory dir = newDirectory();
    IndexWriterConfig conf = newIndexWriterConfig(new MockAnalyzer(random()));
    conf.setCodec(new AssertingCodec() {

        @Override
        public DocValuesFormat getDocValuesFormatForField(String field) {
            return TestUtil.getDefaultDocValuesFormat();
        }
    });
    IndexWriter writer = new IndexWriter(dir, conf);
    Document doc = new Document();
    doc.add(new StringField("key", "doc", Store.NO));
    doc.add(new NumericDocValuesField("ndv", 5));
    doc.add(new SortedDocValuesField("sorted", new BytesRef("value")));
    // flushed document
    writer.addDocument(doc);
    writer.commit();
    // in-memory document
    writer.addDocument(doc);
    writer.updateNumericDocValue(new Term("key", "doc"), "ndv", 17L);
    writer.close();
    final DirectoryReader reader = DirectoryReader.open(dir);
    NumericDocValues ndv = MultiDocValues.getNumericValues(reader, "ndv");
    SortedDocValues sdv = MultiDocValues.getSortedValues(reader, "sorted");
    for (int i = 0; i < reader.maxDoc(); i++) {
        assertEquals(i, ndv.nextDoc());
        assertEquals(17, ndv.longValue());
        assertEquals(i, sdv.nextDoc());
        final BytesRef term = sdv.binaryValue();
        assertEquals(new BytesRef("value"), term);
    }
    reader.close();
    dir.close();
}
Also used : AssertingCodec(org.apache.lucene.codecs.asserting.AssertingCodec) Document(org.apache.lucene.document.Document) DocValuesFormat(org.apache.lucene.codecs.DocValuesFormat) AssertingDocValuesFormat(org.apache.lucene.codecs.asserting.AssertingDocValuesFormat) MockAnalyzer(org.apache.lucene.analysis.MockAnalyzer) NumericDocValuesField(org.apache.lucene.document.NumericDocValuesField) StringField(org.apache.lucene.document.StringField) SortedDocValuesField(org.apache.lucene.document.SortedDocValuesField) BytesRef(org.apache.lucene.util.BytesRef) Directory(org.apache.lucene.store.Directory) NRTCachingDirectory(org.apache.lucene.store.NRTCachingDirectory) Test(org.junit.Test)

Example 93 with SortedDocValuesField

use of org.apache.lucene.document.SortedDocValuesField in project lucene-solr by apache.

the class TestMultiDocValues method testSortedWithLotsOfDups.

// tries to make more dups than testSorted
public void testSortedWithLotsOfDups() throws Exception {
    Directory dir = newDirectory();
    Document doc = new Document();
    Field field = new SortedDocValuesField("bytes", new BytesRef());
    doc.add(field);
    IndexWriterConfig iwc = newIndexWriterConfig(random(), null);
    iwc.setMergePolicy(newLogMergePolicy());
    RandomIndexWriter iw = new RandomIndexWriter(random(), dir, iwc);
    int numDocs = TEST_NIGHTLY ? atLeast(500) : atLeast(50);
    for (int i = 0; i < numDocs; i++) {
        BytesRef ref = new BytesRef(TestUtil.randomSimpleString(random(), 2));
        field.setBytesValue(ref);
        iw.addDocument(doc);
        if (random().nextInt(17) == 0) {
            iw.commit();
        }
    }
    DirectoryReader ir = iw.getReader();
    iw.forceMerge(1);
    DirectoryReader ir2 = iw.getReader();
    LeafReader merged = getOnlyLeafReader(ir2);
    iw.close();
    SortedDocValues multi = MultiDocValues.getSortedValues(ir, "bytes");
    SortedDocValues single = merged.getSortedDocValues("bytes");
    assertEquals(single.getValueCount(), multi.getValueCount());
    for (int i = 0; i < numDocs; i++) {
        assertEquals(i, multi.nextDoc());
        assertEquals(i, single.nextDoc());
        // check ord
        assertEquals(single.ordValue(), multi.ordValue());
        // check ord value
        final BytesRef expected = BytesRef.deepCopyOf(single.binaryValue());
        final BytesRef actual = multi.binaryValue();
        assertEquals(expected, actual);
    }
    testRandomAdvance(merged.getSortedDocValues("bytes"), MultiDocValues.getSortedValues(ir, "bytes"));
    testRandomAdvanceExact(merged.getSortedDocValues("bytes"), MultiDocValues.getSortedValues(ir, "bytes"), merged.maxDoc());
    ir.close();
    ir2.close();
    dir.close();
}
Also used : NumericDocValuesField(org.apache.lucene.document.NumericDocValuesField) SortedSetDocValuesField(org.apache.lucene.document.SortedSetDocValuesField) SortedNumericDocValuesField(org.apache.lucene.document.SortedNumericDocValuesField) BinaryDocValuesField(org.apache.lucene.document.BinaryDocValuesField) SortedDocValuesField(org.apache.lucene.document.SortedDocValuesField) Field(org.apache.lucene.document.Field) SortedDocValuesField(org.apache.lucene.document.SortedDocValuesField) Document(org.apache.lucene.document.Document) BytesRef(org.apache.lucene.util.BytesRef) Directory(org.apache.lucene.store.Directory)

Example 94 with SortedDocValuesField

use of org.apache.lucene.document.SortedDocValuesField in project lucene-solr by apache.

the class TestDiversifiedTopDocsCollector method setUp.

@Override
public void setUp() throws Exception {
    super.setUp();
    // populate an index with documents - artist, song and weeksAtNumberOne
    dir = newDirectory();
    RandomIndexWriter writer = new RandomIndexWriter(random(), dir);
    Document doc = new Document();
    Field yearField = newTextField("year", "", Field.Store.NO);
    SortedDocValuesField artistField = new SortedDocValuesField("artist", new BytesRef(""));
    Field weeksAtNumberOneField = new FloatDocValuesField("weeksAtNumberOne", 0.0F);
    Field weeksStoredField = new StoredField("weeks", 0.0F);
    Field idField = newStringField("id", "", Field.Store.YES);
    Field songField = newTextField("song", "", Field.Store.NO);
    Field storedArtistField = newTextField("artistName", "", Field.Store.NO);
    doc.add(idField);
    doc.add(weeksAtNumberOneField);
    doc.add(storedArtistField);
    doc.add(songField);
    doc.add(weeksStoredField);
    doc.add(yearField);
    doc.add(artistField);
    parsedRecords.clear();
    for (int i = 0; i < hitsOfThe60s.length; i++) {
        String[] cols = hitsOfThe60s[i].split("\t");
        Record record = new Record(String.valueOf(i), cols[0], cols[1], cols[2], Float.parseFloat(cols[3]));
        parsedRecords.put(record.id, record);
        idField.setStringValue(record.id);
        yearField.setStringValue(record.year);
        storedArtistField.setStringValue(record.artist);
        artistField.setBytesValue(new BytesRef(record.artist));
        songField.setStringValue(record.song);
        weeksStoredField.setFloatValue(record.weeks);
        weeksAtNumberOneField.setFloatValue(record.weeks);
        writer.addDocument(doc);
        if (i % 10 == 0) {
            // Causes the creation of multiple segments for our test
            writer.commit();
        }
    }
    reader = writer.getReader();
    writer.close();
    searcher = newSearcher(reader);
    artistDocValues = MultiDocValues.getSortedValues(reader, "artist");
    // All searches sort by song popularity 
    final Similarity base = searcher.getSimilarity(true);
    searcher.setSimilarity(new DocValueSimilarity(base, "weeksAtNumberOne"));
}
Also used : Similarity(org.apache.lucene.search.similarities.Similarity) FloatDocValuesField(org.apache.lucene.document.FloatDocValuesField) Document(org.apache.lucene.document.Document) FloatDocValuesField(org.apache.lucene.document.FloatDocValuesField) StoredField(org.apache.lucene.document.StoredField) SortedDocValuesField(org.apache.lucene.document.SortedDocValuesField) Field(org.apache.lucene.document.Field) StoredField(org.apache.lucene.document.StoredField) SortedDocValuesField(org.apache.lucene.document.SortedDocValuesField) RandomIndexWriter(org.apache.lucene.index.RandomIndexWriter) BytesRef(org.apache.lucene.util.BytesRef)

Example 95 with SortedDocValuesField

use of org.apache.lucene.document.SortedDocValuesField in project lucene-solr by apache.

the class TestDocValuesStatsCollector method testDocsWithSortedValues.

public void testDocsWithSortedValues() throws IOException {
    try (Directory dir = newDirectory();
        IndexWriter indexWriter = new IndexWriter(dir, newIndexWriterConfig())) {
        String field = "sorted";
        int numDocs = TestUtil.nextInt(random(), 1, 100);
        BytesRef[] docValues = new BytesRef[numDocs];
        for (int i = 0; i < numDocs; i++) {
            Document doc = new Document();
            if (random().nextBoolean()) {
                // not all documents have a value
                BytesRef val = TestUtil.randomBinaryTerm(random());
                doc.add(new SortedDocValuesField(field, val));
                doc.add(new StringField("id", "doc" + i, Store.NO));
                docValues[i] = val;
            }
            indexWriter.addDocument(doc);
        }
        // 20% of cases delete some docs
        if (random().nextDouble() < 0.2) {
            for (int i = 0; i < numDocs; i++) {
                if (random().nextBoolean()) {
                    indexWriter.deleteDocuments(new Term("id", "doc" + i));
                    docValues[i] = null;
                }
            }
        }
        try (DirectoryReader reader = DirectoryReader.open(indexWriter)) {
            IndexSearcher searcher = new IndexSearcher(reader);
            SortedDocValuesStats stats = new SortedDocValuesStats(field);
            searcher.search(new MatchAllDocsQuery(), new DocValuesStatsCollector(stats));
            int expCount = (int) nonNull(docValues).count();
            assertEquals(expCount, stats.count());
            int numDocsWithoutField = (int) isNull(docValues).count();
            assertEquals(computeExpMissing(numDocsWithoutField, numDocs, reader), stats.missing());
            if (stats.count() > 0) {
                assertEquals(nonNull(docValues).min(BytesRef::compareTo).get(), stats.min());
                assertEquals(nonNull(docValues).max(BytesRef::compareTo).get(), stats.max());
            }
        }
    }
}
Also used : SortedDocValuesStats(org.apache.lucene.search.DocValuesStats.SortedDocValuesStats) DirectoryReader(org.apache.lucene.index.DirectoryReader) Term(org.apache.lucene.index.Term) Document(org.apache.lucene.document.Document) IndexWriter(org.apache.lucene.index.IndexWriter) StringField(org.apache.lucene.document.StringField) SortedDocValuesField(org.apache.lucene.document.SortedDocValuesField) BytesRef(org.apache.lucene.util.BytesRef) Directory(org.apache.lucene.store.Directory)

Aggregations

SortedDocValuesField (org.apache.lucene.document.SortedDocValuesField)153 BytesRef (org.apache.lucene.util.BytesRef)152 Document (org.apache.lucene.document.Document)137 Directory (org.apache.lucene.store.Directory)109 MockAnalyzer (org.apache.lucene.analysis.MockAnalyzer)87 NumericDocValuesField (org.apache.lucene.document.NumericDocValuesField)66 RandomIndexWriter (org.apache.lucene.index.RandomIndexWriter)53 SortedSetDocValuesField (org.apache.lucene.document.SortedSetDocValuesField)35 StringField (org.apache.lucene.document.StringField)33 TextField (org.apache.lucene.document.TextField)31 BinaryDocValuesField (org.apache.lucene.document.BinaryDocValuesField)30 Field (org.apache.lucene.document.Field)30 IndexReader (org.apache.lucene.index.IndexReader)30 Term (org.apache.lucene.index.Term)28 ArrayList (java.util.ArrayList)27 SortedNumericDocValuesField (org.apache.lucene.document.SortedNumericDocValuesField)25 IndexSearcher (org.apache.lucene.search.IndexSearcher)25 TermQuery (org.apache.lucene.search.TermQuery)21 IntPoint (org.apache.lucene.document.IntPoint)20 MatchAllDocsQuery (org.apache.lucene.search.MatchAllDocsQuery)18