Search in sources :

Example 36 with NumericDocValuesField

use of org.apache.lucene.document.NumericDocValuesField in project lucene-solr by apache.

the class TestNumericDocValuesUpdates method testTonsOfUpdates.

@Test
@Nightly
public void testTonsOfUpdates() throws Exception {
    // LUCENE-5248: make sure that when there are many updates, we don't use too much RAM
    Directory dir = newDirectory();
    final Random random = random();
    IndexWriterConfig conf = newIndexWriterConfig(new MockAnalyzer(random));
    conf.setRAMBufferSizeMB(IndexWriterConfig.DEFAULT_RAM_BUFFER_SIZE_MB);
    // don't flush by doc
    conf.setMaxBufferedDocs(IndexWriterConfig.DISABLE_AUTO_FLUSH);
    IndexWriter writer = new IndexWriter(dir, conf);
    // test data: lots of documents (few 10Ks) and lots of update terms (few hundreds)
    final int numDocs = atLeast(20000);
    final int numNumericFields = atLeast(5);
    // terms should affect many docs
    final int numTerms = TestUtil.nextInt(random, 10, 100);
    Set<String> updateTerms = new HashSet<>();
    while (updateTerms.size() < numTerms) {
        updateTerms.add(TestUtil.randomSimpleString(random));
    }
    // build a large index with many NDV fields and update terms
    for (int i = 0; i < numDocs; i++) {
        Document doc = new Document();
        int numUpdateTerms = TestUtil.nextInt(random, 1, numTerms / 10);
        for (int j = 0; j < numUpdateTerms; j++) {
            doc.add(new StringField("upd", RandomPicks.randomFrom(random, updateTerms), Store.NO));
        }
        for (int j = 0; j < numNumericFields; j++) {
            long val = random.nextInt();
            doc.add(new NumericDocValuesField("f" + j, val));
            doc.add(new NumericDocValuesField("cf" + j, val * 2));
        }
        writer.addDocument(doc);
    }
    // commit so there's something to apply to
    writer.commit();
    // set to flush every 2048 bytes (approximately every 12 updates), so we get
    // many flushes during numeric updates
    writer.getConfig().setRAMBufferSizeMB(2048.0 / 1024 / 1024);
    final int numUpdates = atLeast(100);
    //    System.out.println("numUpdates=" + numUpdates);
    for (int i = 0; i < numUpdates; i++) {
        int field = random.nextInt(numNumericFields);
        Term updateTerm = new Term("upd", RandomPicks.randomFrom(random, updateTerms));
        long value = random.nextInt();
        writer.updateDocValues(updateTerm, new NumericDocValuesField("f" + field, value), new NumericDocValuesField("cf" + field, value * 2));
    }
    writer.close();
    DirectoryReader reader = DirectoryReader.open(dir);
    for (LeafReaderContext context : reader.leaves()) {
        for (int i = 0; i < numNumericFields; i++) {
            LeafReader r = context.reader();
            NumericDocValues f = r.getNumericDocValues("f" + i);
            NumericDocValues cf = r.getNumericDocValues("cf" + i);
            for (int j = 0; j < r.maxDoc(); j++) {
                assertEquals(j, f.nextDoc());
                assertEquals(j, cf.nextDoc());
                assertEquals("reader=" + r + ", field=f" + i + ", doc=" + j, cf.longValue(), f.longValue() * 2);
            }
        }
    }
    reader.close();
    dir.close();
}
Also used : Document(org.apache.lucene.document.Document) Random(java.util.Random) MockAnalyzer(org.apache.lucene.analysis.MockAnalyzer) NumericDocValuesField(org.apache.lucene.document.NumericDocValuesField) StringField(org.apache.lucene.document.StringField) Directory(org.apache.lucene.store.Directory) NRTCachingDirectory(org.apache.lucene.store.NRTCachingDirectory) HashSet(java.util.HashSet) Test(org.junit.Test)

Example 37 with NumericDocValuesField

use of org.apache.lucene.document.NumericDocValuesField in project lucene-solr by apache.

the class TestNumericDocValuesUpdates method testChangeCodec.

@Test
public void testChangeCodec() throws Exception {
    Directory dir = newDirectory();
    IndexWriterConfig conf = newIndexWriterConfig(new MockAnalyzer(random()));
    // disable merges to simplify test assertions.
    conf.setMergePolicy(NoMergePolicy.INSTANCE);
    conf.setCodec(new AssertingCodec() {

        @Override
        public DocValuesFormat getDocValuesFormatForField(String field) {
            return TestUtil.getDefaultDocValuesFormat();
        }
    });
    IndexWriter writer = new IndexWriter(dir, conf);
    Document doc = new Document();
    doc.add(new StringField("id", "d0", Store.NO));
    doc.add(new NumericDocValuesField("f1", 5L));
    doc.add(new NumericDocValuesField("f2", 13L));
    writer.addDocument(doc);
    writer.close();
    // change format
    conf = newIndexWriterConfig(new MockAnalyzer(random()));
    // disable merges to simplify test assertions.
    conf.setMergePolicy(NoMergePolicy.INSTANCE);
    conf.setCodec(new AssertingCodec() {

        @Override
        public DocValuesFormat getDocValuesFormatForField(String field) {
            return new AssertingDocValuesFormat();
        }
    });
    writer = new IndexWriter(dir, conf);
    doc = new Document();
    doc.add(new StringField("id", "d1", Store.NO));
    doc.add(new NumericDocValuesField("f1", 17L));
    doc.add(new NumericDocValuesField("f2", 2L));
    writer.addDocument(doc);
    writer.updateNumericDocValue(new Term("id", "d0"), "f1", 12L);
    writer.close();
    DirectoryReader reader = DirectoryReader.open(dir);
    NumericDocValues f1 = MultiDocValues.getNumericValues(reader, "f1");
    NumericDocValues f2 = MultiDocValues.getNumericValues(reader, "f2");
    assertEquals(0, f1.nextDoc());
    assertEquals(12L, f1.longValue());
    assertEquals(0, f2.nextDoc());
    assertEquals(13L, f2.longValue());
    assertEquals(1, f1.nextDoc());
    assertEquals(17L, f1.longValue());
    assertEquals(1, f2.nextDoc());
    assertEquals(2L, f2.longValue());
    reader.close();
    dir.close();
}
Also used : AssertingCodec(org.apache.lucene.codecs.asserting.AssertingCodec) Document(org.apache.lucene.document.Document) DocValuesFormat(org.apache.lucene.codecs.DocValuesFormat) AssertingDocValuesFormat(org.apache.lucene.codecs.asserting.AssertingDocValuesFormat) MockAnalyzer(org.apache.lucene.analysis.MockAnalyzer) NumericDocValuesField(org.apache.lucene.document.NumericDocValuesField) AssertingDocValuesFormat(org.apache.lucene.codecs.asserting.AssertingDocValuesFormat) StringField(org.apache.lucene.document.StringField) Directory(org.apache.lucene.store.Directory) NRTCachingDirectory(org.apache.lucene.store.NRTCachingDirectory) Test(org.junit.Test)

Example 38 with NumericDocValuesField

use of org.apache.lucene.document.NumericDocValuesField in project lucene-solr by apache.

the class TestNumericDocValuesUpdates method testSegmentMerges.

@Test
public void testSegmentMerges() throws Exception {
    Directory dir = newDirectory();
    Random random = random();
    IndexWriterConfig conf = newIndexWriterConfig(new MockAnalyzer(random));
    IndexWriter writer = new IndexWriter(dir, conf);
    int docid = 0;
    int numRounds = atLeast(10);
    for (int rnd = 0; rnd < numRounds; rnd++) {
        Document doc = new Document();
        doc.add(new StringField("key", "doc", Store.NO));
        doc.add(new NumericDocValuesField("ndv", -1));
        int numDocs = atLeast(30);
        for (int i = 0; i < numDocs; i++) {
            doc.removeField("id");
            doc.add(new StringField("id", Integer.toString(docid++), Store.NO));
            writer.addDocument(doc);
        }
        long value = rnd + 1;
        writer.updateNumericDocValue(new Term("key", "doc"), "ndv", value);
        if (random.nextDouble() < 0.2) {
            // randomly delete some docs
            writer.deleteDocuments(new Term("id", Integer.toString(random.nextInt(docid))));
        }
        // randomly commit or reopen-IW (or nothing), before forceMerge
        if (random.nextDouble() < 0.4) {
            writer.commit();
        } else if (random.nextDouble() < 0.1) {
            writer.close();
            conf = newIndexWriterConfig(new MockAnalyzer(random));
            writer = new IndexWriter(dir, conf);
        }
        // add another document with the current value, to be sure forceMerge has
        // something to merge (for instance, it could be that CMS finished merging
        // all segments down to 1 before the delete was applied, so when
        // forceMerge is called, the index will be with one segment and deletes
        // and some MPs might now merge it, thereby invalidating test's
        // assumption that the reader has no deletes).
        doc = new Document();
        doc.add(new StringField("id", Integer.toString(docid++), Store.NO));
        doc.add(new StringField("key", "doc", Store.NO));
        doc.add(new NumericDocValuesField("ndv", value));
        writer.addDocument(doc);
        writer.forceMerge(1, true);
        final DirectoryReader reader;
        if (random.nextBoolean()) {
            writer.commit();
            reader = DirectoryReader.open(dir);
        } else {
            reader = DirectoryReader.open(writer);
        }
        assertEquals(1, reader.leaves().size());
        final LeafReader r = reader.leaves().get(0).reader();
        assertNull("index should have no deletes after forceMerge", r.getLiveDocs());
        NumericDocValues ndv = r.getNumericDocValues("ndv");
        assertNotNull(ndv);
        for (int i = 0; i < r.maxDoc(); i++) {
            assertEquals(i, ndv.nextDoc());
            assertEquals(value, ndv.longValue());
        }
        reader.close();
    }
    writer.close();
    dir.close();
}
Also used : Document(org.apache.lucene.document.Document) Random(java.util.Random) MockAnalyzer(org.apache.lucene.analysis.MockAnalyzer) NumericDocValuesField(org.apache.lucene.document.NumericDocValuesField) StringField(org.apache.lucene.document.StringField) Directory(org.apache.lucene.store.Directory) NRTCachingDirectory(org.apache.lucene.store.NRTCachingDirectory) Test(org.junit.Test)

Example 39 with NumericDocValuesField

use of org.apache.lucene.document.NumericDocValuesField in project lucene-solr by apache.

the class TestNumericDocValuesUpdates method testAddIndexes.

@Test
public void testAddIndexes() throws Exception {
    Directory dir1 = newDirectory();
    IndexWriterConfig conf = newIndexWriterConfig(new MockAnalyzer(random()));
    IndexWriter writer = new IndexWriter(dir1, conf);
    final int numDocs = atLeast(50);
    final int numTerms = TestUtil.nextInt(random(), 1, numDocs / 5);
    Set<String> randomTerms = new HashSet<>();
    while (randomTerms.size() < numTerms) {
        randomTerms.add(TestUtil.randomSimpleString(random()));
    }
    // create first index
    for (int i = 0; i < numDocs; i++) {
        Document doc = new Document();
        doc.add(new StringField("id", RandomPicks.randomFrom(random(), randomTerms), Store.NO));
        doc.add(new NumericDocValuesField("ndv", 4L));
        doc.add(new NumericDocValuesField("control", 8L));
        writer.addDocument(doc);
    }
    if (random().nextBoolean()) {
        writer.commit();
    }
    // update some docs to a random value
    long value = random().nextInt();
    Term term = new Term("id", RandomPicks.randomFrom(random(), randomTerms));
    writer.updateDocValues(term, new NumericDocValuesField("ndv", value), new NumericDocValuesField("control", value * 2));
    writer.close();
    Directory dir2 = newDirectory();
    conf = newIndexWriterConfig(new MockAnalyzer(random()));
    writer = new IndexWriter(dir2, conf);
    if (random().nextBoolean()) {
        writer.addIndexes(dir1);
    } else {
        DirectoryReader reader = DirectoryReader.open(dir1);
        TestUtil.addIndexesSlowly(writer, reader);
        reader.close();
    }
    writer.close();
    DirectoryReader reader = DirectoryReader.open(dir2);
    for (LeafReaderContext context : reader.leaves()) {
        LeafReader r = context.reader();
        NumericDocValues ndv = r.getNumericDocValues("ndv");
        NumericDocValues control = r.getNumericDocValues("control");
        for (int i = 0; i < r.maxDoc(); i++) {
            assertEquals(i, ndv.nextDoc());
            assertEquals(i, control.nextDoc());
            assertEquals(ndv.longValue() * 2, control.longValue());
        }
    }
    reader.close();
    IOUtils.close(dir1, dir2);
}
Also used : Document(org.apache.lucene.document.Document) MockAnalyzer(org.apache.lucene.analysis.MockAnalyzer) NumericDocValuesField(org.apache.lucene.document.NumericDocValuesField) StringField(org.apache.lucene.document.StringField) Directory(org.apache.lucene.store.Directory) NRTCachingDirectory(org.apache.lucene.store.NRTCachingDirectory) HashSet(java.util.HashSet) Test(org.junit.Test)

Example 40 with NumericDocValuesField

use of org.apache.lucene.document.NumericDocValuesField in project lucene-solr by apache.

the class TestNumericDocValuesUpdates method testMultipleDocValuesTypes.

@Test
public void testMultipleDocValuesTypes() throws Exception {
    Directory dir = newDirectory();
    IndexWriterConfig conf = newIndexWriterConfig(new MockAnalyzer(random()));
    // prevent merges
    conf.setMaxBufferedDocs(10);
    IndexWriter writer = new IndexWriter(dir, conf);
    for (int i = 0; i < 4; i++) {
        Document doc = new Document();
        doc.add(new StringField("dvUpdateKey", "dv", Store.NO));
        doc.add(new NumericDocValuesField("ndv", i));
        doc.add(new BinaryDocValuesField("bdv", new BytesRef(Integer.toString(i))));
        doc.add(new SortedDocValuesField("sdv", new BytesRef(Integer.toString(i))));
        doc.add(new SortedSetDocValuesField("ssdv", new BytesRef(Integer.toString(i))));
        doc.add(new SortedSetDocValuesField("ssdv", new BytesRef(Integer.toString(i * 2))));
        writer.addDocument(doc);
    }
    writer.commit();
    // update all docs' ndv field
    writer.updateNumericDocValue(new Term("dvUpdateKey", "dv"), "ndv", 17L);
    writer.close();
    final DirectoryReader reader = DirectoryReader.open(dir);
    LeafReader r = reader.leaves().get(0).reader();
    NumericDocValues ndv = r.getNumericDocValues("ndv");
    BinaryDocValues bdv = r.getBinaryDocValues("bdv");
    SortedDocValues sdv = r.getSortedDocValues("sdv");
    SortedSetDocValues ssdv = r.getSortedSetDocValues("ssdv");
    for (int i = 0; i < r.maxDoc(); i++) {
        assertEquals(i, ndv.nextDoc());
        assertEquals(17, ndv.longValue());
        assertEquals(i, bdv.nextDoc());
        BytesRef term = bdv.binaryValue();
        assertEquals(new BytesRef(Integer.toString(i)), term);
        assertEquals(i, sdv.nextDoc());
        term = sdv.binaryValue();
        assertEquals(new BytesRef(Integer.toString(i)), term);
        assertEquals(i, ssdv.nextDoc());
        long ord = ssdv.nextOrd();
        term = ssdv.lookupOrd(ord);
        assertEquals(i, Integer.parseInt(term.utf8ToString()));
        if (i != 0) {
            ord = ssdv.nextOrd();
            term = ssdv.lookupOrd(ord);
            assertEquals(i * 2, Integer.parseInt(term.utf8ToString()));
        }
        assertEquals(SortedSetDocValues.NO_MORE_ORDS, ssdv.nextOrd());
    }
    reader.close();
    dir.close();
}
Also used : Document(org.apache.lucene.document.Document) BinaryDocValuesField(org.apache.lucene.document.BinaryDocValuesField) MockAnalyzer(org.apache.lucene.analysis.MockAnalyzer) NumericDocValuesField(org.apache.lucene.document.NumericDocValuesField) StringField(org.apache.lucene.document.StringField) SortedDocValuesField(org.apache.lucene.document.SortedDocValuesField) SortedSetDocValuesField(org.apache.lucene.document.SortedSetDocValuesField) BytesRef(org.apache.lucene.util.BytesRef) Directory(org.apache.lucene.store.Directory) NRTCachingDirectory(org.apache.lucene.store.NRTCachingDirectory) Test(org.junit.Test)

Aggregations

NumericDocValuesField (org.apache.lucene.document.NumericDocValuesField)296 Document (org.apache.lucene.document.Document)268 Directory (org.apache.lucene.store.Directory)206 MockAnalyzer (org.apache.lucene.analysis.MockAnalyzer)132 SortedNumericDocValuesField (org.apache.lucene.document.SortedNumericDocValuesField)103 StringField (org.apache.lucene.document.StringField)92 BytesRef (org.apache.lucene.util.BytesRef)85 RandomIndexWriter (org.apache.lucene.index.RandomIndexWriter)75 SortedDocValuesField (org.apache.lucene.document.SortedDocValuesField)69 IndexReader (org.apache.lucene.index.IndexReader)54 Field (org.apache.lucene.document.Field)51 IndexSearcher (org.apache.lucene.search.IndexSearcher)49 BinaryDocValuesField (org.apache.lucene.document.BinaryDocValuesField)44 SortField (org.apache.lucene.search.SortField)42 TextField (org.apache.lucene.document.TextField)41 Sort (org.apache.lucene.search.Sort)41 Term (org.apache.lucene.index.Term)38 IntPoint (org.apache.lucene.document.IntPoint)36 SortedNumericSortField (org.apache.lucene.search.SortedNumericSortField)36 IndexWriterConfig (org.apache.lucene.index.IndexWriterConfig)34