use of org.apache.lucene.document.NumericDocValuesField in project lucene-solr by apache.
the class TestNumericDocValuesUpdates method testTonsOfUpdates.
@Test
@Nightly
public void testTonsOfUpdates() throws Exception {
// LUCENE-5248: make sure that when there are many updates, we don't use too much RAM
Directory dir = newDirectory();
final Random random = random();
IndexWriterConfig conf = newIndexWriterConfig(new MockAnalyzer(random));
conf.setRAMBufferSizeMB(IndexWriterConfig.DEFAULT_RAM_BUFFER_SIZE_MB);
// don't flush by doc
conf.setMaxBufferedDocs(IndexWriterConfig.DISABLE_AUTO_FLUSH);
IndexWriter writer = new IndexWriter(dir, conf);
// test data: lots of documents (few 10Ks) and lots of update terms (few hundreds)
final int numDocs = atLeast(20000);
final int numNumericFields = atLeast(5);
// terms should affect many docs
final int numTerms = TestUtil.nextInt(random, 10, 100);
Set<String> updateTerms = new HashSet<>();
while (updateTerms.size() < numTerms) {
updateTerms.add(TestUtil.randomSimpleString(random));
}
// build a large index with many NDV fields and update terms
for (int i = 0; i < numDocs; i++) {
Document doc = new Document();
int numUpdateTerms = TestUtil.nextInt(random, 1, numTerms / 10);
for (int j = 0; j < numUpdateTerms; j++) {
doc.add(new StringField("upd", RandomPicks.randomFrom(random, updateTerms), Store.NO));
}
for (int j = 0; j < numNumericFields; j++) {
long val = random.nextInt();
doc.add(new NumericDocValuesField("f" + j, val));
doc.add(new NumericDocValuesField("cf" + j, val * 2));
}
writer.addDocument(doc);
}
// commit so there's something to apply to
writer.commit();
// set to flush every 2048 bytes (approximately every 12 updates), so we get
// many flushes during numeric updates
writer.getConfig().setRAMBufferSizeMB(2048.0 / 1024 / 1024);
final int numUpdates = atLeast(100);
// System.out.println("numUpdates=" + numUpdates);
for (int i = 0; i < numUpdates; i++) {
int field = random.nextInt(numNumericFields);
Term updateTerm = new Term("upd", RandomPicks.randomFrom(random, updateTerms));
long value = random.nextInt();
writer.updateDocValues(updateTerm, new NumericDocValuesField("f" + field, value), new NumericDocValuesField("cf" + field, value * 2));
}
writer.close();
DirectoryReader reader = DirectoryReader.open(dir);
for (LeafReaderContext context : reader.leaves()) {
for (int i = 0; i < numNumericFields; i++) {
LeafReader r = context.reader();
NumericDocValues f = r.getNumericDocValues("f" + i);
NumericDocValues cf = r.getNumericDocValues("cf" + i);
for (int j = 0; j < r.maxDoc(); j++) {
assertEquals(j, f.nextDoc());
assertEquals(j, cf.nextDoc());
assertEquals("reader=" + r + ", field=f" + i + ", doc=" + j, cf.longValue(), f.longValue() * 2);
}
}
}
reader.close();
dir.close();
}
use of org.apache.lucene.document.NumericDocValuesField in project lucene-solr by apache.
the class TestNumericDocValuesUpdates method testChangeCodec.
@Test
public void testChangeCodec() throws Exception {
Directory dir = newDirectory();
IndexWriterConfig conf = newIndexWriterConfig(new MockAnalyzer(random()));
// disable merges to simplify test assertions.
conf.setMergePolicy(NoMergePolicy.INSTANCE);
conf.setCodec(new AssertingCodec() {
@Override
public DocValuesFormat getDocValuesFormatForField(String field) {
return TestUtil.getDefaultDocValuesFormat();
}
});
IndexWriter writer = new IndexWriter(dir, conf);
Document doc = new Document();
doc.add(new StringField("id", "d0", Store.NO));
doc.add(new NumericDocValuesField("f1", 5L));
doc.add(new NumericDocValuesField("f2", 13L));
writer.addDocument(doc);
writer.close();
// change format
conf = newIndexWriterConfig(new MockAnalyzer(random()));
// disable merges to simplify test assertions.
conf.setMergePolicy(NoMergePolicy.INSTANCE);
conf.setCodec(new AssertingCodec() {
@Override
public DocValuesFormat getDocValuesFormatForField(String field) {
return new AssertingDocValuesFormat();
}
});
writer = new IndexWriter(dir, conf);
doc = new Document();
doc.add(new StringField("id", "d1", Store.NO));
doc.add(new NumericDocValuesField("f1", 17L));
doc.add(new NumericDocValuesField("f2", 2L));
writer.addDocument(doc);
writer.updateNumericDocValue(new Term("id", "d0"), "f1", 12L);
writer.close();
DirectoryReader reader = DirectoryReader.open(dir);
NumericDocValues f1 = MultiDocValues.getNumericValues(reader, "f1");
NumericDocValues f2 = MultiDocValues.getNumericValues(reader, "f2");
assertEquals(0, f1.nextDoc());
assertEquals(12L, f1.longValue());
assertEquals(0, f2.nextDoc());
assertEquals(13L, f2.longValue());
assertEquals(1, f1.nextDoc());
assertEquals(17L, f1.longValue());
assertEquals(1, f2.nextDoc());
assertEquals(2L, f2.longValue());
reader.close();
dir.close();
}
use of org.apache.lucene.document.NumericDocValuesField in project lucene-solr by apache.
the class TestNumericDocValuesUpdates method testSegmentMerges.
@Test
public void testSegmentMerges() throws Exception {
Directory dir = newDirectory();
Random random = random();
IndexWriterConfig conf = newIndexWriterConfig(new MockAnalyzer(random));
IndexWriter writer = new IndexWriter(dir, conf);
int docid = 0;
int numRounds = atLeast(10);
for (int rnd = 0; rnd < numRounds; rnd++) {
Document doc = new Document();
doc.add(new StringField("key", "doc", Store.NO));
doc.add(new NumericDocValuesField("ndv", -1));
int numDocs = atLeast(30);
for (int i = 0; i < numDocs; i++) {
doc.removeField("id");
doc.add(new StringField("id", Integer.toString(docid++), Store.NO));
writer.addDocument(doc);
}
long value = rnd + 1;
writer.updateNumericDocValue(new Term("key", "doc"), "ndv", value);
if (random.nextDouble() < 0.2) {
// randomly delete some docs
writer.deleteDocuments(new Term("id", Integer.toString(random.nextInt(docid))));
}
// randomly commit or reopen-IW (or nothing), before forceMerge
if (random.nextDouble() < 0.4) {
writer.commit();
} else if (random.nextDouble() < 0.1) {
writer.close();
conf = newIndexWriterConfig(new MockAnalyzer(random));
writer = new IndexWriter(dir, conf);
}
// add another document with the current value, to be sure forceMerge has
// something to merge (for instance, it could be that CMS finished merging
// all segments down to 1 before the delete was applied, so when
// forceMerge is called, the index will be with one segment and deletes
// and some MPs might now merge it, thereby invalidating test's
// assumption that the reader has no deletes).
doc = new Document();
doc.add(new StringField("id", Integer.toString(docid++), Store.NO));
doc.add(new StringField("key", "doc", Store.NO));
doc.add(new NumericDocValuesField("ndv", value));
writer.addDocument(doc);
writer.forceMerge(1, true);
final DirectoryReader reader;
if (random.nextBoolean()) {
writer.commit();
reader = DirectoryReader.open(dir);
} else {
reader = DirectoryReader.open(writer);
}
assertEquals(1, reader.leaves().size());
final LeafReader r = reader.leaves().get(0).reader();
assertNull("index should have no deletes after forceMerge", r.getLiveDocs());
NumericDocValues ndv = r.getNumericDocValues("ndv");
assertNotNull(ndv);
for (int i = 0; i < r.maxDoc(); i++) {
assertEquals(i, ndv.nextDoc());
assertEquals(value, ndv.longValue());
}
reader.close();
}
writer.close();
dir.close();
}
use of org.apache.lucene.document.NumericDocValuesField in project lucene-solr by apache.
the class TestNumericDocValuesUpdates method testAddIndexes.
@Test
public void testAddIndexes() throws Exception {
Directory dir1 = newDirectory();
IndexWriterConfig conf = newIndexWriterConfig(new MockAnalyzer(random()));
IndexWriter writer = new IndexWriter(dir1, conf);
final int numDocs = atLeast(50);
final int numTerms = TestUtil.nextInt(random(), 1, numDocs / 5);
Set<String> randomTerms = new HashSet<>();
while (randomTerms.size() < numTerms) {
randomTerms.add(TestUtil.randomSimpleString(random()));
}
// create first index
for (int i = 0; i < numDocs; i++) {
Document doc = new Document();
doc.add(new StringField("id", RandomPicks.randomFrom(random(), randomTerms), Store.NO));
doc.add(new NumericDocValuesField("ndv", 4L));
doc.add(new NumericDocValuesField("control", 8L));
writer.addDocument(doc);
}
if (random().nextBoolean()) {
writer.commit();
}
// update some docs to a random value
long value = random().nextInt();
Term term = new Term("id", RandomPicks.randomFrom(random(), randomTerms));
writer.updateDocValues(term, new NumericDocValuesField("ndv", value), new NumericDocValuesField("control", value * 2));
writer.close();
Directory dir2 = newDirectory();
conf = newIndexWriterConfig(new MockAnalyzer(random()));
writer = new IndexWriter(dir2, conf);
if (random().nextBoolean()) {
writer.addIndexes(dir1);
} else {
DirectoryReader reader = DirectoryReader.open(dir1);
TestUtil.addIndexesSlowly(writer, reader);
reader.close();
}
writer.close();
DirectoryReader reader = DirectoryReader.open(dir2);
for (LeafReaderContext context : reader.leaves()) {
LeafReader r = context.reader();
NumericDocValues ndv = r.getNumericDocValues("ndv");
NumericDocValues control = r.getNumericDocValues("control");
for (int i = 0; i < r.maxDoc(); i++) {
assertEquals(i, ndv.nextDoc());
assertEquals(i, control.nextDoc());
assertEquals(ndv.longValue() * 2, control.longValue());
}
}
reader.close();
IOUtils.close(dir1, dir2);
}
use of org.apache.lucene.document.NumericDocValuesField in project lucene-solr by apache.
the class TestNumericDocValuesUpdates method testMultipleDocValuesTypes.
@Test
public void testMultipleDocValuesTypes() throws Exception {
Directory dir = newDirectory();
IndexWriterConfig conf = newIndexWriterConfig(new MockAnalyzer(random()));
// prevent merges
conf.setMaxBufferedDocs(10);
IndexWriter writer = new IndexWriter(dir, conf);
for (int i = 0; i < 4; i++) {
Document doc = new Document();
doc.add(new StringField("dvUpdateKey", "dv", Store.NO));
doc.add(new NumericDocValuesField("ndv", i));
doc.add(new BinaryDocValuesField("bdv", new BytesRef(Integer.toString(i))));
doc.add(new SortedDocValuesField("sdv", new BytesRef(Integer.toString(i))));
doc.add(new SortedSetDocValuesField("ssdv", new BytesRef(Integer.toString(i))));
doc.add(new SortedSetDocValuesField("ssdv", new BytesRef(Integer.toString(i * 2))));
writer.addDocument(doc);
}
writer.commit();
// update all docs' ndv field
writer.updateNumericDocValue(new Term("dvUpdateKey", "dv"), "ndv", 17L);
writer.close();
final DirectoryReader reader = DirectoryReader.open(dir);
LeafReader r = reader.leaves().get(0).reader();
NumericDocValues ndv = r.getNumericDocValues("ndv");
BinaryDocValues bdv = r.getBinaryDocValues("bdv");
SortedDocValues sdv = r.getSortedDocValues("sdv");
SortedSetDocValues ssdv = r.getSortedSetDocValues("ssdv");
for (int i = 0; i < r.maxDoc(); i++) {
assertEquals(i, ndv.nextDoc());
assertEquals(17, ndv.longValue());
assertEquals(i, bdv.nextDoc());
BytesRef term = bdv.binaryValue();
assertEquals(new BytesRef(Integer.toString(i)), term);
assertEquals(i, sdv.nextDoc());
term = sdv.binaryValue();
assertEquals(new BytesRef(Integer.toString(i)), term);
assertEquals(i, ssdv.nextDoc());
long ord = ssdv.nextOrd();
term = ssdv.lookupOrd(ord);
assertEquals(i, Integer.parseInt(term.utf8ToString()));
if (i != 0) {
ord = ssdv.nextOrd();
term = ssdv.lookupOrd(ord);
assertEquals(i * 2, Integer.parseInt(term.utf8ToString()));
}
assertEquals(SortedSetDocValues.NO_MORE_ORDS, ssdv.nextOrd());
}
reader.close();
dir.close();
}
Aggregations