use of org.apache.lucene.document.BinaryDocValuesField in project lucene-solr by apache.
the class TestBinaryDocValuesUpdates method testMultipleBinaryDocValues.
public void testMultipleBinaryDocValues() throws Exception {
Directory dir = newDirectory();
IndexWriterConfig conf = newIndexWriterConfig(new MockAnalyzer(random()));
// prevent merges
conf.setMaxBufferedDocs(10);
IndexWriter writer = new IndexWriter(dir, conf);
for (int i = 0; i < 2; i++) {
Document doc = new Document();
doc.add(new StringField("dvUpdateKey", "dv", Store.NO));
doc.add(new BinaryDocValuesField("bdv1", toBytes(i)));
doc.add(new BinaryDocValuesField("bdv2", toBytes(i)));
writer.addDocument(doc);
}
writer.commit();
// update all docs' bdv1 field
writer.updateBinaryDocValue(new Term("dvUpdateKey", "dv"), "bdv1", toBytes(17L));
writer.close();
final DirectoryReader reader = DirectoryReader.open(dir);
LeafReader r = reader.leaves().get(0).reader();
BinaryDocValues bdv1 = r.getBinaryDocValues("bdv1");
BinaryDocValues bdv2 = r.getBinaryDocValues("bdv2");
for (int i = 0; i < r.maxDoc(); i++) {
assertEquals(i, bdv1.nextDoc());
assertEquals(17, getValue(bdv1));
assertEquals(i, bdv2.nextDoc());
assertEquals(i, getValue(bdv2));
}
reader.close();
dir.close();
}
use of org.apache.lucene.document.BinaryDocValuesField in project lucene-solr by apache.
the class Test2BBinaryDocValues method testVariableBinary.
// indexes IndexWriter.MAX_DOCS docs with a variable binary field
public void testVariableBinary() throws Exception {
BaseDirectoryWrapper dir = newFSDirectory(createTempDir("2BVariableBinary"));
if (dir instanceof MockDirectoryWrapper) {
((MockDirectoryWrapper) dir).setThrottling(MockDirectoryWrapper.Throttling.NEVER);
}
IndexWriter w = new IndexWriter(dir, new IndexWriterConfig(new MockAnalyzer(random())).setMaxBufferedDocs(IndexWriterConfig.DISABLE_AUTO_FLUSH).setRAMBufferSizeMB(256.0).setMergeScheduler(new ConcurrentMergeScheduler()).setMergePolicy(newLogMergePolicy(false, 10)).setOpenMode(IndexWriterConfig.OpenMode.CREATE).setCodec(TestUtil.getDefaultCodec()));
Document doc = new Document();
byte[] bytes = new byte[4];
ByteArrayDataOutput encoder = new ByteArrayDataOutput(bytes);
BytesRef data = new BytesRef(bytes);
BinaryDocValuesField dvField = new BinaryDocValuesField("dv", data);
doc.add(dvField);
for (int i = 0; i < IndexWriter.MAX_DOCS; i++) {
encoder.reset(bytes);
// 1, 2, or 3 bytes
encoder.writeVInt(i % 65535);
data.length = encoder.getPosition();
w.addDocument(doc);
if (i % 100000 == 0) {
System.out.println("indexed: " + i);
System.out.flush();
}
}
w.forceMerge(1);
w.close();
System.out.println("verifying...");
System.out.flush();
DirectoryReader r = DirectoryReader.open(dir);
int expectedValue = 0;
ByteArrayDataInput input = new ByteArrayDataInput();
for (LeafReaderContext context : r.leaves()) {
LeafReader reader = context.reader();
BinaryDocValues dv = reader.getBinaryDocValues("dv");
for (int i = 0; i < reader.maxDoc(); i++) {
assertEquals(i, dv.nextDoc());
final BytesRef term = dv.binaryValue();
input.reset(term.bytes, term.offset, term.length);
assertEquals(expectedValue % 65535, input.readVInt());
assertTrue(input.eof());
expectedValue++;
}
}
r.close();
dir.close();
}
use of org.apache.lucene.document.BinaryDocValuesField in project lucene-solr by apache.
the class TestBinaryDocValuesUpdates method testSegmentMerges.
public void testSegmentMerges() throws Exception {
Directory dir = newDirectory();
Random random = random();
IndexWriterConfig conf = newIndexWriterConfig(new MockAnalyzer(random));
IndexWriter writer = new IndexWriter(dir, conf);
int docid = 0;
int numRounds = atLeast(10);
for (int rnd = 0; rnd < numRounds; rnd++) {
Document doc = new Document();
doc.add(new StringField("key", "doc", Store.NO));
doc.add(new BinaryDocValuesField("bdv", toBytes(-1)));
int numDocs = atLeast(30);
for (int i = 0; i < numDocs; i++) {
doc.removeField("id");
doc.add(new StringField("id", Integer.toString(docid++), Store.NO));
writer.addDocument(doc);
}
long value = rnd + 1;
writer.updateBinaryDocValue(new Term("key", "doc"), "bdv", toBytes(value));
if (random.nextDouble() < 0.2) {
// randomly delete some docs
writer.deleteDocuments(new Term("id", Integer.toString(random.nextInt(docid))));
}
// randomly commit or reopen-IW (or nothing), before forceMerge
if (random.nextDouble() < 0.4) {
writer.commit();
} else if (random.nextDouble() < 0.1) {
writer.close();
conf = newIndexWriterConfig(new MockAnalyzer(random));
writer = new IndexWriter(dir, conf);
}
// add another document with the current value, to be sure forceMerge has
// something to merge (for instance, it could be that CMS finished merging
// all segments down to 1 before the delete was applied, so when
// forceMerge is called, the index will be with one segment and deletes
// and some MPs might now merge it, thereby invalidating test's
// assumption that the reader has no deletes).
doc = new Document();
doc.add(new StringField("id", Integer.toString(docid++), Store.NO));
doc.add(new StringField("key", "doc", Store.NO));
doc.add(new BinaryDocValuesField("bdv", toBytes(value)));
writer.addDocument(doc);
writer.forceMerge(1, true);
final DirectoryReader reader;
if (random.nextBoolean()) {
writer.commit();
reader = DirectoryReader.open(dir);
} else {
reader = DirectoryReader.open(writer);
}
assertEquals(1, reader.leaves().size());
final LeafReader r = reader.leaves().get(0).reader();
assertNull("index should have no deletes after forceMerge", r.getLiveDocs());
BinaryDocValues bdv = r.getBinaryDocValues("bdv");
assertNotNull(bdv);
for (int i = 0; i < r.maxDoc(); i++) {
assertEquals(i, bdv.nextDoc());
assertEquals(value, getValue(bdv));
}
reader.close();
}
writer.close();
dir.close();
}
use of org.apache.lucene.document.BinaryDocValuesField in project lucene-solr by apache.
the class TestBinaryDocValuesUpdates method testTonsOfUpdates.
@Nightly
public void testTonsOfUpdates() throws Exception {
// LUCENE-5248: make sure that when there are many updates, we don't use too much RAM
Directory dir = newDirectory();
final Random random = random();
IndexWriterConfig conf = newIndexWriterConfig(new MockAnalyzer(random));
conf.setRAMBufferSizeMB(IndexWriterConfig.DEFAULT_RAM_BUFFER_SIZE_MB);
// don't flush by doc
conf.setMaxBufferedDocs(IndexWriterConfig.DISABLE_AUTO_FLUSH);
IndexWriter writer = new IndexWriter(dir, conf);
// test data: lots of documents (few 10Ks) and lots of update terms (few hundreds)
final int numDocs = atLeast(20000);
final int numBinaryFields = atLeast(5);
// terms should affect many docs
final int numTerms = TestUtil.nextInt(random, 10, 100);
Set<String> updateTerms = new HashSet<>();
while (updateTerms.size() < numTerms) {
updateTerms.add(TestUtil.randomSimpleString(random));
}
// build a large index with many BDV fields and update terms
for (int i = 0; i < numDocs; i++) {
Document doc = new Document();
int numUpdateTerms = TestUtil.nextInt(random, 1, numTerms / 10);
for (int j = 0; j < numUpdateTerms; j++) {
doc.add(new StringField("upd", RandomPicks.randomFrom(random, updateTerms), Store.NO));
}
for (int j = 0; j < numBinaryFields; j++) {
long val = random.nextInt();
doc.add(new BinaryDocValuesField("f" + j, toBytes(val)));
doc.add(new BinaryDocValuesField("cf" + j, toBytes(val * 2)));
}
writer.addDocument(doc);
}
// commit so there's something to apply to
writer.commit();
// set to flush every 2048 bytes (approximately every 12 updates), so we get
// many flushes during binary updates
writer.getConfig().setRAMBufferSizeMB(2048.0 / 1024 / 1024);
final int numUpdates = atLeast(100);
// System.out.println("numUpdates=" + numUpdates);
for (int i = 0; i < numUpdates; i++) {
int field = random.nextInt(numBinaryFields);
Term updateTerm = new Term("upd", RandomPicks.randomFrom(random, updateTerms));
long value = random.nextInt();
writer.updateDocValues(updateTerm, new BinaryDocValuesField("f" + field, toBytes(value)), new BinaryDocValuesField("cf" + field, toBytes(value * 2)));
}
writer.close();
DirectoryReader reader = DirectoryReader.open(dir);
for (LeafReaderContext context : reader.leaves()) {
for (int i = 0; i < numBinaryFields; i++) {
LeafReader r = context.reader();
BinaryDocValues f = r.getBinaryDocValues("f" + i);
BinaryDocValues cf = r.getBinaryDocValues("cf" + i);
for (int j = 0; j < r.maxDoc(); j++) {
assertEquals(j, f.nextDoc());
assertEquals(j, cf.nextDoc());
assertEquals("reader=" + r + ", field=f" + i + ", doc=" + j, getValue(cf), getValue(f) * 2);
}
}
}
reader.close();
dir.close();
}
use of org.apache.lucene.document.BinaryDocValuesField in project lucene-solr by apache.
the class TestBinaryDocValuesUpdates method testUpdateDifferentDocsInDifferentGens.
public void testUpdateDifferentDocsInDifferentGens() throws Exception {
// update same document multiple times across generations
Directory dir = newDirectory();
IndexWriterConfig conf = newIndexWriterConfig(new MockAnalyzer(random()));
conf.setMaxBufferedDocs(4);
IndexWriter writer = new IndexWriter(dir, conf);
final int numDocs = atLeast(10);
for (int i = 0; i < numDocs; i++) {
Document doc = new Document();
doc.add(new StringField("id", "doc" + i, Store.NO));
long value = random().nextInt();
doc.add(new BinaryDocValuesField("f", toBytes(value)));
doc.add(new BinaryDocValuesField("cf", toBytes(value * 2)));
writer.addDocument(doc);
}
int numGens = atLeast(5);
for (int i = 0; i < numGens; i++) {
int doc = random().nextInt(numDocs);
Term t = new Term("id", "doc" + doc);
long value = random().nextLong();
writer.updateDocValues(t, new BinaryDocValuesField("f", toBytes(value)), new BinaryDocValuesField("cf", toBytes(value * 2)));
DirectoryReader reader = DirectoryReader.open(writer);
for (LeafReaderContext context : reader.leaves()) {
LeafReader r = context.reader();
BinaryDocValues fbdv = r.getBinaryDocValues("f");
BinaryDocValues cfbdv = r.getBinaryDocValues("cf");
for (int j = 0; j < r.maxDoc(); j++) {
assertEquals(j, fbdv.nextDoc());
assertEquals(j, cfbdv.nextDoc());
assertEquals(getValue(cfbdv), getValue(fbdv) * 2);
}
}
reader.close();
}
writer.close();
dir.close();
}
Aggregations