use of org.apache.lucene.document.BinaryDocValuesField in project lucene-solr by apache.
the class TestIndexWriterExceptions method testNoLostDeletesOrUpdates.
// Make sure if we hit a transient IOException (e.g., disk
// full), and then the exception stops (e.g., disk frees
// up), so we successfully close IW or open an NRT
// reader, we don't lose any deletes or updates:
public void testNoLostDeletesOrUpdates() throws Throwable {
int deleteCount = 0;
int docBase = 0;
int docCount = 0;
MockDirectoryWrapper dir = newMockDirectory();
final AtomicBoolean shouldFail = new AtomicBoolean();
dir.failOn(new MockDirectoryWrapper.Failure() {
@Override
public void eval(MockDirectoryWrapper dir) throws IOException {
if (shouldFail.get() == false) {
// flushing buffer, on closing the file:
return;
}
if (random().nextInt(3) != 2) {
return;
}
StackTraceElement[] trace = Thread.currentThread().getStackTrace();
boolean sawSeal = false;
boolean sawWrite = false;
for (int i = 0; i < trace.length; i++) {
if ("sealFlushedSegment".equals(trace[i].getMethodName())) {
sawSeal = true;
break;
}
if ("writeLiveDocs".equals(trace[i].getMethodName()) || "writeFieldUpdates".equals(trace[i].getMethodName())) {
sawWrite = true;
}
}
// the segment is aborted and docs are lost:
if (sawWrite && sawSeal == false) {
if (VERBOSE) {
System.out.println("TEST: now fail; thread=" + Thread.currentThread().getName() + " exc:");
new Throwable().printStackTrace(System.out);
}
shouldFail.set(false);
throw new FakeIOException();
}
}
});
RandomIndexWriter w = null;
boolean tragic = false;
for (int iter = 0; iter < 10 * RANDOM_MULTIPLIER; iter++) {
int numDocs = atLeast(100);
if (VERBOSE) {
System.out.println("\nTEST: iter=" + iter + " numDocs=" + numDocs + " docBase=" + docBase + " delCount=" + deleteCount);
}
if (w == null) {
IndexWriterConfig iwc = newIndexWriterConfig(new MockAnalyzer(random()));
w = new RandomIndexWriter(random(), dir, iwc);
// Since we hit exc during merging, a partial
// forceMerge can easily return when there are still
// too many segments in the index:
w.setDoRandomForceMergeAssert(false);
}
for (int i = 0; i < numDocs; i++) {
Document doc = new Document();
doc.add(new StringField("id", "" + (docBase + i), Field.Store.NO));
doc.add(new NumericDocValuesField("f", 1L));
doc.add(new NumericDocValuesField("cf", 2L));
doc.add(new BinaryDocValuesField("bf", TestBinaryDocValuesUpdates.toBytes(1L)));
doc.add(new BinaryDocValuesField("bcf", TestBinaryDocValuesUpdates.toBytes(2L)));
w.addDocument(doc);
}
docCount += numDocs;
// TODO: we could make the test more evil, by letting
// it throw more than one exc, randomly, before "recovering"
// TODO: we could also install an infoStream and try
// to fail in "more evil" places inside BDS
shouldFail.set(true);
boolean doClose = false;
try {
for (int i = 0; i < numDocs; i++) {
if (random().nextInt(10) == 7) {
boolean fieldUpdate = random().nextBoolean();
int docid = docBase + i;
if (fieldUpdate) {
long value = iter;
if (VERBOSE) {
System.out.println(" update id=" + docid + " to value " + value);
}
Term idTerm = new Term("id", Integer.toString(docid));
if (random().nextBoolean()) {
// update only numeric field
w.updateDocValues(idTerm, new NumericDocValuesField("f", value), new NumericDocValuesField("cf", value * 2));
} else if (random().nextBoolean()) {
w.updateDocValues(idTerm, new BinaryDocValuesField("bf", TestBinaryDocValuesUpdates.toBytes(value)), new BinaryDocValuesField("bcf", TestBinaryDocValuesUpdates.toBytes(value * 2)));
} else {
w.updateDocValues(idTerm, new NumericDocValuesField("f", value), new NumericDocValuesField("cf", value * 2), new BinaryDocValuesField("bf", TestBinaryDocValuesUpdates.toBytes(value)), new BinaryDocValuesField("bcf", TestBinaryDocValuesUpdates.toBytes(value * 2)));
}
}
// sometimes do both deletes and updates
if (!fieldUpdate || random().nextBoolean()) {
if (VERBOSE) {
System.out.println(" delete id=" + docid);
}
deleteCount++;
w.deleteDocuments(new Term("id", "" + docid));
}
}
}
// Trigger writeLiveDocs + writeFieldUpdates so we hit fake exc:
IndexReader r = w.getReader();
// Sometimes we will make it here (we only randomly
// throw the exc):
assertEquals(docCount - deleteCount, r.numDocs());
r.close();
// Sometimes close, so the disk full happens on close:
if (random().nextBoolean()) {
if (VERBOSE) {
System.out.println(" now close writer");
}
doClose = true;
w.commit();
w.close();
w = null;
}
} catch (Throwable t) {
// throws it as a wrapped IOE, so don't fail in this case.
if (t instanceof FakeIOException || (t.getCause() instanceof FakeIOException)) {
// expected
if (VERBOSE) {
System.out.println("TEST: hit expected IOE");
}
if (t instanceof AlreadyClosedException) {
// FakeIOExc struck during merge and writer is now closed:
w = null;
tragic = true;
}
} else {
throw t;
}
}
shouldFail.set(false);
if (w != null) {
MergeScheduler ms = w.w.getConfig().getMergeScheduler();
if (ms instanceof ConcurrentMergeScheduler) {
((ConcurrentMergeScheduler) ms).sync();
}
if (w.w.getTragicException() != null) {
// Tragic exc in CMS closed the writer
w = null;
}
}
IndexReader r;
if (doClose && w != null) {
if (VERBOSE) {
System.out.println(" now 2nd close writer");
}
w.close();
w = null;
}
if (w == null || random().nextBoolean()) {
// disk" bits are good:
if (VERBOSE) {
System.out.println("TEST: verify against non-NRT reader");
}
if (w != null) {
w.commit();
}
r = DirectoryReader.open(dir);
} else {
if (VERBOSE) {
System.out.println("TEST: verify against NRT reader");
}
r = w.getReader();
}
if (tragic == false) {
assertEquals(docCount - deleteCount, r.numDocs());
}
BytesRef scratch = new BytesRef();
for (LeafReaderContext context : r.leaves()) {
LeafReader reader = context.reader();
Bits liveDocs = reader.getLiveDocs();
NumericDocValues f = reader.getNumericDocValues("f");
NumericDocValues cf = reader.getNumericDocValues("cf");
BinaryDocValues bf = reader.getBinaryDocValues("bf");
BinaryDocValues bcf = reader.getBinaryDocValues("bcf");
for (int i = 0; i < reader.maxDoc(); i++) {
if (liveDocs == null || liveDocs.get(i)) {
assertEquals(i, f.advance(i));
assertEquals(i, cf.advance(i));
assertEquals(i, bf.advance(i));
assertEquals(i, bcf.advance(i));
assertEquals("doc=" + (docBase + i), cf.longValue(), f.longValue() * 2);
assertEquals("doc=" + (docBase + i), TestBinaryDocValuesUpdates.getValue(bcf), TestBinaryDocValuesUpdates.getValue(bf) * 2);
}
}
}
r.close();
// Sometimes re-use RIW, other times open new one:
if (w != null && random().nextBoolean()) {
if (VERBOSE) {
System.out.println("TEST: close writer");
}
w.close();
w = null;
}
docBase += numDocs;
}
if (w != null) {
w.close();
}
// Final verify:
if (tragic == false) {
IndexReader r = DirectoryReader.open(dir);
assertEquals(docCount - deleteCount, r.numDocs());
r.close();
}
dir.close();
}
use of org.apache.lucene.document.BinaryDocValuesField in project lucene-solr by apache.
the class TestPerFieldDocValuesFormat method testTwoFieldsTwoFormats.
// just a simple trivial test
// TODO: we should come up with a test that somehow checks that segment suffix
// is respected by all codec apis (not just docvalues and postings)
public void testTwoFieldsTwoFormats() throws IOException {
Analyzer analyzer = new MockAnalyzer(random());
Directory directory = newDirectory();
// we don't use RandomIndexWriter because it might add more docvalues than we expect !!!!1
IndexWriterConfig iwc = newIndexWriterConfig(analyzer);
final DocValuesFormat fast = TestUtil.getDefaultDocValuesFormat();
final DocValuesFormat slow = DocValuesFormat.forName("Memory");
iwc.setCodec(new AssertingCodec() {
@Override
public DocValuesFormat getDocValuesFormatForField(String field) {
if ("dv1".equals(field)) {
return fast;
} else {
return slow;
}
}
});
IndexWriter iwriter = new IndexWriter(directory, iwc);
Document doc = new Document();
String longTerm = "longtermlongtermlongtermlongtermlongtermlongtermlongtermlongtermlongtermlongtermlongtermlongtermlongtermlongtermlongtermlongtermlongtermlongterm";
String text = "This is the text to be indexed. " + longTerm;
doc.add(newTextField("fieldname", text, Field.Store.YES));
doc.add(new NumericDocValuesField("dv1", 5));
doc.add(new BinaryDocValuesField("dv2", new BytesRef("hello world")));
iwriter.addDocument(doc);
iwriter.close();
// Now search the index:
// read-only=true
IndexReader ireader = DirectoryReader.open(directory);
IndexSearcher isearcher = newSearcher(ireader);
assertEquals(1, isearcher.search(new TermQuery(new Term("fieldname", longTerm)), 1).totalHits);
Query query = new TermQuery(new Term("fieldname", "text"));
TopDocs hits = isearcher.search(query, 1);
assertEquals(1, hits.totalHits);
// Iterate through the results:
for (int i = 0; i < hits.scoreDocs.length; i++) {
int hitDocID = hits.scoreDocs[i].doc;
Document hitDoc = isearcher.doc(hitDocID);
assertEquals(text, hitDoc.get("fieldname"));
assert ireader.leaves().size() == 1;
NumericDocValues dv = ireader.leaves().get(0).reader().getNumericDocValues("dv1");
assertEquals(hitDocID, dv.advance(hitDocID));
assertEquals(5, dv.longValue());
BinaryDocValues dv2 = ireader.leaves().get(0).reader().getBinaryDocValues("dv2");
assertEquals(hitDocID, dv2.advance(hitDocID));
final BytesRef term = dv2.binaryValue();
assertEquals(new BytesRef("hello world"), term);
}
ireader.close();
directory.close();
}
use of org.apache.lucene.document.BinaryDocValuesField in project lucene-solr by apache.
the class Test2BBinaryDocValues method testFixedBinary.
// indexes IndexWriter.MAX_DOCS docs with a fixed binary field
public void testFixedBinary() throws Exception {
BaseDirectoryWrapper dir = newFSDirectory(createTempDir("2BFixedBinary"));
if (dir instanceof MockDirectoryWrapper) {
((MockDirectoryWrapper) dir).setThrottling(MockDirectoryWrapper.Throttling.NEVER);
}
IndexWriter w = new IndexWriter(dir, new IndexWriterConfig(new MockAnalyzer(random())).setMaxBufferedDocs(IndexWriterConfig.DISABLE_AUTO_FLUSH).setRAMBufferSizeMB(256.0).setMergeScheduler(new ConcurrentMergeScheduler()).setMergePolicy(newLogMergePolicy(false, 10)).setOpenMode(IndexWriterConfig.OpenMode.CREATE).setCodec(TestUtil.getDefaultCodec()));
Document doc = new Document();
byte[] bytes = new byte[4];
BytesRef data = new BytesRef(bytes);
BinaryDocValuesField dvField = new BinaryDocValuesField("dv", data);
doc.add(dvField);
for (int i = 0; i < IndexWriter.MAX_DOCS; i++) {
bytes[0] = (byte) (i >> 24);
bytes[1] = (byte) (i >> 16);
bytes[2] = (byte) (i >> 8);
bytes[3] = (byte) i;
w.addDocument(doc);
if (i % 100000 == 0) {
System.out.println("indexed: " + i);
System.out.flush();
}
}
w.forceMerge(1);
w.close();
System.out.println("verifying...");
System.out.flush();
DirectoryReader r = DirectoryReader.open(dir);
int expectedValue = 0;
for (LeafReaderContext context : r.leaves()) {
LeafReader reader = context.reader();
BinaryDocValues dv = reader.getBinaryDocValues("dv");
for (int i = 0; i < reader.maxDoc(); i++) {
bytes[0] = (byte) (expectedValue >> 24);
bytes[1] = (byte) (expectedValue >> 16);
bytes[2] = (byte) (expectedValue >> 8);
bytes[3] = (byte) expectedValue;
assertEquals(i, dv.nextDoc());
final BytesRef term = dv.binaryValue();
assertEquals(data, term);
expectedValue++;
}
}
r.close();
dir.close();
}
use of org.apache.lucene.document.BinaryDocValuesField in project lucene-solr by apache.
the class TestMixedDocValuesUpdates method testUpdateDifferentDocsInDifferentGens.
public void testUpdateDifferentDocsInDifferentGens() throws Exception {
// update same document multiple times across generations
Directory dir = newDirectory();
IndexWriterConfig conf = newIndexWriterConfig(new MockAnalyzer(random()));
conf.setMaxBufferedDocs(4);
IndexWriter writer = new IndexWriter(dir, conf);
final int numDocs = atLeast(10);
for (int i = 0; i < numDocs; i++) {
Document doc = new Document();
doc.add(new StringField("id", "doc" + i, Store.NO));
long value = random().nextInt();
doc.add(new BinaryDocValuesField("f", TestBinaryDocValuesUpdates.toBytes(value)));
doc.add(new NumericDocValuesField("cf", value * 2));
writer.addDocument(doc);
}
int numGens = atLeast(5);
for (int i = 0; i < numGens; i++) {
int doc = random().nextInt(numDocs);
Term t = new Term("id", "doc" + doc);
long value = random().nextLong();
writer.updateDocValues(t, new BinaryDocValuesField("f", TestBinaryDocValuesUpdates.toBytes(value)), new NumericDocValuesField("cf", value * 2));
DirectoryReader reader = DirectoryReader.open(writer);
for (LeafReaderContext context : reader.leaves()) {
LeafReader r = context.reader();
BinaryDocValues fbdv = r.getBinaryDocValues("f");
NumericDocValues cfndv = r.getNumericDocValues("cf");
for (int j = 0; j < r.maxDoc(); j++) {
assertEquals(j, cfndv.nextDoc());
assertEquals(j, fbdv.nextDoc());
assertEquals(cfndv.longValue(), TestBinaryDocValuesUpdates.getValue(fbdv) * 2);
}
}
reader.close();
}
writer.close();
dir.close();
}
use of org.apache.lucene.document.BinaryDocValuesField in project lucene-solr by apache.
the class TestMixedDocValuesUpdates method testTonsOfUpdates.
@Nightly
public void testTonsOfUpdates() throws Exception {
// LUCENE-5248: make sure that when there are many updates, we don't use too much RAM
Directory dir = newDirectory();
final Random random = random();
IndexWriterConfig conf = newIndexWriterConfig(new MockAnalyzer(random));
conf.setRAMBufferSizeMB(IndexWriterConfig.DEFAULT_RAM_BUFFER_SIZE_MB);
// don't flush by doc
conf.setMaxBufferedDocs(IndexWriterConfig.DISABLE_AUTO_FLUSH);
IndexWriter writer = new IndexWriter(dir, conf);
// test data: lots of documents (few 10Ks) and lots of update terms (few hundreds)
final int numDocs = atLeast(20000);
final int numBinaryFields = atLeast(5);
// terms should affect many docs
final int numTerms = TestUtil.nextInt(random, 10, 100);
Set<String> updateTerms = new HashSet<>();
while (updateTerms.size() < numTerms) {
updateTerms.add(TestUtil.randomSimpleString(random));
}
// build a large index with many BDV fields and update terms
for (int i = 0; i < numDocs; i++) {
Document doc = new Document();
int numUpdateTerms = TestUtil.nextInt(random, 1, numTerms / 10);
for (int j = 0; j < numUpdateTerms; j++) {
doc.add(new StringField("upd", RandomPicks.randomFrom(random, updateTerms), Store.NO));
}
for (int j = 0; j < numBinaryFields; j++) {
long val = random.nextInt();
doc.add(new BinaryDocValuesField("f" + j, TestBinaryDocValuesUpdates.toBytes(val)));
doc.add(new NumericDocValuesField("cf" + j, val * 2));
}
writer.addDocument(doc);
}
// commit so there's something to apply to
writer.commit();
// set to flush every 2048 bytes (approximately every 12 updates), so we get
// many flushes during binary updates
writer.getConfig().setRAMBufferSizeMB(2048.0 / 1024 / 1024);
final int numUpdates = atLeast(100);
// System.out.println("numUpdates=" + numUpdates);
for (int i = 0; i < numUpdates; i++) {
int field = random.nextInt(numBinaryFields);
Term updateTerm = new Term("upd", RandomPicks.randomFrom(random, updateTerms));
long value = random.nextInt();
writer.updateDocValues(updateTerm, new BinaryDocValuesField("f" + field, TestBinaryDocValuesUpdates.toBytes(value)), new NumericDocValuesField("cf" + field, value * 2));
}
writer.close();
DirectoryReader reader = DirectoryReader.open(dir);
for (LeafReaderContext context : reader.leaves()) {
for (int i = 0; i < numBinaryFields; i++) {
LeafReader r = context.reader();
BinaryDocValues f = r.getBinaryDocValues("f" + i);
NumericDocValues cf = r.getNumericDocValues("cf" + i);
for (int j = 0; j < r.maxDoc(); j++) {
assertEquals(j, cf.nextDoc());
assertEquals(j, f.nextDoc());
assertEquals("reader=" + r + ", field=f" + i + ", doc=" + j, cf.longValue(), TestBinaryDocValuesUpdates.getValue(f) * 2);
}
}
}
reader.close();
dir.close();
}
Aggregations