use of org.apache.lucene.document.StoredField in project lucene-solr by apache.
the class BaseDocValuesFormatTestCase method testThreads2.
/** Tests dv against stored fields with threads (all types + missing) */
@Slow
public void testThreads2() throws Exception {
Directory dir = newDirectory();
IndexWriterConfig conf = newIndexWriterConfig(new MockAnalyzer(random()));
RandomIndexWriter writer = new RandomIndexWriter(random(), dir, conf);
Field idField = new StringField("id", "", Field.Store.NO);
Field storedBinField = new StoredField("storedBin", new byte[0]);
Field dvBinField = new BinaryDocValuesField("dvBin", new BytesRef());
Field dvSortedField = new SortedDocValuesField("dvSorted", new BytesRef());
Field storedNumericField = new StoredField("storedNum", "");
Field dvNumericField = new NumericDocValuesField("dvNum", 0);
// index some docs
int numDocs = TestUtil.nextInt(random(), 1025, 2047);
for (int i = 0; i < numDocs; i++) {
idField.setStringValue(Integer.toString(i));
int length = TestUtil.nextInt(random(), 0, 8);
byte[] buffer = new byte[length];
random().nextBytes(buffer);
storedBinField.setBytesValue(buffer);
dvBinField.setBytesValue(buffer);
dvSortedField.setBytesValue(buffer);
long numericValue = random().nextLong();
storedNumericField.setStringValue(Long.toString(numericValue));
dvNumericField.setLongValue(numericValue);
Document doc = new Document();
doc.add(idField);
if (random().nextInt(4) > 0) {
doc.add(storedBinField);
doc.add(dvBinField);
doc.add(dvSortedField);
}
if (random().nextInt(4) > 0) {
doc.add(storedNumericField);
doc.add(dvNumericField);
}
int numSortedSetFields = random().nextInt(3);
Set<String> values = new TreeSet<>();
for (int j = 0; j < numSortedSetFields; j++) {
values.add(TestUtil.randomSimpleString(random()));
}
for (String v : values) {
doc.add(new SortedSetDocValuesField("dvSortedSet", new BytesRef(v)));
doc.add(new StoredField("storedSortedSet", v));
}
int numSortedNumericFields = random().nextInt(3);
Set<Long> numValues = new TreeSet<>();
for (int j = 0; j < numSortedNumericFields; j++) {
numValues.add(TestUtil.nextLong(random(), Long.MIN_VALUE, Long.MAX_VALUE));
}
for (Long l : numValues) {
doc.add(new SortedNumericDocValuesField("dvSortedNumeric", l));
doc.add(new StoredField("storedSortedNumeric", Long.toString(l)));
}
writer.addDocument(doc);
if (random().nextInt(31) == 0) {
writer.commit();
}
}
// delete some docs
int numDeletions = random().nextInt(numDocs / 10);
for (int i = 0; i < numDeletions; i++) {
int id = random().nextInt(numDocs);
writer.deleteDocuments(new Term("id", Integer.toString(id)));
}
writer.close();
// compare
final DirectoryReader ir = DirectoryReader.open(dir);
int numThreads = TestUtil.nextInt(random(), 2, 7);
Thread[] threads = new Thread[numThreads];
final CountDownLatch startingGun = new CountDownLatch(1);
for (int i = 0; i < threads.length; i++) {
threads[i] = new Thread() {
@Override
public void run() {
try {
startingGun.await();
for (LeafReaderContext context : ir.leaves()) {
LeafReader r = context.reader();
BinaryDocValues binaries = r.getBinaryDocValues("dvBin");
SortedDocValues sorted = r.getSortedDocValues("dvSorted");
NumericDocValues numerics = r.getNumericDocValues("dvNum");
SortedSetDocValues sortedSet = r.getSortedSetDocValues("dvSortedSet");
SortedNumericDocValues sortedNumeric = r.getSortedNumericDocValues("dvSortedNumeric");
for (int j = 0; j < r.maxDoc(); j++) {
BytesRef binaryValue = r.document(j).getBinaryValue("storedBin");
if (binaryValue != null) {
if (binaries != null) {
assertEquals(j, binaries.nextDoc());
BytesRef scratch = binaries.binaryValue();
assertEquals(binaryValue, scratch);
assertEquals(j, sorted.nextDoc());
scratch = sorted.binaryValue();
assertEquals(binaryValue, scratch);
}
}
String number = r.document(j).get("storedNum");
if (number != null) {
if (numerics != null) {
assertEquals(j, numerics.advance(j));
assertEquals(Long.parseLong(number), numerics.longValue());
}
}
String[] values = r.document(j).getValues("storedSortedSet");
if (values.length > 0) {
assertNotNull(sortedSet);
assertEquals(j, sortedSet.nextDoc());
for (int k = 0; k < values.length; k++) {
long ord = sortedSet.nextOrd();
assertTrue(ord != SortedSetDocValues.NO_MORE_ORDS);
BytesRef value = sortedSet.lookupOrd(ord);
assertEquals(values[k], value.utf8ToString());
}
assertEquals(SortedSetDocValues.NO_MORE_ORDS, sortedSet.nextOrd());
}
String[] numValues = r.document(j).getValues("storedSortedNumeric");
if (numValues.length > 0) {
assertNotNull(sortedNumeric);
assertEquals(j, sortedNumeric.nextDoc());
assertEquals(numValues.length, sortedNumeric.docValueCount());
for (int k = 0; k < numValues.length; k++) {
long v = sortedNumeric.nextValue();
assertEquals(numValues[k], Long.toString(v));
}
}
}
}
TestUtil.checkReader(ir);
} catch (Exception e) {
throw new RuntimeException(e);
}
}
};
threads[i].start();
}
startingGun.countDown();
for (Thread t : threads) {
t.join();
}
ir.close();
dir.close();
}
use of org.apache.lucene.document.StoredField in project lucene-solr by apache.
the class BaseDocValuesFormatTestCase method testThreads.
/** Tests dv against stored fields with threads (binary/numeric/sorted, no missing) */
public void testThreads() throws Exception {
Directory dir = newDirectory();
IndexWriterConfig conf = newIndexWriterConfig(new MockAnalyzer(random()));
RandomIndexWriter writer = new RandomIndexWriter(random(), dir, conf);
Document doc = new Document();
Field idField = new StringField("id", "", Field.Store.NO);
Field storedBinField = new StoredField("storedBin", new byte[0]);
Field dvBinField = new BinaryDocValuesField("dvBin", new BytesRef());
Field dvSortedField = new SortedDocValuesField("dvSorted", new BytesRef());
Field storedNumericField = new StoredField("storedNum", "");
Field dvNumericField = new NumericDocValuesField("dvNum", 0);
doc.add(idField);
doc.add(storedBinField);
doc.add(dvBinField);
doc.add(dvSortedField);
doc.add(storedNumericField);
doc.add(dvNumericField);
// index some docs
int numDocs = atLeast(300);
for (int i = 0; i < numDocs; i++) {
idField.setStringValue(Integer.toString(i));
int length = TestUtil.nextInt(random(), 0, 8);
byte[] buffer = new byte[length];
random().nextBytes(buffer);
storedBinField.setBytesValue(buffer);
dvBinField.setBytesValue(buffer);
dvSortedField.setBytesValue(buffer);
long numericValue = random().nextLong();
storedNumericField.setStringValue(Long.toString(numericValue));
dvNumericField.setLongValue(numericValue);
writer.addDocument(doc);
if (random().nextInt(31) == 0) {
writer.commit();
}
}
// delete some docs
int numDeletions = random().nextInt(numDocs / 10);
for (int i = 0; i < numDeletions; i++) {
int id = random().nextInt(numDocs);
writer.deleteDocuments(new Term("id", Integer.toString(id)));
}
writer.close();
// compare
final DirectoryReader ir = DirectoryReader.open(dir);
int numThreads = TestUtil.nextInt(random(), 2, 7);
Thread[] threads = new Thread[numThreads];
final CountDownLatch startingGun = new CountDownLatch(1);
for (int i = 0; i < threads.length; i++) {
threads[i] = new Thread() {
@Override
public void run() {
try {
startingGun.await();
for (LeafReaderContext context : ir.leaves()) {
LeafReader r = context.reader();
BinaryDocValues binaries = r.getBinaryDocValues("dvBin");
SortedDocValues sorted = r.getSortedDocValues("dvSorted");
NumericDocValues numerics = r.getNumericDocValues("dvNum");
for (int j = 0; j < r.maxDoc(); j++) {
BytesRef binaryValue = r.document(j).getBinaryValue("storedBin");
assertEquals(j, binaries.nextDoc());
BytesRef scratch = binaries.binaryValue();
assertEquals(binaryValue, scratch);
assertEquals(j, sorted.nextDoc());
scratch = sorted.binaryValue();
assertEquals(binaryValue, scratch);
String expected = r.document(j).get("storedNum");
assertEquals(j, numerics.nextDoc());
assertEquals(Long.parseLong(expected), numerics.longValue());
}
}
TestUtil.checkReader(ir);
} catch (Exception e) {
throw new RuntimeException(e);
}
}
};
threads[i].start();
}
startingGun.countDown();
for (Thread t : threads) {
t.join();
}
ir.close();
dir.close();
}
use of org.apache.lucene.document.StoredField in project lucene-solr by apache.
the class BaseDocValuesFormatTestCase method doTestSortedVsStoredFields.
protected void doTestSortedVsStoredFields(int numDocs, double density, Supplier<byte[]> bytes) throws Exception {
Directory dir = newFSDirectory(createTempDir("dvduel"));
IndexWriterConfig conf = newIndexWriterConfig(new MockAnalyzer(random()));
RandomIndexWriter writer = new RandomIndexWriter(random(), dir, conf);
Document doc = new Document();
Field idField = new StringField("id", "", Field.Store.NO);
Field storedField = new StoredField("stored", new byte[0]);
Field dvField = new SortedDocValuesField("dv", new BytesRef());
doc.add(idField);
doc.add(storedField);
doc.add(dvField);
// index some docs
for (int i = 0; i < numDocs; i++) {
if (random().nextDouble() > density) {
writer.addDocument(new Document());
continue;
}
idField.setStringValue(Integer.toString(i));
byte[] buffer = bytes.get();
storedField.setBytesValue(buffer);
dvField.setBytesValue(buffer);
writer.addDocument(doc);
if (random().nextInt(31) == 0) {
writer.commit();
}
}
// delete some docs
int numDeletions = random().nextInt(numDocs / 10);
for (int i = 0; i < numDeletions; i++) {
int id = random().nextInt(numDocs);
writer.deleteDocuments(new Term("id", Integer.toString(id)));
}
// compare
DirectoryReader ir = writer.getReader();
TestUtil.checkReader(ir);
for (LeafReaderContext context : ir.leaves()) {
LeafReader r = context.reader();
BinaryDocValues docValues = DocValues.getBinary(r, "dv");
docValues.nextDoc();
for (int i = 0; i < r.maxDoc(); i++) {
BytesRef binaryValue = r.document(i).getBinaryValue("stored");
if (binaryValue == null) {
assertTrue(docValues.docID() > i);
} else {
assertEquals(i, docValues.docID());
assertEquals(binaryValue, docValues.binaryValue());
docValues.nextDoc();
}
}
assertEquals(DocIdSetIterator.NO_MORE_DOCS, docValues.docID());
}
ir.close();
writer.forceMerge(1);
// compare again
ir = writer.getReader();
TestUtil.checkReader(ir);
for (LeafReaderContext context : ir.leaves()) {
LeafReader r = context.reader();
BinaryDocValues docValues = DocValues.getBinary(r, "dv");
docValues.nextDoc();
for (int i = 0; i < r.maxDoc(); i++) {
BytesRef binaryValue = r.document(i).getBinaryValue("stored");
if (binaryValue == null) {
assertTrue(docValues.docID() > i);
} else {
assertEquals(i, docValues.docID());
assertEquals(binaryValue, docValues.binaryValue());
docValues.nextDoc();
}
}
assertEquals(DocIdSetIterator.NO_MORE_DOCS, docValues.docID());
}
ir.close();
writer.close();
dir.close();
}
use of org.apache.lucene.document.StoredField in project lucene-solr by apache.
the class BaseGeoPointTestCase method doRandomDistanceTest.
private void doRandomDistanceTest(int numDocs, int numQueries) throws IOException {
Directory dir = newDirectory();
IndexWriterConfig iwc = newIndexWriterConfig();
// Else seeds may not reproduce:
iwc.setMergeScheduler(new SerialMergeScheduler());
int pointsInLeaf = 2 + random().nextInt(4);
iwc.setCodec(new FilterCodec("Lucene70", TestUtil.getDefaultCodec()) {
@Override
public PointsFormat pointsFormat() {
return new PointsFormat() {
@Override
public PointsWriter fieldsWriter(SegmentWriteState writeState) throws IOException {
return new Lucene60PointsWriter(writeState, pointsInLeaf, BKDWriter.DEFAULT_MAX_MB_SORT_IN_HEAP);
}
@Override
public PointsReader fieldsReader(SegmentReadState readState) throws IOException {
return new Lucene60PointsReader(readState);
}
};
}
});
RandomIndexWriter writer = new RandomIndexWriter(random(), dir, iwc);
for (int i = 0; i < numDocs; i++) {
double latRaw = nextLatitude();
double lonRaw = nextLongitude();
// pre-normalize up front, so we can just use quantized value for testing and do simple exact comparisons
double lat = quantizeLat(latRaw);
double lon = quantizeLon(lonRaw);
Document doc = new Document();
addPointToDoc("field", doc, lat, lon);
doc.add(new StoredField("lat", lat));
doc.add(new StoredField("lon", lon));
writer.addDocument(doc);
}
IndexReader reader = writer.getReader();
IndexSearcher searcher = newSearcher(reader);
for (int i = 0; i < numQueries; i++) {
double lat = nextLatitude();
double lon = nextLongitude();
double radius = 50000000D * random().nextDouble();
BitSet expected = new BitSet();
for (int doc = 0; doc < reader.maxDoc(); doc++) {
double docLatitude = reader.document(doc).getField("lat").numericValue().doubleValue();
double docLongitude = reader.document(doc).getField("lon").numericValue().doubleValue();
double distance = SloppyMath.haversinMeters(lat, lon, docLatitude, docLongitude);
if (distance <= radius) {
expected.set(doc);
}
}
TopDocs topDocs = searcher.search(newDistanceQuery("field", lat, lon, radius), reader.maxDoc(), Sort.INDEXORDER);
BitSet actual = new BitSet();
for (ScoreDoc doc : topDocs.scoreDocs) {
actual.set(doc.doc);
}
try {
assertEquals(expected, actual);
} catch (AssertionError e) {
System.out.println("center: (" + lat + "," + lon + "), radius=" + radius);
for (int doc = 0; doc < reader.maxDoc(); doc++) {
double docLatitude = reader.document(doc).getField("lat").numericValue().doubleValue();
double docLongitude = reader.document(doc).getField("lon").numericValue().doubleValue();
double distance = SloppyMath.haversinMeters(lat, lon, docLatitude, docLongitude);
System.out.println("" + doc + ": (" + docLatitude + "," + docLongitude + "), distance=" + distance);
}
throw e;
}
}
reader.close();
writer.close();
dir.close();
}
use of org.apache.lucene.document.StoredField in project lucene-solr by apache.
the class BaseDocValuesFormatTestCase method doTestNumericsVsStoredFields.
private void doTestNumericsVsStoredFields(double density, LongSupplier longs) throws Exception {
Directory dir = newDirectory();
IndexWriterConfig conf = newIndexWriterConfig(new MockAnalyzer(random()));
RandomIndexWriter writer = new RandomIndexWriter(random(), dir, conf);
Document doc = new Document();
Field idField = new StringField("id", "", Field.Store.NO);
Field storedField = newStringField("stored", "", Field.Store.YES);
Field dvField = new NumericDocValuesField("dv", 0);
doc.add(idField);
doc.add(storedField);
doc.add(dvField);
// index some docs
int numDocs = atLeast(300);
// for numbers of values <= 256, all storage layouts are tested
assert numDocs > 256;
for (int i = 0; i < numDocs; i++) {
if (random().nextDouble() > density) {
writer.addDocument(new Document());
continue;
}
idField.setStringValue(Integer.toString(i));
long value = longs.getAsLong();
storedField.setStringValue(Long.toString(value));
dvField.setLongValue(value);
writer.addDocument(doc);
if (random().nextInt(31) == 0) {
writer.commit();
}
}
// delete some docs
int numDeletions = random().nextInt(numDocs / 10);
for (int i = 0; i < numDeletions; i++) {
int id = random().nextInt(numDocs);
writer.deleteDocuments(new Term("id", Integer.toString(id)));
}
// merge some segments and ensure that at least one of them has more than
// 256 values
writer.forceMerge(numDocs / 256);
writer.close();
// compare
DirectoryReader ir = DirectoryReader.open(dir);
TestUtil.checkReader(ir);
for (LeafReaderContext context : ir.leaves()) {
LeafReader r = context.reader();
NumericDocValues docValues = DocValues.getNumeric(r, "dv");
docValues.nextDoc();
for (int i = 0; i < r.maxDoc(); i++) {
String storedValue = r.document(i).get("stored");
if (storedValue == null) {
assertTrue(docValues.docID() > i);
} else {
assertEquals(i, docValues.docID());
assertEquals(Long.parseLong(storedValue), docValues.longValue());
docValues.nextDoc();
}
}
assertEquals(DocIdSetIterator.NO_MORE_DOCS, docValues.docID());
}
ir.close();
dir.close();
}
Aggregations