use of org.apache.lucene.document.SortedDocValuesField in project lucene-solr by apache.
the class TestDocValuesQueries method doTestDuelPointRangeSortedRangeQuery.
private void doTestDuelPointRangeSortedRangeQuery(boolean sortedSet, int maxValuesPerDoc) throws IOException {
final int iters = atLeast(10);
for (int iter = 0; iter < iters; ++iter) {
Directory dir = newDirectory();
RandomIndexWriter iw = new RandomIndexWriter(random(), dir);
final int numDocs = atLeast(100);
for (int i = 0; i < numDocs; ++i) {
Document doc = new Document();
final int numValues = TestUtil.nextInt(random(), 0, maxValuesPerDoc);
for (int j = 0; j < numValues; ++j) {
final long value = TestUtil.nextLong(random(), -100, 10000);
byte[] encoded = new byte[Long.BYTES];
LongPoint.encodeDimension(value, encoded, 0);
if (sortedSet) {
doc.add(new SortedSetDocValuesField("dv", new BytesRef(encoded)));
} else {
doc.add(new SortedDocValuesField("dv", new BytesRef(encoded)));
}
doc.add(new LongPoint("idx", value));
}
iw.addDocument(doc);
}
if (random().nextBoolean()) {
iw.deleteDocuments(LongPoint.newRangeQuery("idx", 0L, 10L));
}
final IndexReader reader = iw.getReader();
final IndexSearcher searcher = newSearcher(reader, false);
iw.close();
for (int i = 0; i < 100; ++i) {
long min = random().nextBoolean() ? Long.MIN_VALUE : TestUtil.nextLong(random(), -100, 10000);
long max = random().nextBoolean() ? Long.MAX_VALUE : TestUtil.nextLong(random(), -100, 10000);
byte[] encodedMin = new byte[Long.BYTES];
byte[] encodedMax = new byte[Long.BYTES];
LongPoint.encodeDimension(min, encodedMin, 0);
LongPoint.encodeDimension(max, encodedMax, 0);
boolean includeMin = true;
boolean includeMax = true;
if (random().nextBoolean()) {
includeMin = false;
min++;
}
if (random().nextBoolean()) {
includeMax = false;
max--;
}
final Query q1 = LongPoint.newRangeQuery("idx", min, max);
final Query q2;
if (sortedSet) {
q2 = SortedSetDocValuesField.newRangeQuery("dv", min == Long.MIN_VALUE && random().nextBoolean() ? null : new BytesRef(encodedMin), max == Long.MAX_VALUE && random().nextBoolean() ? null : new BytesRef(encodedMax), includeMin, includeMax);
} else {
q2 = SortedDocValuesField.newRangeQuery("dv", min == Long.MIN_VALUE && random().nextBoolean() ? null : new BytesRef(encodedMin), max == Long.MAX_VALUE && random().nextBoolean() ? null : new BytesRef(encodedMax), includeMin, includeMax);
}
assertSameMatches(searcher, q1, q2, false);
}
reader.close();
dir.close();
}
}
use of org.apache.lucene.document.SortedDocValuesField in project lucene-solr by apache.
the class TestNumericDocValuesUpdates method testDifferentDVFormatPerField.
@Test
public void testDifferentDVFormatPerField() throws Exception {
// test relies on separate instances of the "same thing"
assert TestUtil.getDefaultDocValuesFormat() != TestUtil.getDefaultDocValuesFormat();
Directory dir = newDirectory();
IndexWriterConfig conf = newIndexWriterConfig(new MockAnalyzer(random()));
conf.setCodec(new AssertingCodec() {
@Override
public DocValuesFormat getDocValuesFormatForField(String field) {
return TestUtil.getDefaultDocValuesFormat();
}
});
IndexWriter writer = new IndexWriter(dir, conf);
Document doc = new Document();
doc.add(new StringField("key", "doc", Store.NO));
doc.add(new NumericDocValuesField("ndv", 5));
doc.add(new SortedDocValuesField("sorted", new BytesRef("value")));
// flushed document
writer.addDocument(doc);
writer.commit();
// in-memory document
writer.addDocument(doc);
writer.updateNumericDocValue(new Term("key", "doc"), "ndv", 17L);
writer.close();
final DirectoryReader reader = DirectoryReader.open(dir);
NumericDocValues ndv = MultiDocValues.getNumericValues(reader, "ndv");
SortedDocValues sdv = MultiDocValues.getSortedValues(reader, "sorted");
for (int i = 0; i < reader.maxDoc(); i++) {
assertEquals(i, ndv.nextDoc());
assertEquals(17, ndv.longValue());
assertEquals(i, sdv.nextDoc());
final BytesRef term = sdv.binaryValue();
assertEquals(new BytesRef("value"), term);
}
reader.close();
dir.close();
}
use of org.apache.lucene.document.SortedDocValuesField in project lucene-solr by apache.
the class TestMultiDocValues method testSortedWithLotsOfDups.
// tries to make more dups than testSorted
public void testSortedWithLotsOfDups() throws Exception {
Directory dir = newDirectory();
Document doc = new Document();
Field field = new SortedDocValuesField("bytes", new BytesRef());
doc.add(field);
IndexWriterConfig iwc = newIndexWriterConfig(random(), null);
iwc.setMergePolicy(newLogMergePolicy());
RandomIndexWriter iw = new RandomIndexWriter(random(), dir, iwc);
int numDocs = TEST_NIGHTLY ? atLeast(500) : atLeast(50);
for (int i = 0; i < numDocs; i++) {
BytesRef ref = new BytesRef(TestUtil.randomSimpleString(random(), 2));
field.setBytesValue(ref);
iw.addDocument(doc);
if (random().nextInt(17) == 0) {
iw.commit();
}
}
DirectoryReader ir = iw.getReader();
iw.forceMerge(1);
DirectoryReader ir2 = iw.getReader();
LeafReader merged = getOnlyLeafReader(ir2);
iw.close();
SortedDocValues multi = MultiDocValues.getSortedValues(ir, "bytes");
SortedDocValues single = merged.getSortedDocValues("bytes");
assertEquals(single.getValueCount(), multi.getValueCount());
for (int i = 0; i < numDocs; i++) {
assertEquals(i, multi.nextDoc());
assertEquals(i, single.nextDoc());
// check ord
assertEquals(single.ordValue(), multi.ordValue());
// check ord value
final BytesRef expected = BytesRef.deepCopyOf(single.binaryValue());
final BytesRef actual = multi.binaryValue();
assertEquals(expected, actual);
}
testRandomAdvance(merged.getSortedDocValues("bytes"), MultiDocValues.getSortedValues(ir, "bytes"));
testRandomAdvanceExact(merged.getSortedDocValues("bytes"), MultiDocValues.getSortedValues(ir, "bytes"), merged.maxDoc());
ir.close();
ir2.close();
dir.close();
}
use of org.apache.lucene.document.SortedDocValuesField in project lucene-solr by apache.
the class TestDiversifiedTopDocsCollector method setUp.
@Override
public void setUp() throws Exception {
super.setUp();
// populate an index with documents - artist, song and weeksAtNumberOne
dir = newDirectory();
RandomIndexWriter writer = new RandomIndexWriter(random(), dir);
Document doc = new Document();
Field yearField = newTextField("year", "", Field.Store.NO);
SortedDocValuesField artistField = new SortedDocValuesField("artist", new BytesRef(""));
Field weeksAtNumberOneField = new FloatDocValuesField("weeksAtNumberOne", 0.0F);
Field weeksStoredField = new StoredField("weeks", 0.0F);
Field idField = newStringField("id", "", Field.Store.YES);
Field songField = newTextField("song", "", Field.Store.NO);
Field storedArtistField = newTextField("artistName", "", Field.Store.NO);
doc.add(idField);
doc.add(weeksAtNumberOneField);
doc.add(storedArtistField);
doc.add(songField);
doc.add(weeksStoredField);
doc.add(yearField);
doc.add(artistField);
parsedRecords.clear();
for (int i = 0; i < hitsOfThe60s.length; i++) {
String[] cols = hitsOfThe60s[i].split("\t");
Record record = new Record(String.valueOf(i), cols[0], cols[1], cols[2], Float.parseFloat(cols[3]));
parsedRecords.put(record.id, record);
idField.setStringValue(record.id);
yearField.setStringValue(record.year);
storedArtistField.setStringValue(record.artist);
artistField.setBytesValue(new BytesRef(record.artist));
songField.setStringValue(record.song);
weeksStoredField.setFloatValue(record.weeks);
weeksAtNumberOneField.setFloatValue(record.weeks);
writer.addDocument(doc);
if (i % 10 == 0) {
// Causes the creation of multiple segments for our test
writer.commit();
}
}
reader = writer.getReader();
writer.close();
searcher = newSearcher(reader);
artistDocValues = MultiDocValues.getSortedValues(reader, "artist");
// All searches sort by song popularity
final Similarity base = searcher.getSimilarity(true);
searcher.setSimilarity(new DocValueSimilarity(base, "weeksAtNumberOne"));
}
use of org.apache.lucene.document.SortedDocValuesField in project lucene-solr by apache.
the class TestDocValuesStatsCollector method testDocsWithSortedValues.
public void testDocsWithSortedValues() throws IOException {
try (Directory dir = newDirectory();
IndexWriter indexWriter = new IndexWriter(dir, newIndexWriterConfig())) {
String field = "sorted";
int numDocs = TestUtil.nextInt(random(), 1, 100);
BytesRef[] docValues = new BytesRef[numDocs];
for (int i = 0; i < numDocs; i++) {
Document doc = new Document();
if (random().nextBoolean()) {
// not all documents have a value
BytesRef val = TestUtil.randomBinaryTerm(random());
doc.add(new SortedDocValuesField(field, val));
doc.add(new StringField("id", "doc" + i, Store.NO));
docValues[i] = val;
}
indexWriter.addDocument(doc);
}
// 20% of cases delete some docs
if (random().nextDouble() < 0.2) {
for (int i = 0; i < numDocs; i++) {
if (random().nextBoolean()) {
indexWriter.deleteDocuments(new Term("id", "doc" + i));
docValues[i] = null;
}
}
}
try (DirectoryReader reader = DirectoryReader.open(indexWriter)) {
IndexSearcher searcher = new IndexSearcher(reader);
SortedDocValuesStats stats = new SortedDocValuesStats(field);
searcher.search(new MatchAllDocsQuery(), new DocValuesStatsCollector(stats));
int expCount = (int) nonNull(docValues).count();
assertEquals(expCount, stats.count());
int numDocsWithoutField = (int) isNull(docValues).count();
assertEquals(computeExpMissing(numDocsWithoutField, numDocs, reader), stats.missing());
if (stats.count() > 0) {
assertEquals(nonNull(docValues).min(BytesRef::compareTo).get(), stats.min());
assertEquals(nonNull(docValues).max(BytesRef::compareTo).get(), stats.max());
}
}
}
}
Aggregations