use of org.apache.lucene.document.NumericDocValuesField in project lucene-solr by apache.
the class TestIndexSorting method testMultiValuedRandom1.
public void testMultiValuedRandom1() throws IOException {
boolean withDeletes = random().nextBoolean();
Directory dir = newDirectory();
IndexWriterConfig iwc = new IndexWriterConfig(new MockAnalyzer(random()));
Sort indexSort = new Sort(new SortedNumericSortField("foo", SortField.Type.LONG));
iwc.setIndexSort(indexSort);
IndexWriter w = new IndexWriter(dir, iwc);
final int numDocs = atLeast(1000);
final FixedBitSet deleted = new FixedBitSet(numDocs);
for (int i = 0; i < numDocs; ++i) {
Document doc = new Document();
int num = random().nextInt(10);
for (int j = 0; j < num; j++) {
doc.add(new SortedNumericDocValuesField("foo", random().nextInt(2000)));
}
doc.add(new StringField("id", Integer.toString(i), Store.YES));
doc.add(new NumericDocValuesField("id", i));
w.addDocument(doc);
if (random().nextInt(5) == 0) {
w.getReader().close();
} else if (random().nextInt(30) == 0) {
w.forceMerge(2);
} else if (random().nextInt(4) == 0) {
final int id = TestUtil.nextInt(random(), 0, i);
deleted.set(id);
w.deleteDocuments(new Term("id", Integer.toString(id)));
}
}
DirectoryReader reader = w.getReader();
// Now check that the index is consistent
IndexSearcher searcher = newSearcher(reader);
for (int i = 0; i < numDocs; ++i) {
TermQuery termQuery = new TermQuery(new Term("id", Integer.toString(i)));
final TopDocs topDocs = searcher.search(termQuery, 1);
if (deleted.get(i)) {
assertEquals(0, topDocs.totalHits);
} else {
assertEquals(1, topDocs.totalHits);
NumericDocValues values = MultiDocValues.getNumericValues(reader, "id");
assertEquals(topDocs.scoreDocs[0].doc, values.advance(topDocs.scoreDocs[0].doc));
assertEquals(i, values.longValue());
Document document = reader.document(topDocs.scoreDocs[0].doc);
assertEquals(Integer.toString(i), document.get("id"));
}
}
reader.close();
w.close();
dir.close();
}
use of org.apache.lucene.document.NumericDocValuesField in project lucene-solr by apache.
the class TestIndexSorting method testIndexSortWithSparseField.
public void testIndexSortWithSparseField() throws Exception {
Directory dir = newDirectory();
IndexWriterConfig iwc = new IndexWriterConfig(new MockAnalyzer(random()));
SortField sortField = new SortField("dense_int", SortField.Type.INT, true);
Sort indexSort = new Sort(sortField);
iwc.setIndexSort(indexSort);
IndexWriter w = new IndexWriter(dir, iwc);
Field textField = newTextField("sparse_text", "", Field.Store.NO);
for (int i = 0; i < 128; i++) {
Document doc = new Document();
doc.add(new NumericDocValuesField("dense_int", i));
if (i < 64) {
doc.add(new NumericDocValuesField("sparse_int", i));
doc.add(new BinaryDocValuesField("sparse_binary", new BytesRef(Integer.toString(i))));
textField.setStringValue("foo");
doc.add(textField);
}
w.addDocument(doc);
}
w.commit();
w.forceMerge(1);
DirectoryReader r = DirectoryReader.open(w);
assertEquals(1, r.leaves().size());
LeafReader leafReader = r.leaves().get(0).reader();
NumericDocValues denseValues = leafReader.getNumericDocValues("dense_int");
NumericDocValues sparseValues = leafReader.getNumericDocValues("sparse_int");
BinaryDocValues sparseBinaryValues = leafReader.getBinaryDocValues("sparse_binary");
NumericDocValues normsValues = leafReader.getNormValues("sparse_text");
for (int docID = 0; docID < 128; docID++) {
assertTrue(denseValues.advanceExact(docID));
assertEquals(127 - docID, (int) denseValues.longValue());
if (docID >= 64) {
assertTrue(denseValues.advanceExact(docID));
assertTrue(sparseValues.advanceExact(docID));
assertTrue(sparseBinaryValues.advanceExact(docID));
assertTrue(normsValues.advanceExact(docID));
assertEquals(1, normsValues.longValue());
assertEquals(127 - docID, (int) sparseValues.longValue());
assertEquals(new BytesRef(Integer.toString(127 - docID)), sparseBinaryValues.binaryValue());
} else {
assertFalse(sparseBinaryValues.advanceExact(docID));
assertFalse(sparseValues.advanceExact(docID));
assertFalse(normsValues.advanceExact(docID));
}
}
IOUtils.close(r, w, dir);
}
use of org.apache.lucene.document.NumericDocValuesField in project lucene-solr by apache.
the class BaseTestRangeFilter method build.
private static IndexReader build(Random random, TestIndex index) throws IOException {
/* build an index */
Document doc = new Document();
Field idField = newStringField(random, "id", "", Field.Store.YES);
Field idDVField = new SortedDocValuesField("id", new BytesRef());
Field intIdField = new IntPoint("id_int", 0);
Field intDVField = new NumericDocValuesField("id_int", 0);
Field floatIdField = new FloatPoint("id_float", 0);
Field floatDVField = new NumericDocValuesField("id_float", 0);
Field longIdField = new LongPoint("id_long", 0);
Field longDVField = new NumericDocValuesField("id_long", 0);
Field doubleIdField = new DoublePoint("id_double", 0);
Field doubleDVField = new NumericDocValuesField("id_double", 0);
Field randField = newStringField(random, "rand", "", Field.Store.YES);
Field randDVField = new SortedDocValuesField("rand", new BytesRef());
Field bodyField = newStringField(random, "body", "", Field.Store.NO);
Field bodyDVField = new SortedDocValuesField("body", new BytesRef());
doc.add(idField);
doc.add(idDVField);
doc.add(intIdField);
doc.add(intDVField);
doc.add(floatIdField);
doc.add(floatDVField);
doc.add(longIdField);
doc.add(longDVField);
doc.add(doubleIdField);
doc.add(doubleDVField);
doc.add(randField);
doc.add(randDVField);
doc.add(bodyField);
doc.add(bodyDVField);
RandomIndexWriter writer = new RandomIndexWriter(random, index.index, newIndexWriterConfig(random, new MockAnalyzer(random)).setOpenMode(OpenMode.CREATE).setMaxBufferedDocs(TestUtil.nextInt(random, 50, 1000)).setMergePolicy(newLogMergePolicy()));
TestUtil.reduceOpenFiles(writer.w);
while (true) {
int minCount = 0;
int maxCount = 0;
for (int d = minId; d <= maxId; d++) {
idField.setStringValue(pad(d));
idDVField.setBytesValue(new BytesRef(pad(d)));
intIdField.setIntValue(d);
intDVField.setLongValue(d);
floatIdField.setFloatValue(d);
floatDVField.setLongValue(Float.floatToRawIntBits(d));
longIdField.setLongValue(d);
longDVField.setLongValue(d);
doubleIdField.setDoubleValue(d);
doubleDVField.setLongValue(Double.doubleToRawLongBits(d));
int r = index.allowNegativeRandomInts ? random.nextInt() : random.nextInt(Integer.MAX_VALUE);
if (index.maxR < r) {
index.maxR = r;
maxCount = 1;
} else if (index.maxR == r) {
maxCount++;
}
if (r < index.minR) {
index.minR = r;
minCount = 1;
} else if (r == index.minR) {
minCount++;
}
randField.setStringValue(pad(r));
randDVField.setBytesValue(new BytesRef(pad(r)));
bodyField.setStringValue("body");
bodyDVField.setBytesValue(new BytesRef("body"));
writer.addDocument(doc);
}
if (minCount == 1 && maxCount == 1) {
// our subclasses rely on only 1 doc having the min or
// max, so, we loop until we satisfy that. it should be
// exceedingly rare (Yonik calculates 1 in ~429,000)
// times) that this loop requires more than one try:
IndexReader ir = writer.getReader();
writer.close();
return ir;
}
// try again
writer.deleteAll();
}
}
use of org.apache.lucene.document.NumericDocValuesField in project lucene-solr by apache.
the class TestSort method testLongMissing.
/** Tests sorting on type long with a missing value */
public void testLongMissing() throws IOException {
Directory dir = newDirectory();
RandomIndexWriter writer = new RandomIndexWriter(random(), dir);
Document doc = new Document();
writer.addDocument(doc);
doc = new Document();
doc.add(new NumericDocValuesField("value", -1));
doc.add(newStringField("value", "-1", Field.Store.YES));
writer.addDocument(doc);
doc = new Document();
doc.add(new NumericDocValuesField("value", 4));
doc.add(newStringField("value", "4", Field.Store.YES));
writer.addDocument(doc);
IndexReader ir = writer.getReader();
writer.close();
IndexSearcher searcher = newSearcher(ir);
Sort sort = new Sort(new SortField("value", SortField.Type.LONG));
TopDocs td = searcher.search(new MatchAllDocsQuery(), 10, sort);
assertEquals(3, td.totalHits);
// null is treated as 0
assertEquals("-1", searcher.doc(td.scoreDocs[0].doc).get("value"));
assertNull(searcher.doc(td.scoreDocs[1].doc).get("value"));
assertEquals("4", searcher.doc(td.scoreDocs[2].doc).get("value"));
ir.close();
dir.close();
}
use of org.apache.lucene.document.NumericDocValuesField in project lucene-solr by apache.
the class TestSortRandom method testRandomStringSort.
private void testRandomStringSort(SortField.Type type) throws Exception {
Random random = new Random(random().nextLong());
final int NUM_DOCS = atLeast(100);
final Directory dir = newDirectory();
final RandomIndexWriter writer = new RandomIndexWriter(random, dir);
final boolean allowDups = random.nextBoolean();
final Set<String> seen = new HashSet<>();
final int maxLength = TestUtil.nextInt(random, 5, 100);
if (VERBOSE) {
System.out.println("TEST: NUM_DOCS=" + NUM_DOCS + " maxLength=" + maxLength + " allowDups=" + allowDups);
}
int numDocs = 0;
final List<BytesRef> docValues = new ArrayList<>();
// TODO: deletions
while (numDocs < NUM_DOCS) {
final Document doc = new Document();
// 10% of the time, the document is missing the value:
final BytesRef br;
if (random().nextInt(10) != 7) {
final String s;
if (random.nextBoolean()) {
s = TestUtil.randomSimpleString(random, maxLength);
} else {
s = TestUtil.randomUnicodeString(random, maxLength);
}
if (!allowDups) {
if (seen.contains(s)) {
continue;
}
seen.add(s);
}
if (VERBOSE) {
System.out.println(" " + numDocs + ": s=" + s);
}
br = new BytesRef(s);
doc.add(new SortedDocValuesField("stringdv", br));
docValues.add(br);
} else {
br = null;
if (VERBOSE) {
System.out.println(" " + numDocs + ": <missing>");
}
docValues.add(null);
}
doc.add(new NumericDocValuesField("id", numDocs));
doc.add(new StoredField("id", numDocs));
writer.addDocument(doc);
numDocs++;
if (random.nextInt(40) == 17) {
// force flush
writer.getReader().close();
}
}
final IndexReader r = writer.getReader();
writer.close();
if (VERBOSE) {
System.out.println(" reader=" + r);
}
final IndexSearcher s = newSearcher(r, false);
final int ITERS = atLeast(100);
for (int iter = 0; iter < ITERS; iter++) {
final boolean reverse = random.nextBoolean();
final TopFieldDocs hits;
final SortField sf;
final boolean sortMissingLast;
sf = new SortField("stringdv", type, reverse);
sortMissingLast = random().nextBoolean();
if (sortMissingLast) {
sf.setMissingValue(SortField.STRING_LAST);
}
final Sort sort;
if (random.nextBoolean()) {
sort = new Sort(sf);
} else {
sort = new Sort(sf, SortField.FIELD_DOC);
}
final int hitCount = TestUtil.nextInt(random, 1, r.maxDoc() + 20);
final RandomQuery f = new RandomQuery(random.nextLong(), random.nextFloat(), docValues);
hits = s.search(f, hitCount, sort, random.nextBoolean(), random.nextBoolean());
if (VERBOSE) {
System.out.println("\nTEST: iter=" + iter + " " + hits.totalHits + " hits; topN=" + hitCount + "; reverse=" + reverse + "; sortMissingLast=" + sortMissingLast + " sort=" + sort);
}
// Compute expected results:
Collections.sort(f.matchValues, new Comparator<BytesRef>() {
@Override
public int compare(BytesRef a, BytesRef b) {
if (a == null) {
if (b == null) {
return 0;
}
if (sortMissingLast) {
return 1;
} else {
return -1;
}
} else if (b == null) {
if (sortMissingLast) {
return -1;
} else {
return 1;
}
} else {
return a.compareTo(b);
}
}
});
if (reverse) {
Collections.reverse(f.matchValues);
}
final List<BytesRef> expected = f.matchValues;
if (VERBOSE) {
System.out.println(" expected:");
for (int idx = 0; idx < expected.size(); idx++) {
BytesRef br = expected.get(idx);
System.out.println(" " + idx + ": " + (br == null ? "<missing>" : br.utf8ToString()));
if (idx == hitCount - 1) {
break;
}
}
}
if (VERBOSE) {
System.out.println(" actual:");
for (int hitIDX = 0; hitIDX < hits.scoreDocs.length; hitIDX++) {
final FieldDoc fd = (FieldDoc) hits.scoreDocs[hitIDX];
BytesRef br = (BytesRef) fd.fields[0];
System.out.println(" " + hitIDX + ": " + (br == null ? "<missing>" : br.utf8ToString()) + " id=" + s.doc(fd.doc).get("id"));
}
}
for (int hitIDX = 0; hitIDX < hits.scoreDocs.length; hitIDX++) {
final FieldDoc fd = (FieldDoc) hits.scoreDocs[hitIDX];
BytesRef br = expected.get(hitIDX);
BytesRef br2 = (BytesRef) fd.fields[0];
assertEquals(br, br2);
}
}
r.close();
dir.close();
}
Aggregations