use of org.apache.lucene.document.SortedDocValuesField in project lucene-solr by apache.
the class TestSort method testStringReverse.
/** Tests reverse sorting on type string */
public void testStringReverse() throws IOException {
Directory dir = newDirectory();
RandomIndexWriter writer = new RandomIndexWriter(random(), dir);
Document doc = new Document();
doc.add(new SortedDocValuesField("value", new BytesRef("bar")));
doc.add(newStringField("value", "bar", Field.Store.YES));
writer.addDocument(doc);
doc = new Document();
doc.add(new SortedDocValuesField("value", new BytesRef("foo")));
doc.add(newStringField("value", "foo", Field.Store.YES));
writer.addDocument(doc);
IndexReader ir = writer.getReader();
writer.close();
IndexSearcher searcher = newSearcher(ir);
Sort sort = new Sort(new SortField("value", SortField.Type.STRING, true));
TopDocs td = searcher.search(new MatchAllDocsQuery(), 10, sort);
assertEquals(2, td.totalHits);
// 'foo' comes after 'bar' in reverse order
assertEquals("foo", searcher.doc(td.scoreDocs[0].doc).get("value"));
assertEquals("bar", searcher.doc(td.scoreDocs[1].doc).get("value"));
ir.close();
dir.close();
}
use of org.apache.lucene.document.SortedDocValuesField in project lucene-solr by apache.
the class TestSort method testStringValSorted.
/** Tests sorting on type string_val, but with a SortedDocValuesField */
public void testStringValSorted() throws IOException {
Directory dir = newDirectory();
RandomIndexWriter writer = new RandomIndexWriter(random(), dir);
Document doc = new Document();
doc.add(new SortedDocValuesField("value", new BytesRef("foo")));
doc.add(newStringField("value", "foo", Field.Store.YES));
writer.addDocument(doc);
doc = new Document();
doc.add(new SortedDocValuesField("value", new BytesRef("bar")));
doc.add(newStringField("value", "bar", Field.Store.YES));
writer.addDocument(doc);
IndexReader ir = writer.getReader();
writer.close();
IndexSearcher searcher = newSearcher(ir);
Sort sort = new Sort(new SortField("value", SortField.Type.STRING_VAL));
TopDocs td = searcher.search(new MatchAllDocsQuery(), 10, sort);
assertEquals(2, td.totalHits);
// 'bar' comes before 'foo'
assertEquals("bar", searcher.doc(td.scoreDocs[0].doc).get("value"));
assertEquals("foo", searcher.doc(td.scoreDocs[1].doc).get("value"));
ir.close();
dir.close();
}
use of org.apache.lucene.document.SortedDocValuesField in project lucene-solr by apache.
the class TestSortRandom method testRandomStringSort.
private void testRandomStringSort(SortField.Type type) throws Exception {
Random random = new Random(random().nextLong());
final int NUM_DOCS = atLeast(100);
final Directory dir = newDirectory();
final RandomIndexWriter writer = new RandomIndexWriter(random, dir);
final boolean allowDups = random.nextBoolean();
final Set<String> seen = new HashSet<>();
final int maxLength = TestUtil.nextInt(random, 5, 100);
if (VERBOSE) {
System.out.println("TEST: NUM_DOCS=" + NUM_DOCS + " maxLength=" + maxLength + " allowDups=" + allowDups);
}
int numDocs = 0;
final List<BytesRef> docValues = new ArrayList<>();
// TODO: deletions
while (numDocs < NUM_DOCS) {
final Document doc = new Document();
// 10% of the time, the document is missing the value:
final BytesRef br;
if (random().nextInt(10) != 7) {
final String s;
if (random.nextBoolean()) {
s = TestUtil.randomSimpleString(random, maxLength);
} else {
s = TestUtil.randomUnicodeString(random, maxLength);
}
if (!allowDups) {
if (seen.contains(s)) {
continue;
}
seen.add(s);
}
if (VERBOSE) {
System.out.println(" " + numDocs + ": s=" + s);
}
br = new BytesRef(s);
doc.add(new SortedDocValuesField("stringdv", br));
docValues.add(br);
} else {
br = null;
if (VERBOSE) {
System.out.println(" " + numDocs + ": <missing>");
}
docValues.add(null);
}
doc.add(new NumericDocValuesField("id", numDocs));
doc.add(new StoredField("id", numDocs));
writer.addDocument(doc);
numDocs++;
if (random.nextInt(40) == 17) {
// force flush
writer.getReader().close();
}
}
final IndexReader r = writer.getReader();
writer.close();
if (VERBOSE) {
System.out.println(" reader=" + r);
}
final IndexSearcher s = newSearcher(r, false);
final int ITERS = atLeast(100);
for (int iter = 0; iter < ITERS; iter++) {
final boolean reverse = random.nextBoolean();
final TopFieldDocs hits;
final SortField sf;
final boolean sortMissingLast;
sf = new SortField("stringdv", type, reverse);
sortMissingLast = random().nextBoolean();
if (sortMissingLast) {
sf.setMissingValue(SortField.STRING_LAST);
}
final Sort sort;
if (random.nextBoolean()) {
sort = new Sort(sf);
} else {
sort = new Sort(sf, SortField.FIELD_DOC);
}
final int hitCount = TestUtil.nextInt(random, 1, r.maxDoc() + 20);
final RandomQuery f = new RandomQuery(random.nextLong(), random.nextFloat(), docValues);
hits = s.search(f, hitCount, sort, random.nextBoolean(), random.nextBoolean());
if (VERBOSE) {
System.out.println("\nTEST: iter=" + iter + " " + hits.totalHits + " hits; topN=" + hitCount + "; reverse=" + reverse + "; sortMissingLast=" + sortMissingLast + " sort=" + sort);
}
// Compute expected results:
Collections.sort(f.matchValues, new Comparator<BytesRef>() {
@Override
public int compare(BytesRef a, BytesRef b) {
if (a == null) {
if (b == null) {
return 0;
}
if (sortMissingLast) {
return 1;
} else {
return -1;
}
} else if (b == null) {
if (sortMissingLast) {
return -1;
} else {
return 1;
}
} else {
return a.compareTo(b);
}
}
});
if (reverse) {
Collections.reverse(f.matchValues);
}
final List<BytesRef> expected = f.matchValues;
if (VERBOSE) {
System.out.println(" expected:");
for (int idx = 0; idx < expected.size(); idx++) {
BytesRef br = expected.get(idx);
System.out.println(" " + idx + ": " + (br == null ? "<missing>" : br.utf8ToString()));
if (idx == hitCount - 1) {
break;
}
}
}
if (VERBOSE) {
System.out.println(" actual:");
for (int hitIDX = 0; hitIDX < hits.scoreDocs.length; hitIDX++) {
final FieldDoc fd = (FieldDoc) hits.scoreDocs[hitIDX];
BytesRef br = (BytesRef) fd.fields[0];
System.out.println(" " + hitIDX + ": " + (br == null ? "<missing>" : br.utf8ToString()) + " id=" + s.doc(fd.doc).get("id"));
}
}
for (int hitIDX = 0; hitIDX < hits.scoreDocs.length; hitIDX++) {
final FieldDoc fd = (FieldDoc) hits.scoreDocs[hitIDX];
BytesRef br = expected.get(hitIDX);
BytesRef br2 = (BytesRef) fd.fields[0];
assertEquals(br, br2);
}
}
r.close();
dir.close();
}
use of org.apache.lucene.document.SortedDocValuesField in project lucene-solr by apache.
the class TestOrdValues method addDoc.
private static void addDoc(RandomIndexWriter iw, int i) throws Exception {
Document d = new Document();
Field f;
int scoreAndID = i + 1;
FieldType customType = new FieldType(TextField.TYPE_STORED);
customType.setTokenized(false);
customType.setOmitNorms(true);
// for debug purposes
f = newField(ID_FIELD, id2String(scoreAndID), customType);
d.add(f);
d.add(new SortedDocValuesField(ID_FIELD, new BytesRef(id2String(scoreAndID))));
FieldType customType2 = new FieldType(TextField.TYPE_NOT_STORED);
customType2.setOmitNorms(true);
// for regular search
f = newField(TEXT_FIELD, "text of doc" + scoreAndID + textLine(i), customType2);
d.add(f);
// for function scoring
f = new LegacyIntField(INT_FIELD, scoreAndID, Store.YES);
d.add(f);
d.add(new NumericDocValuesField(INT_FIELD, scoreAndID));
// for function scoring
f = new LegacyFloatField(FLOAT_FIELD, scoreAndID, Store.YES);
d.add(f);
d.add(new NumericDocValuesField(FLOAT_FIELD, Float.floatToRawIntBits(scoreAndID)));
iw.addDocument(d);
log("added: " + d);
}
use of org.apache.lucene.document.SortedDocValuesField in project lucene-solr by apache.
the class TestFieldCache method testDocValuesIntegration.
public void testDocValuesIntegration() throws Exception {
Directory dir = newDirectory();
IndexWriterConfig iwc = newIndexWriterConfig(null);
RandomIndexWriter iw = new RandomIndexWriter(random(), dir, iwc);
Document doc = new Document();
doc.add(new BinaryDocValuesField("binary", new BytesRef("binary value")));
doc.add(new SortedDocValuesField("sorted", new BytesRef("sorted value")));
doc.add(new NumericDocValuesField("numeric", 42));
doc.add(new SortedSetDocValuesField("sortedset", new BytesRef("sortedset value1")));
doc.add(new SortedSetDocValuesField("sortedset", new BytesRef("sortedset value2")));
iw.addDocument(doc);
DirectoryReader ir = iw.getReader();
iw.close();
LeafReader ar = getOnlyLeafReader(ir);
// Binary type: can be retrieved via getTerms()
expectThrows(IllegalStateException.class, () -> {
FieldCache.DEFAULT.getNumerics(ar, "binary", FieldCache.INT_POINT_PARSER);
});
BinaryDocValues binary = FieldCache.DEFAULT.getTerms(ar, "binary");
assertEquals(0, binary.nextDoc());
final BytesRef term = binary.binaryValue();
assertEquals("binary value", term.utf8ToString());
expectThrows(IllegalStateException.class, () -> {
FieldCache.DEFAULT.getTermsIndex(ar, "binary");
});
expectThrows(IllegalStateException.class, () -> {
FieldCache.DEFAULT.getDocTermOrds(ar, "binary", null);
});
expectThrows(IllegalStateException.class, () -> {
new DocTermOrds(ar, null, "binary");
});
Bits bits = FieldCache.DEFAULT.getDocsWithField(ar, "binary", null);
assertTrue(bits.get(0));
// Sorted type: can be retrieved via getTerms(), getTermsIndex(), getDocTermOrds()
expectThrows(IllegalStateException.class, () -> {
FieldCache.DEFAULT.getNumerics(ar, "sorted", FieldCache.INT_POINT_PARSER);
});
expectThrows(IllegalStateException.class, () -> {
new DocTermOrds(ar, null, "sorted");
});
binary = FieldCache.DEFAULT.getTerms(ar, "sorted");
assertEquals(0, binary.nextDoc());
BytesRef scratch = binary.binaryValue();
assertEquals("sorted value", scratch.utf8ToString());
SortedDocValues sorted = FieldCache.DEFAULT.getTermsIndex(ar, "sorted");
assertEquals(0, sorted.nextDoc());
assertEquals(0, sorted.ordValue());
assertEquals(1, sorted.getValueCount());
scratch = sorted.binaryValue();
assertEquals("sorted value", scratch.utf8ToString());
SortedSetDocValues sortedSet = FieldCache.DEFAULT.getDocTermOrds(ar, "sorted", null);
assertEquals(0, sortedSet.nextDoc());
assertEquals(0, sortedSet.nextOrd());
assertEquals(SortedSetDocValues.NO_MORE_ORDS, sortedSet.nextOrd());
assertEquals(1, sortedSet.getValueCount());
bits = FieldCache.DEFAULT.getDocsWithField(ar, "sorted", null);
assertTrue(bits.get(0));
// Numeric type: can be retrieved via getInts() and so on
NumericDocValues numeric = FieldCache.DEFAULT.getNumerics(ar, "numeric", FieldCache.INT_POINT_PARSER);
assertEquals(0, numeric.nextDoc());
assertEquals(42, numeric.longValue());
expectThrows(IllegalStateException.class, () -> {
FieldCache.DEFAULT.getTerms(ar, "numeric");
});
expectThrows(IllegalStateException.class, () -> {
FieldCache.DEFAULT.getTermsIndex(ar, "numeric");
});
expectThrows(IllegalStateException.class, () -> {
FieldCache.DEFAULT.getDocTermOrds(ar, "numeric", null);
});
expectThrows(IllegalStateException.class, () -> {
new DocTermOrds(ar, null, "numeric");
});
bits = FieldCache.DEFAULT.getDocsWithField(ar, "numeric", null);
assertTrue(bits.get(0));
// SortedSet type: can be retrieved via getDocTermOrds()
expectThrows(IllegalStateException.class, () -> {
FieldCache.DEFAULT.getNumerics(ar, "sortedset", FieldCache.INT_POINT_PARSER);
});
expectThrows(IllegalStateException.class, () -> {
FieldCache.DEFAULT.getTerms(ar, "sortedset");
});
expectThrows(IllegalStateException.class, () -> {
FieldCache.DEFAULT.getTermsIndex(ar, "sortedset");
});
expectThrows(IllegalStateException.class, () -> {
new DocTermOrds(ar, null, "sortedset");
});
sortedSet = FieldCache.DEFAULT.getDocTermOrds(ar, "sortedset", null);
assertEquals(0, sortedSet.nextDoc());
assertEquals(0, sortedSet.nextOrd());
assertEquals(1, sortedSet.nextOrd());
assertEquals(SortedSetDocValues.NO_MORE_ORDS, sortedSet.nextOrd());
assertEquals(2, sortedSet.getValueCount());
bits = FieldCache.DEFAULT.getDocsWithField(ar, "sortedset", null);
assertTrue(bits.get(0));
ir.close();
dir.close();
}
Aggregations