use of org.apache.lucene.index.SortedSetDocValues in project lucene-solr by apache.
the class TestUninvertingReader method testSortedSetIntegerManyValues.
/** Tests {@link Type#SORTED_SET_INTEGER} using Integer based fields, with and w/o precision steps */
public void testSortedSetIntegerManyValues() throws IOException {
final Directory dir = newDirectory();
final IndexWriter iw = new IndexWriter(dir, newIndexWriterConfig(null));
final LegacyFieldType NO_TRIE_TYPE = new LegacyFieldType(LegacyIntField.TYPE_NOT_STORED);
NO_TRIE_TYPE.setNumericPrecisionStep(Integer.MAX_VALUE);
final Map<String, Type> UNINVERT_MAP = new LinkedHashMap<String, Type>();
UNINVERT_MAP.put("notrie_single", Type.SORTED_SET_INTEGER);
UNINVERT_MAP.put("notrie_multi", Type.SORTED_SET_INTEGER);
UNINVERT_MAP.put("trie_single", Type.SORTED_SET_INTEGER);
UNINVERT_MAP.put("trie_multi", Type.SORTED_SET_INTEGER);
final Set<String> MULTI_VALUES = new LinkedHashSet<String>();
MULTI_VALUES.add("trie_multi");
MULTI_VALUES.add("notrie_multi");
final int NUM_DOCS = TestUtil.nextInt(random(), 200, 1500);
final int MIN = TestUtil.nextInt(random(), 10, 100);
final int MAX = MIN + TestUtil.nextInt(random(), 10, 100);
final long EXPECTED_VALSET_SIZE = 1 + MAX - MIN;
{
// (at least) one doc should have every value, so that at least one segment has every value
final Document doc = new Document();
for (int i = MIN; i <= MAX; i++) {
doc.add(new LegacyIntField("trie_multi", i, Field.Store.NO));
doc.add(new LegacyIntField("notrie_multi", i, NO_TRIE_TYPE));
}
iw.addDocument(doc);
}
// now add some more random docs (note: starting at i=1 because of previously added doc)
for (int i = 1; i < NUM_DOCS; i++) {
final Document doc = new Document();
if (0 != TestUtil.nextInt(random(), 0, 9)) {
int val = TestUtil.nextInt(random(), MIN, MAX);
doc.add(new LegacyIntField("trie_single", val, Field.Store.NO));
doc.add(new LegacyIntField("notrie_single", val, NO_TRIE_TYPE));
}
if (0 != TestUtil.nextInt(random(), 0, 9)) {
int numMulti = atLeast(1);
while (0 < numMulti--) {
int val = TestUtil.nextInt(random(), MIN, MAX);
doc.add(new LegacyIntField("trie_multi", val, Field.Store.NO));
doc.add(new LegacyIntField("notrie_multi", val, NO_TRIE_TYPE));
}
}
iw.addDocument(doc);
}
iw.close();
final DirectoryReader ir = UninvertingReader.wrap(DirectoryReader.open(dir), UNINVERT_MAP);
TestUtil.checkReader(ir);
final int NUM_LEAVES = ir.leaves().size();
// check the leaves: no more then total set size
for (LeafReaderContext rc : ir.leaves()) {
final LeafReader ar = rc.reader();
for (String f : UNINVERT_MAP.keySet()) {
final SortedSetDocValues v = DocValues.getSortedSet(ar, f);
final long valSetSize = v.getValueCount();
assertTrue(f + ": Expected no more then " + EXPECTED_VALSET_SIZE + " values per segment, got " + valSetSize + " from: " + ar.toString(), valSetSize <= EXPECTED_VALSET_SIZE);
if (1 == NUM_LEAVES && MULTI_VALUES.contains(f)) {
// tighter check on multi fields in single segment index since we know one doc has all of them
assertEquals(f + ": Single segment LeafReader's value set should have had exactly expected size", EXPECTED_VALSET_SIZE, valSetSize);
}
}
}
// check the composite of all leaves: exact expectation of set size
final LeafReader composite = SlowCompositeReaderWrapper.wrap(ir);
TestUtil.checkReader(composite);
for (String f : MULTI_VALUES) {
final SortedSetDocValues v = composite.getSortedSetDocValues(f);
final long valSetSize = v.getValueCount();
assertEquals(f + ": Composite reader value set should have had exactly expected size", EXPECTED_VALSET_SIZE, valSetSize);
}
ir.close();
dir.close();
}
use of org.apache.lucene.index.SortedSetDocValues in project lucene-solr by apache.
the class TestDocTermOrds method verify.
private void verify(LeafReader r, int[][] idToOrds, BytesRef[] termsArray, BytesRef prefixRef) throws Exception {
final DocTermOrds dto = new DocTermOrds(r, r.getLiveDocs(), "field", prefixRef, Integer.MAX_VALUE, TestUtil.nextInt(random(), 2, 10));
final NumericDocValues docIDToID = FieldCache.DEFAULT.getNumerics(r, "id", FieldCache.LEGACY_INT_PARSER);
if (VERBOSE) {
System.out.println("TEST: verify prefix=" + (prefixRef == null ? "null" : prefixRef.utf8ToString()));
System.out.println("TEST: all TERMS:");
TermsEnum allTE = MultiFields.getTerms(r, "field").iterator();
int ord = 0;
while (allTE.next() != null) {
System.out.println(" ord=" + (ord++) + " term=" + allTE.term().utf8ToString());
}
}
//final TermsEnum te = subR.fields().terms("field").iterator();
final TermsEnum te = dto.getOrdTermsEnum(r);
if (dto.numTerms() == 0) {
if (prefixRef == null) {
assertNull(MultiFields.getTerms(r, "field"));
} else {
Terms terms = MultiFields.getTerms(r, "field");
if (terms != null) {
TermsEnum termsEnum = terms.iterator();
TermsEnum.SeekStatus result = termsEnum.seekCeil(prefixRef);
if (result != TermsEnum.SeekStatus.END) {
assertFalse("term=" + termsEnum.term().utf8ToString() + " matches prefix=" + prefixRef.utf8ToString(), StringHelper.startsWith(termsEnum.term(), prefixRef));
} else {
// ok
}
} else {
// ok
}
}
return;
}
if (VERBOSE) {
System.out.println("TEST: TERMS:");
te.seekExact(0);
while (true) {
System.out.println(" ord=" + te.ord() + " term=" + te.term().utf8ToString());
if (te.next() == null) {
break;
}
}
}
SortedSetDocValues iter = dto.iterator(r);
for (int docID = 0; docID < r.maxDoc(); docID++) {
assertEquals(docID, docIDToID.nextDoc());
if (docID > iter.docID()) {
iter.nextDoc();
}
if (docID < iter.docID()) {
int[] answers = idToOrds[(int) docIDToID.longValue()];
assertEquals(0, answers.length);
continue;
}
if (VERBOSE) {
System.out.println("TEST: docID=" + docID + " of " + r.maxDoc() + " (id=" + docIDToID.longValue() + ")");
}
final int[] answers = idToOrds[(int) docIDToID.longValue()];
int upto = 0;
long ord;
while ((ord = iter.nextOrd()) != SortedSetDocValues.NO_MORE_ORDS) {
te.seekExact(ord);
final BytesRef expected = termsArray[answers[upto++]];
if (VERBOSE) {
System.out.println(" exp=" + expected.utf8ToString() + " actual=" + te.term().utf8ToString());
}
assertEquals("expected=" + expected.utf8ToString() + " actual=" + te.term().utf8ToString() + " ord=" + ord, expected, te.term());
}
assertEquals(answers.length, upto);
}
}
use of org.apache.lucene.index.SortedSetDocValues in project lucene-solr by apache.
the class TestDocTermOrds method testNumericEncoded32.
public void testNumericEncoded32() throws IOException {
Directory dir = newDirectory();
IndexWriter iw = new IndexWriter(dir, newIndexWriterConfig(null));
Document doc = new Document();
doc.add(new LegacyIntField("foo", 5, Field.Store.NO));
iw.addDocument(doc);
doc = new Document();
doc.add(new LegacyIntField("foo", 5, Field.Store.NO));
doc.add(new LegacyIntField("foo", -3, Field.Store.NO));
iw.addDocument(doc);
iw.forceMerge(1);
iw.close();
DirectoryReader ir = DirectoryReader.open(dir);
LeafReader ar = getOnlyLeafReader(ir);
SortedSetDocValues v = FieldCache.DEFAULT.getDocTermOrds(ar, "foo", FieldCache.INT32_TERM_PREFIX);
assertEquals(2, v.getValueCount());
assertEquals(0, v.nextDoc());
assertEquals(1, v.nextOrd());
assertEquals(SortedSetDocValues.NO_MORE_ORDS, v.nextOrd());
assertEquals(1, v.nextDoc());
assertEquals(0, v.nextOrd());
assertEquals(1, v.nextOrd());
assertEquals(SortedSetDocValues.NO_MORE_ORDS, v.nextOrd());
BytesRef value = v.lookupOrd(0);
assertEquals(-3, LegacyNumericUtils.prefixCodedToInt(value));
value = v.lookupOrd(1);
assertEquals(5, LegacyNumericUtils.prefixCodedToInt(value));
ir.close();
dir.close();
}
use of org.apache.lucene.index.SortedSetDocValues in project lucene-solr by apache.
the class TestDocTermOrds method testBackToTheFuture.
public void testBackToTheFuture() throws Exception {
Directory dir = newDirectory();
IndexWriter iw = new IndexWriter(dir, newIndexWriterConfig(null));
Document doc = new Document();
doc.add(newStringField("foo", "bar", Field.Store.NO));
iw.addDocument(doc);
doc = new Document();
doc.add(newStringField("foo", "baz", Field.Store.NO));
// we need a second value for a doc, or we don't actually test DocTermOrds!
doc.add(newStringField("foo", "car", Field.Store.NO));
iw.addDocument(doc);
DirectoryReader r1 = DirectoryReader.open(iw);
iw.deleteDocuments(new Term("foo", "baz"));
DirectoryReader r2 = DirectoryReader.open(iw);
FieldCache.DEFAULT.getDocTermOrds(getOnlyLeafReader(r2), "foo", null);
SortedSetDocValues v = FieldCache.DEFAULT.getDocTermOrds(getOnlyLeafReader(r1), "foo", null);
assertEquals(3, v.getValueCount());
assertEquals(1, v.advance(1));
assertEquals(1, v.nextOrd());
iw.close();
r1.close();
r2.close();
dir.close();
}
use of org.apache.lucene.index.SortedSetDocValues in project lucene-solr by apache.
the class TestDocTermOrds method testNumericEncoded64.
public void testNumericEncoded64() throws IOException {
Directory dir = newDirectory();
IndexWriter iw = new IndexWriter(dir, newIndexWriterConfig(null));
Document doc = new Document();
doc.add(new LegacyLongField("foo", 5, Field.Store.NO));
iw.addDocument(doc);
doc = new Document();
doc.add(new LegacyLongField("foo", 5, Field.Store.NO));
doc.add(new LegacyLongField("foo", -3, Field.Store.NO));
iw.addDocument(doc);
iw.forceMerge(1);
iw.close();
DirectoryReader ir = DirectoryReader.open(dir);
LeafReader ar = getOnlyLeafReader(ir);
SortedSetDocValues v = FieldCache.DEFAULT.getDocTermOrds(ar, "foo", FieldCache.INT64_TERM_PREFIX);
assertEquals(2, v.getValueCount());
assertEquals(0, v.nextDoc());
assertEquals(1, v.nextOrd());
assertEquals(SortedSetDocValues.NO_MORE_ORDS, v.nextOrd());
assertEquals(1, v.nextDoc());
assertEquals(0, v.nextOrd());
assertEquals(1, v.nextOrd());
assertEquals(SortedSetDocValues.NO_MORE_ORDS, v.nextOrd());
BytesRef value = v.lookupOrd(0);
assertEquals(-3, LegacyNumericUtils.prefixCodedToLong(value));
value = v.lookupOrd(1);
assertEquals(5, LegacyNumericUtils.prefixCodedToLong(value));
ir.close();
dir.close();
}
Aggregations