use of org.apache.lucene.index.SortedSetDocValues in project lucene-solr by apache.
the class TestDocTermOrds method testActuallySingleValued.
public void testActuallySingleValued() throws IOException {
Directory dir = newDirectory();
IndexWriterConfig iwconfig = newIndexWriterConfig(null);
iwconfig.setMergePolicy(newLogMergePolicy());
IndexWriter iw = new IndexWriter(dir, iwconfig);
Document doc = new Document();
doc.add(new StringField("foo", "bar", Field.Store.NO));
iw.addDocument(doc);
doc = new Document();
doc.add(new StringField("foo", "baz", Field.Store.NO));
iw.addDocument(doc);
doc = new Document();
iw.addDocument(doc);
doc = new Document();
doc.add(new StringField("foo", "baz", Field.Store.NO));
doc.add(new StringField("foo", "baz", Field.Store.NO));
iw.addDocument(doc);
iw.forceMerge(1);
iw.close();
DirectoryReader ir = DirectoryReader.open(dir);
LeafReader ar = getOnlyLeafReader(ir);
SortedSetDocValues v = FieldCache.DEFAULT.getDocTermOrds(ar, "foo", null);
// actually a single-valued field
assertNotNull(DocValues.unwrapSingleton(v));
assertEquals(2, v.getValueCount());
assertEquals(0, v.nextDoc());
assertEquals(0, v.nextOrd());
assertEquals(SortedSetDocValues.NO_MORE_ORDS, v.nextOrd());
assertEquals(1, v.nextDoc());
assertEquals(1, v.nextOrd());
assertEquals(SortedSetDocValues.NO_MORE_ORDS, v.nextOrd());
assertEquals(3, v.nextDoc());
assertEquals(1, v.nextOrd());
assertEquals(SortedSetDocValues.NO_MORE_ORDS, v.nextOrd());
BytesRef value = v.lookupOrd(0);
assertEquals("bar", value.utf8ToString());
value = v.lookupOrd(1);
assertEquals("baz", value.utf8ToString());
ir.close();
dir.close();
}
use of org.apache.lucene.index.SortedSetDocValues in project lucene-solr by apache.
the class TestFieldCache method test.
public void test() throws IOException {
FieldCache cache = FieldCache.DEFAULT;
NumericDocValues doubles = cache.getNumerics(reader, "theDouble", FieldCache.DOUBLE_POINT_PARSER);
for (int i = 0; i < NUM_DOCS; i++) {
assertEquals(i, doubles.nextDoc());
assertEquals(Double.doubleToLongBits(Double.MAX_VALUE - i), doubles.longValue());
}
NumericDocValues longs = cache.getNumerics(reader, "theLong", FieldCache.LONG_POINT_PARSER);
for (int i = 0; i < NUM_DOCS; i++) {
assertEquals(i, longs.nextDoc());
assertEquals(Long.MAX_VALUE - i, longs.longValue());
}
NumericDocValues ints = cache.getNumerics(reader, "theInt", FieldCache.INT_POINT_PARSER);
for (int i = 0; i < NUM_DOCS; i++) {
assertEquals(i, ints.nextDoc());
assertEquals(Integer.MAX_VALUE - i, ints.longValue());
}
NumericDocValues floats = cache.getNumerics(reader, "theFloat", FieldCache.FLOAT_POINT_PARSER);
for (int i = 0; i < NUM_DOCS; i++) {
assertEquals(i, floats.nextDoc());
assertEquals(Float.floatToIntBits(Float.MAX_VALUE - i), floats.longValue());
}
Bits docsWithField = cache.getDocsWithField(reader, "theLong", FieldCache.LONG_POINT_PARSER);
assertSame("Second request to cache return same array", docsWithField, cache.getDocsWithField(reader, "theLong", FieldCache.LONG_POINT_PARSER));
assertTrue("docsWithField(theLong) must be class Bits.MatchAllBits", docsWithField instanceof Bits.MatchAllBits);
assertTrue("docsWithField(theLong) Size: " + docsWithField.length() + " is not: " + NUM_DOCS, docsWithField.length() == NUM_DOCS);
for (int i = 0; i < docsWithField.length(); i++) {
assertTrue(docsWithField.get(i));
}
docsWithField = cache.getDocsWithField(reader, "sparse", FieldCache.INT_POINT_PARSER);
assertSame("Second request to cache return same array", docsWithField, cache.getDocsWithField(reader, "sparse", FieldCache.INT_POINT_PARSER));
assertFalse("docsWithField(sparse) must not be class Bits.MatchAllBits", docsWithField instanceof Bits.MatchAllBits);
assertTrue("docsWithField(sparse) Size: " + docsWithField.length() + " is not: " + NUM_DOCS, docsWithField.length() == NUM_DOCS);
for (int i = 0; i < docsWithField.length(); i++) {
assertEquals(i % 2 == 0, docsWithField.get(i));
}
// getTermsIndex
SortedDocValues termsIndex = cache.getTermsIndex(reader, "theRandomUnicodeString");
for (int i = 0; i < NUM_DOCS; i++) {
final String s;
if (i > termsIndex.docID()) {
termsIndex.advance(i);
}
if (i == termsIndex.docID()) {
s = termsIndex.binaryValue().utf8ToString();
} else {
s = null;
}
assertTrue("for doc " + i + ": " + s + " does not equal: " + unicodeStrings[i], unicodeStrings[i] == null || unicodeStrings[i].equals(s));
}
int nTerms = termsIndex.getValueCount();
TermsEnum tenum = termsIndex.termsEnum();
for (int i = 0; i < nTerms; i++) {
BytesRef val1 = BytesRef.deepCopyOf(tenum.next());
final BytesRef val = termsIndex.lookupOrd(i);
// System.out.println("i="+i);
assertEquals(val, val1);
}
// seek the enum around (note this isn't a great test here)
int num = atLeast(100);
for (int i = 0; i < num; i++) {
int k = random().nextInt(nTerms);
final BytesRef val = BytesRef.deepCopyOf(termsIndex.lookupOrd(k));
assertEquals(TermsEnum.SeekStatus.FOUND, tenum.seekCeil(val));
assertEquals(val, tenum.term());
}
for (int i = 0; i < nTerms; i++) {
final BytesRef val = BytesRef.deepCopyOf(termsIndex.lookupOrd(i));
assertEquals(TermsEnum.SeekStatus.FOUND, tenum.seekCeil(val));
assertEquals(val, tenum.term());
}
// test bad field
termsIndex = cache.getTermsIndex(reader, "bogusfield");
// getTerms
BinaryDocValues terms = cache.getTerms(reader, "theRandomUnicodeString");
for (int i = 0; i < NUM_DOCS; i++) {
if (terms.docID() < i) {
terms.nextDoc();
}
if (terms.docID() == i) {
assertEquals(unicodeStrings[i], terms.binaryValue().utf8ToString());
} else {
assertNull(unicodeStrings[i]);
}
}
// test bad field
terms = cache.getTerms(reader, "bogusfield");
// getDocTermOrds
SortedSetDocValues termOrds = cache.getDocTermOrds(reader, "theRandomUnicodeMultiValuedField", null);
int numEntries = cache.getCacheEntries().length;
// ask for it again, and check that we didnt create any additional entries:
termOrds = cache.getDocTermOrds(reader, "theRandomUnicodeMultiValuedField", null);
assertEquals(numEntries, cache.getCacheEntries().length);
for (int i = 0; i < NUM_DOCS; i++) {
// This will remove identical terms. A DocTermOrds doesn't return duplicate ords for a docId
List<BytesRef> values = new ArrayList<>(new LinkedHashSet<>(Arrays.asList(multiValued[i])));
for (BytesRef v : values) {
if (v == null) {
// why does this test use null values... instead of an empty list: confusing
break;
}
if (i > termOrds.docID()) {
assertEquals(i, termOrds.nextDoc());
}
long ord = termOrds.nextOrd();
assert ord != SortedSetDocValues.NO_MORE_ORDS;
BytesRef scratch = termOrds.lookupOrd(ord);
assertEquals(v, scratch);
}
if (i == termOrds.docID()) {
assertEquals(SortedSetDocValues.NO_MORE_ORDS, termOrds.nextOrd());
}
}
// test bad field
termOrds = cache.getDocTermOrds(reader, "bogusfield", null);
assertTrue(termOrds.getValueCount() == 0);
FieldCache.DEFAULT.purgeByCacheKey(reader.getCoreCacheHelper().getKey());
}
use of org.apache.lucene.index.SortedSetDocValues in project lucene-solr by apache.
the class TestFieldCache method testNonexistantFields.
public void testNonexistantFields() throws Exception {
Directory dir = newDirectory();
RandomIndexWriter iw = new RandomIndexWriter(random(), dir);
Document doc = new Document();
iw.addDocument(doc);
DirectoryReader ir = iw.getReader();
iw.close();
LeafReader ar = getOnlyLeafReader(ir);
final FieldCache cache = FieldCache.DEFAULT;
cache.purgeAllCaches();
assertEquals(0, cache.getCacheEntries().length);
NumericDocValues ints = cache.getNumerics(ar, "bogusints", FieldCache.INT_POINT_PARSER);
assertEquals(NO_MORE_DOCS, ints.nextDoc());
NumericDocValues longs = cache.getNumerics(ar, "boguslongs", FieldCache.LONG_POINT_PARSER);
assertEquals(NO_MORE_DOCS, longs.nextDoc());
NumericDocValues floats = cache.getNumerics(ar, "bogusfloats", FieldCache.FLOAT_POINT_PARSER);
assertEquals(NO_MORE_DOCS, floats.nextDoc());
NumericDocValues doubles = cache.getNumerics(ar, "bogusdoubles", FieldCache.DOUBLE_POINT_PARSER);
assertEquals(NO_MORE_DOCS, doubles.nextDoc());
BinaryDocValues binaries = cache.getTerms(ar, "bogusterms");
assertEquals(NO_MORE_DOCS, binaries.nextDoc());
SortedDocValues sorted = cache.getTermsIndex(ar, "bogustermsindex");
assertEquals(NO_MORE_DOCS, sorted.nextDoc());
SortedSetDocValues sortedSet = cache.getDocTermOrds(ar, "bogusmultivalued", null);
assertEquals(NO_MORE_DOCS, sortedSet.nextDoc());
Bits bits = cache.getDocsWithField(ar, "bogusbits", null);
assertFalse(bits.get(0));
// check that we cached nothing
assertEquals(0, cache.getCacheEntries().length);
ir.close();
dir.close();
}
use of org.apache.lucene.index.SortedSetDocValues in project lucene-solr by apache.
the class TestFieldCache method testNonIndexedFields.
public void testNonIndexedFields() throws Exception {
Directory dir = newDirectory();
RandomIndexWriter iw = new RandomIndexWriter(random(), dir);
Document doc = new Document();
doc.add(new StoredField("bogusbytes", "bogus"));
doc.add(new StoredField("bogusshorts", "bogus"));
doc.add(new StoredField("bogusints", "bogus"));
doc.add(new StoredField("boguslongs", "bogus"));
doc.add(new StoredField("bogusfloats", "bogus"));
doc.add(new StoredField("bogusdoubles", "bogus"));
doc.add(new StoredField("bogusterms", "bogus"));
doc.add(new StoredField("bogustermsindex", "bogus"));
doc.add(new StoredField("bogusmultivalued", "bogus"));
doc.add(new StoredField("bogusbits", "bogus"));
iw.addDocument(doc);
DirectoryReader ir = iw.getReader();
iw.close();
LeafReader ar = getOnlyLeafReader(ir);
final FieldCache cache = FieldCache.DEFAULT;
cache.purgeAllCaches();
assertEquals(0, cache.getCacheEntries().length);
NumericDocValues ints = cache.getNumerics(ar, "bogusints", FieldCache.INT_POINT_PARSER);
assertEquals(NO_MORE_DOCS, ints.nextDoc());
NumericDocValues longs = cache.getNumerics(ar, "boguslongs", FieldCache.LONG_POINT_PARSER);
assertEquals(NO_MORE_DOCS, longs.nextDoc());
NumericDocValues floats = cache.getNumerics(ar, "bogusfloats", FieldCache.FLOAT_POINT_PARSER);
assertEquals(NO_MORE_DOCS, floats.nextDoc());
NumericDocValues doubles = cache.getNumerics(ar, "bogusdoubles", FieldCache.DOUBLE_POINT_PARSER);
assertEquals(NO_MORE_DOCS, doubles.nextDoc());
BinaryDocValues binaries = cache.getTerms(ar, "bogusterms");
assertEquals(NO_MORE_DOCS, binaries.nextDoc());
SortedDocValues sorted = cache.getTermsIndex(ar, "bogustermsindex");
assertEquals(NO_MORE_DOCS, sorted.nextDoc());
SortedSetDocValues sortedSet = cache.getDocTermOrds(ar, "bogusmultivalued", null);
assertEquals(NO_MORE_DOCS, sortedSet.nextDoc());
Bits bits = cache.getDocsWithField(ar, "bogusbits", null);
assertFalse(bits.get(0));
// check that we cached nothing
assertEquals(0, cache.getCacheEntries().length);
ir.close();
dir.close();
}
use of org.apache.lucene.index.SortedSetDocValues in project jackrabbit-oak by apache.
the class FilteredSortedSetDocValuesFacetCounts method filterFacet.
private LabelAndValue[] filterFacet(int docId, String dimension, LabelAndValue[] labelAndValues) throws IOException {
boolean filterd = false;
Map<String, Long> newValues = new HashMap<String, Long>();
Document document = reader.document(docId);
SortedSetDocValues docValues = state.getDocValues();
docValues.setDocument(docId);
// filter using doc values (avoiding requiring stored values)
if (!filter.isAccessible(document.getField(FieldNames.PATH).stringValue() + "/" + dimension)) {
filterd = true;
for (LabelAndValue lv : labelAndValues) {
long existingCount = lv.value.longValue();
BytesRef key = new BytesRef(FacetsConfig.pathToString(dimension, new String[] { lv.label }));
long l = docValues.lookupTerm(key);
if (l >= 0) {
if (existingCount > 0) {
newValues.put(lv.label, existingCount - 1);
} else {
if (newValues.containsKey(lv.label)) {
newValues.remove(lv.label);
}
}
}
}
}
LabelAndValue[] filteredLVs;
if (filterd) {
filteredLVs = new LabelAndValue[newValues.size()];
int i = 0;
for (Map.Entry<String, Long> entry : newValues.entrySet()) {
filteredLVs[i] = new LabelAndValue(entry.getKey(), entry.getValue());
i++;
}
} else {
filteredLVs = labelAndValues;
}
return filteredLVs;
}
Aggregations