use of org.apache.lucene.index.SortedDocValues in project elasticsearch by elastic.
the class BytesRefFieldComparatorSource method newComparator.
@Override
public FieldComparator<?> newComparator(String fieldname, int numHits, int sortPos, boolean reversed) {
assert indexFieldData == null || fieldname.equals(indexFieldData.getFieldName());
final boolean sortMissingLast = sortMissingLast(missingValue) ^ reversed;
final BytesRef missingBytes = (BytesRef) missingObject(missingValue, reversed);
if (indexFieldData instanceof IndexOrdinalsFieldData) {
return new FieldComparator.TermOrdValComparator(numHits, null, sortMissingLast) {
@Override
protected SortedDocValues getSortedDocValues(LeafReaderContext context, String field) throws IOException {
final RandomAccessOrds values = ((IndexOrdinalsFieldData) indexFieldData).load(context).getOrdinalsValues();
final SortedDocValues selectedValues;
if (nested == null) {
selectedValues = sortMode.select(values);
} else {
final BitSet rootDocs = nested.rootDocs(context);
final DocIdSetIterator innerDocs = nested.innerDocs(context);
selectedValues = sortMode.select(values, rootDocs, innerDocs);
}
if (sortMissingFirst(missingValue) || sortMissingLast(missingValue)) {
return selectedValues;
} else {
return new ReplaceMissing(selectedValues, missingBytes);
}
}
@Override
public void setScorer(Scorer scorer) {
BytesRefFieldComparatorSource.this.setScorer(scorer);
}
};
}
final BytesRef nullPlaceHolder = new BytesRef();
final BytesRef nonNullMissingBytes = missingBytes == null ? nullPlaceHolder : missingBytes;
return new FieldComparator.TermValComparator(numHits, null, sortMissingLast) {
@Override
protected BinaryDocValues getBinaryDocValues(LeafReaderContext context, String field) throws IOException {
final SortedBinaryDocValues values = getValues(context);
final BinaryDocValues selectedValues;
if (nested == null) {
selectedValues = sortMode.select(values, nonNullMissingBytes);
} else {
final BitSet rootDocs = nested.rootDocs(context);
final DocIdSetIterator innerDocs = nested.innerDocs(context);
selectedValues = sortMode.select(values, nonNullMissingBytes, rootDocs, innerDocs, context.reader().maxDoc());
}
return selectedValues;
}
@Override
protected Bits getDocsWithField(LeafReaderContext context, String field) throws IOException {
return new Bits.MatchAllBits(context.reader().maxDoc());
}
@Override
protected boolean isNull(int doc, BytesRef term) {
return term == nullPlaceHolder;
}
@Override
public void setScorer(Scorer scorer) {
BytesRefFieldComparatorSource.this.setScorer(scorer);
}
};
}
use of org.apache.lucene.index.SortedDocValues in project elasticsearch by elastic.
the class AbstractAtomicParentChildFieldData method getBytesValues.
@Override
public final SortedBinaryDocValues getBytesValues() {
return new SortedBinaryDocValues() {
private final BytesRef[] terms = new BytesRef[2];
private int count;
@Override
public void setDocument(int docId) {
count = 0;
for (String type : types()) {
final SortedDocValues values = getOrdinalsValues(type);
final int ord = values.getOrd(docId);
if (ord >= 0) {
terms[count++] = values.lookupOrd(ord);
}
}
assert count <= 2 : "A single doc can potentially be both parent and child, so the maximum allowed values is 2";
if (count > 1) {
int cmp = terms[0].compareTo(terms[1]);
if (cmp > 0) {
ArrayUtil.swap(terms, 0, 1);
} else if (cmp == 0) {
// If the id is the same between types the only omit one. For example: a doc has parent#1 in _uid field and has grand_parent#1 in _parent field.
count = 1;
}
}
}
@Override
public int count() {
return count;
}
@Override
public BytesRef valueAt(int index) {
return terms[index];
}
};
}
use of org.apache.lucene.index.SortedDocValues in project elasticsearch by elastic.
the class MultiValueModeTests method testSingleValuedOrds.
public void testSingleValuedOrds() throws Exception {
final int numDocs = scaledRandomIntBetween(1, 100);
final int[] array = new int[numDocs];
for (int i = 0; i < array.length; ++i) {
if (randomBoolean()) {
array[i] = randomInt(1000);
} else {
array[i] = -1;
}
}
final SortedDocValues singleValues = new SortedDocValues() {
@Override
public int getOrd(int docID) {
return array[docID];
}
@Override
public BytesRef lookupOrd(int ord) {
throw new UnsupportedOperationException();
}
@Override
public int getValueCount() {
return 1 << 20;
}
};
final RandomAccessOrds multiValues = (RandomAccessOrds) DocValues.singleton(singleValues);
verify(multiValues, numDocs);
final FixedBitSet rootDocs = randomRootDocs(numDocs);
final FixedBitSet innerDocs = randomInnerDocs(rootDocs);
verify(multiValues, numDocs, rootDocs, innerDocs);
}
use of org.apache.lucene.index.SortedDocValues in project elasticsearch by elastic.
the class MultiValueModeTests method verify.
private void verify(RandomAccessOrds values, int maxDoc, FixedBitSet rootDocs, FixedBitSet innerDocs) throws IOException {
for (MultiValueMode mode : new MultiValueMode[] { MultiValueMode.MIN, MultiValueMode.MAX }) {
final SortedDocValues selected = mode.select(values, rootDocs, new BitSetIterator(innerDocs, 0L));
int prevRoot = -1;
for (int root = rootDocs.nextSetBit(0); root != -1; root = root + 1 < maxDoc ? rootDocs.nextSetBit(root + 1) : -1) {
final int actual = selected.getOrd(root);
int expected = -1;
for (int child = innerDocs.nextSetBit(prevRoot + 1); child != -1 && child < root; child = innerDocs.nextSetBit(child + 1)) {
values.setDocument(child);
for (int j = 0; j < values.cardinality(); ++j) {
if (expected == -1) {
expected = (int) values.ordAt(j);
} else {
if (mode == MultiValueMode.MIN) {
expected = Math.min(expected, (int) values.ordAt(j));
} else if (mode == MultiValueMode.MAX) {
expected = Math.max(expected, (int) values.ordAt(j));
}
}
}
}
assertEquals(mode.toString() + " docId=" + root, expected, actual);
prevRoot = root;
}
}
}
use of org.apache.lucene.index.SortedDocValues in project lucene-solr by apache.
the class TestFieldCache method test.
public void test() throws IOException {
FieldCache cache = FieldCache.DEFAULT;
NumericDocValues doubles = cache.getNumerics(reader, "theDouble", FieldCache.DOUBLE_POINT_PARSER);
for (int i = 0; i < NUM_DOCS; i++) {
assertEquals(i, doubles.nextDoc());
assertEquals(Double.doubleToLongBits(Double.MAX_VALUE - i), doubles.longValue());
}
NumericDocValues longs = cache.getNumerics(reader, "theLong", FieldCache.LONG_POINT_PARSER);
for (int i = 0; i < NUM_DOCS; i++) {
assertEquals(i, longs.nextDoc());
assertEquals(Long.MAX_VALUE - i, longs.longValue());
}
NumericDocValues ints = cache.getNumerics(reader, "theInt", FieldCache.INT_POINT_PARSER);
for (int i = 0; i < NUM_DOCS; i++) {
assertEquals(i, ints.nextDoc());
assertEquals(Integer.MAX_VALUE - i, ints.longValue());
}
NumericDocValues floats = cache.getNumerics(reader, "theFloat", FieldCache.FLOAT_POINT_PARSER);
for (int i = 0; i < NUM_DOCS; i++) {
assertEquals(i, floats.nextDoc());
assertEquals(Float.floatToIntBits(Float.MAX_VALUE - i), floats.longValue());
}
Bits docsWithField = cache.getDocsWithField(reader, "theLong", FieldCache.LONG_POINT_PARSER);
assertSame("Second request to cache return same array", docsWithField, cache.getDocsWithField(reader, "theLong", FieldCache.LONG_POINT_PARSER));
assertTrue("docsWithField(theLong) must be class Bits.MatchAllBits", docsWithField instanceof Bits.MatchAllBits);
assertTrue("docsWithField(theLong) Size: " + docsWithField.length() + " is not: " + NUM_DOCS, docsWithField.length() == NUM_DOCS);
for (int i = 0; i < docsWithField.length(); i++) {
assertTrue(docsWithField.get(i));
}
docsWithField = cache.getDocsWithField(reader, "sparse", FieldCache.INT_POINT_PARSER);
assertSame("Second request to cache return same array", docsWithField, cache.getDocsWithField(reader, "sparse", FieldCache.INT_POINT_PARSER));
assertFalse("docsWithField(sparse) must not be class Bits.MatchAllBits", docsWithField instanceof Bits.MatchAllBits);
assertTrue("docsWithField(sparse) Size: " + docsWithField.length() + " is not: " + NUM_DOCS, docsWithField.length() == NUM_DOCS);
for (int i = 0; i < docsWithField.length(); i++) {
assertEquals(i % 2 == 0, docsWithField.get(i));
}
// getTermsIndex
SortedDocValues termsIndex = cache.getTermsIndex(reader, "theRandomUnicodeString");
for (int i = 0; i < NUM_DOCS; i++) {
final String s;
if (i > termsIndex.docID()) {
termsIndex.advance(i);
}
if (i == termsIndex.docID()) {
s = termsIndex.binaryValue().utf8ToString();
} else {
s = null;
}
assertTrue("for doc " + i + ": " + s + " does not equal: " + unicodeStrings[i], unicodeStrings[i] == null || unicodeStrings[i].equals(s));
}
int nTerms = termsIndex.getValueCount();
TermsEnum tenum = termsIndex.termsEnum();
for (int i = 0; i < nTerms; i++) {
BytesRef val1 = BytesRef.deepCopyOf(tenum.next());
final BytesRef val = termsIndex.lookupOrd(i);
// System.out.println("i="+i);
assertEquals(val, val1);
}
// seek the enum around (note this isn't a great test here)
int num = atLeast(100);
for (int i = 0; i < num; i++) {
int k = random().nextInt(nTerms);
final BytesRef val = BytesRef.deepCopyOf(termsIndex.lookupOrd(k));
assertEquals(TermsEnum.SeekStatus.FOUND, tenum.seekCeil(val));
assertEquals(val, tenum.term());
}
for (int i = 0; i < nTerms; i++) {
final BytesRef val = BytesRef.deepCopyOf(termsIndex.lookupOrd(i));
assertEquals(TermsEnum.SeekStatus.FOUND, tenum.seekCeil(val));
assertEquals(val, tenum.term());
}
// test bad field
termsIndex = cache.getTermsIndex(reader, "bogusfield");
// getTerms
BinaryDocValues terms = cache.getTerms(reader, "theRandomUnicodeString");
for (int i = 0; i < NUM_DOCS; i++) {
if (terms.docID() < i) {
terms.nextDoc();
}
if (terms.docID() == i) {
assertEquals(unicodeStrings[i], terms.binaryValue().utf8ToString());
} else {
assertNull(unicodeStrings[i]);
}
}
// test bad field
terms = cache.getTerms(reader, "bogusfield");
// getDocTermOrds
SortedSetDocValues termOrds = cache.getDocTermOrds(reader, "theRandomUnicodeMultiValuedField", null);
int numEntries = cache.getCacheEntries().length;
// ask for it again, and check that we didnt create any additional entries:
termOrds = cache.getDocTermOrds(reader, "theRandomUnicodeMultiValuedField", null);
assertEquals(numEntries, cache.getCacheEntries().length);
for (int i = 0; i < NUM_DOCS; i++) {
// This will remove identical terms. A DocTermOrds doesn't return duplicate ords for a docId
List<BytesRef> values = new ArrayList<>(new LinkedHashSet<>(Arrays.asList(multiValued[i])));
for (BytesRef v : values) {
if (v == null) {
// why does this test use null values... instead of an empty list: confusing
break;
}
if (i > termOrds.docID()) {
assertEquals(i, termOrds.nextDoc());
}
long ord = termOrds.nextOrd();
assert ord != SortedSetDocValues.NO_MORE_ORDS;
BytesRef scratch = termOrds.lookupOrd(ord);
assertEquals(v, scratch);
}
if (i == termOrds.docID()) {
assertEquals(SortedSetDocValues.NO_MORE_ORDS, termOrds.nextOrd());
}
}
// test bad field
termOrds = cache.getDocTermOrds(reader, "bogusfield", null);
assertTrue(termOrds.getValueCount() == 0);
FieldCache.DEFAULT.purgeByCacheKey(reader.getCoreCacheHelper().getKey());
}
Aggregations