use of org.apache.lucene.index.BinaryDocValues in project elasticsearch by elastic.
the class BytesRefFieldComparatorSource method newComparator.
@Override
public FieldComparator<?> newComparator(String fieldname, int numHits, int sortPos, boolean reversed) {
assert indexFieldData == null || fieldname.equals(indexFieldData.getFieldName());
final boolean sortMissingLast = sortMissingLast(missingValue) ^ reversed;
final BytesRef missingBytes = (BytesRef) missingObject(missingValue, reversed);
if (indexFieldData instanceof IndexOrdinalsFieldData) {
return new FieldComparator.TermOrdValComparator(numHits, null, sortMissingLast) {
@Override
protected SortedDocValues getSortedDocValues(LeafReaderContext context, String field) throws IOException {
final RandomAccessOrds values = ((IndexOrdinalsFieldData) indexFieldData).load(context).getOrdinalsValues();
final SortedDocValues selectedValues;
if (nested == null) {
selectedValues = sortMode.select(values);
} else {
final BitSet rootDocs = nested.rootDocs(context);
final DocIdSetIterator innerDocs = nested.innerDocs(context);
selectedValues = sortMode.select(values, rootDocs, innerDocs);
}
if (sortMissingFirst(missingValue) || sortMissingLast(missingValue)) {
return selectedValues;
} else {
return new ReplaceMissing(selectedValues, missingBytes);
}
}
@Override
public void setScorer(Scorer scorer) {
BytesRefFieldComparatorSource.this.setScorer(scorer);
}
};
}
final BytesRef nullPlaceHolder = new BytesRef();
final BytesRef nonNullMissingBytes = missingBytes == null ? nullPlaceHolder : missingBytes;
return new FieldComparator.TermValComparator(numHits, null, sortMissingLast) {
@Override
protected BinaryDocValues getBinaryDocValues(LeafReaderContext context, String field) throws IOException {
final SortedBinaryDocValues values = getValues(context);
final BinaryDocValues selectedValues;
if (nested == null) {
selectedValues = sortMode.select(values, nonNullMissingBytes);
} else {
final BitSet rootDocs = nested.rootDocs(context);
final DocIdSetIterator innerDocs = nested.innerDocs(context);
selectedValues = sortMode.select(values, nonNullMissingBytes, rootDocs, innerDocs, context.reader().maxDoc());
}
return selectedValues;
}
@Override
protected Bits getDocsWithField(LeafReaderContext context, String field) throws IOException {
return new Bits.MatchAllBits(context.reader().maxDoc());
}
@Override
protected boolean isNull(int doc, BytesRef term) {
return term == nullPlaceHolder;
}
@Override
public void setScorer(Scorer scorer) {
BytesRefFieldComparatorSource.this.setScorer(scorer);
}
};
}
use of org.apache.lucene.index.BinaryDocValues in project lucene-solr by apache.
the class TestFieldCache method test.
public void test() throws IOException {
FieldCache cache = FieldCache.DEFAULT;
NumericDocValues doubles = cache.getNumerics(reader, "theDouble", FieldCache.DOUBLE_POINT_PARSER);
for (int i = 0; i < NUM_DOCS; i++) {
assertEquals(i, doubles.nextDoc());
assertEquals(Double.doubleToLongBits(Double.MAX_VALUE - i), doubles.longValue());
}
NumericDocValues longs = cache.getNumerics(reader, "theLong", FieldCache.LONG_POINT_PARSER);
for (int i = 0; i < NUM_DOCS; i++) {
assertEquals(i, longs.nextDoc());
assertEquals(Long.MAX_VALUE - i, longs.longValue());
}
NumericDocValues ints = cache.getNumerics(reader, "theInt", FieldCache.INT_POINT_PARSER);
for (int i = 0; i < NUM_DOCS; i++) {
assertEquals(i, ints.nextDoc());
assertEquals(Integer.MAX_VALUE - i, ints.longValue());
}
NumericDocValues floats = cache.getNumerics(reader, "theFloat", FieldCache.FLOAT_POINT_PARSER);
for (int i = 0; i < NUM_DOCS; i++) {
assertEquals(i, floats.nextDoc());
assertEquals(Float.floatToIntBits(Float.MAX_VALUE - i), floats.longValue());
}
Bits docsWithField = cache.getDocsWithField(reader, "theLong", FieldCache.LONG_POINT_PARSER);
assertSame("Second request to cache return same array", docsWithField, cache.getDocsWithField(reader, "theLong", FieldCache.LONG_POINT_PARSER));
assertTrue("docsWithField(theLong) must be class Bits.MatchAllBits", docsWithField instanceof Bits.MatchAllBits);
assertTrue("docsWithField(theLong) Size: " + docsWithField.length() + " is not: " + NUM_DOCS, docsWithField.length() == NUM_DOCS);
for (int i = 0; i < docsWithField.length(); i++) {
assertTrue(docsWithField.get(i));
}
docsWithField = cache.getDocsWithField(reader, "sparse", FieldCache.INT_POINT_PARSER);
assertSame("Second request to cache return same array", docsWithField, cache.getDocsWithField(reader, "sparse", FieldCache.INT_POINT_PARSER));
assertFalse("docsWithField(sparse) must not be class Bits.MatchAllBits", docsWithField instanceof Bits.MatchAllBits);
assertTrue("docsWithField(sparse) Size: " + docsWithField.length() + " is not: " + NUM_DOCS, docsWithField.length() == NUM_DOCS);
for (int i = 0; i < docsWithField.length(); i++) {
assertEquals(i % 2 == 0, docsWithField.get(i));
}
// getTermsIndex
SortedDocValues termsIndex = cache.getTermsIndex(reader, "theRandomUnicodeString");
for (int i = 0; i < NUM_DOCS; i++) {
final String s;
if (i > termsIndex.docID()) {
termsIndex.advance(i);
}
if (i == termsIndex.docID()) {
s = termsIndex.binaryValue().utf8ToString();
} else {
s = null;
}
assertTrue("for doc " + i + ": " + s + " does not equal: " + unicodeStrings[i], unicodeStrings[i] == null || unicodeStrings[i].equals(s));
}
int nTerms = termsIndex.getValueCount();
TermsEnum tenum = termsIndex.termsEnum();
for (int i = 0; i < nTerms; i++) {
BytesRef val1 = BytesRef.deepCopyOf(tenum.next());
final BytesRef val = termsIndex.lookupOrd(i);
// System.out.println("i="+i);
assertEquals(val, val1);
}
// seek the enum around (note this isn't a great test here)
int num = atLeast(100);
for (int i = 0; i < num; i++) {
int k = random().nextInt(nTerms);
final BytesRef val = BytesRef.deepCopyOf(termsIndex.lookupOrd(k));
assertEquals(TermsEnum.SeekStatus.FOUND, tenum.seekCeil(val));
assertEquals(val, tenum.term());
}
for (int i = 0; i < nTerms; i++) {
final BytesRef val = BytesRef.deepCopyOf(termsIndex.lookupOrd(i));
assertEquals(TermsEnum.SeekStatus.FOUND, tenum.seekCeil(val));
assertEquals(val, tenum.term());
}
// test bad field
termsIndex = cache.getTermsIndex(reader, "bogusfield");
// getTerms
BinaryDocValues terms = cache.getTerms(reader, "theRandomUnicodeString");
for (int i = 0; i < NUM_DOCS; i++) {
if (terms.docID() < i) {
terms.nextDoc();
}
if (terms.docID() == i) {
assertEquals(unicodeStrings[i], terms.binaryValue().utf8ToString());
} else {
assertNull(unicodeStrings[i]);
}
}
// test bad field
terms = cache.getTerms(reader, "bogusfield");
// getDocTermOrds
SortedSetDocValues termOrds = cache.getDocTermOrds(reader, "theRandomUnicodeMultiValuedField", null);
int numEntries = cache.getCacheEntries().length;
// ask for it again, and check that we didnt create any additional entries:
termOrds = cache.getDocTermOrds(reader, "theRandomUnicodeMultiValuedField", null);
assertEquals(numEntries, cache.getCacheEntries().length);
for (int i = 0; i < NUM_DOCS; i++) {
// This will remove identical terms. A DocTermOrds doesn't return duplicate ords for a docId
List<BytesRef> values = new ArrayList<>(new LinkedHashSet<>(Arrays.asList(multiValued[i])));
for (BytesRef v : values) {
if (v == null) {
// why does this test use null values... instead of an empty list: confusing
break;
}
if (i > termOrds.docID()) {
assertEquals(i, termOrds.nextDoc());
}
long ord = termOrds.nextOrd();
assert ord != SortedSetDocValues.NO_MORE_ORDS;
BytesRef scratch = termOrds.lookupOrd(ord);
assertEquals(v, scratch);
}
if (i == termOrds.docID()) {
assertEquals(SortedSetDocValues.NO_MORE_ORDS, termOrds.nextOrd());
}
}
// test bad field
termOrds = cache.getDocTermOrds(reader, "bogusfield", null);
assertTrue(termOrds.getValueCount() == 0);
FieldCache.DEFAULT.purgeByCacheKey(reader.getCoreCacheHelper().getKey());
}
use of org.apache.lucene.index.BinaryDocValues in project lucene-solr by apache.
the class TestFieldCache method testNonexistantFields.
public void testNonexistantFields() throws Exception {
Directory dir = newDirectory();
RandomIndexWriter iw = new RandomIndexWriter(random(), dir);
Document doc = new Document();
iw.addDocument(doc);
DirectoryReader ir = iw.getReader();
iw.close();
LeafReader ar = getOnlyLeafReader(ir);
final FieldCache cache = FieldCache.DEFAULT;
cache.purgeAllCaches();
assertEquals(0, cache.getCacheEntries().length);
NumericDocValues ints = cache.getNumerics(ar, "bogusints", FieldCache.INT_POINT_PARSER);
assertEquals(NO_MORE_DOCS, ints.nextDoc());
NumericDocValues longs = cache.getNumerics(ar, "boguslongs", FieldCache.LONG_POINT_PARSER);
assertEquals(NO_MORE_DOCS, longs.nextDoc());
NumericDocValues floats = cache.getNumerics(ar, "bogusfloats", FieldCache.FLOAT_POINT_PARSER);
assertEquals(NO_MORE_DOCS, floats.nextDoc());
NumericDocValues doubles = cache.getNumerics(ar, "bogusdoubles", FieldCache.DOUBLE_POINT_PARSER);
assertEquals(NO_MORE_DOCS, doubles.nextDoc());
BinaryDocValues binaries = cache.getTerms(ar, "bogusterms");
assertEquals(NO_MORE_DOCS, binaries.nextDoc());
SortedDocValues sorted = cache.getTermsIndex(ar, "bogustermsindex");
assertEquals(NO_MORE_DOCS, sorted.nextDoc());
SortedSetDocValues sortedSet = cache.getDocTermOrds(ar, "bogusmultivalued", null);
assertEquals(NO_MORE_DOCS, sortedSet.nextDoc());
Bits bits = cache.getDocsWithField(ar, "bogusbits", null);
assertFalse(bits.get(0));
// check that we cached nothing
assertEquals(0, cache.getCacheEntries().length);
ir.close();
dir.close();
}
use of org.apache.lucene.index.BinaryDocValues in project lucene-solr by apache.
the class TestFieldCache method testNonIndexedFields.
public void testNonIndexedFields() throws Exception {
Directory dir = newDirectory();
RandomIndexWriter iw = new RandomIndexWriter(random(), dir);
Document doc = new Document();
doc.add(new StoredField("bogusbytes", "bogus"));
doc.add(new StoredField("bogusshorts", "bogus"));
doc.add(new StoredField("bogusints", "bogus"));
doc.add(new StoredField("boguslongs", "bogus"));
doc.add(new StoredField("bogusfloats", "bogus"));
doc.add(new StoredField("bogusdoubles", "bogus"));
doc.add(new StoredField("bogusterms", "bogus"));
doc.add(new StoredField("bogustermsindex", "bogus"));
doc.add(new StoredField("bogusmultivalued", "bogus"));
doc.add(new StoredField("bogusbits", "bogus"));
iw.addDocument(doc);
DirectoryReader ir = iw.getReader();
iw.close();
LeafReader ar = getOnlyLeafReader(ir);
final FieldCache cache = FieldCache.DEFAULT;
cache.purgeAllCaches();
assertEquals(0, cache.getCacheEntries().length);
NumericDocValues ints = cache.getNumerics(ar, "bogusints", FieldCache.INT_POINT_PARSER);
assertEquals(NO_MORE_DOCS, ints.nextDoc());
NumericDocValues longs = cache.getNumerics(ar, "boguslongs", FieldCache.LONG_POINT_PARSER);
assertEquals(NO_MORE_DOCS, longs.nextDoc());
NumericDocValues floats = cache.getNumerics(ar, "bogusfloats", FieldCache.FLOAT_POINT_PARSER);
assertEquals(NO_MORE_DOCS, floats.nextDoc());
NumericDocValues doubles = cache.getNumerics(ar, "bogusdoubles", FieldCache.DOUBLE_POINT_PARSER);
assertEquals(NO_MORE_DOCS, doubles.nextDoc());
BinaryDocValues binaries = cache.getTerms(ar, "bogusterms");
assertEquals(NO_MORE_DOCS, binaries.nextDoc());
SortedDocValues sorted = cache.getTermsIndex(ar, "bogustermsindex");
assertEquals(NO_MORE_DOCS, sorted.nextDoc());
SortedSetDocValues sortedSet = cache.getDocTermOrds(ar, "bogusmultivalued", null);
assertEquals(NO_MORE_DOCS, sortedSet.nextDoc());
Bits bits = cache.getDocsWithField(ar, "bogusbits", null);
assertFalse(bits.get(0));
// check that we cached nothing
assertEquals(0, cache.getCacheEntries().length);
ir.close();
dir.close();
}
use of org.apache.lucene.index.BinaryDocValues in project lucene-solr by apache.
the class SimpleTextDocValuesWriter method doAddBinaryField.
private void doAddBinaryField(FieldInfo field, DocValuesProducer valuesProducer) throws IOException {
int maxLength = 0;
BinaryDocValues values = valuesProducer.getBinary(field);
for (int doc = values.nextDoc(); doc != DocIdSetIterator.NO_MORE_DOCS; doc = values.nextDoc()) {
maxLength = Math.max(maxLength, values.binaryValue().length);
}
writeFieldEntry(field, DocValuesType.BINARY);
// write maxLength
SimpleTextUtil.write(data, MAXLENGTH);
SimpleTextUtil.write(data, Integer.toString(maxLength), scratch);
SimpleTextUtil.writeNewline(data);
int maxBytesLength = Long.toString(maxLength).length();
StringBuilder sb = new StringBuilder();
for (int i = 0; i < maxBytesLength; i++) {
sb.append('0');
}
// write our pattern for encoding lengths
SimpleTextUtil.write(data, PATTERN);
SimpleTextUtil.write(data, sb.toString(), scratch);
SimpleTextUtil.writeNewline(data);
final DecimalFormat encoder = new DecimalFormat(sb.toString(), new DecimalFormatSymbols(Locale.ROOT));
values = valuesProducer.getBinary(field);
int numDocsWritten = 0;
for (int i = 0; i < numDocs; ++i) {
if (values.docID() < i) {
values.nextDoc();
assert values.docID() >= i;
}
// write length
final int length = values.docID() != i ? 0 : values.binaryValue().length;
SimpleTextUtil.write(data, LENGTH);
SimpleTextUtil.write(data, encoder.format(length), scratch);
SimpleTextUtil.writeNewline(data);
// because it escapes:
if (values.docID() == i) {
BytesRef value = values.binaryValue();
data.writeBytes(value.bytes, value.offset, value.length);
}
// pad to fit
for (int j = length; j < maxLength; j++) {
data.writeByte((byte) ' ');
}
SimpleTextUtil.writeNewline(data);
if (values.docID() != i) {
SimpleTextUtil.write(data, "F", scratch);
} else {
SimpleTextUtil.write(data, "T", scratch);
}
SimpleTextUtil.writeNewline(data);
numDocsWritten++;
}
assert numDocs == numDocsWritten;
}
Aggregations