use of org.apache.lucene.index.SortedDocValues in project lucene-solr by apache.
the class TestFieldCache method testNonexistantFields.
public void testNonexistantFields() throws Exception {
Directory dir = newDirectory();
RandomIndexWriter iw = new RandomIndexWriter(random(), dir);
Document doc = new Document();
iw.addDocument(doc);
DirectoryReader ir = iw.getReader();
iw.close();
LeafReader ar = getOnlyLeafReader(ir);
final FieldCache cache = FieldCache.DEFAULT;
cache.purgeAllCaches();
assertEquals(0, cache.getCacheEntries().length);
NumericDocValues ints = cache.getNumerics(ar, "bogusints", FieldCache.INT_POINT_PARSER);
assertEquals(NO_MORE_DOCS, ints.nextDoc());
NumericDocValues longs = cache.getNumerics(ar, "boguslongs", FieldCache.LONG_POINT_PARSER);
assertEquals(NO_MORE_DOCS, longs.nextDoc());
NumericDocValues floats = cache.getNumerics(ar, "bogusfloats", FieldCache.FLOAT_POINT_PARSER);
assertEquals(NO_MORE_DOCS, floats.nextDoc());
NumericDocValues doubles = cache.getNumerics(ar, "bogusdoubles", FieldCache.DOUBLE_POINT_PARSER);
assertEquals(NO_MORE_DOCS, doubles.nextDoc());
BinaryDocValues binaries = cache.getTerms(ar, "bogusterms");
assertEquals(NO_MORE_DOCS, binaries.nextDoc());
SortedDocValues sorted = cache.getTermsIndex(ar, "bogustermsindex");
assertEquals(NO_MORE_DOCS, sorted.nextDoc());
SortedSetDocValues sortedSet = cache.getDocTermOrds(ar, "bogusmultivalued", null);
assertEquals(NO_MORE_DOCS, sortedSet.nextDoc());
Bits bits = cache.getDocsWithField(ar, "bogusbits", null);
assertFalse(bits.get(0));
// check that we cached nothing
assertEquals(0, cache.getCacheEntries().length);
ir.close();
dir.close();
}
use of org.apache.lucene.index.SortedDocValues in project lucene-solr by apache.
the class TestFieldCache method testNonIndexedFields.
public void testNonIndexedFields() throws Exception {
Directory dir = newDirectory();
RandomIndexWriter iw = new RandomIndexWriter(random(), dir);
Document doc = new Document();
doc.add(new StoredField("bogusbytes", "bogus"));
doc.add(new StoredField("bogusshorts", "bogus"));
doc.add(new StoredField("bogusints", "bogus"));
doc.add(new StoredField("boguslongs", "bogus"));
doc.add(new StoredField("bogusfloats", "bogus"));
doc.add(new StoredField("bogusdoubles", "bogus"));
doc.add(new StoredField("bogusterms", "bogus"));
doc.add(new StoredField("bogustermsindex", "bogus"));
doc.add(new StoredField("bogusmultivalued", "bogus"));
doc.add(new StoredField("bogusbits", "bogus"));
iw.addDocument(doc);
DirectoryReader ir = iw.getReader();
iw.close();
LeafReader ar = getOnlyLeafReader(ir);
final FieldCache cache = FieldCache.DEFAULT;
cache.purgeAllCaches();
assertEquals(0, cache.getCacheEntries().length);
NumericDocValues ints = cache.getNumerics(ar, "bogusints", FieldCache.INT_POINT_PARSER);
assertEquals(NO_MORE_DOCS, ints.nextDoc());
NumericDocValues longs = cache.getNumerics(ar, "boguslongs", FieldCache.LONG_POINT_PARSER);
assertEquals(NO_MORE_DOCS, longs.nextDoc());
NumericDocValues floats = cache.getNumerics(ar, "bogusfloats", FieldCache.FLOAT_POINT_PARSER);
assertEquals(NO_MORE_DOCS, floats.nextDoc());
NumericDocValues doubles = cache.getNumerics(ar, "bogusdoubles", FieldCache.DOUBLE_POINT_PARSER);
assertEquals(NO_MORE_DOCS, doubles.nextDoc());
BinaryDocValues binaries = cache.getTerms(ar, "bogusterms");
assertEquals(NO_MORE_DOCS, binaries.nextDoc());
SortedDocValues sorted = cache.getTermsIndex(ar, "bogustermsindex");
assertEquals(NO_MORE_DOCS, sorted.nextDoc());
SortedSetDocValues sortedSet = cache.getDocTermOrds(ar, "bogusmultivalued", null);
assertEquals(NO_MORE_DOCS, sortedSet.nextDoc());
Bits bits = cache.getDocsWithField(ar, "bogusbits", null);
assertFalse(bits.get(0));
// check that we cached nothing
assertEquals(0, cache.getCacheEntries().length);
ir.close();
dir.close();
}
use of org.apache.lucene.index.SortedDocValues in project lucene-solr by apache.
the class UninvertDocValuesMergePolicyTest method testIndexAndAddDocValues.
public void testIndexAndAddDocValues() throws Exception {
Random rand = random();
for (int i = 0; i < 100; i++) {
assertU(adoc(ID_FIELD, String.valueOf(i), TEST_FIELD, String.valueOf(i)));
if (rand.nextBoolean()) {
assertU(commit());
}
}
assertU(commit());
// Assert everything has been indexed and there are no docvalues
withNewRawReader(h, topReader -> {
assertEquals(100, topReader.numDocs());
final FieldInfos infos = MultiFields.getMergedFieldInfos(topReader);
assertEquals(DocValuesType.NONE, infos.fieldInfo(TEST_FIELD).getDocValuesType());
});
addDocValuesTo(h, TEST_FIELD);
// Add some more documents with doc values turned on including updating some
for (int i = 90; i < 110; i++) {
assertU(adoc(ID_FIELD, String.valueOf(i), TEST_FIELD, String.valueOf(i)));
if (rand.nextBoolean()) {
assertU(commit());
}
}
assertU(commit());
withNewRawReader(h, topReader -> {
assertEquals(110, topReader.numDocs());
final FieldInfos infos = MultiFields.getMergedFieldInfos(topReader);
assertEquals(DocValuesType.SORTED, infos.fieldInfo(TEST_FIELD).getDocValuesType());
});
int optimizeSegments = 1;
assertU(optimize("maxSegments", String.valueOf(optimizeSegments)));
// Assert all docs have the right docvalues
withNewRawReader(h, topReader -> {
assertEquals(110, topReader.numDocs());
assertEquals(optimizeSegments, topReader.leaves().size());
final FieldInfos infos = MultiFields.getMergedFieldInfos(topReader);
assertEquals(DocValuesType.SORTED, infos.fieldInfo(TEST_FIELD).getDocValuesType());
for (LeafReaderContext ctx : topReader.leaves()) {
LeafReader r = ctx.reader();
SortedDocValues docvalues = r.getSortedDocValues(TEST_FIELD);
for (int i = 0; i < r.numDocs(); ++i) {
Document doc = r.document(i);
String v = doc.getField(TEST_FIELD).stringValue();
String id = doc.getField(ID_FIELD).stringValue();
assertEquals(DocValuesType.SORTED, r.getFieldInfos().fieldInfo(TEST_FIELD).getDocValuesType());
assertEquals(DocValuesType.NONE, r.getFieldInfos().fieldInfo(ID_FIELD).getDocValuesType());
assertEquals(v, id);
docvalues.nextDoc();
assertEquals(v, docvalues.binaryValue().utf8ToString());
}
}
});
}
use of org.apache.lucene.index.SortedDocValues in project lucene-solr by apache.
the class LegacyDocValuesIterables method sortedOrdIterable.
/** Converts {@link SortedDocValues} into the ord for each document as an {@code Iterable<Number>}.
*
* @deprecated Consume {@link SortedDocValues} instead. */
@Deprecated
public static Iterable<Number> sortedOrdIterable(final DocValuesProducer valuesProducer, FieldInfo fieldInfo, int maxDoc) {
return new Iterable<Number>() {
@Override
public Iterator<Number> iterator() {
final SortedDocValues values;
try {
values = valuesProducer.getSorted(fieldInfo);
} catch (IOException ioe) {
throw new RuntimeException(ioe);
}
return new Iterator<Number>() {
private int nextDocID;
@Override
public boolean hasNext() {
return nextDocID < maxDoc;
}
@Override
public Number next() {
try {
if (nextDocID > values.docID()) {
values.nextDoc();
}
int result;
if (nextDocID == values.docID()) {
result = values.ordValue();
} else {
result = -1;
}
nextDocID++;
return result;
} catch (IOException ioe) {
throw new RuntimeException(ioe);
}
}
};
}
};
}
use of org.apache.lucene.index.SortedDocValues in project lucene-solr by apache.
the class DocValuesConsumer method mergeSortedField.
/**
* Merges the sorted docvalues from <code>toMerge</code>.
* <p>
* The default implementation calls {@link #addSortedField}, passing
* an Iterable that merges ordinals and values and filters deleted documents .
*/
public void mergeSortedField(FieldInfo fieldInfo, final MergeState mergeState) throws IOException {
List<SortedDocValues> toMerge = new ArrayList<>();
for (int i = 0; i < mergeState.docValuesProducers.length; i++) {
SortedDocValues values = null;
DocValuesProducer docValuesProducer = mergeState.docValuesProducers[i];
if (docValuesProducer != null) {
FieldInfo readerFieldInfo = mergeState.fieldInfos[i].fieldInfo(fieldInfo.name);
if (readerFieldInfo != null && readerFieldInfo.getDocValuesType() == DocValuesType.SORTED) {
values = docValuesProducer.getSorted(fieldInfo);
}
}
if (values == null) {
values = DocValues.emptySorted();
}
toMerge.add(values);
}
final int numReaders = toMerge.size();
final SortedDocValues[] dvs = toMerge.toArray(new SortedDocValues[numReaders]);
// step 1: iterate thru each sub and mark terms still in use
TermsEnum[] liveTerms = new TermsEnum[dvs.length];
long[] weights = new long[liveTerms.length];
for (int sub = 0; sub < numReaders; sub++) {
SortedDocValues dv = dvs[sub];
Bits liveDocs = mergeState.liveDocs[sub];
if (liveDocs == null) {
liveTerms[sub] = dv.termsEnum();
weights[sub] = dv.getValueCount();
} else {
LongBitSet bitset = new LongBitSet(dv.getValueCount());
int docID;
while ((docID = dv.nextDoc()) != NO_MORE_DOCS) {
if (liveDocs.get(docID)) {
int ord = dv.ordValue();
if (ord >= 0) {
bitset.set(ord);
}
}
}
liveTerms[sub] = new BitsFilteredTermsEnum(dv.termsEnum(), bitset);
weights[sub] = bitset.cardinality();
}
}
// step 2: create ordinal map (this conceptually does the "merging")
final OrdinalMap map = OrdinalMap.build(null, liveTerms, weights, PackedInts.COMPACT);
// step 3: add field
addSortedField(fieldInfo, new EmptyDocValuesProducer() {
@Override
public SortedDocValues getSorted(FieldInfo fieldInfoIn) throws IOException {
if (fieldInfoIn != fieldInfo) {
throw new IllegalArgumentException("wrong FieldInfo");
}
// We must make new iterators + DocIDMerger for each iterator:
List<SortedDocValuesSub> subs = new ArrayList<>();
long cost = 0;
for (int i = 0; i < mergeState.docValuesProducers.length; i++) {
SortedDocValues values = null;
DocValuesProducer docValuesProducer = mergeState.docValuesProducers[i];
if (docValuesProducer != null) {
FieldInfo readerFieldInfo = mergeState.fieldInfos[i].fieldInfo(fieldInfo.name);
if (readerFieldInfo != null && readerFieldInfo.getDocValuesType() == DocValuesType.SORTED) {
values = docValuesProducer.getSorted(readerFieldInfo);
}
}
if (values == null) {
values = DocValues.emptySorted();
}
cost += values.cost();
subs.add(new SortedDocValuesSub(mergeState.docMaps[i], values, map.getGlobalOrds(i)));
}
final long finalCost = cost;
final DocIDMerger<SortedDocValuesSub> docIDMerger = DocIDMerger.of(subs, mergeState.needsIndexSort);
return new SortedDocValues() {
private int docID = -1;
private int ord;
@Override
public int docID() {
return docID;
}
@Override
public int nextDoc() throws IOException {
SortedDocValuesSub sub = docIDMerger.next();
if (sub == null) {
return docID = NO_MORE_DOCS;
}
int subOrd = sub.values.ordValue();
assert subOrd != -1;
ord = (int) sub.map.get(subOrd);
docID = sub.mappedDocID;
return docID;
}
@Override
public int ordValue() {
return ord;
}
@Override
public int advance(int target) {
throw new UnsupportedOperationException();
}
@Override
public boolean advanceExact(int target) throws IOException {
throw new UnsupportedOperationException();
}
@Override
public long cost() {
return finalCost;
}
@Override
public int getValueCount() {
return (int) map.getValueCount();
}
@Override
public BytesRef lookupOrd(int ord) throws IOException {
int segmentNumber = map.getFirstSegmentNumber(ord);
int segmentOrd = (int) map.getFirstSegmentOrd(ord);
return dvs[segmentNumber].lookupOrd(segmentOrd);
}
};
}
});
}
Aggregations