use of org.apache.lucene.util.LongValues in project lucene-solr by apache.
the class Lucene54DocValuesProducer method getNumeric.
@Override
public NumericDocValues getNumeric(FieldInfo field) throws IOException {
NumericEntry entry = numerics.get(field.name);
Bits docsWithField;
if (entry.format == SPARSE_COMPRESSED) {
return getSparseNumericDocValues(entry);
} else {
if (entry.missingOffset == ALL_MISSING) {
return DocValues.emptyNumeric();
} else if (entry.missingOffset == ALL_LIVE) {
LongValues values = getNumeric(entry);
return new NumericDocValues() {
private int docID = -1;
@Override
public int docID() {
return docID;
}
@Override
public int nextDoc() {
docID++;
if (docID == maxDoc) {
docID = NO_MORE_DOCS;
}
return docID;
}
@Override
public int advance(int target) {
if (target >= maxDoc) {
docID = NO_MORE_DOCS;
} else {
docID = target;
}
return docID;
}
@Override
public boolean advanceExact(int target) throws IOException {
docID = target;
return true;
}
@Override
public long cost() {
// TODO
return 0;
}
@Override
public long longValue() {
return values.get(docID);
}
};
} else {
docsWithField = getLiveBits(entry.missingOffset, maxDoc);
}
}
final LongValues values = getNumeric(entry);
return new NumericDocValues() {
int doc = -1;
long value;
@Override
public long longValue() throws IOException {
return value;
}
@Override
public int docID() {
return doc;
}
@Override
public int nextDoc() throws IOException {
return advance(doc + 1);
}
@Override
public int advance(int target) throws IOException {
for (int doc = target; doc < maxDoc; ++doc) {
value = values.get(doc);
if (value != 0 || docsWithField.get(doc)) {
return this.doc = doc;
}
}
return doc = NO_MORE_DOCS;
}
@Override
public boolean advanceExact(int target) throws IOException {
doc = target;
value = values.get(doc);
return value != 0 || docsWithField.get(doc);
}
@Override
public long cost() {
return maxDoc;
}
};
}
use of org.apache.lucene.util.LongValues in project lucene-solr by apache.
the class Lucene54DocValuesProducer method getSortedSetTable.
private SortedSetDocValues getSortedSetTable(FieldInfo field, SortedSetEntry ss) throws IOException {
final long valueCount = binaries.get(field.name).count;
final LongBinaryDocValues binary = (LongBinaryDocValues) getLegacyBinary(field);
final NumericEntry ordinalsEntry = ords.get(field.name);
final LongValues ordinals = getNumeric(ordinalsEntry);
final long[] table = ss.table;
final int[] offsets = ss.tableOffsets;
return new LegacySortedSetDocValuesWrapper(new LegacySortedSetDocValues() {
int offset, startOffset, endOffset;
@Override
public void setDocument(int docID) {
final int ord = (int) ordinals.get(docID);
offset = startOffset = offsets[ord];
endOffset = offsets[ord + 1];
}
@Override
public long nextOrd() {
if (offset == endOffset) {
return NO_MORE_ORDS;
} else {
return table[offset++];
}
}
@Override
public BytesRef lookupOrd(long ord) {
return binary.get(ord);
}
@Override
public long getValueCount() {
return valueCount;
}
@Override
public long lookupTerm(BytesRef key) {
if (binary instanceof CompressedBinaryDocValues) {
return ((CompressedBinaryDocValues) binary).lookupTerm(key);
} else {
return super.lookupTerm(key);
}
}
@Override
public TermsEnum termsEnum() throws IOException {
if (binary instanceof CompressedBinaryDocValues) {
return ((CompressedBinaryDocValues) binary).getTermsEnum();
} else {
return super.termsEnum();
}
}
}, maxDoc);
}
use of org.apache.lucene.util.LongValues in project lucene-solr by apache.
the class Lucene54DocValuesProducer method getSortedNumeric.
@Override
public SortedNumericDocValues getSortedNumeric(FieldInfo field) throws IOException {
SortedSetEntry ss = sortedNumerics.get(field.name);
if (ss.format == SORTED_SINGLE_VALUED) {
NumericEntry numericEntry = numerics.get(field.name);
final LongValues values = getNumeric(numericEntry);
if (numericEntry.format == SPARSE_COMPRESSED) {
SparseNumericDocValues sparseValues = ((SparseNumericDocValuesRandomAccessWrapper) values).values;
return new SortedNumericDocValues() {
@Override
public long nextValue() throws IOException {
return sparseValues.longValue();
}
@Override
public int docValueCount() {
return 1;
}
@Override
public int docID() {
return sparseValues.docID();
}
@Override
public int nextDoc() throws IOException {
return sparseValues.nextDoc();
}
@Override
public int advance(int target) throws IOException {
return sparseValues.advance(target);
}
@Override
public boolean advanceExact(int target) throws IOException {
return sparseValues.advanceExact(target);
}
@Override
public long cost() {
return sparseValues.cost();
}
};
}
final Bits docsWithField = getLiveBits(numericEntry.missingOffset, maxDoc);
return new SortedNumericDocValues() {
int docID = -1;
@Override
public int docID() {
return docID;
}
@Override
public int nextDoc() {
while (true) {
docID++;
if (docID == maxDoc) {
docID = NO_MORE_DOCS;
break;
}
if (docsWithField.get(docID)) {
// TODO: use .nextSetBit here, at least!!
break;
}
}
return docID;
}
@Override
public int advance(int target) {
if (target >= maxDoc) {
docID = NO_MORE_DOCS;
return docID;
} else {
docID = target - 1;
return nextDoc();
}
}
@Override
public boolean advanceExact(int target) throws IOException {
docID = target;
return docsWithField.get(docID);
}
@Override
public long cost() {
// TODO
return 0;
}
@Override
public int docValueCount() {
return 1;
}
@Override
public long nextValue() {
return values.get(docID);
}
};
} else if (ss.format == SORTED_WITH_ADDRESSES) {
NumericEntry numericEntry = numerics.get(field.name);
final LongValues values = getNumeric(numericEntry);
final LongValues ordIndex = getOrdIndexInstance(field, ordIndexes.get(field.name));
return new SortedNumericDocValues() {
long startOffset;
long endOffset;
int docID = -1;
long upto;
@Override
public int docID() {
return docID;
}
@Override
public int nextDoc() {
while (true) {
docID++;
if (docID == maxDoc) {
docID = NO_MORE_DOCS;
return docID;
}
startOffset = ordIndex.get(docID);
endOffset = ordIndex.get(docID + 1L);
if (endOffset > startOffset) {
break;
}
}
upto = startOffset;
return docID;
}
@Override
public int advance(int target) {
if (target >= maxDoc) {
docID = NO_MORE_DOCS;
return docID;
} else {
docID = target - 1;
return nextDoc();
}
}
@Override
public boolean advanceExact(int target) throws IOException {
docID = target;
startOffset = ordIndex.get(docID);
endOffset = ordIndex.get(docID + 1L);
return endOffset > startOffset;
}
@Override
public long cost() {
// TODO
return 0;
}
@Override
public int docValueCount() {
return (int) (endOffset - startOffset);
}
@Override
public long nextValue() {
return values.get(upto++);
}
};
} else if (ss.format == SORTED_SET_TABLE) {
NumericEntry entry = ords.get(field.name);
final LongValues ordinals = getNumeric(entry);
final long[] table = ss.table;
final int[] offsets = ss.tableOffsets;
return new SortedNumericDocValues() {
int startOffset;
int endOffset;
int docID = -1;
int upto;
@Override
public int docID() {
return docID;
}
@Override
public int nextDoc() {
while (true) {
docID++;
if (docID == maxDoc) {
docID = NO_MORE_DOCS;
return docID;
}
int ord = (int) ordinals.get(docID);
startOffset = offsets[ord];
endOffset = offsets[ord + 1];
if (endOffset > startOffset) {
break;
}
}
upto = startOffset;
return docID;
}
@Override
public int advance(int target) {
if (target >= maxDoc) {
docID = NO_MORE_DOCS;
return docID;
} else {
docID = target - 1;
return nextDoc();
}
}
@Override
public boolean advanceExact(int target) throws IOException {
docID = target;
int ord = (int) ordinals.get(docID);
startOffset = offsets[ord];
endOffset = offsets[ord + 1];
return endOffset > startOffset;
}
@Override
public long cost() {
// TODO
return 0;
}
@Override
public int docValueCount() {
return endOffset - startOffset;
}
@Override
public long nextValue() {
return table[upto++];
}
};
} else {
throw new AssertionError();
}
}
use of org.apache.lucene.util.LongValues in project lucene-solr by apache.
the class TestLucene54DocValuesFormat method testSparseLongValues.
public void testSparseLongValues() throws IOException {
final int iters = atLeast(5);
for (int iter = 0; iter < iters; ++iter) {
final int numDocs = TestUtil.nextInt(random(), 0, 100);
final int[] docIds = new int[numDocs];
final long[] values = new long[numDocs];
final int maxDoc;
if (numDocs == 0) {
maxDoc = 1 + random().nextInt(10);
} else {
docIds[0] = random().nextInt(10);
for (int i = 1; i < docIds.length; ++i) {
docIds[i] = docIds[i - 1] + 1 + random().nextInt(100);
}
maxDoc = docIds[numDocs - 1] + 1 + random().nextInt(10);
}
for (int i = 0; i < values.length; ++i) {
values[i] = random().nextLong();
}
final long missingValue = random().nextLong();
final LongValues docIdsValues = new LongValues() {
@Override
public long get(long index) {
return docIds[Math.toIntExact(index)];
}
};
final LongValues valuesValues = new LongValues() {
@Override
public long get(long index) {
return values[Math.toIntExact(index)];
}
};
final SparseNumericDocValues sparseValues = new SparseNumericDocValues(numDocs, docIdsValues, valuesValues);
// sequential access
assertEquals(-1, sparseValues.docID());
for (int i = 0; i < docIds.length; ++i) {
assertEquals(docIds[i], sparseValues.nextDoc());
}
assertEquals(DocIdSetIterator.NO_MORE_DOCS, sparseValues.nextDoc());
// advance
for (int i = 0; i < 2000; ++i) {
final int target = TestUtil.nextInt(random(), 0, maxDoc);
int index = Arrays.binarySearch(docIds, target);
if (index < 0) {
index = -1 - index;
}
sparseValues.reset();
if (index > 0) {
assertEquals(docIds[index - 1], sparseValues.advance(Math.toIntExact(docIds[index - 1])));
}
if (index == docIds.length) {
assertEquals(DocIdSetIterator.NO_MORE_DOCS, sparseValues.advance(target));
} else {
assertEquals(docIds[index], sparseValues.advance(target));
}
}
// advanceExact
for (int i = 0; i < 2000; ++i) {
sparseValues.reset();
if (random().nextBoolean() && docIds.length > 0) {
sparseValues.advance(docIds[TestUtil.nextInt(random(), 0, docIds.length - 1)]);
}
final int target = TestUtil.nextInt(random(), Math.max(0, sparseValues.docID()), maxDoc - 1);
final boolean exists = sparseValues.advanceExact(target);
final int index = Arrays.binarySearch(docIds, target);
assertEquals(index >= 0, exists);
assertEquals(target, sparseValues.docID());
final boolean exists2 = sparseValues.advanceExact(target);
assertEquals(index >= 0, exists2);
assertEquals(target, sparseValues.docID());
final int nextIndex = index >= 0 ? index + 1 : -1 - index;
if (nextIndex >= docIds.length) {
assertEquals(DocIdSetIterator.NO_MORE_DOCS, sparseValues.nextDoc());
} else {
assertEquals(docIds[nextIndex], sparseValues.nextDoc());
}
}
final SparseNumericDocValuesRandomAccessWrapper raWrapper = new SparseNumericDocValuesRandomAccessWrapper(sparseValues, missingValue);
// random-access
for (int i = 0; i < 2000; ++i) {
final int docId = TestUtil.nextInt(random(), 0, maxDoc - 1);
final int idx = Arrays.binarySearch(docIds, docId);
final long value = raWrapper.get(docId);
if (idx >= 0) {
assertEquals(values[idx], value);
} else {
assertEquals(missingValue, value);
}
}
// sequential access
for (int docId = 0; docId < maxDoc; docId += random().nextInt(3)) {
final int idx = Arrays.binarySearch(docIds, docId);
final long value = raWrapper.get(docId);
if (idx >= 0) {
assertEquals(values[idx], value);
} else {
assertEquals(missingValue, value);
}
}
}
}
use of org.apache.lucene.util.LongValues in project lucene-solr by apache.
the class FacetFieldProcessorByArrayDV method collectDocs.
@Override
protected void collectDocs() throws IOException {
int domainSize = fcontext.base.size();
if (nTerms <= 0 || domainSize < effectiveMincount) {
// TODO: what about allBuckets? missing bucket?
return;
}
// TODO: refactor some of this logic into a base class
boolean countOnly = collectAcc == null && allBucketsAcc == null;
boolean fullRange = startTermIndex == 0 && endTermIndex == si.getValueCount();
// Are we expecting many hits per bucket?
// FUTURE: pro-rate for nTerms?
// FUTURE: better take into account number of values in multi-valued fields. This info is available for indexed fields.
// FUTURE: take into account that bigger ord maps are more expensive than smaller ones
// One test: 5M doc index, faceting on a single-valued field with almost 1M unique values, crossover point where global counting was slower
// than per-segment counting was a domain of 658k docs. At that point, top 10 buckets had 6-7 matches each.
// this was for heap docvalues produced by UninvertingReader
// Since these values were randomly distributed, lets round our domain multiplier up to account for less random real world data.
long domainMultiplier = multiValuedField ? 4L : 2L;
// +3 to increase test coverage with small tests
boolean manyHitsPerBucket = domainSize * domainMultiplier > (si.getValueCount() + 3);
// If we're only calculating counts, we're not prefixing, and we expect to collect many documents per unique value,
// then collect per-segment before mapping to global ords at the end. This will save redundant seg->global ord mappings.
// FUTURE: there are probably some other non "countOnly" cases where we can use this as well (i.e. those where
// the docid is not used)
boolean canDoPerSeg = countOnly && fullRange;
boolean accumSeg = manyHitsPerBucket && canDoPerSeg;
// internal - override perSeg heuristic
if (freq.perSeg != null)
accumSeg = canDoPerSeg && freq.perSeg;
final List<LeafReaderContext> leaves = fcontext.searcher.getIndexReader().leaves();
Filter filter = fcontext.base.getTopFilter();
for (int subIdx = 0; subIdx < leaves.size(); subIdx++) {
LeafReaderContext subCtx = leaves.get(subIdx);
setNextReaderFirstPhase(subCtx);
// solr docsets already exclude any deleted docs
DocIdSet dis = filter.getDocIdSet(subCtx, null);
DocIdSetIterator disi = dis.iterator();
SortedDocValues singleDv = null;
SortedSetDocValues multiDv = null;
if (multiValuedField) {
// TODO: get sub from multi?
multiDv = subCtx.reader().getSortedSetDocValues(sf.getName());
if (multiDv == null) {
multiDv = DocValues.emptySortedSet();
}
// this will be null if this is not a wrapped single valued docvalues.
if (unwrap_singleValued_multiDv) {
singleDv = DocValues.unwrapSingleton(multiDv);
}
} else {
singleDv = subCtx.reader().getSortedDocValues(sf.getName());
if (singleDv == null) {
singleDv = DocValues.emptySorted();
}
}
LongValues toGlobal = ordinalMap == null ? null : ordinalMap.getGlobalOrds(subIdx);
if (singleDv != null) {
if (accumSeg) {
collectPerSeg(singleDv, disi, toGlobal);
} else {
if (canDoPerSeg && toGlobal != null) {
collectCounts(singleDv, disi, toGlobal);
} else {
collectDocs(singleDv, disi, toGlobal);
}
}
} else {
if (accumSeg) {
collectPerSeg(multiDv, disi, toGlobal);
} else {
if (canDoPerSeg && toGlobal != null) {
collectCounts(multiDv, disi, toGlobal);
} else {
collectDocs(multiDv, disi, toGlobal);
}
}
}
}
// better GC
reuse = null;
}
Aggregations