use of org.apache.lucene.index.LeafReaderContext in project lucene-solr by apache.
the class IndexSortedFacetCollector method getFacetCounts.
NamedList<Integer> getFacetCounts(Executor executor) throws IOException {
CompletionService<SegFacet> completionService = new ExecutorCompletionService<>(executor);
// reuse the translation logic to go from top level set to per-segment set
baseSet = docs.getTopFilter();
final List<LeafReaderContext> leaves = searcher.getTopReaderContext().leaves();
// The list of pending tasks that aren't immediately submitted
// TODO: Is there a completion service, or a delegating executor that can
// limit the number of concurrent tasks submitted to a bigger executor?
LinkedList<Callable<SegFacet>> pending = new LinkedList<>();
int threads = nThreads <= 0 ? Integer.MAX_VALUE : nThreads;
for (final LeafReaderContext leave : leaves) {
final SegFacet segFacet = new SegFacet(leave);
Callable<SegFacet> task = () -> {
segFacet.countTerms();
return segFacet;
};
if (--threads >= 0) {
completionService.submit(task);
} else {
pending.add(task);
}
}
// now merge the per-segment results
PriorityQueue<SegFacet> queue = new PriorityQueue<SegFacet>(leaves.size()) {
@Override
protected boolean lessThan(SegFacet a, SegFacet b) {
return a.tempBR.compareTo(b.tempBR) < 0;
}
};
boolean hasMissingCount = false;
int missingCount = 0;
for (int i = 0, c = leaves.size(); i < c; i++) {
SegFacet seg = null;
try {
Future<SegFacet> future = completionService.take();
seg = future.get();
if (!pending.isEmpty()) {
completionService.submit(pending.removeFirst());
}
} catch (InterruptedException e) {
Thread.currentThread().interrupt();
throw new SolrException(SolrException.ErrorCode.SERVER_ERROR, e);
} catch (ExecutionException e) {
Throwable cause = e.getCause();
if (cause instanceof RuntimeException) {
throw (RuntimeException) cause;
} else {
throw new SolrException(SolrException.ErrorCode.SERVER_ERROR, "Error in per-segment faceting on field: " + fieldName, cause);
}
}
if (seg.startTermIndex < seg.endTermIndex) {
if (seg.startTermIndex == -1) {
hasMissingCount = true;
missingCount += seg.counts[0];
seg.pos = 0;
} else {
seg.pos = seg.startTermIndex;
}
if (seg.pos < seg.endTermIndex && (mincount < 1 || seg.hasAnyCount)) {
seg.tenum = seg.si.termsEnum();
seg.tenum.seekExact(seg.pos);
seg.tempBR = seg.tenum.term();
queue.add(seg);
}
}
}
FacetCollector collector;
if (sort.equals(FacetParams.FACET_SORT_COUNT) || sort.equals(FacetParams.FACET_SORT_COUNT_LEGACY)) {
collector = new CountSortedFacetCollector(offset, limit, mincount);
} else {
collector = new IndexSortedFacetCollector(offset, limit, mincount);
}
BytesRefBuilder val = new BytesRefBuilder();
while (queue.size() > 0) {
SegFacet seg = queue.top();
boolean collect = termFilter == null || termFilter.test(seg.tempBR);
// may be shared across calls.
if (collect) {
val.copyBytes(seg.tempBR);
}
int count = 0;
do {
if (collect) {
count += seg.counts[seg.pos - seg.startTermIndex];
}
// if mincount>0 then seg.pos++ can skip ahead to the next non-zero entry.
do {
++seg.pos;
} while (//stop incrementing before we run off the end
(seg.pos < seg.endTermIndex) && //move term enum forward with position -- dont care about value
(seg.tenum.next() != null || true) && //only skip ahead if mincount > 0
(mincount > 0) && //check zero count
(seg.counts[seg.pos - seg.startTermIndex] == 0));
if (seg.pos >= seg.endTermIndex) {
queue.pop();
seg = queue.top();
} else {
seg.tempBR = seg.tenum.term();
seg = queue.updateTop();
}
} while (seg != null && val.get().compareTo(seg.tempBR) == 0);
if (collect) {
boolean stop = collector.collect(val.get(), count);
if (stop)
break;
}
}
NamedList<Integer> res = collector.getFacetCounts();
// convert labels to readable form
FieldType ft = searcher.getSchema().getFieldType(fieldName);
int sz = res.size();
for (int i = 0; i < sz; i++) {
res.setName(i, ft.indexedToReadable(res.getName(i)));
}
if (missing) {
if (!hasMissingCount) {
missingCount = SimpleFacets.getFieldMissingCount(searcher, docs, fieldName);
}
res.add(null, missingCount);
}
return res;
}
use of org.apache.lucene.index.LeafReaderContext in project lucene-solr by apache.
the class ReverseOrdFieldSource method getValues.
@Override
public FunctionValues getValues(Map context, LeafReaderContext readerContext) throws IOException {
final int off = readerContext.docBase;
final LeafReader r;
Object o = context.get("searcher");
if (o instanceof SolrIndexSearcher) {
SolrIndexSearcher is = (SolrIndexSearcher) o;
SchemaField sf = is.getSchema().getFieldOrNull(field);
if (sf != null && sf.hasDocValues() == false && sf.multiValued() == false && sf.getType().getNumberType() != null) {
// it's a single-valued numeric field: we must currently create insanity :(
List<LeafReaderContext> leaves = is.getIndexReader().leaves();
LeafReader[] insaneLeaves = new LeafReader[leaves.size()];
int upto = 0;
for (LeafReaderContext raw : leaves) {
insaneLeaves[upto++] = Insanity.wrapInsanity(raw.reader(), field);
}
r = SlowCompositeReaderWrapper.wrap(new MultiReader(insaneLeaves));
} else {
// reuse ordinalmap
r = ((SolrIndexSearcher) o).getSlowAtomicReader();
}
} else {
IndexReader topReader = ReaderUtil.getTopLevelContext(readerContext).reader();
r = SlowCompositeReaderWrapper.wrap(topReader);
}
// if it's e.g. tokenized/multivalued, emulate old behavior of single-valued fc
final SortedDocValues sindex = SortedSetSelector.wrap(DocValues.getSortedSet(r, field), SortedSetSelector.Type.MIN);
final int end = sindex.getValueCount();
return new IntDocValues(this) {
@Override
public int intVal(int doc) throws IOException {
if (doc + off > sindex.docID()) {
sindex.advance(doc + off);
}
if (doc + off == sindex.docID()) {
return (end - sindex.ordValue() - 1);
} else {
return end;
}
}
};
}
use of org.apache.lucene.index.LeafReaderContext in project lucene-solr by apache.
the class FacetProcessor method collect.
int collect(DocSet docs, int slot) throws IOException {
int count = 0;
SolrIndexSearcher searcher = fcontext.searcher;
final List<LeafReaderContext> leaves = searcher.getIndexReader().leaves();
final Iterator<LeafReaderContext> ctxIt = leaves.iterator();
LeafReaderContext ctx = null;
int segBase = 0;
int segMax;
int adjustedMax = 0;
for (DocIterator docsIt = docs.iterator(); docsIt.hasNext(); ) {
final int doc = docsIt.nextDoc();
if (doc >= adjustedMax) {
do {
ctx = ctxIt.next();
if (ctx == null) {
// should be impossible
throw new RuntimeException("INTERNAL FACET ERROR");
}
segBase = ctx.docBase;
segMax = ctx.reader().maxDoc();
adjustedMax = segBase + segMax;
} while (doc >= adjustedMax);
assert doc >= ctx.docBase;
setNextReader(ctx);
}
count++;
// per-seg collectors
collect(doc - segBase, slot);
}
return count;
}
use of org.apache.lucene.index.LeafReaderContext in project lucene-solr by apache.
the class FacetFieldProcessorByHashDV method collectDocs.
private void collectDocs() throws IOException {
if (calc instanceof TermOrdCalc) {
// Strings
// TODO support SortedSetDocValues
SortedDocValues globalDocValues = FieldUtil.getSortedDocValues(fcontext.qcontext, sf, null);
((TermOrdCalc) calc).lookupOrdFunction = ord -> {
try {
return globalDocValues.lookupOrd(ord);
} catch (IOException e) {
throw new RuntimeException(e);
}
};
DocSetUtil.collectSortedDocSet(fcontext.base, fcontext.searcher.getIndexReader(), new SimpleCollector() {
// this segment/leaf. NN
SortedDocValues docValues = globalDocValues;
// this segment to global ordinal. NN
LongValues toGlobal = LongValues.IDENTITY;
@Override
public boolean needsScores() {
return false;
}
@Override
protected void doSetNextReader(LeafReaderContext ctx) throws IOException {
setNextReaderFirstPhase(ctx);
if (globalDocValues instanceof MultiDocValues.MultiSortedDocValues) {
MultiDocValues.MultiSortedDocValues multiDocValues = (MultiDocValues.MultiSortedDocValues) globalDocValues;
docValues = multiDocValues.values[ctx.ord];
toGlobal = multiDocValues.mapping.getGlobalOrds(ctx.ord);
}
}
@Override
public void collect(int segDoc) throws IOException {
if (segDoc > docValues.docID()) {
docValues.advance(segDoc);
}
if (segDoc == docValues.docID()) {
long val = toGlobal.get(docValues.ordValue());
collectValFirstPhase(segDoc, val);
}
}
});
} else {
// Numeric:
// TODO support SortedNumericDocValues
DocSetUtil.collectSortedDocSet(fcontext.base, fcontext.searcher.getIndexReader(), new SimpleCollector() {
//NN
NumericDocValues values = null;
@Override
public boolean needsScores() {
return false;
}
@Override
protected void doSetNextReader(LeafReaderContext ctx) throws IOException {
setNextReaderFirstPhase(ctx);
values = DocValues.getNumeric(ctx.reader(), sf.getName());
}
@Override
public void collect(int segDoc) throws IOException {
if (segDoc > values.docID()) {
values.advance(segDoc);
}
if (segDoc == values.docID()) {
collectValFirstPhase(segDoc, values.longValue());
}
}
});
}
}
use of org.apache.lucene.index.LeafReaderContext in project lucene-solr by apache.
the class UnInvertedField method collectDocsGeneric.
// called from FieldFacetProcessor
// TODO: do a callback version that can be specialized!
public void collectDocsGeneric(FacetFieldProcessorByArrayUIF processor) throws IOException {
use.incrementAndGet();
int startTermIndex = processor.startTermIndex;
int endTermIndex = processor.endTermIndex;
int nTerms = processor.nTerms;
DocSet docs = processor.fcontext.base;
int uniqueTerms = 0;
final CountSlotAcc countAcc = processor.countAcc;
for (TopTerm tt : bigTerms.values()) {
if (tt.termNum >= startTermIndex && tt.termNum < endTermIndex) {
// handle the biggest terms
try (DocSet intersection = searcher.getDocSet(tt.termQuery, docs)) {
int collected = processor.collectFirstPhase(intersection, tt.termNum - startTermIndex);
countAcc.incrementCount(tt.termNum - startTermIndex, collected);
if (collected > 0) {
uniqueTerms++;
}
}
}
}
if (termInstances > 0) {
final List<LeafReaderContext> leaves = searcher.getIndexReader().leaves();
final Iterator<LeafReaderContext> ctxIt = leaves.iterator();
LeafReaderContext ctx = null;
int segBase = 0;
int segMax;
int adjustedMax = 0;
// TODO: handle facet.prefix here!!!
DocIterator iter = docs.iterator();
while (iter.hasNext()) {
int doc = iter.nextDoc();
if (doc >= adjustedMax) {
do {
ctx = ctxIt.next();
if (ctx == null) {
// should be impossible
throw new RuntimeException("INTERNAL FACET ERROR");
}
segBase = ctx.docBase;
segMax = ctx.reader().maxDoc();
adjustedMax = segBase + segMax;
} while (doc >= adjustedMax);
assert doc >= ctx.docBase;
processor.setNextReaderFirstPhase(ctx);
}
int segDoc = doc - segBase;
int code = index[doc];
if ((code & 0xff) == 1) {
int pos = code >>> 8;
int whichArray = (doc >>> 16) & 0xff;
byte[] arr = tnums[whichArray];
int tnum = 0;
for (; ; ) {
int delta = 0;
for (; ; ) {
byte b = arr[pos++];
delta = (delta << 7) | (b & 0x7f);
if ((b & 0x80) == 0)
break;
}
if (delta == 0)
break;
tnum += delta - TNUM_OFFSET;
int arrIdx = tnum - startTermIndex;
if (arrIdx < 0)
continue;
if (arrIdx >= nTerms)
break;
countAcc.incrementCount(arrIdx, 1);
processor.collectFirstPhase(segDoc, arrIdx);
}
} else {
int tnum = 0;
int delta = 0;
for (; ; ) {
delta = (delta << 7) | (code & 0x7f);
if ((code & 0x80) == 0) {
if (delta == 0)
break;
tnum += delta - TNUM_OFFSET;
int arrIdx = tnum - startTermIndex;
if (arrIdx >= 0) {
if (arrIdx >= nTerms)
break;
countAcc.incrementCount(arrIdx, 1);
processor.collectFirstPhase(segDoc, arrIdx);
}
delta = 0;
}
code >>>= 8;
}
}
}
}
}
Aggregations