Search in sources :

Example 76 with LeafReaderContext

use of org.apache.lucene.index.LeafReaderContext in project lucene-solr by apache.

the class IndexSortedFacetCollector method getFacetCounts.

NamedList<Integer> getFacetCounts(Executor executor) throws IOException {
    CompletionService<SegFacet> completionService = new ExecutorCompletionService<>(executor);
    // reuse the translation logic to go from top level set to per-segment set
    baseSet = docs.getTopFilter();
    final List<LeafReaderContext> leaves = searcher.getTopReaderContext().leaves();
    // The list of pending tasks that aren't immediately submitted
    // TODO: Is there a completion service, or a delegating executor that can
    // limit the number of concurrent tasks submitted to a bigger executor?
    LinkedList<Callable<SegFacet>> pending = new LinkedList<>();
    int threads = nThreads <= 0 ? Integer.MAX_VALUE : nThreads;
    for (final LeafReaderContext leave : leaves) {
        final SegFacet segFacet = new SegFacet(leave);
        Callable<SegFacet> task = () -> {
            segFacet.countTerms();
            return segFacet;
        };
        if (--threads >= 0) {
            completionService.submit(task);
        } else {
            pending.add(task);
        }
    }
    // now merge the per-segment results
    PriorityQueue<SegFacet> queue = new PriorityQueue<SegFacet>(leaves.size()) {

        @Override
        protected boolean lessThan(SegFacet a, SegFacet b) {
            return a.tempBR.compareTo(b.tempBR) < 0;
        }
    };
    boolean hasMissingCount = false;
    int missingCount = 0;
    for (int i = 0, c = leaves.size(); i < c; i++) {
        SegFacet seg = null;
        try {
            Future<SegFacet> future = completionService.take();
            seg = future.get();
            if (!pending.isEmpty()) {
                completionService.submit(pending.removeFirst());
            }
        } catch (InterruptedException e) {
            Thread.currentThread().interrupt();
            throw new SolrException(SolrException.ErrorCode.SERVER_ERROR, e);
        } catch (ExecutionException e) {
            Throwable cause = e.getCause();
            if (cause instanceof RuntimeException) {
                throw (RuntimeException) cause;
            } else {
                throw new SolrException(SolrException.ErrorCode.SERVER_ERROR, "Error in per-segment faceting on field: " + fieldName, cause);
            }
        }
        if (seg.startTermIndex < seg.endTermIndex) {
            if (seg.startTermIndex == -1) {
                hasMissingCount = true;
                missingCount += seg.counts[0];
                seg.pos = 0;
            } else {
                seg.pos = seg.startTermIndex;
            }
            if (seg.pos < seg.endTermIndex && (mincount < 1 || seg.hasAnyCount)) {
                seg.tenum = seg.si.termsEnum();
                seg.tenum.seekExact(seg.pos);
                seg.tempBR = seg.tenum.term();
                queue.add(seg);
            }
        }
    }
    FacetCollector collector;
    if (sort.equals(FacetParams.FACET_SORT_COUNT) || sort.equals(FacetParams.FACET_SORT_COUNT_LEGACY)) {
        collector = new CountSortedFacetCollector(offset, limit, mincount);
    } else {
        collector = new IndexSortedFacetCollector(offset, limit, mincount);
    }
    BytesRefBuilder val = new BytesRefBuilder();
    while (queue.size() > 0) {
        SegFacet seg = queue.top();
        boolean collect = termFilter == null || termFilter.test(seg.tempBR);
        // may be shared across calls.
        if (collect) {
            val.copyBytes(seg.tempBR);
        }
        int count = 0;
        do {
            if (collect) {
                count += seg.counts[seg.pos - seg.startTermIndex];
            }
            // if mincount>0 then seg.pos++ can skip ahead to the next non-zero entry.
            do {
                ++seg.pos;
            } while (//stop incrementing before we run off the end
            (seg.pos < seg.endTermIndex) && //move term enum forward with position -- dont care about value 
            (seg.tenum.next() != null || true) && //only skip ahead if mincount > 0
            (mincount > 0) && //check zero count
            (seg.counts[seg.pos - seg.startTermIndex] == 0));
            if (seg.pos >= seg.endTermIndex) {
                queue.pop();
                seg = queue.top();
            } else {
                seg.tempBR = seg.tenum.term();
                seg = queue.updateTop();
            }
        } while (seg != null && val.get().compareTo(seg.tempBR) == 0);
        if (collect) {
            boolean stop = collector.collect(val.get(), count);
            if (stop)
                break;
        }
    }
    NamedList<Integer> res = collector.getFacetCounts();
    // convert labels to readable form    
    FieldType ft = searcher.getSchema().getFieldType(fieldName);
    int sz = res.size();
    for (int i = 0; i < sz; i++) {
        res.setName(i, ft.indexedToReadable(res.getName(i)));
    }
    if (missing) {
        if (!hasMissingCount) {
            missingCount = SimpleFacets.getFieldMissingCount(searcher, docs, fieldName);
        }
        res.add(null, missingCount);
    }
    return res;
}
Also used : ExecutorCompletionService(java.util.concurrent.ExecutorCompletionService) Callable(java.util.concurrent.Callable) LeafReaderContext(org.apache.lucene.index.LeafReaderContext) ExecutionException(java.util.concurrent.ExecutionException) SolrException(org.apache.solr.common.SolrException) BytesRefBuilder(org.apache.lucene.util.BytesRefBuilder) PriorityQueue(org.apache.lucene.util.PriorityQueue) LinkedList(java.util.LinkedList) FieldType(org.apache.solr.schema.FieldType)

Example 77 with LeafReaderContext

use of org.apache.lucene.index.LeafReaderContext in project lucene-solr by apache.

the class ReverseOrdFieldSource method getValues.

@Override
public FunctionValues getValues(Map context, LeafReaderContext readerContext) throws IOException {
    final int off = readerContext.docBase;
    final LeafReader r;
    Object o = context.get("searcher");
    if (o instanceof SolrIndexSearcher) {
        SolrIndexSearcher is = (SolrIndexSearcher) o;
        SchemaField sf = is.getSchema().getFieldOrNull(field);
        if (sf != null && sf.hasDocValues() == false && sf.multiValued() == false && sf.getType().getNumberType() != null) {
            // it's a single-valued numeric field: we must currently create insanity :(
            List<LeafReaderContext> leaves = is.getIndexReader().leaves();
            LeafReader[] insaneLeaves = new LeafReader[leaves.size()];
            int upto = 0;
            for (LeafReaderContext raw : leaves) {
                insaneLeaves[upto++] = Insanity.wrapInsanity(raw.reader(), field);
            }
            r = SlowCompositeReaderWrapper.wrap(new MultiReader(insaneLeaves));
        } else {
            // reuse ordinalmap
            r = ((SolrIndexSearcher) o).getSlowAtomicReader();
        }
    } else {
        IndexReader topReader = ReaderUtil.getTopLevelContext(readerContext).reader();
        r = SlowCompositeReaderWrapper.wrap(topReader);
    }
    // if it's e.g. tokenized/multivalued, emulate old behavior of single-valued fc
    final SortedDocValues sindex = SortedSetSelector.wrap(DocValues.getSortedSet(r, field), SortedSetSelector.Type.MIN);
    final int end = sindex.getValueCount();
    return new IntDocValues(this) {

        @Override
        public int intVal(int doc) throws IOException {
            if (doc + off > sindex.docID()) {
                sindex.advance(doc + off);
            }
            if (doc + off == sindex.docID()) {
                return (end - sindex.ordValue() - 1);
            } else {
                return end;
            }
        }
    };
}
Also used : SchemaField(org.apache.solr.schema.SchemaField) LeafReader(org.apache.lucene.index.LeafReader) MultiReader(org.apache.lucene.index.MultiReader) IndexReader(org.apache.lucene.index.IndexReader) LeafReaderContext(org.apache.lucene.index.LeafReaderContext) SolrIndexSearcher(org.apache.solr.search.SolrIndexSearcher) IntDocValues(org.apache.lucene.queries.function.docvalues.IntDocValues) SortedDocValues(org.apache.lucene.index.SortedDocValues)

Example 78 with LeafReaderContext

use of org.apache.lucene.index.LeafReaderContext in project lucene-solr by apache.

the class FacetProcessor method collect.

int collect(DocSet docs, int slot) throws IOException {
    int count = 0;
    SolrIndexSearcher searcher = fcontext.searcher;
    final List<LeafReaderContext> leaves = searcher.getIndexReader().leaves();
    final Iterator<LeafReaderContext> ctxIt = leaves.iterator();
    LeafReaderContext ctx = null;
    int segBase = 0;
    int segMax;
    int adjustedMax = 0;
    for (DocIterator docsIt = docs.iterator(); docsIt.hasNext(); ) {
        final int doc = docsIt.nextDoc();
        if (doc >= adjustedMax) {
            do {
                ctx = ctxIt.next();
                if (ctx == null) {
                    // should be impossible
                    throw new RuntimeException("INTERNAL FACET ERROR");
                }
                segBase = ctx.docBase;
                segMax = ctx.reader().maxDoc();
                adjustedMax = segBase + segMax;
            } while (doc >= adjustedMax);
            assert doc >= ctx.docBase;
            setNextReader(ctx);
        }
        count++;
        // per-seg collectors
        collect(doc - segBase, slot);
    }
    return count;
}
Also used : DocIterator(org.apache.solr.search.DocIterator) LeafReaderContext(org.apache.lucene.index.LeafReaderContext) SolrIndexSearcher(org.apache.solr.search.SolrIndexSearcher)

Example 79 with LeafReaderContext

use of org.apache.lucene.index.LeafReaderContext in project lucene-solr by apache.

the class FacetFieldProcessorByHashDV method collectDocs.

private void collectDocs() throws IOException {
    if (calc instanceof TermOrdCalc) {
        // Strings
        // TODO support SortedSetDocValues
        SortedDocValues globalDocValues = FieldUtil.getSortedDocValues(fcontext.qcontext, sf, null);
        ((TermOrdCalc) calc).lookupOrdFunction = ord -> {
            try {
                return globalDocValues.lookupOrd(ord);
            } catch (IOException e) {
                throw new RuntimeException(e);
            }
        };
        DocSetUtil.collectSortedDocSet(fcontext.base, fcontext.searcher.getIndexReader(), new SimpleCollector() {

            // this segment/leaf. NN
            SortedDocValues docValues = globalDocValues;

            // this segment to global ordinal. NN
            LongValues toGlobal = LongValues.IDENTITY;

            @Override
            public boolean needsScores() {
                return false;
            }

            @Override
            protected void doSetNextReader(LeafReaderContext ctx) throws IOException {
                setNextReaderFirstPhase(ctx);
                if (globalDocValues instanceof MultiDocValues.MultiSortedDocValues) {
                    MultiDocValues.MultiSortedDocValues multiDocValues = (MultiDocValues.MultiSortedDocValues) globalDocValues;
                    docValues = multiDocValues.values[ctx.ord];
                    toGlobal = multiDocValues.mapping.getGlobalOrds(ctx.ord);
                }
            }

            @Override
            public void collect(int segDoc) throws IOException {
                if (segDoc > docValues.docID()) {
                    docValues.advance(segDoc);
                }
                if (segDoc == docValues.docID()) {
                    long val = toGlobal.get(docValues.ordValue());
                    collectValFirstPhase(segDoc, val);
                }
            }
        });
    } else {
        // Numeric:
        // TODO support SortedNumericDocValues
        DocSetUtil.collectSortedDocSet(fcontext.base, fcontext.searcher.getIndexReader(), new SimpleCollector() {

            //NN
            NumericDocValues values = null;

            @Override
            public boolean needsScores() {
                return false;
            }

            @Override
            protected void doSetNextReader(LeafReaderContext ctx) throws IOException {
                setNextReaderFirstPhase(ctx);
                values = DocValues.getNumeric(ctx.reader(), sf.getName());
            }

            @Override
            public void collect(int segDoc) throws IOException {
                if (segDoc > values.docID()) {
                    values.advance(segDoc);
                }
                if (segDoc == values.docID()) {
                    collectValFirstPhase(segDoc, values.longValue());
                }
            }
        });
    }
}
Also used : NumericDocValues(org.apache.lucene.index.NumericDocValues) IOException(java.io.IOException) MultiDocValues(org.apache.lucene.index.MultiDocValues) SortedDocValues(org.apache.lucene.index.SortedDocValues) SimpleCollector(org.apache.lucene.search.SimpleCollector) LongValues(org.apache.lucene.util.LongValues) LeafReaderContext(org.apache.lucene.index.LeafReaderContext)

Example 80 with LeafReaderContext

use of org.apache.lucene.index.LeafReaderContext in project lucene-solr by apache.

the class UnInvertedField method collectDocsGeneric.

// called from FieldFacetProcessor
// TODO: do a callback version that can be specialized!
public void collectDocsGeneric(FacetFieldProcessorByArrayUIF processor) throws IOException {
    use.incrementAndGet();
    int startTermIndex = processor.startTermIndex;
    int endTermIndex = processor.endTermIndex;
    int nTerms = processor.nTerms;
    DocSet docs = processor.fcontext.base;
    int uniqueTerms = 0;
    final CountSlotAcc countAcc = processor.countAcc;
    for (TopTerm tt : bigTerms.values()) {
        if (tt.termNum >= startTermIndex && tt.termNum < endTermIndex) {
            // handle the biggest terms
            try (DocSet intersection = searcher.getDocSet(tt.termQuery, docs)) {
                int collected = processor.collectFirstPhase(intersection, tt.termNum - startTermIndex);
                countAcc.incrementCount(tt.termNum - startTermIndex, collected);
                if (collected > 0) {
                    uniqueTerms++;
                }
            }
        }
    }
    if (termInstances > 0) {
        final List<LeafReaderContext> leaves = searcher.getIndexReader().leaves();
        final Iterator<LeafReaderContext> ctxIt = leaves.iterator();
        LeafReaderContext ctx = null;
        int segBase = 0;
        int segMax;
        int adjustedMax = 0;
        // TODO: handle facet.prefix here!!!
        DocIterator iter = docs.iterator();
        while (iter.hasNext()) {
            int doc = iter.nextDoc();
            if (doc >= adjustedMax) {
                do {
                    ctx = ctxIt.next();
                    if (ctx == null) {
                        // should be impossible
                        throw new RuntimeException("INTERNAL FACET ERROR");
                    }
                    segBase = ctx.docBase;
                    segMax = ctx.reader().maxDoc();
                    adjustedMax = segBase + segMax;
                } while (doc >= adjustedMax);
                assert doc >= ctx.docBase;
                processor.setNextReaderFirstPhase(ctx);
            }
            int segDoc = doc - segBase;
            int code = index[doc];
            if ((code & 0xff) == 1) {
                int pos = code >>> 8;
                int whichArray = (doc >>> 16) & 0xff;
                byte[] arr = tnums[whichArray];
                int tnum = 0;
                for (; ; ) {
                    int delta = 0;
                    for (; ; ) {
                        byte b = arr[pos++];
                        delta = (delta << 7) | (b & 0x7f);
                        if ((b & 0x80) == 0)
                            break;
                    }
                    if (delta == 0)
                        break;
                    tnum += delta - TNUM_OFFSET;
                    int arrIdx = tnum - startTermIndex;
                    if (arrIdx < 0)
                        continue;
                    if (arrIdx >= nTerms)
                        break;
                    countAcc.incrementCount(arrIdx, 1);
                    processor.collectFirstPhase(segDoc, arrIdx);
                }
            } else {
                int tnum = 0;
                int delta = 0;
                for (; ; ) {
                    delta = (delta << 7) | (code & 0x7f);
                    if ((code & 0x80) == 0) {
                        if (delta == 0)
                            break;
                        tnum += delta - TNUM_OFFSET;
                        int arrIdx = tnum - startTermIndex;
                        if (arrIdx >= 0) {
                            if (arrIdx >= nTerms)
                                break;
                            countAcc.incrementCount(arrIdx, 1);
                            processor.collectFirstPhase(segDoc, arrIdx);
                        }
                        delta = 0;
                    }
                    code >>>= 8;
                }
            }
        }
    }
}
Also used : DocIterator(org.apache.solr.search.DocIterator) LeafReaderContext(org.apache.lucene.index.LeafReaderContext) BitDocSet(org.apache.solr.search.BitDocSet) DocSet(org.apache.solr.search.DocSet)

Aggregations

LeafReaderContext (org.apache.lucene.index.LeafReaderContext)326 LeafReader (org.apache.lucene.index.LeafReader)70 Document (org.apache.lucene.document.Document)68 BytesRef (org.apache.lucene.util.BytesRef)66 IOException (java.io.IOException)65 Directory (org.apache.lucene.store.Directory)57 Term (org.apache.lucene.index.Term)50 IndexSearcher (org.apache.lucene.search.IndexSearcher)47 RandomIndexWriter (org.apache.lucene.index.RandomIndexWriter)45 DirectoryReader (org.apache.lucene.index.DirectoryReader)44 Bits (org.apache.lucene.util.Bits)44 IndexReader (org.apache.lucene.index.IndexReader)43 NumericDocValues (org.apache.lucene.index.NumericDocValues)40 ArrayList (java.util.ArrayList)39 Terms (org.apache.lucene.index.Terms)35 Weight (org.apache.lucene.search.Weight)35 DocIdSetIterator (org.apache.lucene.search.DocIdSetIterator)34 Scorer (org.apache.lucene.search.Scorer)34 IndexWriterConfig (org.apache.lucene.index.IndexWriterConfig)33 TermsEnum (org.apache.lucene.index.TermsEnum)31