Search in sources :

Example 1 with PriorityQueue

use of org.apache.lucene.util.PriorityQueue in project elasticsearch by elastic.

the class InternalDateHistogram method reduceBuckets.

private List<Bucket> reduceBuckets(List<InternalAggregation> aggregations, ReduceContext reduceContext) {
    final PriorityQueue<IteratorAndCurrent> pq = new PriorityQueue<IteratorAndCurrent>(aggregations.size()) {

        @Override
        protected boolean lessThan(IteratorAndCurrent a, IteratorAndCurrent b) {
            return a.current.key < b.current.key;
        }
    };
    for (InternalAggregation aggregation : aggregations) {
        InternalDateHistogram histogram = (InternalDateHistogram) aggregation;
        if (histogram.buckets.isEmpty() == false) {
            pq.add(new IteratorAndCurrent(histogram.buckets.iterator()));
        }
    }
    List<Bucket> reducedBuckets = new ArrayList<>();
    if (pq.size() > 0) {
        // list of buckets coming from different shards that have the same key
        List<Bucket> currentBuckets = new ArrayList<>();
        double key = pq.top().current.key;
        do {
            final IteratorAndCurrent top = pq.top();
            if (top.current.key != key) {
                // the key changes, reduce what we already buffered and reset the buffer for current buckets
                final Bucket reduced = currentBuckets.get(0).reduce(currentBuckets, reduceContext);
                if (reduced.getDocCount() >= minDocCount || reduceContext.isFinalReduce() == false) {
                    reducedBuckets.add(reduced);
                }
                currentBuckets.clear();
                key = top.current.key;
            }
            currentBuckets.add(top.current);
            if (top.iterator.hasNext()) {
                final Bucket next = top.iterator.next();
                assert next.key > top.current.key : "shards must return data sorted by key";
                top.current = next;
                pq.updateTop();
            } else {
                pq.pop();
            }
        } while (pq.size() > 0);
        if (currentBuckets.isEmpty() == false) {
            final Bucket reduced = currentBuckets.get(0).reduce(currentBuckets, reduceContext);
            if (reduced.getDocCount() >= minDocCount || reduceContext.isFinalReduce() == false) {
                reducedBuckets.add(reduced);
            }
        }
    }
    return reducedBuckets;
}
Also used : InternalAggregation(org.elasticsearch.search.aggregations.InternalAggregation) ArrayList(java.util.ArrayList) PriorityQueue(org.apache.lucene.util.PriorityQueue)

Example 2 with PriorityQueue

use of org.apache.lucene.util.PriorityQueue in project lucene-solr by apache.

the class IndexSortedFacetCollector method getFacetCounts.

NamedList<Integer> getFacetCounts(Executor executor) throws IOException {
    CompletionService<SegFacet> completionService = new ExecutorCompletionService<>(executor);
    // reuse the translation logic to go from top level set to per-segment set
    baseSet = docs.getTopFilter();
    final List<LeafReaderContext> leaves = searcher.getTopReaderContext().leaves();
    // The list of pending tasks that aren't immediately submitted
    // TODO: Is there a completion service, or a delegating executor that can
    // limit the number of concurrent tasks submitted to a bigger executor?
    LinkedList<Callable<SegFacet>> pending = new LinkedList<>();
    int threads = nThreads <= 0 ? Integer.MAX_VALUE : nThreads;
    for (final LeafReaderContext leave : leaves) {
        final SegFacet segFacet = new SegFacet(leave);
        Callable<SegFacet> task = () -> {
            segFacet.countTerms();
            return segFacet;
        };
        if (--threads >= 0) {
            completionService.submit(task);
        } else {
            pending.add(task);
        }
    }
    // now merge the per-segment results
    PriorityQueue<SegFacet> queue = new PriorityQueue<SegFacet>(leaves.size()) {

        @Override
        protected boolean lessThan(SegFacet a, SegFacet b) {
            return a.tempBR.compareTo(b.tempBR) < 0;
        }
    };
    boolean hasMissingCount = false;
    int missingCount = 0;
    for (int i = 0, c = leaves.size(); i < c; i++) {
        SegFacet seg = null;
        try {
            Future<SegFacet> future = completionService.take();
            seg = future.get();
            if (!pending.isEmpty()) {
                completionService.submit(pending.removeFirst());
            }
        } catch (InterruptedException e) {
            Thread.currentThread().interrupt();
            throw new SolrException(SolrException.ErrorCode.SERVER_ERROR, e);
        } catch (ExecutionException e) {
            Throwable cause = e.getCause();
            if (cause instanceof RuntimeException) {
                throw (RuntimeException) cause;
            } else {
                throw new SolrException(SolrException.ErrorCode.SERVER_ERROR, "Error in per-segment faceting on field: " + fieldName, cause);
            }
        }
        if (seg.startTermIndex < seg.endTermIndex) {
            if (seg.startTermIndex == -1) {
                hasMissingCount = true;
                missingCount += seg.counts[0];
                seg.pos = 0;
            } else {
                seg.pos = seg.startTermIndex;
            }
            if (seg.pos < seg.endTermIndex && (mincount < 1 || seg.hasAnyCount)) {
                seg.tenum = seg.si.termsEnum();
                seg.tenum.seekExact(seg.pos);
                seg.tempBR = seg.tenum.term();
                queue.add(seg);
            }
        }
    }
    FacetCollector collector;
    if (sort.equals(FacetParams.FACET_SORT_COUNT) || sort.equals(FacetParams.FACET_SORT_COUNT_LEGACY)) {
        collector = new CountSortedFacetCollector(offset, limit, mincount);
    } else {
        collector = new IndexSortedFacetCollector(offset, limit, mincount);
    }
    BytesRefBuilder val = new BytesRefBuilder();
    while (queue.size() > 0) {
        SegFacet seg = queue.top();
        boolean collect = termFilter == null || termFilter.test(seg.tempBR);
        // may be shared across calls.
        if (collect) {
            val.copyBytes(seg.tempBR);
        }
        int count = 0;
        do {
            if (collect) {
                count += seg.counts[seg.pos - seg.startTermIndex];
            }
            // if mincount>0 then seg.pos++ can skip ahead to the next non-zero entry.
            do {
                ++seg.pos;
            } while (//stop incrementing before we run off the end
            (seg.pos < seg.endTermIndex) && //move term enum forward with position -- dont care about value 
            (seg.tenum.next() != null || true) && //only skip ahead if mincount > 0
            (mincount > 0) && //check zero count
            (seg.counts[seg.pos - seg.startTermIndex] == 0));
            if (seg.pos >= seg.endTermIndex) {
                queue.pop();
                seg = queue.top();
            } else {
                seg.tempBR = seg.tenum.term();
                seg = queue.updateTop();
            }
        } while (seg != null && val.get().compareTo(seg.tempBR) == 0);
        if (collect) {
            boolean stop = collector.collect(val.get(), count);
            if (stop)
                break;
        }
    }
    NamedList<Integer> res = collector.getFacetCounts();
    // convert labels to readable form    
    FieldType ft = searcher.getSchema().getFieldType(fieldName);
    int sz = res.size();
    for (int i = 0; i < sz; i++) {
        res.setName(i, ft.indexedToReadable(res.getName(i)));
    }
    if (missing) {
        if (!hasMissingCount) {
            missingCount = SimpleFacets.getFieldMissingCount(searcher, docs, fieldName);
        }
        res.add(null, missingCount);
    }
    return res;
}
Also used : ExecutorCompletionService(java.util.concurrent.ExecutorCompletionService) Callable(java.util.concurrent.Callable) LeafReaderContext(org.apache.lucene.index.LeafReaderContext) ExecutionException(java.util.concurrent.ExecutionException) SolrException(org.apache.solr.common.SolrException) BytesRefBuilder(org.apache.lucene.util.BytesRefBuilder) PriorityQueue(org.apache.lucene.util.PriorityQueue) LinkedList(java.util.LinkedList) FieldType(org.apache.solr.schema.FieldType)

Example 3 with PriorityQueue

use of org.apache.lucene.util.PriorityQueue in project lucene-solr by apache.

the class SizeLimitedDistributedMap method put.

@Override
public void put(String trackingId, byte[] data) throws KeeperException, InterruptedException {
    if (this.size() >= maxSize) {
        // Bring down the size
        List<String> children = zookeeper.getChildren(dir, null, true);
        int cleanupSize = maxSize / 10;
        final PriorityQueue priorityQueue = new PriorityQueue<Long>(cleanupSize) {

            @Override
            protected boolean lessThan(Long a, Long b) {
                return (a > b);
            }
        };
        for (String child : children) {
            Stat stat = zookeeper.exists(dir + "/" + child, null, true);
            priorityQueue.insertWithOverflow(stat.getMzxid());
        }
        long topElementMzxId = (Long) priorityQueue.top();
        for (String child : children) {
            Stat stat = zookeeper.exists(dir + "/" + child, null, true);
            if (stat.getMzxid() <= topElementMzxId)
                zookeeper.delete(dir + "/" + child, -1, true);
        }
    }
    super.put(trackingId, data);
}
Also used : Stat(org.apache.zookeeper.data.Stat) PriorityQueue(org.apache.lucene.util.PriorityQueue)

Example 4 with PriorityQueue

use of org.apache.lucene.util.PriorityQueue in project jackrabbit by apache.

the class MoreLikeThis method retrieveInterestingTerms.

/**
 * @see #retrieveInterestingTerms(java.io.Reader)
 */
public String[] retrieveInterestingTerms(int docNum) throws IOException {
    List<String> al = new ArrayList<String>(maxQueryTerms);
    PriorityQueue pq = retrieveTerms(docNum);
    Object cur;
    // have to be careful, retrieveTerms returns all words but that's probably not useful to our caller...
    int lim = maxQueryTerms;
    // we just want to return the top words
    while (((cur = pq.pop()) != null) && lim-- > 0) {
        Object[] ar = (Object[]) cur;
        // the 1st entry is the interesting word
        al.add((String) ar[0]);
    }
    return al.toArray(new String[al.size()]);
}
Also used : ArrayList(java.util.ArrayList) PriorityQueue(org.apache.lucene.util.PriorityQueue)

Example 5 with PriorityQueue

use of org.apache.lucene.util.PriorityQueue in project jackrabbit by apache.

the class MoreLikeThis method retrieveInterestingTerms.

/**
 * Convenience routine to make it easy to return the most interesting words in a document.
 * More advanced users will call {@link #retrieveTerms(java.io.Reader) retrieveTerms()} directly.
 * @param r the source document
 * @return the most interesting words in the document
 *
 * @see #retrieveTerms(java.io.Reader)
 * @see #setMaxQueryTerms
 */
public String[] retrieveInterestingTerms(Reader r) throws IOException {
    List<String> al = new ArrayList<String>(maxQueryTerms);
    PriorityQueue pq = retrieveTerms(r);
    Object cur;
    // have to be careful, retrieveTerms returns all words but that's probably not useful to our caller...
    int lim = maxQueryTerms;
    // we just want to return the top words
    while (((cur = pq.pop()) != null) && lim-- > 0) {
        Object[] ar = (Object[]) cur;
        // the 1st entry is the interesting word
        al.add((String) ar[0]);
    }
    return al.toArray(new String[al.size()]);
}
Also used : ArrayList(java.util.ArrayList) PriorityQueue(org.apache.lucene.util.PriorityQueue)

Aggregations

PriorityQueue (org.apache.lucene.util.PriorityQueue)10 ArrayList (java.util.ArrayList)6 InternalAggregation (org.elasticsearch.search.aggregations.InternalAggregation)2 Date (java.util.Date)1 HashSet (java.util.HashSet)1 LinkedList (java.util.LinkedList)1 Callable (java.util.concurrent.Callable)1 ExecutionException (java.util.concurrent.ExecutionException)1 ExecutorCompletionService (java.util.concurrent.ExecutorCompletionService)1 MockAnalyzer (org.apache.lucene.analysis.MockAnalyzer)1 DirectoryReader (org.apache.lucene.index.DirectoryReader)1 LeafReader (org.apache.lucene.index.LeafReader)1 LeafReaderContext (org.apache.lucene.index.LeafReaderContext)1 DocMap (org.apache.lucene.index.MergeState.DocMap)1 RandomIndexWriter (org.apache.lucene.index.RandomIndexWriter)1 Term (org.apache.lucene.index.Term)1 Terms (org.apache.lucene.index.Terms)1 TermsEnum (org.apache.lucene.index.TermsEnum)1 BooleanClause (org.apache.lucene.search.BooleanClause)1 Occur (org.apache.lucene.search.BooleanClause.Occur)1