use of org.apache.lucene.util.PriorityQueue in project elasticsearch by elastic.
the class InternalDateHistogram method reduceBuckets.
private List<Bucket> reduceBuckets(List<InternalAggregation> aggregations, ReduceContext reduceContext) {
final PriorityQueue<IteratorAndCurrent> pq = new PriorityQueue<IteratorAndCurrent>(aggregations.size()) {
@Override
protected boolean lessThan(IteratorAndCurrent a, IteratorAndCurrent b) {
return a.current.key < b.current.key;
}
};
for (InternalAggregation aggregation : aggregations) {
InternalDateHistogram histogram = (InternalDateHistogram) aggregation;
if (histogram.buckets.isEmpty() == false) {
pq.add(new IteratorAndCurrent(histogram.buckets.iterator()));
}
}
List<Bucket> reducedBuckets = new ArrayList<>();
if (pq.size() > 0) {
// list of buckets coming from different shards that have the same key
List<Bucket> currentBuckets = new ArrayList<>();
double key = pq.top().current.key;
do {
final IteratorAndCurrent top = pq.top();
if (top.current.key != key) {
// the key changes, reduce what we already buffered and reset the buffer for current buckets
final Bucket reduced = currentBuckets.get(0).reduce(currentBuckets, reduceContext);
if (reduced.getDocCount() >= minDocCount || reduceContext.isFinalReduce() == false) {
reducedBuckets.add(reduced);
}
currentBuckets.clear();
key = top.current.key;
}
currentBuckets.add(top.current);
if (top.iterator.hasNext()) {
final Bucket next = top.iterator.next();
assert next.key > top.current.key : "shards must return data sorted by key";
top.current = next;
pq.updateTop();
} else {
pq.pop();
}
} while (pq.size() > 0);
if (currentBuckets.isEmpty() == false) {
final Bucket reduced = currentBuckets.get(0).reduce(currentBuckets, reduceContext);
if (reduced.getDocCount() >= minDocCount || reduceContext.isFinalReduce() == false) {
reducedBuckets.add(reduced);
}
}
}
return reducedBuckets;
}
use of org.apache.lucene.util.PriorityQueue in project lucene-solr by apache.
the class IndexSortedFacetCollector method getFacetCounts.
NamedList<Integer> getFacetCounts(Executor executor) throws IOException {
CompletionService<SegFacet> completionService = new ExecutorCompletionService<>(executor);
// reuse the translation logic to go from top level set to per-segment set
baseSet = docs.getTopFilter();
final List<LeafReaderContext> leaves = searcher.getTopReaderContext().leaves();
// The list of pending tasks that aren't immediately submitted
// TODO: Is there a completion service, or a delegating executor that can
// limit the number of concurrent tasks submitted to a bigger executor?
LinkedList<Callable<SegFacet>> pending = new LinkedList<>();
int threads = nThreads <= 0 ? Integer.MAX_VALUE : nThreads;
for (final LeafReaderContext leave : leaves) {
final SegFacet segFacet = new SegFacet(leave);
Callable<SegFacet> task = () -> {
segFacet.countTerms();
return segFacet;
};
if (--threads >= 0) {
completionService.submit(task);
} else {
pending.add(task);
}
}
// now merge the per-segment results
PriorityQueue<SegFacet> queue = new PriorityQueue<SegFacet>(leaves.size()) {
@Override
protected boolean lessThan(SegFacet a, SegFacet b) {
return a.tempBR.compareTo(b.tempBR) < 0;
}
};
boolean hasMissingCount = false;
int missingCount = 0;
for (int i = 0, c = leaves.size(); i < c; i++) {
SegFacet seg = null;
try {
Future<SegFacet> future = completionService.take();
seg = future.get();
if (!pending.isEmpty()) {
completionService.submit(pending.removeFirst());
}
} catch (InterruptedException e) {
Thread.currentThread().interrupt();
throw new SolrException(SolrException.ErrorCode.SERVER_ERROR, e);
} catch (ExecutionException e) {
Throwable cause = e.getCause();
if (cause instanceof RuntimeException) {
throw (RuntimeException) cause;
} else {
throw new SolrException(SolrException.ErrorCode.SERVER_ERROR, "Error in per-segment faceting on field: " + fieldName, cause);
}
}
if (seg.startTermIndex < seg.endTermIndex) {
if (seg.startTermIndex == -1) {
hasMissingCount = true;
missingCount += seg.counts[0];
seg.pos = 0;
} else {
seg.pos = seg.startTermIndex;
}
if (seg.pos < seg.endTermIndex && (mincount < 1 || seg.hasAnyCount)) {
seg.tenum = seg.si.termsEnum();
seg.tenum.seekExact(seg.pos);
seg.tempBR = seg.tenum.term();
queue.add(seg);
}
}
}
FacetCollector collector;
if (sort.equals(FacetParams.FACET_SORT_COUNT) || sort.equals(FacetParams.FACET_SORT_COUNT_LEGACY)) {
collector = new CountSortedFacetCollector(offset, limit, mincount);
} else {
collector = new IndexSortedFacetCollector(offset, limit, mincount);
}
BytesRefBuilder val = new BytesRefBuilder();
while (queue.size() > 0) {
SegFacet seg = queue.top();
boolean collect = termFilter == null || termFilter.test(seg.tempBR);
// may be shared across calls.
if (collect) {
val.copyBytes(seg.tempBR);
}
int count = 0;
do {
if (collect) {
count += seg.counts[seg.pos - seg.startTermIndex];
}
// if mincount>0 then seg.pos++ can skip ahead to the next non-zero entry.
do {
++seg.pos;
} while (//stop incrementing before we run off the end
(seg.pos < seg.endTermIndex) && //move term enum forward with position -- dont care about value
(seg.tenum.next() != null || true) && //only skip ahead if mincount > 0
(mincount > 0) && //check zero count
(seg.counts[seg.pos - seg.startTermIndex] == 0));
if (seg.pos >= seg.endTermIndex) {
queue.pop();
seg = queue.top();
} else {
seg.tempBR = seg.tenum.term();
seg = queue.updateTop();
}
} while (seg != null && val.get().compareTo(seg.tempBR) == 0);
if (collect) {
boolean stop = collector.collect(val.get(), count);
if (stop)
break;
}
}
NamedList<Integer> res = collector.getFacetCounts();
// convert labels to readable form
FieldType ft = searcher.getSchema().getFieldType(fieldName);
int sz = res.size();
for (int i = 0; i < sz; i++) {
res.setName(i, ft.indexedToReadable(res.getName(i)));
}
if (missing) {
if (!hasMissingCount) {
missingCount = SimpleFacets.getFieldMissingCount(searcher, docs, fieldName);
}
res.add(null, missingCount);
}
return res;
}
use of org.apache.lucene.util.PriorityQueue in project lucene-solr by apache.
the class SizeLimitedDistributedMap method put.
@Override
public void put(String trackingId, byte[] data) throws KeeperException, InterruptedException {
if (this.size() >= maxSize) {
// Bring down the size
List<String> children = zookeeper.getChildren(dir, null, true);
int cleanupSize = maxSize / 10;
final PriorityQueue priorityQueue = new PriorityQueue<Long>(cleanupSize) {
@Override
protected boolean lessThan(Long a, Long b) {
return (a > b);
}
};
for (String child : children) {
Stat stat = zookeeper.exists(dir + "/" + child, null, true);
priorityQueue.insertWithOverflow(stat.getMzxid());
}
long topElementMzxId = (Long) priorityQueue.top();
for (String child : children) {
Stat stat = zookeeper.exists(dir + "/" + child, null, true);
if (stat.getMzxid() <= topElementMzxId)
zookeeper.delete(dir + "/" + child, -1, true);
}
}
super.put(trackingId, data);
}
use of org.apache.lucene.util.PriorityQueue in project jackrabbit by apache.
the class MoreLikeThis method retrieveInterestingTerms.
/**
* @see #retrieveInterestingTerms(java.io.Reader)
*/
public String[] retrieveInterestingTerms(int docNum) throws IOException {
List<String> al = new ArrayList<String>(maxQueryTerms);
PriorityQueue pq = retrieveTerms(docNum);
Object cur;
// have to be careful, retrieveTerms returns all words but that's probably not useful to our caller...
int lim = maxQueryTerms;
// we just want to return the top words
while (((cur = pq.pop()) != null) && lim-- > 0) {
Object[] ar = (Object[]) cur;
// the 1st entry is the interesting word
al.add((String) ar[0]);
}
return al.toArray(new String[al.size()]);
}
use of org.apache.lucene.util.PriorityQueue in project jackrabbit by apache.
the class MoreLikeThis method retrieveInterestingTerms.
/**
* Convenience routine to make it easy to return the most interesting words in a document.
* More advanced users will call {@link #retrieveTerms(java.io.Reader) retrieveTerms()} directly.
* @param r the source document
* @return the most interesting words in the document
*
* @see #retrieveTerms(java.io.Reader)
* @see #setMaxQueryTerms
*/
public String[] retrieveInterestingTerms(Reader r) throws IOException {
List<String> al = new ArrayList<String>(maxQueryTerms);
PriorityQueue pq = retrieveTerms(r);
Object cur;
// have to be careful, retrieveTerms returns all words but that's probably not useful to our caller...
int lim = maxQueryTerms;
// we just want to return the top words
while (((cur = pq.pop()) != null) && lim-- > 0) {
Object[] ar = (Object[]) cur;
// the 1st entry is the interesting word
al.add((String) ar[0]);
}
return al.toArray(new String[al.size()]);
}
Aggregations