use of org.apache.lucene.util.PriorityQueue in project elasticsearch by elastic.
the class InternalHistogram method reduceBuckets.
private List<Bucket> reduceBuckets(List<InternalAggregation> aggregations, ReduceContext reduceContext) {
final PriorityQueue<IteratorAndCurrent> pq = new PriorityQueue<IteratorAndCurrent>(aggregations.size()) {
@Override
protected boolean lessThan(IteratorAndCurrent a, IteratorAndCurrent b) {
return a.current.key < b.current.key;
}
};
for (InternalAggregation aggregation : aggregations) {
InternalHistogram histogram = (InternalHistogram) aggregation;
if (histogram.buckets.isEmpty() == false) {
pq.add(new IteratorAndCurrent(histogram.buckets.iterator()));
}
}
List<Bucket> reducedBuckets = new ArrayList<>();
if (pq.size() > 0) {
// list of buckets coming from different shards that have the same key
List<Bucket> currentBuckets = new ArrayList<>();
double key = pq.top().current.key;
do {
final IteratorAndCurrent top = pq.top();
if (top.current.key != key) {
// the key changes, reduce what we already buffered and reset the buffer for current buckets
final Bucket reduced = currentBuckets.get(0).reduce(currentBuckets, reduceContext);
if (reduced.getDocCount() >= minDocCount || reduceContext.isFinalReduce() == false) {
reducedBuckets.add(reduced);
}
currentBuckets.clear();
key = top.current.key;
}
currentBuckets.add(top.current);
if (top.iterator.hasNext()) {
final Bucket next = top.iterator.next();
assert next.key > top.current.key : "shards must return data sorted by key";
top.current = next;
pq.updateTop();
} else {
pq.pop();
}
} while (pq.size() > 0);
if (currentBuckets.isEmpty() == false) {
final Bucket reduced = currentBuckets.get(0).reduce(currentBuckets, reduceContext);
if (reduced.getDocCount() >= minDocCount || reduceContext.isFinalReduce() == false) {
reducedBuckets.add(reduced);
}
}
}
return reducedBuckets;
}
use of org.apache.lucene.util.PriorityQueue in project lucene-solr by apache.
the class FacetFieldProcessor method findTopSlots.
/** Processes the collected data to finds the top slots, and composes it in the response NamedList. */
SimpleOrderedMap<Object> findTopSlots(final int numSlots, final int slotCardinality, IntFunction<Comparable> bucketValFromSlotNumFunc, Function<Comparable, String> fieldQueryValFunc) throws IOException {
int numBuckets = 0;
final int off = fcontext.isShard() ? 0 : (int) freq.offset;
// use max-int instead of max-long to avoid overflow
long effectiveLimit = Integer.MAX_VALUE;
if (freq.limit >= 0) {
effectiveLimit = freq.limit;
if (fcontext.isShard()) {
if (freq.overrequest == -1) {
// add over-request if this is a shard request and if we have a small offset (large offsets will already be gathering many more buckets than needed)
if (freq.offset < 10) {
// default: add 10% plus 4 (to overrequest for very small limits)
effectiveLimit = (long) (effectiveLimit * 1.1 + 4);
}
} else {
effectiveLimit += freq.overrequest;
}
}
}
final int sortMul = freq.sortDirection.getMultiplier();
int maxTopVals = (int) (effectiveLimit >= 0 ? Math.min(freq.offset + effectiveLimit, Integer.MAX_VALUE - 1) : Integer.MAX_VALUE - 1);
maxTopVals = Math.min(maxTopVals, slotCardinality);
final SlotAcc sortAcc = this.sortAcc, indexOrderAcc = this.indexOrderAcc;
final BiPredicate<Slot, Slot> orderPredicate;
if (indexOrderAcc != null && indexOrderAcc != sortAcc) {
orderPredicate = (a, b) -> {
int cmp = sortAcc.compare(a.slot, b.slot) * sortMul;
return cmp == 0 ? (indexOrderAcc.compare(a.slot, b.slot) > 0) : cmp < 0;
};
} else {
orderPredicate = (a, b) -> {
int cmp = sortAcc.compare(a.slot, b.slot) * sortMul;
return cmp == 0 ? b.slot < a.slot : cmp < 0;
};
}
final PriorityQueue<Slot> queue = new PriorityQueue<Slot>(maxTopVals) {
@Override
protected boolean lessThan(Slot a, Slot b) {
return orderPredicate.test(a, b);
}
};
// note: We avoid object allocation by having a Slot and re-using the 'bottom'.
Slot bottom = null;
Slot scratchSlot = new Slot();
for (int slotNum = 0; slotNum < numSlots; slotNum++) {
// screen out buckets not matching mincount
if (effectiveMincount > 0) {
int count = countAcc.getCount(slotNum);
if (count < effectiveMincount) {
if (count > 0)
// Still increment numBuckets as long as we have some count. This is for consistency between distrib and non-distrib mode.
numBuckets++;
continue;
}
}
numBuckets++;
if (bottom != null) {
// scratchSlot is only used to hold this slotNum for the following line
scratchSlot.slot = slotNum;
if (orderPredicate.test(bottom, scratchSlot)) {
bottom.slot = slotNum;
bottom = queue.updateTop();
}
} else if (effectiveLimit > 0) {
// queue not full
Slot s = new Slot();
s.slot = slotNum;
queue.add(s);
if (queue.size() >= maxTopVals) {
bottom = queue.top();
}
}
}
assert queue.size() <= numBuckets;
SimpleOrderedMap<Object> res = new SimpleOrderedMap<>();
if (freq.numBuckets) {
if (!fcontext.isShard()) {
res.add("numBuckets", numBuckets);
} else {
calculateNumBuckets(res);
}
}
FacetDebugInfo fdebug = fcontext.getDebugInfo();
if (fdebug != null)
fdebug.putInfoItem("numBuckets", (long) numBuckets);
if (freq.allBuckets) {
SimpleOrderedMap<Object> allBuckets = new SimpleOrderedMap<>();
// countAcc.setValues(allBuckets, allBucketsSlot);
allBuckets.add("count", allBucketsAcc.getSpecialCount());
// -1 slotNum is unused for SpecialSlotAcc
allBucketsAcc.setValues(allBuckets, -1);
// allBuckets currently doesn't execute sub-facets (because it doesn't change the domain?)
res.add("allBuckets", allBuckets);
}
if (freq.missing) {
// TODO: it would be more efficient to build up a missing DocSet if we need it here anyway.
SimpleOrderedMap<Object> missingBucket = new SimpleOrderedMap<>();
fillBucket(missingBucket, getFieldMissingQuery(fcontext.searcher, freq.field), null, false, null);
res.add("missing", missingBucket);
}
// if we are deep paging, we don't have to order the highest "offset" counts.
int collectCount = Math.max(0, queue.size() - off);
assert collectCount <= maxTopVals;
int[] sortedSlots = new int[collectCount];
for (int i = collectCount - 1; i >= 0; i--) {
sortedSlots[i] = queue.pop().slot;
}
ArrayList<SimpleOrderedMap> bucketList = new ArrayList<>(collectCount);
res.add("buckets", bucketList);
boolean needFilter = deferredAggs != null || freq.getSubFacets().size() > 0;
for (int slotNum : sortedSlots) {
SimpleOrderedMap<Object> bucket = new SimpleOrderedMap<>();
Comparable val = bucketValFromSlotNumFunc.apply(slotNum);
bucket.add("val", val);
Query filter = needFilter ? sf.getType().getFieldQuery(null, sf, fieldQueryValFunc.apply(val)) : null;
fillBucket(bucket, countAcc.getCount(slotNum), slotNum, null, filter);
bucketList.add(bucket);
}
return res;
}
use of org.apache.lucene.util.PriorityQueue in project lucene-solr by apache.
the class MultiSorter method sort.
/** Does a merge sort of the leaves of the incoming reader, returning {@link DocMap} to map each leaf's
* documents into the merged segment. The documents for each incoming leaf reader must already be sorted by the same sort!
* Returns null if the merge sort is not needed (segments are already in index sort order).
**/
static MergeState.DocMap[] sort(Sort sort, List<CodecReader> readers) throws IOException {
// TODO: optimize if only 1 reader is incoming, though that's a rare case
SortField[] fields = sort.getSort();
final ComparableProvider[][] comparables = new ComparableProvider[fields.length][];
for (int i = 0; i < fields.length; i++) {
comparables[i] = getComparableProviders(readers, fields[i]);
}
int leafCount = readers.size();
PriorityQueue<LeafAndDocID> queue = new PriorityQueue<LeafAndDocID>(leafCount) {
@Override
public boolean lessThan(LeafAndDocID a, LeafAndDocID b) {
for (int i = 0; i < comparables.length; i++) {
int cmp = a.values[i].compareTo(b.values[i]);
if (cmp != 0) {
return cmp < 0;
}
}
// tie-break by docID natural order:
if (a.readerIndex != b.readerIndex) {
return a.readerIndex < b.readerIndex;
} else {
return a.docID < b.docID;
}
}
};
PackedLongValues.Builder[] builders = new PackedLongValues.Builder[leafCount];
for (int i = 0; i < leafCount; i++) {
CodecReader reader = readers.get(i);
LeafAndDocID leaf = new LeafAndDocID(i, reader.getLiveDocs(), reader.maxDoc(), comparables.length);
for (int j = 0; j < comparables.length; j++) {
leaf.values[j] = comparables[j][i].getComparable(leaf.docID);
assert leaf.values[j] != null;
}
queue.add(leaf);
builders[i] = PackedLongValues.monotonicBuilder(PackedInts.COMPACT);
}
// merge sort:
int mappedDocID = 0;
int lastReaderIndex = 0;
boolean isSorted = true;
while (queue.size() != 0) {
LeafAndDocID top = queue.top();
if (lastReaderIndex > top.readerIndex) {
// merge sort is needed
isSorted = false;
}
lastReaderIndex = top.readerIndex;
builders[top.readerIndex].add(mappedDocID);
if (top.liveDocs == null || top.liveDocs.get(top.docID)) {
mappedDocID++;
}
top.docID++;
if (top.docID < top.maxDoc) {
for (int j = 0; j < comparables.length; j++) {
top.values[j] = comparables[j][top.readerIndex].getComparable(top.docID);
assert top.values[j] != null;
}
queue.updateTop();
} else {
queue.pop();
}
}
if (isSorted) {
return null;
}
MergeState.DocMap[] docMaps = new MergeState.DocMap[leafCount];
for (int i = 0; i < leafCount; i++) {
final PackedLongValues remapped = builders[i].build();
final Bits liveDocs = readers.get(i).getLiveDocs();
docMaps[i] = new MergeState.DocMap() {
@Override
public int get(int docID) {
if (liveDocs == null || liveDocs.get(docID)) {
return (int) remapped.get(docID);
} else {
return -1;
}
}
};
}
return docMaps;
}
use of org.apache.lucene.util.PriorityQueue in project lucene-solr by apache.
the class CommonTermsQueryTest method testRandomIndex.
public void testRandomIndex() throws IOException {
Directory dir = newDirectory();
MockAnalyzer analyzer = new MockAnalyzer(random());
analyzer.setMaxTokenLength(TestUtil.nextInt(random(), 1, IndexWriter.MAX_TERM_LENGTH));
RandomIndexWriter w = new RandomIndexWriter(random(), dir, analyzer);
createRandomIndex(atLeast(50), w, random().nextLong());
w.forceMerge(1);
DirectoryReader reader = w.getReader();
LeafReader wrapper = getOnlyLeafReader(reader);
String field = "body";
Terms terms = wrapper.terms(field);
PriorityQueue<TermAndFreq> lowFreqQueue = new PriorityQueue<CommonTermsQueryTest.TermAndFreq>(5) {
@Override
protected boolean lessThan(TermAndFreq a, TermAndFreq b) {
return a.freq > b.freq;
}
};
PriorityQueue<TermAndFreq> highFreqQueue = new PriorityQueue<CommonTermsQueryTest.TermAndFreq>(5) {
@Override
protected boolean lessThan(TermAndFreq a, TermAndFreq b) {
return a.freq < b.freq;
}
};
try {
TermsEnum iterator = terms.iterator();
while (iterator.next() != null) {
if (highFreqQueue.size() < 5) {
highFreqQueue.add(new TermAndFreq(BytesRef.deepCopyOf(iterator.term()), iterator.docFreq()));
lowFreqQueue.add(new TermAndFreq(BytesRef.deepCopyOf(iterator.term()), iterator.docFreq()));
} else {
if (highFreqQueue.top().freq < iterator.docFreq()) {
highFreqQueue.top().freq = iterator.docFreq();
highFreqQueue.top().term = BytesRef.deepCopyOf(iterator.term());
highFreqQueue.updateTop();
}
if (lowFreqQueue.top().freq > iterator.docFreq()) {
lowFreqQueue.top().freq = iterator.docFreq();
lowFreqQueue.top().term = BytesRef.deepCopyOf(iterator.term());
lowFreqQueue.updateTop();
}
}
}
int lowFreq = lowFreqQueue.top().freq;
int highFreq = highFreqQueue.top().freq;
assumeTrue("unlucky index", highFreq - 1 > lowFreq);
List<TermAndFreq> highTerms = queueToList(highFreqQueue);
List<TermAndFreq> lowTerms = queueToList(lowFreqQueue);
IndexSearcher searcher = newSearcher(reader);
Occur lowFreqOccur = randomOccur(random());
BooleanQuery.Builder verifyQuery = new BooleanQuery.Builder();
CommonTermsQuery cq = new CommonTermsQuery(randomOccur(random()), lowFreqOccur, highFreq - 1);
for (TermAndFreq termAndFreq : lowTerms) {
cq.add(new Term(field, termAndFreq.term));
verifyQuery.add(new BooleanClause(new TermQuery(new Term(field, termAndFreq.term)), lowFreqOccur));
}
for (TermAndFreq termAndFreq : highTerms) {
cq.add(new Term(field, termAndFreq.term));
}
TopDocs cqSearch = searcher.search(cq, reader.maxDoc());
TopDocs verifySearch = searcher.search(verifyQuery.build(), reader.maxDoc());
assertEquals(verifySearch.totalHits, cqSearch.totalHits);
Set<Integer> hits = new HashSet<>();
for (ScoreDoc doc : verifySearch.scoreDocs) {
hits.add(doc.doc);
}
for (ScoreDoc doc : cqSearch.scoreDocs) {
assertTrue(hits.remove(doc.doc));
}
assertTrue(hits.isEmpty());
/*
* need to force merge here since QueryUtils adds checks based
* on leave readers which have different statistics than the top
* level reader if we have more than one segment. This could
* result in a different query / results.
*/
w.forceMerge(1);
DirectoryReader reader2 = w.getReader();
QueryUtils.check(random(), cq, newSearcher(reader2));
reader2.close();
} finally {
IOUtils.close(reader, w, dir, analyzer);
}
}
Aggregations