use of org.apache.lucene.util.InPlaceMergeSorter in project lucene-solr by apache.
the class TestDrillSideways method getTopNOrds.
private int[] getTopNOrds(final int[] counts, final String[] values, int topN) {
final int[] ids = new int[counts.length];
for (int i = 0; i < ids.length; i++) {
ids[i] = i;
}
// Naive (on purpose, to reduce bug in tester/gold):
// sort all ids, then return top N slice:
new InPlaceMergeSorter() {
@Override
protected void swap(int i, int j) {
int id = ids[i];
ids[i] = ids[j];
ids[j] = id;
}
@Override
protected int compare(int i, int j) {
int counti = counts[ids[i]];
int countj = counts[ids[j]];
// Sort by count descending...
if (counti > countj) {
return -1;
} else if (counti < countj) {
return 1;
} else {
// ... then by label ascending:
return new BytesRef(values[ids[i]]).compareTo(new BytesRef(values[ids[j]]));
}
}
}.sort(0, ids.length);
if (topN > ids.length) {
topN = ids.length;
}
int numSet = topN;
for (int i = 0; i < topN; i++) {
if (counts[ids[i]] == 0) {
numSet = i;
break;
}
}
int[] topNIDs = new int[numSet];
System.arraycopy(ids, 0, topNIDs, 0, topNIDs.length);
return topNIDs;
}
use of org.apache.lucene.util.InPlaceMergeSorter in project lucene-solr by apache.
the class UnifiedHighlighter method copyAndSortFieldsWithMaxPassages.
private void copyAndSortFieldsWithMaxPassages(String[] fieldsIn, int[] maxPassagesIn, final String[] fields, final int[] maxPassages) {
System.arraycopy(fieldsIn, 0, fields, 0, fieldsIn.length);
System.arraycopy(maxPassagesIn, 0, maxPassages, 0, maxPassagesIn.length);
new InPlaceMergeSorter() {
@Override
protected void swap(int i, int j) {
String tmp = fields[i];
fields[i] = fields[j];
fields[j] = tmp;
int tmp2 = maxPassages[i];
maxPassages[i] = maxPassages[j];
maxPassages[j] = tmp2;
}
@Override
protected int compare(int i, int j) {
return fields[i].compareTo(fields[j]);
}
}.sort(0, fields.length);
}
use of org.apache.lucene.util.InPlaceMergeSorter in project lucene-solr by apache.
the class NumericDocValuesFieldUpdates method iterator.
@Override
public Iterator iterator() {
final PagedMutable docs = this.docs;
final PagedGrowableWriter values = this.values;
new InPlaceMergeSorter() {
@Override
protected void swap(int i, int j) {
long tmpDoc = docs.get(j);
docs.set(j, docs.get(i));
docs.set(i, tmpDoc);
long tmpVal = values.get(j);
values.set(j, values.get(i));
values.set(i, tmpVal);
}
@Override
protected int compare(int i, int j) {
int x = (int) docs.get(i);
int y = (int) docs.get(j);
return (x < y) ? -1 : ((x == y) ? 0 : 1);
}
}.sort(0, size);
return new Iterator(size, values, docs);
}
use of org.apache.lucene.util.InPlaceMergeSorter in project elasticsearch by elastic.
the class BlendedTermQuery method blend.
protected void blend(final TermContext[] contexts, int maxDoc, IndexReader reader) throws IOException {
if (contexts.length <= 1) {
return;
}
int max = 0;
long minSumTTF = Long.MAX_VALUE;
for (int i = 0; i < contexts.length; i++) {
TermContext ctx = contexts[i];
int df = ctx.docFreq();
// we use the max here since it's the only "true" estimation we can make here
// at least max(df) documents have that term. Sum or Averages don't seem
// to have a significant meaning here.
// TODO: Maybe it could also make sense to assume independent distributions of documents and eg. have:
// df = df1 + df2 - (df1 * df2 / maxDoc)?
max = Math.max(df, max);
if (minSumTTF != -1 && ctx.totalTermFreq() != -1) {
// we need to find out the minimum sumTTF to adjust the statistics
// otherwise the statistics don't match
minSumTTF = Math.min(minSumTTF, reader.getSumTotalTermFreq(terms[i].field()));
} else {
minSumTTF = -1;
}
}
if (minSumTTF != -1 && maxDoc > minSumTTF) {
maxDoc = (int) minSumTTF;
}
if (max == 0) {
// we are done that term doesn't exist at all
return;
}
long sumTTF = minSumTTF == -1 ? -1 : 0;
final int[] tieBreak = new int[contexts.length];
for (int i = 0; i < tieBreak.length; ++i) {
tieBreak[i] = i;
}
new InPlaceMergeSorter() {
@Override
protected void swap(int i, int j) {
final int tmp = tieBreak[i];
tieBreak[i] = tieBreak[j];
tieBreak[j] = tmp;
}
@Override
protected int compare(int i, int j) {
return Integer.compare(contexts[tieBreak[j]].docFreq(), contexts[tieBreak[i]].docFreq());
}
}.sort(0, tieBreak.length);
int prev = contexts[tieBreak[0]].docFreq();
int actualDf = Math.min(maxDoc, max);
assert actualDf >= 0 : "DF must be >= 0";
// that acts as a tie breaker
for (int i : tieBreak) {
TermContext ctx = contexts[i];
if (ctx.docFreq() == 0) {
break;
}
final int current = ctx.docFreq();
if (prev > current) {
actualDf++;
}
contexts[i] = ctx = adjustDF(reader.getContext(), ctx, Math.min(maxDoc, actualDf));
prev = current;
if (sumTTF >= 0 && ctx.totalTermFreq() >= 0) {
sumTTF += ctx.totalTermFreq();
} else {
// omit once TF is omitted anywhere!
sumTTF = -1;
}
}
sumTTF = Math.min(sumTTF, minSumTTF);
for (int i = 0; i < contexts.length; i++) {
int df = contexts[i].docFreq();
if (df == 0) {
continue;
}
// the blended sumTTF can't be greater than the sumTTTF on the field
final long fixedTTF = sumTTF == -1 ? -1 : sumTTF;
contexts[i] = adjustTTF(reader.getContext(), contexts[i], fixedTTF);
}
}
use of org.apache.lucene.util.InPlaceMergeSorter in project lucene-solr by apache.
the class Passage method sort.
/** @lucene.internal */
public void sort() {
final int[] starts = matchStarts;
final int[] ends = matchEnds;
final BytesRef[] terms = matchTerms;
new InPlaceMergeSorter() {
@Override
protected void swap(int i, int j) {
int temp = starts[i];
starts[i] = starts[j];
starts[j] = temp;
temp = ends[i];
ends[i] = ends[j];
ends[j] = temp;
BytesRef tempTerm = terms[i];
terms[i] = terms[j];
terms[j] = tempTerm;
}
@Override
protected int compare(int i, int j) {
return Integer.compare(starts[i], starts[j]);
}
}.sort(0, numMatches);
}
Aggregations