Search in sources :

Example 1 with InPlaceMergeSorter

use of org.apache.lucene.util.InPlaceMergeSorter in project lucene-solr by apache.

the class TestDrillSideways method getTopNOrds.

private int[] getTopNOrds(final int[] counts, final String[] values, int topN) {
    final int[] ids = new int[counts.length];
    for (int i = 0; i < ids.length; i++) {
        ids[i] = i;
    }
    // Naive (on purpose, to reduce bug in tester/gold):
    // sort all ids, then return top N slice:
    new InPlaceMergeSorter() {

        @Override
        protected void swap(int i, int j) {
            int id = ids[i];
            ids[i] = ids[j];
            ids[j] = id;
        }

        @Override
        protected int compare(int i, int j) {
            int counti = counts[ids[i]];
            int countj = counts[ids[j]];
            // Sort by count descending...
            if (counti > countj) {
                return -1;
            } else if (counti < countj) {
                return 1;
            } else {
                // ... then by label ascending:
                return new BytesRef(values[ids[i]]).compareTo(new BytesRef(values[ids[j]]));
            }
        }
    }.sort(0, ids.length);
    if (topN > ids.length) {
        topN = ids.length;
    }
    int numSet = topN;
    for (int i = 0; i < topN; i++) {
        if (counts[ids[i]] == 0) {
            numSet = i;
            break;
        }
    }
    int[] topNIDs = new int[numSet];
    System.arraycopy(ids, 0, topNIDs, 0, topNIDs.length);
    return topNIDs;
}
Also used : InPlaceMergeSorter(org.apache.lucene.util.InPlaceMergeSorter) BytesRef(org.apache.lucene.util.BytesRef)

Example 2 with InPlaceMergeSorter

use of org.apache.lucene.util.InPlaceMergeSorter in project lucene-solr by apache.

the class UnifiedHighlighter method copyAndSortFieldsWithMaxPassages.

private void copyAndSortFieldsWithMaxPassages(String[] fieldsIn, int[] maxPassagesIn, final String[] fields, final int[] maxPassages) {
    System.arraycopy(fieldsIn, 0, fields, 0, fieldsIn.length);
    System.arraycopy(maxPassagesIn, 0, maxPassages, 0, maxPassagesIn.length);
    new InPlaceMergeSorter() {

        @Override
        protected void swap(int i, int j) {
            String tmp = fields[i];
            fields[i] = fields[j];
            fields[j] = tmp;
            int tmp2 = maxPassages[i];
            maxPassages[i] = maxPassages[j];
            maxPassages[j] = tmp2;
        }

        @Override
        protected int compare(int i, int j) {
            return fields[i].compareTo(fields[j]);
        }
    }.sort(0, fields.length);
}
Also used : InPlaceMergeSorter(org.apache.lucene.util.InPlaceMergeSorter)

Example 3 with InPlaceMergeSorter

use of org.apache.lucene.util.InPlaceMergeSorter in project lucene-solr by apache.

the class NumericDocValuesFieldUpdates method iterator.

@Override
public Iterator iterator() {
    final PagedMutable docs = this.docs;
    final PagedGrowableWriter values = this.values;
    new InPlaceMergeSorter() {

        @Override
        protected void swap(int i, int j) {
            long tmpDoc = docs.get(j);
            docs.set(j, docs.get(i));
            docs.set(i, tmpDoc);
            long tmpVal = values.get(j);
            values.set(j, values.get(i));
            values.set(i, tmpVal);
        }

        @Override
        protected int compare(int i, int j) {
            int x = (int) docs.get(i);
            int y = (int) docs.get(j);
            return (x < y) ? -1 : ((x == y) ? 0 : 1);
        }
    }.sort(0, size);
    return new Iterator(size, values, docs);
}
Also used : PagedMutable(org.apache.lucene.util.packed.PagedMutable) InPlaceMergeSorter(org.apache.lucene.util.InPlaceMergeSorter) DocIdSetIterator(org.apache.lucene.search.DocIdSetIterator) PagedGrowableWriter(org.apache.lucene.util.packed.PagedGrowableWriter)

Example 4 with InPlaceMergeSorter

use of org.apache.lucene.util.InPlaceMergeSorter in project elasticsearch by elastic.

the class BlendedTermQuery method blend.

protected void blend(final TermContext[] contexts, int maxDoc, IndexReader reader) throws IOException {
    if (contexts.length <= 1) {
        return;
    }
    int max = 0;
    long minSumTTF = Long.MAX_VALUE;
    for (int i = 0; i < contexts.length; i++) {
        TermContext ctx = contexts[i];
        int df = ctx.docFreq();
        // we use the max here since it's the only "true" estimation we can make here
        // at least max(df) documents have that term. Sum or Averages don't seem
        // to have a significant meaning here.
        // TODO: Maybe it could also make sense to assume independent distributions of documents and eg. have:
        //   df = df1 + df2 - (df1 * df2 / maxDoc)?
        max = Math.max(df, max);
        if (minSumTTF != -1 && ctx.totalTermFreq() != -1) {
            // we need to find out the minimum sumTTF to adjust the statistics
            // otherwise the statistics don't match
            minSumTTF = Math.min(minSumTTF, reader.getSumTotalTermFreq(terms[i].field()));
        } else {
            minSumTTF = -1;
        }
    }
    if (minSumTTF != -1 && maxDoc > minSumTTF) {
        maxDoc = (int) minSumTTF;
    }
    if (max == 0) {
        // we are done that term doesn't exist at all
        return;
    }
    long sumTTF = minSumTTF == -1 ? -1 : 0;
    final int[] tieBreak = new int[contexts.length];
    for (int i = 0; i < tieBreak.length; ++i) {
        tieBreak[i] = i;
    }
    new InPlaceMergeSorter() {

        @Override
        protected void swap(int i, int j) {
            final int tmp = tieBreak[i];
            tieBreak[i] = tieBreak[j];
            tieBreak[j] = tmp;
        }

        @Override
        protected int compare(int i, int j) {
            return Integer.compare(contexts[tieBreak[j]].docFreq(), contexts[tieBreak[i]].docFreq());
        }
    }.sort(0, tieBreak.length);
    int prev = contexts[tieBreak[0]].docFreq();
    int actualDf = Math.min(maxDoc, max);
    assert actualDf >= 0 : "DF must be >= 0";
    // that acts as a tie breaker
    for (int i : tieBreak) {
        TermContext ctx = contexts[i];
        if (ctx.docFreq() == 0) {
            break;
        }
        final int current = ctx.docFreq();
        if (prev > current) {
            actualDf++;
        }
        contexts[i] = ctx = adjustDF(reader.getContext(), ctx, Math.min(maxDoc, actualDf));
        prev = current;
        if (sumTTF >= 0 && ctx.totalTermFreq() >= 0) {
            sumTTF += ctx.totalTermFreq();
        } else {
            // omit once TF is omitted anywhere!
            sumTTF = -1;
        }
    }
    sumTTF = Math.min(sumTTF, minSumTTF);
    for (int i = 0; i < contexts.length; i++) {
        int df = contexts[i].docFreq();
        if (df == 0) {
            continue;
        }
        // the blended sumTTF can't be greater than the sumTTTF on the field
        final long fixedTTF = sumTTF == -1 ? -1 : sumTTF;
        contexts[i] = adjustTTF(reader.getContext(), contexts[i], fixedTTF);
    }
}
Also used : InPlaceMergeSorter(org.apache.lucene.util.InPlaceMergeSorter) TermContext(org.apache.lucene.index.TermContext)

Example 5 with InPlaceMergeSorter

use of org.apache.lucene.util.InPlaceMergeSorter in project lucene-solr by apache.

the class Passage method sort.

/** @lucene.internal */
public void sort() {
    final int[] starts = matchStarts;
    final int[] ends = matchEnds;
    final BytesRef[] terms = matchTerms;
    new InPlaceMergeSorter() {

        @Override
        protected void swap(int i, int j) {
            int temp = starts[i];
            starts[i] = starts[j];
            starts[j] = temp;
            temp = ends[i];
            ends[i] = ends[j];
            ends[j] = temp;
            BytesRef tempTerm = terms[i];
            terms[i] = terms[j];
            terms[j] = tempTerm;
        }

        @Override
        protected int compare(int i, int j) {
            return Integer.compare(starts[i], starts[j]);
        }
    }.sort(0, numMatches);
}
Also used : InPlaceMergeSorter(org.apache.lucene.util.InPlaceMergeSorter) BytesRef(org.apache.lucene.util.BytesRef)

Aggregations

InPlaceMergeSorter (org.apache.lucene.util.InPlaceMergeSorter)8 BytesRef (org.apache.lucene.util.BytesRef)3 DocIdSetIterator (org.apache.lucene.search.DocIdSetIterator)2 PagedGrowableWriter (org.apache.lucene.util.packed.PagedGrowableWriter)2 PagedMutable (org.apache.lucene.util.packed.PagedMutable)2 IndexReaderContext (org.apache.lucene.index.IndexReaderContext)1 LeafReaderContext (org.apache.lucene.index.LeafReaderContext)1 TermContext (org.apache.lucene.index.TermContext)1 LeafFieldComparator (org.apache.lucene.search.LeafFieldComparator)1 Sort (org.apache.lucene.search.Sort)1 SortField (org.apache.lucene.search.SortField)1 NamedList (org.apache.solr.common.util.NamedList)1 SolrQueryRequest (org.apache.solr.request.SolrQueryRequest)1 SolrQueryResponse (org.apache.solr.response.SolrQueryResponse)1 FieldType (org.apache.solr.schema.FieldType)1 SchemaField (org.apache.solr.schema.SchemaField)1 DocIterator (org.apache.solr.search.DocIterator)1 DocList (org.apache.solr.search.DocList)1 SortSpec (org.apache.solr.search.SortSpec)1