Search in sources :

Example 1 with DocIdSet

use of org.apache.lucene.search.DocIdSet in project lucene-solr by apache.

the class TestFilteredDocIdSet method testNullIteratorFilteredDocIdSet.

public void testNullIteratorFilteredDocIdSet() throws Exception {
    Directory dir = newDirectory();
    RandomIndexWriter writer = new RandomIndexWriter(random(), dir);
    Document doc = new Document();
    doc.add(newStringField("c", "val", Field.Store.NO));
    writer.addDocument(doc);
    IndexReader reader = writer.getReader();
    writer.close();
    // First verify the document is searchable.
    IndexSearcher searcher = newSearcher(reader);
    Assert.assertEquals(1, searcher.search(new MatchAllDocsQuery(), 10).totalHits);
    // Now search w/ a Filter which returns a null DocIdSet
    Filter f = new Filter() {

        @Override
        public DocIdSet getDocIdSet(LeafReaderContext context, Bits acceptDocs) {
            final DocIdSet innerNullIteratorSet = new DocIdSet() {

                @Override
                public DocIdSetIterator iterator() {
                    return null;
                }

                @Override
                public long ramBytesUsed() {
                    return 0L;
                }
            };
            return new FilteredDocIdSet(innerNullIteratorSet) {

                @Override
                protected boolean match(int docid) {
                    return true;
                }
            };
        }

        @Override
        public String toString(String field) {
            return "nullDocIdSetFilter";
        }

        @Override
        public boolean equals(Object other) {
            return other == this;
        }

        @Override
        public int hashCode() {
            return System.identityHashCode(this);
        }
    };
    Query filtered = new BooleanQuery.Builder().add(new MatchAllDocsQuery(), Occur.MUST).add(f, Occur.FILTER).build();
    Assert.assertEquals(0, searcher.search(filtered, 10).totalHits);
    reader.close();
    dir.close();
}
Also used : IndexSearcher(org.apache.lucene.search.IndexSearcher) BooleanQuery(org.apache.lucene.search.BooleanQuery) Query(org.apache.lucene.search.Query) MatchAllDocsQuery(org.apache.lucene.search.MatchAllDocsQuery) BooleanQuery(org.apache.lucene.search.BooleanQuery) DocIdSet(org.apache.lucene.search.DocIdSet) Document(org.apache.lucene.document.Document) MatchAllDocsQuery(org.apache.lucene.search.MatchAllDocsQuery) IndexReader(org.apache.lucene.index.IndexReader) LeafReaderContext(org.apache.lucene.index.LeafReaderContext) Bits(org.apache.lucene.util.Bits) RandomIndexWriter(org.apache.lucene.index.RandomIndexWriter) Directory(org.apache.lucene.store.Directory)

Example 2 with DocIdSet

use of org.apache.lucene.search.DocIdSet in project lucene-solr by apache.

the class TestFilteredDocIdSet method testFilteredDocIdSet.

public void testFilteredDocIdSet() throws Exception {
    final int maxdoc = 10;
    final DocIdSet innerSet = new DocIdSet() {

        @Override
        public long ramBytesUsed() {
            return 0L;
        }

        @Override
        public DocIdSetIterator iterator() {
            return new DocIdSetIterator() {

                int docid = -1;

                @Override
                public int docID() {
                    return docid;
                }

                @Override
                public int nextDoc() {
                    docid++;
                    return docid < maxdoc ? docid : (docid = NO_MORE_DOCS);
                }

                @Override
                public int advance(int target) throws IOException {
                    return slowAdvance(target);
                }

                @Override
                public long cost() {
                    return 1;
                }
            };
        }
    };
    DocIdSet filteredSet = new FilteredDocIdSet(innerSet) {

        @Override
        protected boolean match(int docid) {
            //validate only even docids
            return docid % 2 == 0;
        }
    };
    DocIdSetIterator iter = filteredSet.iterator();
    ArrayList<Integer> list = new ArrayList<>();
    int doc = iter.advance(3);
    if (doc != DocIdSetIterator.NO_MORE_DOCS) {
        list.add(Integer.valueOf(doc));
        while ((doc = iter.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS) {
            list.add(Integer.valueOf(doc));
        }
    }
    int[] docs = new int[list.size()];
    int c = 0;
    Iterator<Integer> intIter = list.iterator();
    while (intIter.hasNext()) {
        docs[c++] = intIter.next().intValue();
    }
    int[] answer = new int[] { 4, 6, 8 };
    boolean same = Arrays.equals(answer, docs);
    if (!same) {
        System.out.println("answer: " + Arrays.toString(answer));
        System.out.println("gotten: " + Arrays.toString(docs));
        fail();
    }
}
Also used : ArrayList(java.util.ArrayList) DocIdSet(org.apache.lucene.search.DocIdSet) DocIdSetIterator(org.apache.lucene.search.DocIdSetIterator)

Example 3 with DocIdSet

use of org.apache.lucene.search.DocIdSet in project lucene-solr by apache.

the class TestDocSet method doFilterTest.

public void doFilterTest(IndexReader reader) throws IOException {
    IndexReaderContext topLevelContext = reader.getContext();
    FixedBitSet bs = getRandomSet(reader.maxDoc(), rand.nextInt(reader.maxDoc() + 1));
    DocSet a = new BitDocSet(bs);
    DocSet b = getIntDocSet(bs);
    Filter fa = a.getTopFilter();
    Filter fb = b.getTopFilter();
    /* top level filters are no longer supported
    // test top-level
    DocIdSet da = fa.getDocIdSet(topLevelContext);
    DocIdSet db = fb.getDocIdSet(topLevelContext);
    doTestIteratorEqual(da, db);
    ***/
    DocIdSet da;
    DocIdSet db;
    List<LeafReaderContext> leaves = topLevelContext.leaves();
    // first test in-sequence sub readers
    for (LeafReaderContext readerContext : leaves) {
        da = fa.getDocIdSet(readerContext, null);
        db = fb.getDocIdSet(readerContext, null);
        doTestIteratorEqual(da, db);
    }
    int nReaders = leaves.size();
    // now test out-of-sequence sub readers
    for (int i = 0; i < nReaders; i++) {
        LeafReaderContext readerContext = leaves.get(rand.nextInt(nReaders));
        da = fa.getDocIdSet(readerContext, null);
        db = fb.getDocIdSet(readerContext, null);
        doTestIteratorEqual(da, db);
    }
}
Also used : FixedBitSet(org.apache.lucene.util.FixedBitSet) DocIdSet(org.apache.lucene.search.DocIdSet) LeafReaderContext(org.apache.lucene.index.LeafReaderContext) IndexReaderContext(org.apache.lucene.index.IndexReaderContext)

Example 4 with DocIdSet

use of org.apache.lucene.search.DocIdSet in project lucene-solr by apache.

the class DocValuesFacets method getCounts.

public static NamedList<Integer> getCounts(SolrIndexSearcher searcher, DocSet docs, String fieldName, int offset, int limit, int mincount, boolean missing, String sort, String prefix, Predicate<BytesRef> termFilter, FacetDebugInfo fdebug) throws IOException {
    SchemaField schemaField = searcher.getSchema().getField(fieldName);
    FieldType ft = schemaField.getType();
    NamedList<Integer> res = new NamedList<>();
    // TODO: remove multiValuedFieldCache(), check dv type / uninversion type?
    final boolean multiValued = schemaField.multiValued() || ft.multiValuedFieldCache();
    // for term lookups only
    final SortedSetDocValues si;
    // for mapping per-segment ords to global ones
    OrdinalMap ordinalMap = null;
    if (multiValued) {
        si = searcher.getSlowAtomicReader().getSortedSetDocValues(fieldName);
        if (si instanceof MultiDocValues.MultiSortedSetDocValues) {
            ordinalMap = ((MultiSortedSetDocValues) si).mapping;
        }
    } else {
        SortedDocValues single = searcher.getSlowAtomicReader().getSortedDocValues(fieldName);
        si = single == null ? null : DocValues.singleton(single);
        if (single instanceof MultiDocValues.MultiSortedDocValues) {
            ordinalMap = ((MultiDocValues.MultiSortedDocValues) single).mapping;
        }
    }
    if (si == null) {
        return finalize(res, searcher, schemaField, docs, -1, missing);
    }
    if (si.getValueCount() >= Integer.MAX_VALUE) {
        throw new UnsupportedOperationException("Currently this faceting method is limited to " + Integer.MAX_VALUE + " unique terms");
    }
    final BytesRefBuilder prefixRef;
    if (prefix == null) {
        prefixRef = null;
    } else if (prefix.length() == 0) {
        prefix = null;
        prefixRef = null;
    } else {
        prefixRef = new BytesRefBuilder();
        prefixRef.copyChars(prefix);
    }
    int startTermIndex, endTermIndex;
    if (prefix != null) {
        startTermIndex = (int) si.lookupTerm(prefixRef.get());
        if (startTermIndex < 0)
            startTermIndex = -startTermIndex - 1;
        prefixRef.append(UnicodeUtil.BIG_TERM);
        endTermIndex = (int) si.lookupTerm(prefixRef.get());
        assert endTermIndex < 0;
        endTermIndex = -endTermIndex - 1;
    } else {
        startTermIndex = -1;
        endTermIndex = (int) si.getValueCount();
    }
    final int nTerms = endTermIndex - startTermIndex;
    int missingCount = -1;
    final CharsRefBuilder charsRef = new CharsRefBuilder();
    if (nTerms > 0 && docs.size() >= mincount) {
        // count collection array only needs to be as big as the number of terms we are
        // going to collect counts for.
        final int[] counts = new int[nTerms];
        if (fdebug != null) {
            fdebug.putInfoItem("numBuckets", nTerms);
        }
        Filter filter = docs.getTopFilter();
        List<LeafReaderContext> leaves = searcher.getTopReaderContext().leaves();
        for (int subIndex = 0; subIndex < leaves.size(); subIndex++) {
            LeafReaderContext leaf = leaves.get(subIndex);
            // solr docsets already exclude any deleted docs
            DocIdSet dis = filter.getDocIdSet(leaf, null);
            DocIdSetIterator disi = null;
            if (dis != null) {
                disi = dis.iterator();
            }
            if (disi != null) {
                if (multiValued) {
                    SortedSetDocValues sub = leaf.reader().getSortedSetDocValues(fieldName);
                    if (sub == null) {
                        sub = DocValues.emptySortedSet();
                    }
                    final SortedDocValues singleton = DocValues.unwrapSingleton(sub);
                    if (singleton != null) {
                        // some codecs may optimize SORTED_SET storage for single-valued fields
                        accumSingle(counts, startTermIndex, singleton, disi, subIndex, ordinalMap);
                    } else {
                        accumMulti(counts, startTermIndex, sub, disi, subIndex, ordinalMap);
                    }
                } else {
                    SortedDocValues sub = leaf.reader().getSortedDocValues(fieldName);
                    if (sub == null) {
                        sub = DocValues.emptySorted();
                    }
                    accumSingle(counts, startTermIndex, sub, disi, subIndex, ordinalMap);
                }
            }
        }
        if (startTermIndex == -1) {
            missingCount = counts[0];
        }
        // IDEA: we could also maintain a count of "other"... everything that fell outside
        // of the top 'N'
        int off = offset;
        int lim = limit >= 0 ? limit : Integer.MAX_VALUE;
        if (sort.equals(FacetParams.FACET_SORT_COUNT) || sort.equals(FacetParams.FACET_SORT_COUNT_LEGACY)) {
            int maxsize = limit > 0 ? offset + limit : Integer.MAX_VALUE - 1;
            maxsize = Math.min(maxsize, nTerms);
            LongPriorityQueue queue = new LongPriorityQueue(Math.min(maxsize, 1000), maxsize, Long.MIN_VALUE);
            // the smallest value in the top 'N' values
            int min = mincount - 1;
            for (int i = (startTermIndex == -1) ? 1 : 0; i < nTerms; i++) {
                int c = counts[i];
                if (c > min) {
                    if (termFilter != null) {
                        final BytesRef term = si.lookupOrd(startTermIndex + i);
                        if (!termFilter.test(term)) {
                            continue;
                        }
                    }
                    // smaller term numbers sort higher, so subtract the term number instead
                    long pair = (((long) c) << 32) + (Integer.MAX_VALUE - i);
                    boolean displaced = queue.insert(pair);
                    if (displaced)
                        min = (int) (queue.top() >>> 32);
                }
            }
            // if we are deep paging, we don't have to order the highest "offset" counts.
            int collectCount = Math.max(0, queue.size() - off);
            assert collectCount <= lim;
            // the start and end indexes of our list "sorted" (starting with the highest value)
            int sortedIdxStart = queue.size() - (collectCount - 1);
            int sortedIdxEnd = queue.size() + 1;
            final long[] sorted = queue.sort(collectCount);
            for (int i = sortedIdxStart; i < sortedIdxEnd; i++) {
                long pair = sorted[i];
                int c = (int) (pair >>> 32);
                int tnum = Integer.MAX_VALUE - (int) pair;
                final BytesRef term = si.lookupOrd(startTermIndex + tnum);
                ft.indexedToReadable(term, charsRef);
                res.add(charsRef.toString(), c);
            }
        } else {
            // add results in index order
            int i = (startTermIndex == -1) ? 1 : 0;
            if (mincount <= 0 && termFilter == null) {
                // if mincount<=0 and we're not examining the values for the term filter, then
                // we won't discard any terms and we know exactly where to start.
                i += off;
                off = 0;
            }
            for (; i < nTerms; i++) {
                int c = counts[i];
                if (c < mincount)
                    continue;
                BytesRef term = null;
                if (termFilter != null) {
                    term = si.lookupOrd(startTermIndex + i);
                    if (!termFilter.test(term)) {
                        continue;
                    }
                }
                if (--off >= 0)
                    continue;
                if (--lim < 0)
                    break;
                if (term == null) {
                    term = si.lookupOrd(startTermIndex + i);
                }
                ft.indexedToReadable(term, charsRef);
                res.add(charsRef.toString(), c);
            }
        }
    }
    return finalize(res, searcher, schemaField, docs, missingCount, missing);
}
Also used : MultiSortedSetDocValues(org.apache.lucene.index.MultiDocValues.MultiSortedSetDocValues) DocIdSet(org.apache.lucene.search.DocIdSet) MultiDocValues(org.apache.lucene.index.MultiDocValues) OrdinalMap(org.apache.lucene.index.MultiDocValues.OrdinalMap) LeafReaderContext(org.apache.lucene.index.LeafReaderContext) CharsRefBuilder(org.apache.lucene.util.CharsRefBuilder) BytesRef(org.apache.lucene.util.BytesRef) BytesRefBuilder(org.apache.lucene.util.BytesRefBuilder) LongPriorityQueue(org.apache.solr.util.LongPriorityQueue) NamedList(org.apache.solr.common.util.NamedList) SortedDocValues(org.apache.lucene.index.SortedDocValues) FieldType(org.apache.solr.schema.FieldType) SchemaField(org.apache.solr.schema.SchemaField) MultiSortedSetDocValues(org.apache.lucene.index.MultiDocValues.MultiSortedSetDocValues) SortedSetDocValues(org.apache.lucene.index.SortedSetDocValues) Filter(org.apache.solr.search.Filter) DocIdSetIterator(org.apache.lucene.search.DocIdSetIterator)

Example 5 with DocIdSet

use of org.apache.lucene.search.DocIdSet in project lucene-solr by apache.

the class DocValuesStats method getCounts.

public static StatsValues getCounts(SolrIndexSearcher searcher, StatsField statsField, DocSet docs, String[] facet) throws IOException {
    final SchemaField schemaField = statsField.getSchemaField();
    assert null != statsField.getSchemaField() : "DocValuesStats requires a StatsField using a SchemaField";
    final String fieldName = schemaField.getName();
    final FieldType ft = schemaField.getType();
    final StatsValues res = StatsValuesFactory.createStatsValues(statsField);
    //Initialize facetstats, if facets have been passed in
    final FieldFacetStats[] facetStats = new FieldFacetStats[facet.length];
    int upto = 0;
    for (String facetField : facet) {
        SchemaField fsf = searcher.getSchema().getField(facetField);
        if (fsf.multiValued()) {
            throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, "Stats can only facet on single-valued fields, not: " + facetField);
        }
        SchemaField facetSchemaField = searcher.getSchema().getField(facetField);
        facetStats[upto++] = new FieldFacetStats(searcher, facetSchemaField, statsField);
    }
    // TODO: remove multiValuedFieldCache(), check dv type / uninversion type?
    final boolean multiValued = schemaField.multiValued() || ft.multiValuedFieldCache();
    // for term lookups only
    SortedSetDocValues si;
    // for mapping per-segment ords to global ones
    OrdinalMap ordinalMap = null;
    if (multiValued) {
        si = searcher.getSlowAtomicReader().getSortedSetDocValues(fieldName);
        if (si instanceof MultiSortedSetDocValues) {
            ordinalMap = ((MultiDocValues.MultiSortedSetDocValues) si).mapping;
        }
    } else {
        SortedDocValues single = searcher.getSlowAtomicReader().getSortedDocValues(fieldName);
        si = single == null ? null : DocValues.singleton(single);
        if (single instanceof MultiDocValues.MultiSortedDocValues) {
            ordinalMap = ((MultiDocValues.MultiSortedDocValues) single).mapping;
        }
    }
    if (si == null) {
        si = DocValues.emptySortedSet();
    }
    if (si.getValueCount() >= Integer.MAX_VALUE) {
        throw new UnsupportedOperationException("Currently this stats method is limited to " + Integer.MAX_VALUE + " unique terms");
    }
    int missingDocCountTotal = 0;
    final int nTerms = (int) si.getValueCount();
    // count collection array only needs to be as big as the number of terms we are
    // going to collect counts for.
    final int[] counts = new int[nTerms];
    Filter filter = docs.getTopFilter();
    List<LeafReaderContext> leaves = searcher.getTopReaderContext().leaves();
    for (int subIndex = 0; subIndex < leaves.size(); subIndex++) {
        LeafReaderContext leaf = leaves.get(subIndex);
        // solr docsets already exclude any deleted docs
        DocIdSet dis = filter.getDocIdSet(leaf, null);
        DocIdSetIterator disi = null;
        if (dis != null) {
            disi = dis.iterator();
        }
        if (disi != null) {
            int docBase = leaf.docBase;
            if (multiValued) {
                SortedSetDocValues sub = leaf.reader().getSortedSetDocValues(fieldName);
                if (sub == null) {
                    sub = DocValues.emptySortedSet();
                }
                SortedDocValues singleton = DocValues.unwrapSingleton(sub);
                if (singleton != null) {
                    // some codecs may optimize SORTED_SET storage for single-valued fields
                    missingDocCountTotal += accumSingle(counts, docBase, facetStats, singleton, disi, subIndex, ordinalMap);
                } else {
                    missingDocCountTotal += accumMulti(counts, docBase, facetStats, sub, disi, subIndex, ordinalMap);
                }
            } else {
                SortedDocValues sub = leaf.reader().getSortedDocValues(fieldName);
                if (sub == null) {
                    sub = DocValues.emptySorted();
                }
                missingDocCountTotal += accumSingle(counts, docBase, facetStats, sub, disi, subIndex, ordinalMap);
            }
        }
    }
    // add results in index order
    for (int ord = 0; ord < counts.length; ord++) {
        int count = counts[ord];
        if (count > 0) {
            final BytesRef value = si.lookupOrd(ord);
            res.accumulate(value, count);
            for (FieldFacetStats f : facetStats) {
                f.accumulateTermNum(ord, value);
            }
        }
    }
    res.addMissing(missingDocCountTotal);
    if (facetStats.length > 0) {
        for (FieldFacetStats f : facetStats) {
            Map<String, StatsValues> facetStatsValues = f.facetStatsValues;
            f.accumulateMissing();
            res.addFacet(f.name, facetStatsValues);
        }
    }
    return res;
}
Also used : MultiSortedSetDocValues(org.apache.lucene.index.MultiDocValues.MultiSortedSetDocValues) DocIdSet(org.apache.lucene.search.DocIdSet) MultiDocValues(org.apache.lucene.index.MultiDocValues) OrdinalMap(org.apache.lucene.index.MultiDocValues.OrdinalMap) SortedDocValues(org.apache.lucene.index.SortedDocValues) FieldType(org.apache.solr.schema.FieldType) SchemaField(org.apache.solr.schema.SchemaField) FieldFacetStats(org.apache.solr.handler.component.FieldFacetStats) MultiSortedSetDocValues(org.apache.lucene.index.MultiDocValues.MultiSortedSetDocValues) SortedSetDocValues(org.apache.lucene.index.SortedSetDocValues) Filter(org.apache.solr.search.Filter) LeafReaderContext(org.apache.lucene.index.LeafReaderContext) StatsValues(org.apache.solr.handler.component.StatsValues) DocIdSetIterator(org.apache.lucene.search.DocIdSetIterator) SolrException(org.apache.solr.common.SolrException) BytesRef(org.apache.lucene.util.BytesRef)

Aggregations

DocIdSet (org.apache.lucene.search.DocIdSet)27 LeafReaderContext (org.apache.lucene.index.LeafReaderContext)16 DocIdSetIterator (org.apache.lucene.search.DocIdSetIterator)14 Filter (org.apache.solr.search.Filter)6 Bits (org.apache.lucene.util.Bits)5 FixedBitSet (org.apache.lucene.util.FixedBitSet)5 LeafReader (org.apache.lucene.index.LeafReader)4 SortedDocValues (org.apache.lucene.index.SortedDocValues)4 SortedSetDocValues (org.apache.lucene.index.SortedSetDocValues)4 BitDocIdSet (org.apache.lucene.util.BitDocIdSet)4 IOException (java.io.IOException)3 ConstantScoreScorer (org.apache.lucene.search.ConstantScoreScorer)3 IndexSearcher (org.apache.lucene.search.IndexSearcher)3 Scorer (org.apache.lucene.search.Scorer)3 Weight (org.apache.lucene.search.Weight)3 IndexReader (org.apache.lucene.index.IndexReader)2 IndexReaderContext (org.apache.lucene.index.IndexReaderContext)2 MultiDocValues (org.apache.lucene.index.MultiDocValues)2 MultiSortedSetDocValues (org.apache.lucene.index.MultiDocValues.MultiSortedSetDocValues)2 OrdinalMap (org.apache.lucene.index.MultiDocValues.OrdinalMap)2