Search in sources :

Example 26 with DocSet

use of in project lucene-solr by apache.

the class SimpleFacets method computeDocSet.

protected DocSet computeDocSet(DocSet baseDocSet, List<String> excludeTagList) throws SyntaxError, IOException {
    Map<?, ?> tagMap = (Map<?, ?>) req.getContext().get("tags");
    // rb can be null if facets are being calculated from a RequestHandler e.g. MoreLikeThisHandler
    if (tagMap == null || rb == null) {
        return baseDocSet;
    IdentityHashMap<Query, Boolean> excludeSet = new IdentityHashMap<>();
    for (String excludeTag : excludeTagList) {
        Object olst = tagMap.get(excludeTag);
        // tagMap has entries of List<String,List<QParser>>, but subject to change in the future
        if (!(olst instanceof Collection))
        for (Object o : (Collection<?>) olst) {
            if (!(o instanceof QParser))
            QParser qp = (QParser) o;
            excludeSet.put(qp.getQuery(), Boolean.TRUE);
    if (excludeSet.size() == 0)
        return baseDocSet;
    List<Query> qlist = new ArrayList<>();
    // add the base query
    if (!excludeSet.containsKey(rb.getQuery())) {
    // add the filters
    if (rb.getFilters() != null) {
        for (Query q : rb.getFilters()) {
            if (!excludeSet.containsKey(q)) {
    // get the new base docset for this facet
    DocSet base = searcher.getDocSet(qlist);
    if (rb.grouping() && rb.getGroupingSpec().isTruncateGroups()) {
        Grouping grouping = new Grouping(searcher, null, rb.getQueryCommand(), false, 0, false);
        if (rb.getGroupingSpec().getFields().length > 0) {
            grouping.addFieldCommand(rb.getGroupingSpec().getFields()[0], req);
        } else if (rb.getGroupingSpec().getFunctions().length > 0) {
            grouping.addFunctionCommand(rb.getGroupingSpec().getFunctions()[0], req);
        } else {
            return base;
        AllGroupHeadsCollector allGroupHeadsCollector = grouping.getCommands().get(0).createAllGroupCollector();, allGroupHeadsCollector);
        return new BitDocSet(allGroupHeadsCollector.retrieveGroupHeads(searcher.maxDoc()));
    } else {
        return base;
Also used : Query( BooleanQuery( IdentityHashMap(java.util.IdentityHashMap) ArrayList(java.util.ArrayList) Grouping( AllGroupHeadsCollector( BitDocSet( QParser( Collection(java.util.Collection) SimpleOrderedMap(org.apache.solr.common.util.SimpleOrderedMap) Map(java.util.Map) IdentityHashMap(java.util.IdentityHashMap) HashMap(java.util.HashMap) HashDocSet( DocSet( SortedIntDocSet( BitDocSet(

Example 27 with DocSet

use of in project lucene-solr by apache.

the class SimpleFacets method getFieldMissingCount.

   * Returns a count of the documents in the set which do not have any 
   * terms for for the specified field.
   * @see FacetParams#FACET_MISSING
public static int getFieldMissingCount(SolrIndexSearcher searcher, DocSet docs, String fieldName) throws IOException {
    SchemaField sf = searcher.getSchema().getField(fieldName);
    DocSet hasVal = searcher.getDocSet(sf.getType().getRangeQuery(null, sf, null, null, false, false));
    return docs.andNotSize(hasVal);
Also used : SchemaField(org.apache.solr.schema.SchemaField) HashDocSet( DocSet( SortedIntDocSet( BitDocSet(

Example 28 with DocSet

use of in project lucene-solr by apache.

the class SimpleFacets method getTermCounts.

   * Term counts for use in field faceting that resepcts the specified mincount - 
   * if mincount is null, the "zeros" param is consulted for the appropriate backcompat 
   * default
   * @see FacetParams#FACET_ZEROS
private NamedList<Integer> getTermCounts(String field, Integer mincount, ParsedParams parsed) throws IOException {
    final SolrParams params = parsed.params;
    final DocSet docs =;
    final int threads = parsed.threads;
    int offset = params.getFieldInt(field, FacetParams.FACET_OFFSET, 0);
    int limit = params.getFieldInt(field, FacetParams.FACET_LIMIT, 100);
    if (limit == 0)
        return new NamedList<>();
    if (mincount == null) {
        Boolean zeros = params.getFieldBool(field, FacetParams.FACET_ZEROS);
        // mincount = (zeros!=null && zeros) ? 0 : 1;
        mincount = (zeros != null && !zeros) ? 1 : 0;
    // current default is to include zeros.
    boolean missing = params.getFieldBool(field, FacetParams.FACET_MISSING, false);
    // default to sorting if there is a limit.
    String sort = params.getFieldParam(field, FacetParams.FACET_SORT, limit > 0 ? FacetParams.FACET_SORT_COUNT : FacetParams.FACET_SORT_INDEX);
    String prefix = params.getFieldParam(field, FacetParams.FACET_PREFIX);
    final Predicate<BytesRef> termFilter = newBytesRefFilter(field, params);
    boolean exists = params.getFieldBool(field, FacetParams.FACET_EXISTS, false);
    NamedList<Integer> counts;
    SchemaField sf = searcher.getSchema().getField(field);
    if (sf.getType().isPointField() && !sf.hasDocValues()) {
        throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, "Can't facet on a PointField without docValues");
    FieldType ft = sf.getType();
    // determine what type of faceting method to use
    final String methodStr = params.getFieldParam(field, FacetParams.FACET_METHOD);
    final FacetMethod requestedMethod;
    if (FacetParams.FACET_METHOD_enum.equals(methodStr)) {
        requestedMethod = FacetMethod.ENUM;
    } else if (FacetParams.FACET_METHOD_fcs.equals(methodStr)) {
        requestedMethod = FacetMethod.FCS;
    } else if (FacetParams.FACET_METHOD_fc.equals(methodStr)) {
        requestedMethod = FacetMethod.FC;
    } else if (FacetParams.FACET_METHOD_uif.equals(methodStr)) {
        requestedMethod = FacetMethod.UIF;
    } else {
        requestedMethod = null;
    final boolean multiToken = sf.multiValued() || ft.multiValuedFieldCache();
    FacetMethod appliedFacetMethod = selectFacetMethod(field, sf, requestedMethod, mincount, exists);
    RTimer timer = null;
    if (fdebug != null) {
        fdebug.putInfoItem("requestedMethod", requestedMethod == null ? "not specified" :;
        fdebug.putInfoItem("inputDocSetSize", docs.size());
        fdebug.putInfoItem("field", field);
        timer = new RTimer();
    if (params.getFieldBool(field, GroupParams.GROUP_FACET, false)) {
        counts = getGroupedCounts(searcher, docs, field, multiToken, offset, limit, mincount, missing, sort, prefix, termFilter);
    } else {
        assert appliedFacetMethod != null;
        switch(appliedFacetMethod) {
            case ENUM:
                assert TrieField.getMainValuePrefix(ft) == null;
                counts = getFacetTermEnumCounts(searcher, docs, field, offset, limit, mincount, missing, sort, prefix, termFilter, exists);
            case FCS:
                assert ft.isPointField() || !multiToken;
                if (ft.isPointField() || (ft.getNumberType() != null && !sf.multiValued())) {
                    if (prefix != null) {
                        throw new SolrException(ErrorCode.BAD_REQUEST, FacetParams.FACET_PREFIX + " is not supported on numeric types");
                    if (termFilter != null) {
                        throw new SolrException(ErrorCode.BAD_REQUEST, "BytesRef term filters (" + FacetParams.FACET_CONTAINS + ", " + FacetParams.FACET_EXCLUDETERMS + ") are not supported on numeric types");
                    //            We should do this, but mincount=0 is currently the default
                    //            if (ft.isPointField() && mincount <= 0) {
                    //              throw new SolrException(ErrorCode.BAD_REQUEST, FacetParams.FACET_MINCOUNT + " <= 0 is not supported on point types");
                    //            }
                    counts = NumericFacets.getCounts(searcher, docs, field, offset, limit, mincount, missing, sort);
                } else {
                    PerSegmentSingleValuedFaceting ps = new PerSegmentSingleValuedFaceting(searcher, docs, field, offset, limit, mincount, missing, sort, prefix, termFilter);
                    Executor executor = threads == 0 ? directExecutor : facetExecutor;
                    counts = ps.getFacetCounts(executor);
            case UIF:
                //Emulate the JSON Faceting structure so we can use the same parsing classes
                Map<String, Object> jsonFacet = new HashMap<>(13);
                jsonFacet.put("type", "terms");
                jsonFacet.put("field", field);
                jsonFacet.put("offset", offset);
                jsonFacet.put("limit", limit);
                jsonFacet.put("mincount", mincount);
                jsonFacet.put("missing", missing);
                jsonFacet.put("prefix", prefix);
                jsonFacet.put("numBuckets", params.getFieldBool(field, "numBuckets", false));
                jsonFacet.put("allBuckets", params.getFieldBool(field, "allBuckets", false));
                jsonFacet.put("method", "uif");
                jsonFacet.put("cacheDf", 0);
                jsonFacet.put("perSeg", false);
                final String sortVal;
                switch(sort) {
                    case FacetParams.FACET_SORT_COUNT_LEGACY:
                        sortVal = FacetParams.FACET_SORT_COUNT;
                    case FacetParams.FACET_SORT_INDEX_LEGACY:
                        sortVal = FacetParams.FACET_SORT_INDEX;
                        sortVal = sort;
                jsonFacet.put(SORT, sortVal);
                Map<String, Object> topLevel = new HashMap<>();
                topLevel.put(field, jsonFacet);
                topLevel.put("processEmpty", true);
                FacetProcessor fproc = // rb.getResults().docSet
                FacetProcessor.createProcessor(// rb.getResults().docSet
                rb.req, // rb.getResults().docSet
                topLevel, docs);
                //TODO do we handle debug?  Should probably already be handled by the legacy code
                //Go through the response to build the expected output for SimpleFacets
                Object res = fproc.getResponse();
                counts = new NamedList<Integer>();
                if (res != null) {
                    SimpleOrderedMap<Object> som = (SimpleOrderedMap<Object>) res;
                    SimpleOrderedMap<Object> asdf = (SimpleOrderedMap<Object>) som.get(field);
                    List<SimpleOrderedMap<Object>> buckets = (List<SimpleOrderedMap<Object>>) asdf.get("buckets");
                    for (SimpleOrderedMap<Object> b : buckets) {
                        counts.add(b.get("val").toString(), (Integer) b.get("count"));
                    if (missing) {
                        SimpleOrderedMap<Object> missingCounts = (SimpleOrderedMap<Object>) asdf.get("missing");
                        counts.add(null, (Integer) missingCounts.get("count"));
            case FC:
                counts = DocValuesFacets.getCounts(searcher, docs, field, offset, limit, mincount, missing, sort, prefix, termFilter, fdebug);
                throw new AssertionError();
    if (fdebug != null) {
        long timeElapsed = (long) timer.getTime();
    return counts;
Also used : IdentityHashMap(java.util.IdentityHashMap) HashMap(java.util.HashMap) FacetProcessor( SimpleOrderedMap(org.apache.solr.common.util.SimpleOrderedMap) Executor(java.util.concurrent.Executor) List(java.util.List) ArrayList(java.util.ArrayList) NamedList(org.apache.solr.common.util.NamedList) BytesRef(org.apache.lucene.util.BytesRef) SolrException(org.apache.solr.common.SolrException) RTimer(org.apache.solr.util.RTimer) FieldType(org.apache.solr.schema.FieldType) SchemaField(org.apache.solr.schema.SchemaField) RequiredSolrParams(org.apache.solr.common.params.RequiredSolrParams) SolrParams(org.apache.solr.common.params.SolrParams) HashDocSet( DocSet( SortedIntDocSet( BitDocSet(

Example 29 with DocSet

use of in project lucene-solr by apache.

the class SolrRangeQuery method createDocSet.

private DocSet createDocSet(SolrIndexSearcher searcher, long cost) throws IOException {
    int maxDoc = searcher.maxDoc();
    BitDocSet liveDocs = searcher.getLiveDocs();
    FixedBitSet liveBits = liveDocs.size() == maxDoc ? null : liveDocs.getBits();
    DocSetBuilder builder = new DocSetBuilder(maxDoc, cost);
    List<LeafReaderContext> leaves = searcher.getTopReaderContext().leaves();
    int maxTermsPerSegment = 0;
    for (LeafReaderContext ctx : leaves) {
        TermsEnum te = getTermsEnum(ctx);
        int termsVisited = builder.add(te, ctx.docBase);
        maxTermsPerSegment = Math.max(maxTermsPerSegment, termsVisited);
    DocSet set = maxTermsPerSegment <= 1 ? builder.buildUniqueInOrder(liveBits) :;
    return DocSetUtil.getDocSet(set, searcher);
Also used : BitDocSet( FixedBitSet(org.apache.lucene.util.FixedBitSet) DocSetBuilder( LeafReaderContext(org.apache.lucene.index.LeafReaderContext) BitDocSet( DocSet( TermsEnum(org.apache.lucene.index.TermsEnum)

Example 30 with DocSet

use of in project lucene-solr by apache.

the class BlockJoinDocSetFacetComponent method process.

public void process(ResponseBuilder rb) throws IOException {
    final BlockJoinParentQParser.AllParentsAware bjq = (BlockJoinParentQParser.AllParentsAware) rb.req.getContext().get(bjqKey);
    if (bjq != null) {
        final DocSet parentResult = rb.getResults().docSet;
        final BitDocSet allParentsBitsDocSet = rb.req.getSearcher().getDocSetBits(bjq.getParentQuery());
        final DocSet allChildren = BlockJoin.toChildren(parentResult, allParentsBitsDocSet, rb.req.getSearcher().getDocSetBits(new MatchAllDocsQuery()), QueryContext.newContext(rb.req.getSearcher()));
        final DocSet childQueryDocSet = rb.req.getSearcher().getDocSet(bjq.getChildQuery());
        final DocSet selectedChildren = allChildren.intersection(childQueryDocSet);
        // don't include parent into facet counts
        //childResult = childResult.union(parentResult);// just to mimic the current logic
        final List<LeafReaderContext> leaves = rb.req.getSearcher().getIndexReader().leaves();
        Filter filter = selectedChildren.getTopFilter();
        final BlockJoinFacetAccsHolder facetCounter = new BlockJoinFacetAccsHolder(rb.req);
        for (int subIdx = 0; subIdx < leaves.size(); subIdx++) {
            LeafReaderContext subCtx = leaves.get(subIdx);
            // solr docsets already exclude any deleted docs
            DocIdSet dis = filter.getDocIdSet(subCtx, null);
            AggregatableDocIter iter = new SegmentChildren(subCtx, dis, allParentsBitsDocSet);
            if (iter.hasNext()) {
        rb.req.getContext().put(COLLECTOR_CONTEXT_PARAM, facetCounter);
Also used : AggregatableDocIter( DocIdSet( MatchAllDocsQuery( BitDocSet( Filter( LeafReaderContext(org.apache.lucene.index.LeafReaderContext) BitDocSet( DocSet(


DocSet ( BitDocSet ( Query ( Term (org.apache.lucene.index.Term)12 TermQuery ( FixedBitSet (org.apache.lucene.util.FixedBitSet)9 DocIterator ( BooleanQuery ( SchemaField (org.apache.solr.schema.SchemaField)8 SolrIndexSearcher ( LeafReaderContext (org.apache.lucene.index.LeafReaderContext)6 HashDocSet ( SortedIntDocSet ( WrappedQuery ( FieldType (org.apache.solr.schema.FieldType)5 ArrayList (java.util.ArrayList)4 BytesRef (org.apache.lucene.util.BytesRef)4 NamedList (org.apache.solr.common.util.NamedList)4 SimpleOrderedMap (org.apache.solr.common.util.SimpleOrderedMap)4 IdentityHashMap (java.util.IdentityHashMap)3