Search in sources :

Example 26 with DocSet

use of org.apache.solr.search.DocSet in project lucene-solr by apache.

the class SimpleFacets method computeDocSet.

protected DocSet computeDocSet(DocSet baseDocSet, List<String> excludeTagList) throws SyntaxError, IOException {
    Map<?, ?> tagMap = (Map<?, ?>) req.getContext().get("tags");
    // rb can be null if facets are being calculated from a RequestHandler e.g. MoreLikeThisHandler
    if (tagMap == null || rb == null) {
        return baseDocSet;
    }
    IdentityHashMap<Query, Boolean> excludeSet = new IdentityHashMap<>();
    for (String excludeTag : excludeTagList) {
        Object olst = tagMap.get(excludeTag);
        // tagMap has entries of List<String,List<QParser>>, but subject to change in the future
        if (!(olst instanceof Collection))
            continue;
        for (Object o : (Collection<?>) olst) {
            if (!(o instanceof QParser))
                continue;
            QParser qp = (QParser) o;
            excludeSet.put(qp.getQuery(), Boolean.TRUE);
        }
    }
    if (excludeSet.size() == 0)
        return baseDocSet;
    List<Query> qlist = new ArrayList<>();
    // add the base query
    if (!excludeSet.containsKey(rb.getQuery())) {
        qlist.add(rb.getQuery());
    }
    // add the filters
    if (rb.getFilters() != null) {
        for (Query q : rb.getFilters()) {
            if (!excludeSet.containsKey(q)) {
                qlist.add(q);
            }
        }
    }
    // get the new base docset for this facet
    DocSet base = searcher.getDocSet(qlist);
    if (rb.grouping() && rb.getGroupingSpec().isTruncateGroups()) {
        Grouping grouping = new Grouping(searcher, null, rb.getQueryCommand(), false, 0, false);
        grouping.setWithinGroupSort(rb.getGroupingSpec().getSortWithinGroup());
        if (rb.getGroupingSpec().getFields().length > 0) {
            grouping.addFieldCommand(rb.getGroupingSpec().getFields()[0], req);
        } else if (rb.getGroupingSpec().getFunctions().length > 0) {
            grouping.addFunctionCommand(rb.getGroupingSpec().getFunctions()[0], req);
        } else {
            return base;
        }
        AllGroupHeadsCollector allGroupHeadsCollector = grouping.getCommands().get(0).createAllGroupCollector();
        searcher.search(base.getTopFilter(), allGroupHeadsCollector);
        return new BitDocSet(allGroupHeadsCollector.retrieveGroupHeads(searcher.maxDoc()));
    } else {
        return base;
    }
}
Also used : Query(org.apache.lucene.search.Query) BooleanQuery(org.apache.lucene.search.BooleanQuery) IdentityHashMap(java.util.IdentityHashMap) ArrayList(java.util.ArrayList) Grouping(org.apache.solr.search.Grouping) AllGroupHeadsCollector(org.apache.lucene.search.grouping.AllGroupHeadsCollector) BitDocSet(org.apache.solr.search.BitDocSet) QParser(org.apache.solr.search.QParser) Collection(java.util.Collection) SimpleOrderedMap(org.apache.solr.common.util.SimpleOrderedMap) Map(java.util.Map) IdentityHashMap(java.util.IdentityHashMap) HashMap(java.util.HashMap) HashDocSet(org.apache.solr.search.HashDocSet) DocSet(org.apache.solr.search.DocSet) SortedIntDocSet(org.apache.solr.search.SortedIntDocSet) BitDocSet(org.apache.solr.search.BitDocSet)

Example 27 with DocSet

use of org.apache.solr.search.DocSet in project lucene-solr by apache.

the class SimpleFacets method getFieldMissingCount.

/**
   * Returns a count of the documents in the set which do not have any 
   * terms for for the specified field.
   *
   * @see FacetParams#FACET_MISSING
   */
public static int getFieldMissingCount(SolrIndexSearcher searcher, DocSet docs, String fieldName) throws IOException {
    SchemaField sf = searcher.getSchema().getField(fieldName);
    DocSet hasVal = searcher.getDocSet(sf.getType().getRangeQuery(null, sf, null, null, false, false));
    return docs.andNotSize(hasVal);
}
Also used : SchemaField(org.apache.solr.schema.SchemaField) HashDocSet(org.apache.solr.search.HashDocSet) DocSet(org.apache.solr.search.DocSet) SortedIntDocSet(org.apache.solr.search.SortedIntDocSet) BitDocSet(org.apache.solr.search.BitDocSet)

Example 28 with DocSet

use of org.apache.solr.search.DocSet in project lucene-solr by apache.

the class SimpleFacets method getTermCounts.

/**
   * Term counts for use in field faceting that resepcts the specified mincount - 
   * if mincount is null, the "zeros" param is consulted for the appropriate backcompat 
   * default
   *
   * @see FacetParams#FACET_ZEROS
   */
private NamedList<Integer> getTermCounts(String field, Integer mincount, ParsedParams parsed) throws IOException {
    final SolrParams params = parsed.params;
    final DocSet docs = parsed.docs;
    final int threads = parsed.threads;
    int offset = params.getFieldInt(field, FacetParams.FACET_OFFSET, 0);
    int limit = params.getFieldInt(field, FacetParams.FACET_LIMIT, 100);
    if (limit == 0)
        return new NamedList<>();
    if (mincount == null) {
        Boolean zeros = params.getFieldBool(field, FacetParams.FACET_ZEROS);
        // mincount = (zeros!=null && zeros) ? 0 : 1;
        mincount = (zeros != null && !zeros) ? 1 : 0;
    // current default is to include zeros.
    }
    boolean missing = params.getFieldBool(field, FacetParams.FACET_MISSING, false);
    // default to sorting if there is a limit.
    String sort = params.getFieldParam(field, FacetParams.FACET_SORT, limit > 0 ? FacetParams.FACET_SORT_COUNT : FacetParams.FACET_SORT_INDEX);
    String prefix = params.getFieldParam(field, FacetParams.FACET_PREFIX);
    final Predicate<BytesRef> termFilter = newBytesRefFilter(field, params);
    boolean exists = params.getFieldBool(field, FacetParams.FACET_EXISTS, false);
    NamedList<Integer> counts;
    SchemaField sf = searcher.getSchema().getField(field);
    if (sf.getType().isPointField() && !sf.hasDocValues()) {
        throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, "Can't facet on a PointField without docValues");
    }
    FieldType ft = sf.getType();
    // determine what type of faceting method to use
    final String methodStr = params.getFieldParam(field, FacetParams.FACET_METHOD);
    final FacetMethod requestedMethod;
    if (FacetParams.FACET_METHOD_enum.equals(methodStr)) {
        requestedMethod = FacetMethod.ENUM;
    } else if (FacetParams.FACET_METHOD_fcs.equals(methodStr)) {
        requestedMethod = FacetMethod.FCS;
    } else if (FacetParams.FACET_METHOD_fc.equals(methodStr)) {
        requestedMethod = FacetMethod.FC;
    } else if (FacetParams.FACET_METHOD_uif.equals(methodStr)) {
        requestedMethod = FacetMethod.UIF;
    } else {
        requestedMethod = null;
    }
    final boolean multiToken = sf.multiValued() || ft.multiValuedFieldCache();
    FacetMethod appliedFacetMethod = selectFacetMethod(field, sf, requestedMethod, mincount, exists);
    RTimer timer = null;
    if (fdebug != null) {
        fdebug.putInfoItem("requestedMethod", requestedMethod == null ? "not specified" : requestedMethod.name());
        fdebug.putInfoItem("appliedMethod", appliedFacetMethod.name());
        fdebug.putInfoItem("inputDocSetSize", docs.size());
        fdebug.putInfoItem("field", field);
        timer = new RTimer();
    }
    if (params.getFieldBool(field, GroupParams.GROUP_FACET, false)) {
        counts = getGroupedCounts(searcher, docs, field, multiToken, offset, limit, mincount, missing, sort, prefix, termFilter);
    } else {
        assert appliedFacetMethod != null;
        switch(appliedFacetMethod) {
            case ENUM:
                assert TrieField.getMainValuePrefix(ft) == null;
                counts = getFacetTermEnumCounts(searcher, docs, field, offset, limit, mincount, missing, sort, prefix, termFilter, exists);
                break;
            case FCS:
                assert ft.isPointField() || !multiToken;
                if (ft.isPointField() || (ft.getNumberType() != null && !sf.multiValued())) {
                    if (prefix != null) {
                        throw new SolrException(ErrorCode.BAD_REQUEST, FacetParams.FACET_PREFIX + " is not supported on numeric types");
                    }
                    if (termFilter != null) {
                        throw new SolrException(ErrorCode.BAD_REQUEST, "BytesRef term filters (" + FacetParams.FACET_CONTAINS + ", " + FacetParams.FACET_EXCLUDETERMS + ") are not supported on numeric types");
                    }
                    //            We should do this, but mincount=0 is currently the default
                    //            if (ft.isPointField() && mincount <= 0) {
                    //              throw new SolrException(ErrorCode.BAD_REQUEST, FacetParams.FACET_MINCOUNT + " <= 0 is not supported on point types");
                    //            }
                    counts = NumericFacets.getCounts(searcher, docs, field, offset, limit, mincount, missing, sort);
                } else {
                    PerSegmentSingleValuedFaceting ps = new PerSegmentSingleValuedFaceting(searcher, docs, field, offset, limit, mincount, missing, sort, prefix, termFilter);
                    Executor executor = threads == 0 ? directExecutor : facetExecutor;
                    ps.setNumThreads(threads);
                    counts = ps.getFacetCounts(executor);
                }
                break;
            case UIF:
                //Emulate the JSON Faceting structure so we can use the same parsing classes
                Map<String, Object> jsonFacet = new HashMap<>(13);
                jsonFacet.put("type", "terms");
                jsonFacet.put("field", field);
                jsonFacet.put("offset", offset);
                jsonFacet.put("limit", limit);
                jsonFacet.put("mincount", mincount);
                jsonFacet.put("missing", missing);
                jsonFacet.put("prefix", prefix);
                jsonFacet.put("numBuckets", params.getFieldBool(field, "numBuckets", false));
                jsonFacet.put("allBuckets", params.getFieldBool(field, "allBuckets", false));
                jsonFacet.put("method", "uif");
                jsonFacet.put("cacheDf", 0);
                jsonFacet.put("perSeg", false);
                final String sortVal;
                switch(sort) {
                    case FacetParams.FACET_SORT_COUNT_LEGACY:
                        sortVal = FacetParams.FACET_SORT_COUNT;
                        break;
                    case FacetParams.FACET_SORT_INDEX_LEGACY:
                        sortVal = FacetParams.FACET_SORT_INDEX;
                        break;
                    default:
                        sortVal = sort;
                }
                jsonFacet.put(SORT, sortVal);
                Map<String, Object> topLevel = new HashMap<>();
                topLevel.put(field, jsonFacet);
                topLevel.put("processEmpty", true);
                FacetProcessor fproc = // rb.getResults().docSet
                FacetProcessor.createProcessor(// rb.getResults().docSet
                rb.req, // rb.getResults().docSet
                topLevel, docs);
                //TODO do we handle debug?  Should probably already be handled by the legacy code
                fproc.process();
                //Go through the response to build the expected output for SimpleFacets
                Object res = fproc.getResponse();
                counts = new NamedList<Integer>();
                if (res != null) {
                    SimpleOrderedMap<Object> som = (SimpleOrderedMap<Object>) res;
                    SimpleOrderedMap<Object> asdf = (SimpleOrderedMap<Object>) som.get(field);
                    List<SimpleOrderedMap<Object>> buckets = (List<SimpleOrderedMap<Object>>) asdf.get("buckets");
                    for (SimpleOrderedMap<Object> b : buckets) {
                        counts.add(b.get("val").toString(), (Integer) b.get("count"));
                    }
                    if (missing) {
                        SimpleOrderedMap<Object> missingCounts = (SimpleOrderedMap<Object>) asdf.get("missing");
                        counts.add(null, (Integer) missingCounts.get("count"));
                    }
                }
                break;
            case FC:
                counts = DocValuesFacets.getCounts(searcher, docs, field, offset, limit, mincount, missing, sort, prefix, termFilter, fdebug);
                break;
            default:
                throw new AssertionError();
        }
    }
    if (fdebug != null) {
        long timeElapsed = (long) timer.getTime();
        fdebug.setElapse(timeElapsed);
    }
    return counts;
}
Also used : IdentityHashMap(java.util.IdentityHashMap) HashMap(java.util.HashMap) FacetProcessor(org.apache.solr.search.facet.FacetProcessor) SimpleOrderedMap(org.apache.solr.common.util.SimpleOrderedMap) Executor(java.util.concurrent.Executor) List(java.util.List) ArrayList(java.util.ArrayList) NamedList(org.apache.solr.common.util.NamedList) BytesRef(org.apache.lucene.util.BytesRef) SolrException(org.apache.solr.common.SolrException) RTimer(org.apache.solr.util.RTimer) FieldType(org.apache.solr.schema.FieldType) SchemaField(org.apache.solr.schema.SchemaField) RequiredSolrParams(org.apache.solr.common.params.RequiredSolrParams) SolrParams(org.apache.solr.common.params.SolrParams) HashDocSet(org.apache.solr.search.HashDocSet) DocSet(org.apache.solr.search.DocSet) SortedIntDocSet(org.apache.solr.search.SortedIntDocSet) BitDocSet(org.apache.solr.search.BitDocSet)

Example 29 with DocSet

use of org.apache.solr.search.DocSet in project lucene-solr by apache.

the class SolrRangeQuery method createDocSet.

private DocSet createDocSet(SolrIndexSearcher searcher, long cost) throws IOException {
    int maxDoc = searcher.maxDoc();
    BitDocSet liveDocs = searcher.getLiveDocs();
    FixedBitSet liveBits = liveDocs.size() == maxDoc ? null : liveDocs.getBits();
    DocSetBuilder builder = new DocSetBuilder(maxDoc, cost);
    List<LeafReaderContext> leaves = searcher.getTopReaderContext().leaves();
    int maxTermsPerSegment = 0;
    for (LeafReaderContext ctx : leaves) {
        TermsEnum te = getTermsEnum(ctx);
        int termsVisited = builder.add(te, ctx.docBase);
        maxTermsPerSegment = Math.max(maxTermsPerSegment, termsVisited);
    }
    DocSet set = maxTermsPerSegment <= 1 ? builder.buildUniqueInOrder(liveBits) : builder.build(liveBits);
    return DocSetUtil.getDocSet(set, searcher);
}
Also used : BitDocSet(org.apache.solr.search.BitDocSet) FixedBitSet(org.apache.lucene.util.FixedBitSet) DocSetBuilder(org.apache.solr.search.DocSetBuilder) LeafReaderContext(org.apache.lucene.index.LeafReaderContext) BitDocSet(org.apache.solr.search.BitDocSet) DocSet(org.apache.solr.search.DocSet) TermsEnum(org.apache.lucene.index.TermsEnum)

Example 30 with DocSet

use of org.apache.solr.search.DocSet in project lucene-solr by apache.

the class BlockJoinDocSetFacetComponent method process.

@Override
public void process(ResponseBuilder rb) throws IOException {
    final BlockJoinParentQParser.AllParentsAware bjq = (BlockJoinParentQParser.AllParentsAware) rb.req.getContext().get(bjqKey);
    if (bjq != null) {
        final DocSet parentResult = rb.getResults().docSet;
        final BitDocSet allParentsBitsDocSet = rb.req.getSearcher().getDocSetBits(bjq.getParentQuery());
        final DocSet allChildren = BlockJoin.toChildren(parentResult, allParentsBitsDocSet, rb.req.getSearcher().getDocSetBits(new MatchAllDocsQuery()), QueryContext.newContext(rb.req.getSearcher()));
        final DocSet childQueryDocSet = rb.req.getSearcher().getDocSet(bjq.getChildQuery());
        final DocSet selectedChildren = allChildren.intersection(childQueryDocSet);
        // don't include parent into facet counts
        //childResult = childResult.union(parentResult);// just to mimic the current logic
        final List<LeafReaderContext> leaves = rb.req.getSearcher().getIndexReader().leaves();
        Filter filter = selectedChildren.getTopFilter();
        final BlockJoinFacetAccsHolder facetCounter = new BlockJoinFacetAccsHolder(rb.req);
        for (int subIdx = 0; subIdx < leaves.size(); subIdx++) {
            LeafReaderContext subCtx = leaves.get(subIdx);
            // solr docsets already exclude any deleted docs
            DocIdSet dis = filter.getDocIdSet(subCtx, null);
            AggregatableDocIter iter = new SegmentChildren(subCtx, dis, allParentsBitsDocSet);
            if (iter.hasNext()) {
                facetCounter.doSetNextReader(subCtx);
                facetCounter.countFacets(iter);
            }
        }
        facetCounter.finish();
        rb.req.getContext().put(COLLECTOR_CONTEXT_PARAM, facetCounter);
        super.process(rb);
    }
}
Also used : AggregatableDocIter(org.apache.solr.search.join.BlockJoinFieldFacetAccumulator.AggregatableDocIter) DocIdSet(org.apache.lucene.search.DocIdSet) MatchAllDocsQuery(org.apache.lucene.search.MatchAllDocsQuery) BitDocSet(org.apache.solr.search.BitDocSet) Filter(org.apache.solr.search.Filter) LeafReaderContext(org.apache.lucene.index.LeafReaderContext) BitDocSet(org.apache.solr.search.BitDocSet) DocSet(org.apache.solr.search.DocSet)

Aggregations

DocSet (org.apache.solr.search.DocSet)37 BitDocSet (org.apache.solr.search.BitDocSet)19 Query (org.apache.lucene.search.Query)15 Term (org.apache.lucene.index.Term)12 TermQuery (org.apache.lucene.search.TermQuery)11 FixedBitSet (org.apache.lucene.util.FixedBitSet)9 DocIterator (org.apache.solr.search.DocIterator)9 BooleanQuery (org.apache.lucene.search.BooleanQuery)8 SchemaField (org.apache.solr.schema.SchemaField)8 SolrIndexSearcher (org.apache.solr.search.SolrIndexSearcher)7 LeafReaderContext (org.apache.lucene.index.LeafReaderContext)6 HashDocSet (org.apache.solr.search.HashDocSet)6 SortedIntDocSet (org.apache.solr.search.SortedIntDocSet)6 WrappedQuery (org.apache.solr.search.WrappedQuery)6 FieldType (org.apache.solr.schema.FieldType)5 ArrayList (java.util.ArrayList)4 BytesRef (org.apache.lucene.util.BytesRef)4 NamedList (org.apache.solr.common.util.NamedList)4 SimpleOrderedMap (org.apache.solr.common.util.SimpleOrderedMap)4 IdentityHashMap (java.util.IdentityHashMap)3