use of org.apache.solr.search.DocSet in project lucene-solr by apache.
the class SimpleFacets method computeDocSet.
protected DocSet computeDocSet(DocSet baseDocSet, List<String> excludeTagList) throws SyntaxError, IOException {
Map<?, ?> tagMap = (Map<?, ?>) req.getContext().get("tags");
// rb can be null if facets are being calculated from a RequestHandler e.g. MoreLikeThisHandler
if (tagMap == null || rb == null) {
return baseDocSet;
}
IdentityHashMap<Query, Boolean> excludeSet = new IdentityHashMap<>();
for (String excludeTag : excludeTagList) {
Object olst = tagMap.get(excludeTag);
// tagMap has entries of List<String,List<QParser>>, but subject to change in the future
if (!(olst instanceof Collection))
continue;
for (Object o : (Collection<?>) olst) {
if (!(o instanceof QParser))
continue;
QParser qp = (QParser) o;
excludeSet.put(qp.getQuery(), Boolean.TRUE);
}
}
if (excludeSet.size() == 0)
return baseDocSet;
List<Query> qlist = new ArrayList<>();
// add the base query
if (!excludeSet.containsKey(rb.getQuery())) {
qlist.add(rb.getQuery());
}
// add the filters
if (rb.getFilters() != null) {
for (Query q : rb.getFilters()) {
if (!excludeSet.containsKey(q)) {
qlist.add(q);
}
}
}
// get the new base docset for this facet
DocSet base = searcher.getDocSet(qlist);
if (rb.grouping() && rb.getGroupingSpec().isTruncateGroups()) {
Grouping grouping = new Grouping(searcher, null, rb.getQueryCommand(), false, 0, false);
grouping.setWithinGroupSort(rb.getGroupingSpec().getSortWithinGroup());
if (rb.getGroupingSpec().getFields().length > 0) {
grouping.addFieldCommand(rb.getGroupingSpec().getFields()[0], req);
} else if (rb.getGroupingSpec().getFunctions().length > 0) {
grouping.addFunctionCommand(rb.getGroupingSpec().getFunctions()[0], req);
} else {
return base;
}
AllGroupHeadsCollector allGroupHeadsCollector = grouping.getCommands().get(0).createAllGroupCollector();
searcher.search(base.getTopFilter(), allGroupHeadsCollector);
return new BitDocSet(allGroupHeadsCollector.retrieveGroupHeads(searcher.maxDoc()));
} else {
return base;
}
}
use of org.apache.solr.search.DocSet in project lucene-solr by apache.
the class SimpleFacets method getFieldMissingCount.
/**
* Returns a count of the documents in the set which do not have any
* terms for for the specified field.
*
* @see FacetParams#FACET_MISSING
*/
public static int getFieldMissingCount(SolrIndexSearcher searcher, DocSet docs, String fieldName) throws IOException {
SchemaField sf = searcher.getSchema().getField(fieldName);
DocSet hasVal = searcher.getDocSet(sf.getType().getRangeQuery(null, sf, null, null, false, false));
return docs.andNotSize(hasVal);
}
use of org.apache.solr.search.DocSet in project lucene-solr by apache.
the class SimpleFacets method getTermCounts.
/**
* Term counts for use in field faceting that resepcts the specified mincount -
* if mincount is null, the "zeros" param is consulted for the appropriate backcompat
* default
*
* @see FacetParams#FACET_ZEROS
*/
private NamedList<Integer> getTermCounts(String field, Integer mincount, ParsedParams parsed) throws IOException {
final SolrParams params = parsed.params;
final DocSet docs = parsed.docs;
final int threads = parsed.threads;
int offset = params.getFieldInt(field, FacetParams.FACET_OFFSET, 0);
int limit = params.getFieldInt(field, FacetParams.FACET_LIMIT, 100);
if (limit == 0)
return new NamedList<>();
if (mincount == null) {
Boolean zeros = params.getFieldBool(field, FacetParams.FACET_ZEROS);
// mincount = (zeros!=null && zeros) ? 0 : 1;
mincount = (zeros != null && !zeros) ? 1 : 0;
// current default is to include zeros.
}
boolean missing = params.getFieldBool(field, FacetParams.FACET_MISSING, false);
// default to sorting if there is a limit.
String sort = params.getFieldParam(field, FacetParams.FACET_SORT, limit > 0 ? FacetParams.FACET_SORT_COUNT : FacetParams.FACET_SORT_INDEX);
String prefix = params.getFieldParam(field, FacetParams.FACET_PREFIX);
final Predicate<BytesRef> termFilter = newBytesRefFilter(field, params);
boolean exists = params.getFieldBool(field, FacetParams.FACET_EXISTS, false);
NamedList<Integer> counts;
SchemaField sf = searcher.getSchema().getField(field);
if (sf.getType().isPointField() && !sf.hasDocValues()) {
throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, "Can't facet on a PointField without docValues");
}
FieldType ft = sf.getType();
// determine what type of faceting method to use
final String methodStr = params.getFieldParam(field, FacetParams.FACET_METHOD);
final FacetMethod requestedMethod;
if (FacetParams.FACET_METHOD_enum.equals(methodStr)) {
requestedMethod = FacetMethod.ENUM;
} else if (FacetParams.FACET_METHOD_fcs.equals(methodStr)) {
requestedMethod = FacetMethod.FCS;
} else if (FacetParams.FACET_METHOD_fc.equals(methodStr)) {
requestedMethod = FacetMethod.FC;
} else if (FacetParams.FACET_METHOD_uif.equals(methodStr)) {
requestedMethod = FacetMethod.UIF;
} else {
requestedMethod = null;
}
final boolean multiToken = sf.multiValued() || ft.multiValuedFieldCache();
FacetMethod appliedFacetMethod = selectFacetMethod(field, sf, requestedMethod, mincount, exists);
RTimer timer = null;
if (fdebug != null) {
fdebug.putInfoItem("requestedMethod", requestedMethod == null ? "not specified" : requestedMethod.name());
fdebug.putInfoItem("appliedMethod", appliedFacetMethod.name());
fdebug.putInfoItem("inputDocSetSize", docs.size());
fdebug.putInfoItem("field", field);
timer = new RTimer();
}
if (params.getFieldBool(field, GroupParams.GROUP_FACET, false)) {
counts = getGroupedCounts(searcher, docs, field, multiToken, offset, limit, mincount, missing, sort, prefix, termFilter);
} else {
assert appliedFacetMethod != null;
switch(appliedFacetMethod) {
case ENUM:
assert TrieField.getMainValuePrefix(ft) == null;
counts = getFacetTermEnumCounts(searcher, docs, field, offset, limit, mincount, missing, sort, prefix, termFilter, exists);
break;
case FCS:
assert ft.isPointField() || !multiToken;
if (ft.isPointField() || (ft.getNumberType() != null && !sf.multiValued())) {
if (prefix != null) {
throw new SolrException(ErrorCode.BAD_REQUEST, FacetParams.FACET_PREFIX + " is not supported on numeric types");
}
if (termFilter != null) {
throw new SolrException(ErrorCode.BAD_REQUEST, "BytesRef term filters (" + FacetParams.FACET_CONTAINS + ", " + FacetParams.FACET_EXCLUDETERMS + ") are not supported on numeric types");
}
// We should do this, but mincount=0 is currently the default
// if (ft.isPointField() && mincount <= 0) {
// throw new SolrException(ErrorCode.BAD_REQUEST, FacetParams.FACET_MINCOUNT + " <= 0 is not supported on point types");
// }
counts = NumericFacets.getCounts(searcher, docs, field, offset, limit, mincount, missing, sort);
} else {
PerSegmentSingleValuedFaceting ps = new PerSegmentSingleValuedFaceting(searcher, docs, field, offset, limit, mincount, missing, sort, prefix, termFilter);
Executor executor = threads == 0 ? directExecutor : facetExecutor;
ps.setNumThreads(threads);
counts = ps.getFacetCounts(executor);
}
break;
case UIF:
//Emulate the JSON Faceting structure so we can use the same parsing classes
Map<String, Object> jsonFacet = new HashMap<>(13);
jsonFacet.put("type", "terms");
jsonFacet.put("field", field);
jsonFacet.put("offset", offset);
jsonFacet.put("limit", limit);
jsonFacet.put("mincount", mincount);
jsonFacet.put("missing", missing);
jsonFacet.put("prefix", prefix);
jsonFacet.put("numBuckets", params.getFieldBool(field, "numBuckets", false));
jsonFacet.put("allBuckets", params.getFieldBool(field, "allBuckets", false));
jsonFacet.put("method", "uif");
jsonFacet.put("cacheDf", 0);
jsonFacet.put("perSeg", false);
final String sortVal;
switch(sort) {
case FacetParams.FACET_SORT_COUNT_LEGACY:
sortVal = FacetParams.FACET_SORT_COUNT;
break;
case FacetParams.FACET_SORT_INDEX_LEGACY:
sortVal = FacetParams.FACET_SORT_INDEX;
break;
default:
sortVal = sort;
}
jsonFacet.put(SORT, sortVal);
Map<String, Object> topLevel = new HashMap<>();
topLevel.put(field, jsonFacet);
topLevel.put("processEmpty", true);
FacetProcessor fproc = // rb.getResults().docSet
FacetProcessor.createProcessor(// rb.getResults().docSet
rb.req, // rb.getResults().docSet
topLevel, docs);
//TODO do we handle debug? Should probably already be handled by the legacy code
fproc.process();
//Go through the response to build the expected output for SimpleFacets
Object res = fproc.getResponse();
counts = new NamedList<Integer>();
if (res != null) {
SimpleOrderedMap<Object> som = (SimpleOrderedMap<Object>) res;
SimpleOrderedMap<Object> asdf = (SimpleOrderedMap<Object>) som.get(field);
List<SimpleOrderedMap<Object>> buckets = (List<SimpleOrderedMap<Object>>) asdf.get("buckets");
for (SimpleOrderedMap<Object> b : buckets) {
counts.add(b.get("val").toString(), (Integer) b.get("count"));
}
if (missing) {
SimpleOrderedMap<Object> missingCounts = (SimpleOrderedMap<Object>) asdf.get("missing");
counts.add(null, (Integer) missingCounts.get("count"));
}
}
break;
case FC:
counts = DocValuesFacets.getCounts(searcher, docs, field, offset, limit, mincount, missing, sort, prefix, termFilter, fdebug);
break;
default:
throw new AssertionError();
}
}
if (fdebug != null) {
long timeElapsed = (long) timer.getTime();
fdebug.setElapse(timeElapsed);
}
return counts;
}
use of org.apache.solr.search.DocSet in project lucene-solr by apache.
the class SolrRangeQuery method createDocSet.
private DocSet createDocSet(SolrIndexSearcher searcher, long cost) throws IOException {
int maxDoc = searcher.maxDoc();
BitDocSet liveDocs = searcher.getLiveDocs();
FixedBitSet liveBits = liveDocs.size() == maxDoc ? null : liveDocs.getBits();
DocSetBuilder builder = new DocSetBuilder(maxDoc, cost);
List<LeafReaderContext> leaves = searcher.getTopReaderContext().leaves();
int maxTermsPerSegment = 0;
for (LeafReaderContext ctx : leaves) {
TermsEnum te = getTermsEnum(ctx);
int termsVisited = builder.add(te, ctx.docBase);
maxTermsPerSegment = Math.max(maxTermsPerSegment, termsVisited);
}
DocSet set = maxTermsPerSegment <= 1 ? builder.buildUniqueInOrder(liveBits) : builder.build(liveBits);
return DocSetUtil.getDocSet(set, searcher);
}
use of org.apache.solr.search.DocSet in project lucene-solr by apache.
the class BlockJoinDocSetFacetComponent method process.
@Override
public void process(ResponseBuilder rb) throws IOException {
final BlockJoinParentQParser.AllParentsAware bjq = (BlockJoinParentQParser.AllParentsAware) rb.req.getContext().get(bjqKey);
if (bjq != null) {
final DocSet parentResult = rb.getResults().docSet;
final BitDocSet allParentsBitsDocSet = rb.req.getSearcher().getDocSetBits(bjq.getParentQuery());
final DocSet allChildren = BlockJoin.toChildren(parentResult, allParentsBitsDocSet, rb.req.getSearcher().getDocSetBits(new MatchAllDocsQuery()), QueryContext.newContext(rb.req.getSearcher()));
final DocSet childQueryDocSet = rb.req.getSearcher().getDocSet(bjq.getChildQuery());
final DocSet selectedChildren = allChildren.intersection(childQueryDocSet);
// don't include parent into facet counts
//childResult = childResult.union(parentResult);// just to mimic the current logic
final List<LeafReaderContext> leaves = rb.req.getSearcher().getIndexReader().leaves();
Filter filter = selectedChildren.getTopFilter();
final BlockJoinFacetAccsHolder facetCounter = new BlockJoinFacetAccsHolder(rb.req);
for (int subIdx = 0; subIdx < leaves.size(); subIdx++) {
LeafReaderContext subCtx = leaves.get(subIdx);
// solr docsets already exclude any deleted docs
DocIdSet dis = filter.getDocIdSet(subCtx, null);
AggregatableDocIter iter = new SegmentChildren(subCtx, dis, allParentsBitsDocSet);
if (iter.hasNext()) {
facetCounter.doSetNextReader(subCtx);
facetCounter.countFacets(iter);
}
}
facetCounter.finish();
rb.req.getContext().put(COLLECTOR_CONTEXT_PARAM, facetCounter);
super.process(rb);
}
}
Aggregations