Search in sources :

Example 21 with FieldType

use of org.apache.solr.schema.FieldType in project lucene-solr by apache.

the class SimpleFacets method getFacetTermEnumCounts.

/**
   * Returns a list of terms in the specified field along with the 
   * corresponding count of documents in the set that match that constraint.
   * This method uses the FilterCache to get the intersection count between <code>docs</code>
   * and the DocSet for each term in the filter.
   *
   * @see FacetParams#FACET_LIMIT
   * @see FacetParams#FACET_ZEROS
   * @see FacetParams#FACET_MISSING
   */
public NamedList<Integer> getFacetTermEnumCounts(SolrIndexSearcher searcher, DocSet docs, String field, int offset, int limit, int mincount, boolean missing, String sort, String prefix, Predicate<BytesRef> termFilter, boolean intersectsCheck) throws IOException {
    /* :TODO: potential optimization...
    * cache the Terms with the highest docFreq and try them first
    * don't enum if we get our max from them
    */
    // Minimum term docFreq in order to use the filterCache for that term.
    int minDfFilterCache = global.getFieldInt(field, FacetParams.FACET_ENUM_CACHE_MINDF, 0);
    // make sure we have a set that is fast for random access, if we will use it for that
    DocSet fastForRandomSet = docs;
    if (minDfFilterCache > 0 && docs instanceof SortedIntDocSet) {
        SortedIntDocSet sset = (SortedIntDocSet) docs;
        fastForRandomSet = new HashDocSet(sset.getDocs(), 0, sset.size());
    }
    IndexSchema schema = searcher.getSchema();
    FieldType ft = schema.getFieldType(field);
    assert !ft.isPointField() : "Point Fields don't support enum method";
    LeafReader r = searcher.getSlowAtomicReader();
    boolean sortByCount = sort.equals("count") || sort.equals("true");
    final int maxsize = limit >= 0 ? offset + limit : Integer.MAX_VALUE - 1;
    final BoundedTreeSet<CountPair<BytesRef, Integer>> queue = sortByCount ? new BoundedTreeSet<CountPair<BytesRef, Integer>>(maxsize) : null;
    final NamedList<Integer> res = new NamedList<>();
    // the smallest value in the top 'N' values    
    int min = mincount - 1;
    int off = offset;
    int lim = limit >= 0 ? limit : Integer.MAX_VALUE;
    BytesRef prefixTermBytes = null;
    if (prefix != null) {
        String indexedPrefix = ft.toInternal(prefix);
        prefixTermBytes = new BytesRef(indexedPrefix);
    }
    Fields fields = r.fields();
    Terms terms = fields == null ? null : fields.terms(field);
    TermsEnum termsEnum = null;
    SolrIndexSearcher.DocsEnumState deState = null;
    BytesRef term = null;
    if (terms != null) {
        termsEnum = terms.iterator();
        if (prefixTermBytes != null) {
            if (termsEnum.seekCeil(prefixTermBytes) == TermsEnum.SeekStatus.END) {
                termsEnum = null;
            } else {
                term = termsEnum.term();
            }
        } else {
            // position termsEnum on first term
            term = termsEnum.next();
        }
    }
    PostingsEnum postingsEnum = null;
    CharsRefBuilder charsRef = new CharsRefBuilder();
    if (docs.size() >= mincount) {
        while (term != null) {
            if (prefixTermBytes != null && !StringHelper.startsWith(term, prefixTermBytes))
                break;
            if (termFilter == null || termFilter.test(term)) {
                int df = termsEnum.docFreq();
                // make a large difference (for example, many terms with df=1).
                if (df > 0 && df > min) {
                    int c;
                    if (df >= minDfFilterCache) {
                        if (deState == null) {
                            deState = new SolrIndexSearcher.DocsEnumState();
                            deState.fieldName = field;
                            deState.liveDocs = r.getLiveDocs();
                            deState.termsEnum = termsEnum;
                            deState.postingsEnum = postingsEnum;
                        }
                        if (intersectsCheck) {
                            c = searcher.intersects(docs, deState) ? 1 : 0;
                        } else {
                            c = searcher.numDocs(docs, deState);
                        }
                        postingsEnum = deState.postingsEnum;
                    } else {
                        // iterate over TermDocs to calculate the intersection
                        // TODO: specialize when base docset is a bitset or hash set (skipDocs)?  or does it matter for this?
                        // TODO: do this per-segment for better efficiency (MultiDocsEnum just uses base class impl)
                        // TODO: would passing deleted docs lead to better efficiency over checking the fastForRandomSet?
                        postingsEnum = termsEnum.postings(postingsEnum, PostingsEnum.NONE);
                        c = 0;
                        if (postingsEnum instanceof MultiPostingsEnum) {
                            MultiPostingsEnum.EnumWithSlice[] subs = ((MultiPostingsEnum) postingsEnum).getSubs();
                            int numSubs = ((MultiPostingsEnum) postingsEnum).getNumSubs();
                            SEGMENTS_LOOP: for (int subindex = 0; subindex < numSubs; subindex++) {
                                MultiPostingsEnum.EnumWithSlice sub = subs[subindex];
                                if (sub.postingsEnum == null)
                                    continue;
                                int base = sub.slice.start;
                                int docid;
                                while ((docid = sub.postingsEnum.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS) {
                                    if (fastForRandomSet.exists(docid + base)) {
                                        c++;
                                        if (intersectsCheck) {
                                            assert c == 1;
                                            break SEGMENTS_LOOP;
                                        }
                                    }
                                }
                            }
                        } else {
                            int docid;
                            while ((docid = postingsEnum.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS) {
                                if (fastForRandomSet.exists(docid)) {
                                    c++;
                                    if (intersectsCheck) {
                                        assert c == 1;
                                        break;
                                    }
                                }
                            }
                        }
                    }
                    if (sortByCount) {
                        if (c > min) {
                            BytesRef termCopy = BytesRef.deepCopyOf(term);
                            queue.add(new CountPair<>(termCopy, c));
                            if (queue.size() >= maxsize)
                                min = queue.last().val;
                        }
                    } else {
                        if (c >= mincount && --off < 0) {
                            if (--lim < 0)
                                break;
                            ft.indexedToReadable(term, charsRef);
                            res.add(charsRef.toString(), c);
                        }
                    }
                }
            }
            term = termsEnum.next();
        }
    }
    if (sortByCount) {
        for (CountPair<BytesRef, Integer> p : queue) {
            if (--off >= 0)
                continue;
            if (--lim < 0)
                break;
            ft.indexedToReadable(p.key, charsRef);
            res.add(charsRef.toString(), p.val);
        }
    }
    if (missing) {
        res.add(null, getFieldMissingCount(searcher, docs, field));
    }
    return res;
}
Also used : SortedIntDocSet(org.apache.solr.search.SortedIntDocSet) HashDocSet(org.apache.solr.search.HashDocSet) TermsEnum(org.apache.lucene.index.TermsEnum) CharsRefBuilder(org.apache.lucene.util.CharsRefBuilder) MultiPostingsEnum(org.apache.lucene.index.MultiPostingsEnum) PostingsEnum(org.apache.lucene.index.PostingsEnum) BytesRef(org.apache.lucene.util.BytesRef) LeafReader(org.apache.lucene.index.LeafReader) NamedList(org.apache.solr.common.util.NamedList) Terms(org.apache.lucene.index.Terms) SolrIndexSearcher(org.apache.solr.search.SolrIndexSearcher) FieldType(org.apache.solr.schema.FieldType) Fields(org.apache.lucene.index.Fields) MultiPostingsEnum(org.apache.lucene.index.MultiPostingsEnum) IndexSchema(org.apache.solr.schema.IndexSchema) HashDocSet(org.apache.solr.search.HashDocSet) DocSet(org.apache.solr.search.DocSet) SortedIntDocSet(org.apache.solr.search.SortedIntDocSet) BitDocSet(org.apache.solr.search.BitDocSet)

Example 22 with FieldType

use of org.apache.solr.schema.FieldType in project lucene-solr by apache.

the class SimpleFacets method selectFacetMethod.

/**
   * This method will force the appropriate facet method even if the user provided a different one as a request parameter
   *
   * N.B. this method could overwrite what you passed as request parameter. Be Extra careful
   *
   * @param field field we are faceting
   * @param method the facet method passed as a request parameter
   * @param mincount the minimum value a facet should have to be returned
   * @return the FacetMethod to use
   */
static FacetMethod selectFacetMethod(SchemaField field, FacetMethod method, Integer mincount) {
    FieldType type = field.getType();
    if (type.isPointField()) {
        // Only FCS is supported for PointFields for now
        return FacetMethod.FCS;
    }
    /*The user did not specify any preference*/
    if (method == null) {
        /* Always use filters for booleans if not DocValues only... we know the number of values is very small. */
        if (type instanceof BoolField && (field.indexed() == true || field.hasDocValues() == false)) {
            method = FacetMethod.ENUM;
        } else if (type.getNumberType() != null && !field.multiValued()) {
            /* the per-segment approach is optimal for numeric field types since there
           are no global ords to merge and no need to create an expensive
           top-level reader */
            method = FacetMethod.FCS;
        } else {
            // TODO: default to per-segment or not?
            method = FacetMethod.FC;
        }
    }
    /* FC without docValues does not support single valued numeric facets */
    if (method == FacetMethod.FC && type.getNumberType() != null && !field.multiValued()) {
        method = FacetMethod.FCS;
    }
    /* UIF without DocValues can't deal with mincount=0, the reason is because
         we create the buckets based on the values present in the result set.
         So we are not going to see facet values which are not in the result set */
    if (method == FacetMethod.UIF && !field.hasDocValues() && mincount == 0) {
        method = field.multiValued() ? FacetMethod.FC : FacetMethod.FCS;
    }
    /* ENUM can't deal with trie fields that index several terms per value */
    if (method == FacetMethod.ENUM && TrieField.getMainValuePrefix(type) != null) {
        method = field.multiValued() ? FacetMethod.FC : FacetMethod.FCS;
    }
    /* FCS can't deal with multi token fields */
    final boolean multiToken = field.multiValued() || type.multiValuedFieldCache();
    if (method == FacetMethod.FCS && multiToken) {
        method = FacetMethod.FC;
    }
    return method;
}
Also used : BoolField(org.apache.solr.schema.BoolField) FieldType(org.apache.solr.schema.FieldType)

Example 23 with FieldType

use of org.apache.solr.schema.FieldType in project lucene-solr by apache.

the class ChildDocTransformer method transform.

@Override
public void transform(SolrDocument doc, int docid, float score) {
    FieldType idFt = idField.getType();
    Object parentIdField = doc.getFirstValue(idField.getName());
    String parentIdExt = parentIdField instanceof IndexableField ? idFt.toExternal((IndexableField) parentIdField) : parentIdField.toString();
    try {
        Query parentQuery = idFt.getFieldQuery(null, idField, parentIdExt);
        Query query = new ToChildBlockJoinQuery(parentQuery, parentsFilter);
        DocList children = context.getSearcher().getDocList(query, childFilterQuery, new Sort(), 0, limit);
        if (children.matches() > 0) {
            DocIterator i = children.iterator();
            while (i.hasNext()) {
                Integer childDocNum = i.next();
                Document childDoc = context.getSearcher().doc(childDocNum);
                SolrDocument solrChildDoc = DocsStreamer.convertLuceneDocToSolrDoc(childDoc, schema);
                // TODO: future enhancement...
                // support an fl local param in the transformer, which is used to build
                // a private ReturnFields instance that we use to prune unwanted field 
                // names from solrChildDoc
                doc.addChildDocument(solrChildDoc);
            }
        }
    } catch (IOException e) {
        doc.put(name, "Could not fetch child Documents");
    }
}
Also used : DocIterator(org.apache.solr.search.DocIterator) Query(org.apache.lucene.search.Query) ToChildBlockJoinQuery(org.apache.lucene.search.join.ToChildBlockJoinQuery) IOException(java.io.IOException) SolrDocument(org.apache.solr.common.SolrDocument) Document(org.apache.lucene.document.Document) ToChildBlockJoinQuery(org.apache.lucene.search.join.ToChildBlockJoinQuery) FieldType(org.apache.solr.schema.FieldType) IndexableField(org.apache.lucene.index.IndexableField) SolrDocument(org.apache.solr.common.SolrDocument) Sort(org.apache.lucene.search.Sort) DocList(org.apache.solr.search.DocList)

Example 24 with FieldType

use of org.apache.solr.schema.FieldType in project lucene-solr by apache.

the class DocsStreamer method getValue.

public static Object getValue(SchemaField sf, IndexableField f) {
    FieldType ft = null;
    if (sf != null)
        ft = sf.getType();
    if (ft == null) {
        // handle fields not in the schema
        BytesRef bytesRef = f.binaryValue();
        if (bytesRef != null) {
            if (bytesRef.offset == 0 && bytesRef.length == bytesRef.bytes.length) {
                return bytesRef.bytes;
            } else {
                final byte[] bytes = new byte[bytesRef.length];
                System.arraycopy(bytesRef.bytes, bytesRef.offset, bytes, 0, bytesRef.length);
                return bytes;
            }
        } else
            return f.stringValue();
    } else {
        if (KNOWN_TYPES.contains(ft.getClass())) {
            return ft.toObject(f);
        } else {
            return ft.toExternal(f);
        }
    }
}
Also used : BytesRef(org.apache.lucene.util.BytesRef) FieldType(org.apache.solr.schema.FieldType)

Example 25 with FieldType

use of org.apache.solr.schema.FieldType in project lucene-solr by apache.

the class Grouping method addFieldCommand.

/**
   * Adds a field command based on the specified field.
   * If the field is not compatible with {@link CommandField} it invokes the
   * {@link #addFunctionCommand(String, org.apache.solr.request.SolrQueryRequest)} method.
   *
   * @param field The fieldname to group by.
   */
public void addFieldCommand(String field, SolrQueryRequest request) throws SyntaxError {
    // Throws an exception when field doesn't exist. Bad request.
    SchemaField schemaField = searcher.getSchema().getField(field);
    FieldType fieldType = schemaField.getType();
    ValueSource valueSource = fieldType.getValueSource(schemaField, null);
    if (!(valueSource instanceof StrFieldSource)) {
        addFunctionCommand(field, request);
        return;
    }
    Grouping.CommandField gc = new CommandField();
    gc.withinGroupSort = withinGroupSort;
    gc.groupBy = field;
    gc.key = field;
    gc.numGroups = limitDefault;
    gc.docsPerGroup = docsPerGroupDefault;
    gc.groupOffset = groupOffsetDefault;
    gc.offset = cmd.getOffset();
    gc.groupSort = groupSort;
    gc.format = defaultFormat;
    gc.totalCount = defaultTotalCount;
    if (main) {
        gc.main = true;
        gc.format = Grouping.Format.simple;
    }
    if (gc.format == Grouping.Format.simple) {
        // doesn't make sense
        gc.groupOffset = 0;
    }
    commands.add(gc);
}
Also used : SchemaField(org.apache.solr.schema.SchemaField) QueryValueSource(org.apache.lucene.queries.function.valuesource.QueryValueSource) ValueSource(org.apache.lucene.queries.function.ValueSource) StrFieldSource(org.apache.solr.schema.StrFieldSource) FieldType(org.apache.solr.schema.FieldType)

Aggregations

FieldType (org.apache.solr.schema.FieldType)93 SchemaField (org.apache.solr.schema.SchemaField)37 SolrException (org.apache.solr.common.SolrException)29 ArrayList (java.util.ArrayList)23 BytesRef (org.apache.lucene.util.BytesRef)23 NamedList (org.apache.solr.common.util.NamedList)23 IOException (java.io.IOException)18 SimpleOrderedMap (org.apache.solr.common.util.SimpleOrderedMap)15 IndexSchema (org.apache.solr.schema.IndexSchema)14 Query (org.apache.lucene.search.Query)13 BytesRefBuilder (org.apache.lucene.util.BytesRefBuilder)13 Analyzer (org.apache.lucene.analysis.Analyzer)12 LeafReaderContext (org.apache.lucene.index.LeafReaderContext)10 CharsRefBuilder (org.apache.lucene.util.CharsRefBuilder)10 StrField (org.apache.solr.schema.StrField)8 HashMap (java.util.HashMap)7 List (java.util.List)7 Map (java.util.Map)7 DocIterator (org.apache.solr.search.DocIterator)7 DocList (org.apache.solr.search.DocList)7