Search in sources :

Example 21 with SchemaField

use of org.apache.solr.schema.SchemaField in project lucene-solr by apache.

the class NumericFacets method getCountsSingleValue.

private static NamedList<Integer> getCountsSingleValue(SolrIndexSearcher searcher, DocSet docs, String fieldName, int offset, int limit, int mincount, boolean missing, String sort) throws IOException {
    boolean zeros = mincount <= 0;
    mincount = Math.max(mincount, 1);
    final SchemaField sf = searcher.getSchema().getField(fieldName);
    final FieldType ft = sf.getType();
    final NumberType numericType = ft.getNumberType();
    if (numericType == null) {
        throw new IllegalStateException();
    }
    // We don't return zeros when using PointFields or when index=false
    zeros = zeros && !ft.isPointField() && sf.indexed();
    final List<LeafReaderContext> leaves = searcher.getIndexReader().leaves();
    // 1. accumulate
    final HashTable hashTable = new HashTable(true);
    final Iterator<LeafReaderContext> ctxIt = leaves.iterator();
    LeafReaderContext ctx = null;
    NumericDocValues longs = null;
    int missingCount = 0;
    for (DocIterator docsIt = docs.iterator(); docsIt.hasNext(); ) {
        final int doc = docsIt.nextDoc();
        if (ctx == null || doc >= ctx.docBase + ctx.reader().maxDoc()) {
            do {
                ctx = ctxIt.next();
            } while (ctx == null || doc >= ctx.docBase + ctx.reader().maxDoc());
            assert doc >= ctx.docBase;
            switch(numericType) {
                case LONG:
                case DATE:
                case INTEGER:
                    // Long, Date and Integer
                    longs = DocValues.getNumeric(ctx.reader(), fieldName);
                    break;
                case FLOAT:
                    // TODO: this bit flipping should probably be moved to tie-break in the PQ comparator
                    longs = new FilterNumericDocValues(DocValues.getNumeric(ctx.reader(), fieldName)) {

                        @Override
                        public long longValue() throws IOException {
                            long bits = super.longValue();
                            if (bits < 0)
                                bits ^= 0x7fffffffffffffffL;
                            return bits;
                        }
                    };
                    break;
                case DOUBLE:
                    // TODO: this bit flipping should probably be moved to tie-break in the PQ comparator
                    longs = new FilterNumericDocValues(DocValues.getNumeric(ctx.reader(), fieldName)) {

                        @Override
                        public long longValue() throws IOException {
                            long bits = super.longValue();
                            if (bits < 0)
                                bits ^= 0x7fffffffffffffffL;
                            return bits;
                        }
                    };
                    break;
                default:
                    throw new AssertionError("Unexpected type: " + numericType);
            }
        }
        int valuesDocID = longs.docID();
        if (valuesDocID < doc - ctx.docBase) {
            valuesDocID = longs.advance(doc - ctx.docBase);
        }
        if (valuesDocID == doc - ctx.docBase) {
            hashTable.add(doc, longs.longValue(), 1);
        } else {
            ++missingCount;
        }
    }
    // 2. select top-k facet values
    final int pqSize = limit < 0 ? hashTable.size : Math.min(offset + limit, hashTable.size);
    final PriorityQueue<Entry> pq;
    if (FacetParams.FACET_SORT_COUNT.equals(sort) || FacetParams.FACET_SORT_COUNT_LEGACY.equals(sort)) {
        pq = new PriorityQueue<Entry>(pqSize) {

            @Override
            protected boolean lessThan(Entry a, Entry b) {
                if (a.count < b.count || (a.count == b.count && a.bits > b.bits)) {
                    return true;
                } else {
                    return false;
                }
            }
        };
    } else {
        pq = new PriorityQueue<Entry>(pqSize) {

            @Override
            protected boolean lessThan(Entry a, Entry b) {
                return a.bits > b.bits;
            }
        };
    }
    Entry e = null;
    for (int i = 0; i < hashTable.bits.length; ++i) {
        if (hashTable.counts[i] >= mincount) {
            if (e == null) {
                e = new Entry();
            }
            e.bits = hashTable.bits[i];
            e.count = hashTable.counts[i];
            e.docID = hashTable.docIDs[i];
            e = pq.insertWithOverflow(e);
        }
    }
    // 4. build the NamedList
    final ValueSource vs = ft.getValueSource(sf, null);
    final NamedList<Integer> result = new NamedList<>();
    // to be merged with terms from the terms dict
    if (!zeros || FacetParams.FACET_SORT_COUNT.equals(sort) || FacetParams.FACET_SORT_COUNT_LEGACY.equals(sort)) {
        // Only keep items we're interested in
        final Deque<Entry> counts = new ArrayDeque<>();
        while (pq.size() > offset) {
            counts.addFirst(pq.pop());
        }
        // Entries from the PQ first, then using the terms dictionary
        for (Entry entry : counts) {
            final int readerIdx = ReaderUtil.subIndex(entry.docID, leaves);
            final FunctionValues values = vs.getValues(Collections.emptyMap(), leaves.get(readerIdx));
            result.add(values.strVal(entry.docID - leaves.get(readerIdx).docBase), entry.count);
        }
        if (zeros && (limit < 0 || result.size() < limit)) {
            // need to merge with the term dict
            if (!sf.indexed() && !sf.hasDocValues()) {
                throw new IllegalStateException("Cannot use " + FacetParams.FACET_MINCOUNT + "=0 on field " + sf.getName() + " which is neither indexed nor docValues");
            }
            // Add zeros until there are limit results
            final Set<String> alreadySeen = new HashSet<>();
            while (pq.size() > 0) {
                Entry entry = pq.pop();
                final int readerIdx = ReaderUtil.subIndex(entry.docID, leaves);
                final FunctionValues values = vs.getValues(Collections.emptyMap(), leaves.get(readerIdx));
                alreadySeen.add(values.strVal(entry.docID - leaves.get(readerIdx).docBase));
            }
            for (int i = 0; i < result.size(); ++i) {
                alreadySeen.add(result.getName(i));
            }
            final Terms terms = searcher.getSlowAtomicReader().terms(fieldName);
            if (terms != null) {
                final String prefixStr = TrieField.getMainValuePrefix(ft);
                final BytesRef prefix;
                if (prefixStr != null) {
                    prefix = new BytesRef(prefixStr);
                } else {
                    prefix = new BytesRef();
                }
                final TermsEnum termsEnum = terms.iterator();
                BytesRef term;
                switch(termsEnum.seekCeil(prefix)) {
                    case FOUND:
                    case NOT_FOUND:
                        term = termsEnum.term();
                        break;
                    case END:
                        term = null;
                        break;
                    default:
                        throw new AssertionError();
                }
                final CharsRefBuilder spare = new CharsRefBuilder();
                for (int skipped = hashTable.size; skipped < offset && term != null && StringHelper.startsWith(term, prefix); ) {
                    ft.indexedToReadable(term, spare);
                    final String termStr = spare.toString();
                    if (!alreadySeen.contains(termStr)) {
                        ++skipped;
                    }
                    term = termsEnum.next();
                }
                for (; term != null && StringHelper.startsWith(term, prefix) && (limit < 0 || result.size() < limit); term = termsEnum.next()) {
                    ft.indexedToReadable(term, spare);
                    final String termStr = spare.toString();
                    if (!alreadySeen.contains(termStr)) {
                        result.add(termStr, 0);
                    }
                }
            }
        }
    } else {
        // => Merge the PQ and the terms dictionary on the fly
        if (!sf.indexed()) {
            throw new IllegalStateException("Cannot use " + FacetParams.FACET_SORT + "=" + FacetParams.FACET_SORT_INDEX + " on a field which is not indexed");
        }
        final Map<String, Integer> counts = new HashMap<>();
        while (pq.size() > 0) {
            final Entry entry = pq.pop();
            final int readerIdx = ReaderUtil.subIndex(entry.docID, leaves);
            final FunctionValues values = vs.getValues(Collections.emptyMap(), leaves.get(readerIdx));
            counts.put(values.strVal(entry.docID - leaves.get(readerIdx).docBase), entry.count);
        }
        final Terms terms = searcher.getSlowAtomicReader().terms(fieldName);
        if (terms != null) {
            final String prefixStr = TrieField.getMainValuePrefix(ft);
            final BytesRef prefix;
            if (prefixStr != null) {
                prefix = new BytesRef(prefixStr);
            } else {
                prefix = new BytesRef();
            }
            final TermsEnum termsEnum = terms.iterator();
            BytesRef term;
            switch(termsEnum.seekCeil(prefix)) {
                case FOUND:
                case NOT_FOUND:
                    term = termsEnum.term();
                    break;
                case END:
                    term = null;
                    break;
                default:
                    throw new AssertionError();
            }
            final CharsRefBuilder spare = new CharsRefBuilder();
            for (int i = 0; i < offset && term != null && StringHelper.startsWith(term, prefix); ++i) {
                term = termsEnum.next();
            }
            for (; term != null && StringHelper.startsWith(term, prefix) && (limit < 0 || result.size() < limit); term = termsEnum.next()) {
                ft.indexedToReadable(term, spare);
                final String termStr = spare.toString();
                Integer count = counts.get(termStr);
                if (count == null) {
                    count = 0;
                }
                result.add(termStr, count);
            }
        }
    }
    if (missing) {
        result.add(null, missingCount);
    }
    return result;
}
Also used : FilterNumericDocValues(org.apache.lucene.index.FilterNumericDocValues) NumericDocValues(org.apache.lucene.index.NumericDocValues) SortedNumericDocValues(org.apache.lucene.index.SortedNumericDocValues) DocIterator(org.apache.solr.search.DocIterator) HashMap(java.util.HashMap) TermsEnum(org.apache.lucene.index.TermsEnum) LeafReaderContext(org.apache.lucene.index.LeafReaderContext) CharsRefBuilder(org.apache.lucene.util.CharsRefBuilder) BytesRef(org.apache.lucene.util.BytesRef) HashSet(java.util.HashSet) NamedList(org.apache.solr.common.util.NamedList) Terms(org.apache.lucene.index.Terms) IOException(java.io.IOException) FilterNumericDocValues(org.apache.lucene.index.FilterNumericDocValues) ArrayDeque(java.util.ArrayDeque) FieldType(org.apache.solr.schema.FieldType) SchemaField(org.apache.solr.schema.SchemaField) NumberType(org.apache.solr.schema.NumberType) ValueSource(org.apache.lucene.queries.function.ValueSource) FunctionValues(org.apache.lucene.queries.function.FunctionValues)

Example 22 with SchemaField

use of org.apache.solr.schema.SchemaField in project lucene-solr by apache.

the class SolrQueryParserBase method getFieldQuery.

// Assumption: quoted is always false
protected Query getFieldQuery(String field, List<String> queryTerms, boolean raw) throws SyntaxError {
    checkNullField(field);
    SchemaField sf;
    if (field.equals(lastFieldName)) {
        // only look up the SchemaField on a field change... this helps with memory allocation of dynamic fields
        // and large queries like foo_i:(1 2 3 4 5 6 7 8 9 10) when we are passed "foo_i" each time.
        sf = lastField;
    } else {
        // own functions.
        if (field.charAt(0) == '_' && parser != null) {
            MagicFieldName magic = MagicFieldName.get(field);
            if (null != magic) {
                subQParser = parser.subQuery(String.join(" ", queryTerms), magic.subParser);
                return subQParser.getQuery();
            }
        }
        lastFieldName = field;
        sf = lastField = schema.getFieldOrNull(field);
    }
    if (sf != null) {
        FieldType ft = sf.getType();
        // delegate to type for everything except tokenized fields
        if (ft.isTokenized() && sf.indexed()) {
            String queryText = queryTerms.size() == 1 ? queryTerms.get(0) : String.join(" ", queryTerms);
            boolean fieldAutoGenPhraseQueries = ft instanceof TextField && ((TextField) ft).getAutoGeneratePhraseQueries();
            boolean fieldEnableGraphQueries = ft instanceof TextField && ((TextField) ft).getEnableGraphQueries();
            return newFieldQuery(getAnalyzer(), field, queryText, false, fieldAutoGenPhraseQueries, fieldEnableGraphQueries);
        } else {
            if (raw) {
                return new RawQuery(sf, queryTerms);
            } else {
                if (queryTerms.size() == 1) {
                    return ft.getFieldQuery(parser, sf, queryTerms.get(0));
                } else {
                    List<Query> subqs = new ArrayList<>();
                    for (String queryTerm : queryTerms) {
                        try {
                            subqs.add(ft.getFieldQuery(parser, sf, queryTerm));
                        } catch (Exception e) {
                        // assumption: raw = false only when called from ExtendedDismaxQueryParser.getQuery()
                        // for edismax: ignore parsing failures
                        }
                    }
                    if (subqs.size() == 1) {
                        return subqs.get(0);
                    } else {
                        // delay building boolean query until we must
                        final BooleanClause.Occur occur = operator == AND_OPERATOR ? BooleanClause.Occur.MUST : BooleanClause.Occur.SHOULD;
                        BooleanQuery.Builder booleanBuilder = newBooleanQuery();
                        subqs.forEach(subq -> booleanBuilder.add(subq, occur));
                        return booleanBuilder.build();
                    }
                }
            }
        }
    }
    // default to a normal field query
    String queryText = queryTerms.size() == 1 ? queryTerms.get(0) : String.join(" ", queryTerms);
    return newFieldQuery(getAnalyzer(), field, queryText, false, false, true);
}
Also used : BooleanQuery(org.apache.lucene.search.BooleanQuery) Query(org.apache.lucene.search.Query) AutomatonQuery(org.apache.lucene.search.AutomatonQuery) SolrConstantScoreQuery(org.apache.solr.search.SolrConstantScoreQuery) PhraseQuery(org.apache.lucene.search.PhraseQuery) RegexpQuery(org.apache.lucene.search.RegexpQuery) MultiTermQuery(org.apache.lucene.search.MultiTermQuery) ConstantScoreQuery(org.apache.lucene.search.ConstantScoreQuery) MultiPhraseQuery(org.apache.lucene.search.MultiPhraseQuery) FuzzyQuery(org.apache.lucene.search.FuzzyQuery) MatchAllDocsQuery(org.apache.lucene.search.MatchAllDocsQuery) WildcardQuery(org.apache.lucene.search.WildcardQuery) FilterQuery(org.apache.solr.query.FilterQuery) BooleanQuery(org.apache.lucene.search.BooleanQuery) BoostQuery(org.apache.lucene.search.BoostQuery) ArrayList(java.util.ArrayList) SolrException(org.apache.solr.common.SolrException) FieldType(org.apache.solr.schema.FieldType) SchemaField(org.apache.solr.schema.SchemaField) BooleanClause(org.apache.lucene.search.BooleanClause) TextField(org.apache.solr.schema.TextField)

Example 23 with SchemaField

use of org.apache.solr.schema.SchemaField in project lucene-solr by apache.

the class ReverseOrdFieldSource method getValues.

@Override
public FunctionValues getValues(Map context, LeafReaderContext readerContext) throws IOException {
    final int off = readerContext.docBase;
    final LeafReader r;
    Object o = context.get("searcher");
    if (o instanceof SolrIndexSearcher) {
        SolrIndexSearcher is = (SolrIndexSearcher) o;
        SchemaField sf = is.getSchema().getFieldOrNull(field);
        if (sf != null && sf.hasDocValues() == false && sf.multiValued() == false && sf.getType().getNumberType() != null) {
            // it's a single-valued numeric field: we must currently create insanity :(
            List<LeafReaderContext> leaves = is.getIndexReader().leaves();
            LeafReader[] insaneLeaves = new LeafReader[leaves.size()];
            int upto = 0;
            for (LeafReaderContext raw : leaves) {
                insaneLeaves[upto++] = Insanity.wrapInsanity(raw.reader(), field);
            }
            r = SlowCompositeReaderWrapper.wrap(new MultiReader(insaneLeaves));
        } else {
            // reuse ordinalmap
            r = ((SolrIndexSearcher) o).getSlowAtomicReader();
        }
    } else {
        IndexReader topReader = ReaderUtil.getTopLevelContext(readerContext).reader();
        r = SlowCompositeReaderWrapper.wrap(topReader);
    }
    // if it's e.g. tokenized/multivalued, emulate old behavior of single-valued fc
    final SortedDocValues sindex = SortedSetSelector.wrap(DocValues.getSortedSet(r, field), SortedSetSelector.Type.MIN);
    final int end = sindex.getValueCount();
    return new IntDocValues(this) {

        @Override
        public int intVal(int doc) throws IOException {
            if (doc + off > sindex.docID()) {
                sindex.advance(doc + off);
            }
            if (doc + off == sindex.docID()) {
                return (end - sindex.ordValue() - 1);
            } else {
                return end;
            }
        }
    };
}
Also used : SchemaField(org.apache.solr.schema.SchemaField) LeafReader(org.apache.lucene.index.LeafReader) MultiReader(org.apache.lucene.index.MultiReader) IndexReader(org.apache.lucene.index.IndexReader) LeafReaderContext(org.apache.lucene.index.LeafReaderContext) SolrIndexSearcher(org.apache.solr.search.SolrIndexSearcher) IntDocValues(org.apache.lucene.queries.function.docvalues.IntDocValues) SortedDocValues(org.apache.lucene.index.SortedDocValues)

Example 24 with SchemaField

use of org.apache.solr.schema.SchemaField in project lucene-solr by apache.

the class FieldUtil method getSortedDocValues.

/** Simpler method that creates a request context and looks up the field for you */
public static SortedDocValues getSortedDocValues(SolrIndexSearcher searcher, String field) throws IOException {
    SchemaField sf = searcher.getSchema().getField(field);
    QueryContext qContext = QueryContext.newContext(searcher);
    return getSortedDocValues(qContext, sf, null);
}
Also used : SchemaField(org.apache.solr.schema.SchemaField) QueryContext(org.apache.solr.search.QueryContext)

Example 25 with SchemaField

use of org.apache.solr.schema.SchemaField in project lucene-solr by apache.

the class DeleteUpdateCommand method getIndexedId.

/** Returns the indexed ID for this delete.  The returned BytesRef is retained across multiple calls, and should not be modified. */
public BytesRef getIndexedId() {
    if (indexedId == null) {
        IndexSchema schema = req.getSchema();
        SchemaField sf = schema.getUniqueKeyField();
        if (sf != null && id != null) {
            BytesRefBuilder b = new BytesRefBuilder();
            sf.getType().readableToIndexed(id, b);
            indexedId = b.get();
        }
    }
    return indexedId;
}
Also used : SchemaField(org.apache.solr.schema.SchemaField) BytesRefBuilder(org.apache.lucene.util.BytesRefBuilder) IndexSchema(org.apache.solr.schema.IndexSchema)

Aggregations

SchemaField (org.apache.solr.schema.SchemaField)182 SolrException (org.apache.solr.common.SolrException)48 ArrayList (java.util.ArrayList)42 FieldType (org.apache.solr.schema.FieldType)41 IndexSchema (org.apache.solr.schema.IndexSchema)35 NamedList (org.apache.solr.common.util.NamedList)29 Query (org.apache.lucene.search.Query)23 IOException (java.io.IOException)22 BytesRef (org.apache.lucene.util.BytesRef)21 SimpleOrderedMap (org.apache.solr.common.util.SimpleOrderedMap)21 SolrIndexSearcher (org.apache.solr.search.SolrIndexSearcher)21 Document (org.apache.lucene.document.Document)20 SolrParams (org.apache.solr.common.params.SolrParams)19 IndexableField (org.apache.lucene.index.IndexableField)18 HashMap (java.util.HashMap)17 SolrInputDocument (org.apache.solr.common.SolrInputDocument)16 SolrDocument (org.apache.solr.common.SolrDocument)15 SolrQueryRequest (org.apache.solr.request.SolrQueryRequest)15 Map (java.util.Map)14 Term (org.apache.lucene.index.Term)14