use of org.apache.solr.schema.SchemaField in project lucene-solr by apache.
the class NumericFacets method getCountsSingleValue.
private static NamedList<Integer> getCountsSingleValue(SolrIndexSearcher searcher, DocSet docs, String fieldName, int offset, int limit, int mincount, boolean missing, String sort) throws IOException {
boolean zeros = mincount <= 0;
mincount = Math.max(mincount, 1);
final SchemaField sf = searcher.getSchema().getField(fieldName);
final FieldType ft = sf.getType();
final NumberType numericType = ft.getNumberType();
if (numericType == null) {
throw new IllegalStateException();
}
// We don't return zeros when using PointFields or when index=false
zeros = zeros && !ft.isPointField() && sf.indexed();
final List<LeafReaderContext> leaves = searcher.getIndexReader().leaves();
// 1. accumulate
final HashTable hashTable = new HashTable(true);
final Iterator<LeafReaderContext> ctxIt = leaves.iterator();
LeafReaderContext ctx = null;
NumericDocValues longs = null;
int missingCount = 0;
for (DocIterator docsIt = docs.iterator(); docsIt.hasNext(); ) {
final int doc = docsIt.nextDoc();
if (ctx == null || doc >= ctx.docBase + ctx.reader().maxDoc()) {
do {
ctx = ctxIt.next();
} while (ctx == null || doc >= ctx.docBase + ctx.reader().maxDoc());
assert doc >= ctx.docBase;
switch(numericType) {
case LONG:
case DATE:
case INTEGER:
// Long, Date and Integer
longs = DocValues.getNumeric(ctx.reader(), fieldName);
break;
case FLOAT:
// TODO: this bit flipping should probably be moved to tie-break in the PQ comparator
longs = new FilterNumericDocValues(DocValues.getNumeric(ctx.reader(), fieldName)) {
@Override
public long longValue() throws IOException {
long bits = super.longValue();
if (bits < 0)
bits ^= 0x7fffffffffffffffL;
return bits;
}
};
break;
case DOUBLE:
// TODO: this bit flipping should probably be moved to tie-break in the PQ comparator
longs = new FilterNumericDocValues(DocValues.getNumeric(ctx.reader(), fieldName)) {
@Override
public long longValue() throws IOException {
long bits = super.longValue();
if (bits < 0)
bits ^= 0x7fffffffffffffffL;
return bits;
}
};
break;
default:
throw new AssertionError("Unexpected type: " + numericType);
}
}
int valuesDocID = longs.docID();
if (valuesDocID < doc - ctx.docBase) {
valuesDocID = longs.advance(doc - ctx.docBase);
}
if (valuesDocID == doc - ctx.docBase) {
hashTable.add(doc, longs.longValue(), 1);
} else {
++missingCount;
}
}
// 2. select top-k facet values
final int pqSize = limit < 0 ? hashTable.size : Math.min(offset + limit, hashTable.size);
final PriorityQueue<Entry> pq;
if (FacetParams.FACET_SORT_COUNT.equals(sort) || FacetParams.FACET_SORT_COUNT_LEGACY.equals(sort)) {
pq = new PriorityQueue<Entry>(pqSize) {
@Override
protected boolean lessThan(Entry a, Entry b) {
if (a.count < b.count || (a.count == b.count && a.bits > b.bits)) {
return true;
} else {
return false;
}
}
};
} else {
pq = new PriorityQueue<Entry>(pqSize) {
@Override
protected boolean lessThan(Entry a, Entry b) {
return a.bits > b.bits;
}
};
}
Entry e = null;
for (int i = 0; i < hashTable.bits.length; ++i) {
if (hashTable.counts[i] >= mincount) {
if (e == null) {
e = new Entry();
}
e.bits = hashTable.bits[i];
e.count = hashTable.counts[i];
e.docID = hashTable.docIDs[i];
e = pq.insertWithOverflow(e);
}
}
// 4. build the NamedList
final ValueSource vs = ft.getValueSource(sf, null);
final NamedList<Integer> result = new NamedList<>();
// to be merged with terms from the terms dict
if (!zeros || FacetParams.FACET_SORT_COUNT.equals(sort) || FacetParams.FACET_SORT_COUNT_LEGACY.equals(sort)) {
// Only keep items we're interested in
final Deque<Entry> counts = new ArrayDeque<>();
while (pq.size() > offset) {
counts.addFirst(pq.pop());
}
// Entries from the PQ first, then using the terms dictionary
for (Entry entry : counts) {
final int readerIdx = ReaderUtil.subIndex(entry.docID, leaves);
final FunctionValues values = vs.getValues(Collections.emptyMap(), leaves.get(readerIdx));
result.add(values.strVal(entry.docID - leaves.get(readerIdx).docBase), entry.count);
}
if (zeros && (limit < 0 || result.size() < limit)) {
// need to merge with the term dict
if (!sf.indexed() && !sf.hasDocValues()) {
throw new IllegalStateException("Cannot use " + FacetParams.FACET_MINCOUNT + "=0 on field " + sf.getName() + " which is neither indexed nor docValues");
}
// Add zeros until there are limit results
final Set<String> alreadySeen = new HashSet<>();
while (pq.size() > 0) {
Entry entry = pq.pop();
final int readerIdx = ReaderUtil.subIndex(entry.docID, leaves);
final FunctionValues values = vs.getValues(Collections.emptyMap(), leaves.get(readerIdx));
alreadySeen.add(values.strVal(entry.docID - leaves.get(readerIdx).docBase));
}
for (int i = 0; i < result.size(); ++i) {
alreadySeen.add(result.getName(i));
}
final Terms terms = searcher.getSlowAtomicReader().terms(fieldName);
if (terms != null) {
final String prefixStr = TrieField.getMainValuePrefix(ft);
final BytesRef prefix;
if (prefixStr != null) {
prefix = new BytesRef(prefixStr);
} else {
prefix = new BytesRef();
}
final TermsEnum termsEnum = terms.iterator();
BytesRef term;
switch(termsEnum.seekCeil(prefix)) {
case FOUND:
case NOT_FOUND:
term = termsEnum.term();
break;
case END:
term = null;
break;
default:
throw new AssertionError();
}
final CharsRefBuilder spare = new CharsRefBuilder();
for (int skipped = hashTable.size; skipped < offset && term != null && StringHelper.startsWith(term, prefix); ) {
ft.indexedToReadable(term, spare);
final String termStr = spare.toString();
if (!alreadySeen.contains(termStr)) {
++skipped;
}
term = termsEnum.next();
}
for (; term != null && StringHelper.startsWith(term, prefix) && (limit < 0 || result.size() < limit); term = termsEnum.next()) {
ft.indexedToReadable(term, spare);
final String termStr = spare.toString();
if (!alreadySeen.contains(termStr)) {
result.add(termStr, 0);
}
}
}
}
} else {
// => Merge the PQ and the terms dictionary on the fly
if (!sf.indexed()) {
throw new IllegalStateException("Cannot use " + FacetParams.FACET_SORT + "=" + FacetParams.FACET_SORT_INDEX + " on a field which is not indexed");
}
final Map<String, Integer> counts = new HashMap<>();
while (pq.size() > 0) {
final Entry entry = pq.pop();
final int readerIdx = ReaderUtil.subIndex(entry.docID, leaves);
final FunctionValues values = vs.getValues(Collections.emptyMap(), leaves.get(readerIdx));
counts.put(values.strVal(entry.docID - leaves.get(readerIdx).docBase), entry.count);
}
final Terms terms = searcher.getSlowAtomicReader().terms(fieldName);
if (terms != null) {
final String prefixStr = TrieField.getMainValuePrefix(ft);
final BytesRef prefix;
if (prefixStr != null) {
prefix = new BytesRef(prefixStr);
} else {
prefix = new BytesRef();
}
final TermsEnum termsEnum = terms.iterator();
BytesRef term;
switch(termsEnum.seekCeil(prefix)) {
case FOUND:
case NOT_FOUND:
term = termsEnum.term();
break;
case END:
term = null;
break;
default:
throw new AssertionError();
}
final CharsRefBuilder spare = new CharsRefBuilder();
for (int i = 0; i < offset && term != null && StringHelper.startsWith(term, prefix); ++i) {
term = termsEnum.next();
}
for (; term != null && StringHelper.startsWith(term, prefix) && (limit < 0 || result.size() < limit); term = termsEnum.next()) {
ft.indexedToReadable(term, spare);
final String termStr = spare.toString();
Integer count = counts.get(termStr);
if (count == null) {
count = 0;
}
result.add(termStr, count);
}
}
}
if (missing) {
result.add(null, missingCount);
}
return result;
}
use of org.apache.solr.schema.SchemaField in project lucene-solr by apache.
the class SolrQueryParserBase method getFieldQuery.
// Assumption: quoted is always false
protected Query getFieldQuery(String field, List<String> queryTerms, boolean raw) throws SyntaxError {
checkNullField(field);
SchemaField sf;
if (field.equals(lastFieldName)) {
// only look up the SchemaField on a field change... this helps with memory allocation of dynamic fields
// and large queries like foo_i:(1 2 3 4 5 6 7 8 9 10) when we are passed "foo_i" each time.
sf = lastField;
} else {
// own functions.
if (field.charAt(0) == '_' && parser != null) {
MagicFieldName magic = MagicFieldName.get(field);
if (null != magic) {
subQParser = parser.subQuery(String.join(" ", queryTerms), magic.subParser);
return subQParser.getQuery();
}
}
lastFieldName = field;
sf = lastField = schema.getFieldOrNull(field);
}
if (sf != null) {
FieldType ft = sf.getType();
// delegate to type for everything except tokenized fields
if (ft.isTokenized() && sf.indexed()) {
String queryText = queryTerms.size() == 1 ? queryTerms.get(0) : String.join(" ", queryTerms);
boolean fieldAutoGenPhraseQueries = ft instanceof TextField && ((TextField) ft).getAutoGeneratePhraseQueries();
boolean fieldEnableGraphQueries = ft instanceof TextField && ((TextField) ft).getEnableGraphQueries();
return newFieldQuery(getAnalyzer(), field, queryText, false, fieldAutoGenPhraseQueries, fieldEnableGraphQueries);
} else {
if (raw) {
return new RawQuery(sf, queryTerms);
} else {
if (queryTerms.size() == 1) {
return ft.getFieldQuery(parser, sf, queryTerms.get(0));
} else {
List<Query> subqs = new ArrayList<>();
for (String queryTerm : queryTerms) {
try {
subqs.add(ft.getFieldQuery(parser, sf, queryTerm));
} catch (Exception e) {
// assumption: raw = false only when called from ExtendedDismaxQueryParser.getQuery()
// for edismax: ignore parsing failures
}
}
if (subqs.size() == 1) {
return subqs.get(0);
} else {
// delay building boolean query until we must
final BooleanClause.Occur occur = operator == AND_OPERATOR ? BooleanClause.Occur.MUST : BooleanClause.Occur.SHOULD;
BooleanQuery.Builder booleanBuilder = newBooleanQuery();
subqs.forEach(subq -> booleanBuilder.add(subq, occur));
return booleanBuilder.build();
}
}
}
}
}
// default to a normal field query
String queryText = queryTerms.size() == 1 ? queryTerms.get(0) : String.join(" ", queryTerms);
return newFieldQuery(getAnalyzer(), field, queryText, false, false, true);
}
use of org.apache.solr.schema.SchemaField in project lucene-solr by apache.
the class ReverseOrdFieldSource method getValues.
@Override
public FunctionValues getValues(Map context, LeafReaderContext readerContext) throws IOException {
final int off = readerContext.docBase;
final LeafReader r;
Object o = context.get("searcher");
if (o instanceof SolrIndexSearcher) {
SolrIndexSearcher is = (SolrIndexSearcher) o;
SchemaField sf = is.getSchema().getFieldOrNull(field);
if (sf != null && sf.hasDocValues() == false && sf.multiValued() == false && sf.getType().getNumberType() != null) {
// it's a single-valued numeric field: we must currently create insanity :(
List<LeafReaderContext> leaves = is.getIndexReader().leaves();
LeafReader[] insaneLeaves = new LeafReader[leaves.size()];
int upto = 0;
for (LeafReaderContext raw : leaves) {
insaneLeaves[upto++] = Insanity.wrapInsanity(raw.reader(), field);
}
r = SlowCompositeReaderWrapper.wrap(new MultiReader(insaneLeaves));
} else {
// reuse ordinalmap
r = ((SolrIndexSearcher) o).getSlowAtomicReader();
}
} else {
IndexReader topReader = ReaderUtil.getTopLevelContext(readerContext).reader();
r = SlowCompositeReaderWrapper.wrap(topReader);
}
// if it's e.g. tokenized/multivalued, emulate old behavior of single-valued fc
final SortedDocValues sindex = SortedSetSelector.wrap(DocValues.getSortedSet(r, field), SortedSetSelector.Type.MIN);
final int end = sindex.getValueCount();
return new IntDocValues(this) {
@Override
public int intVal(int doc) throws IOException {
if (doc + off > sindex.docID()) {
sindex.advance(doc + off);
}
if (doc + off == sindex.docID()) {
return (end - sindex.ordValue() - 1);
} else {
return end;
}
}
};
}
use of org.apache.solr.schema.SchemaField in project lucene-solr by apache.
the class FieldUtil method getSortedDocValues.
/** Simpler method that creates a request context and looks up the field for you */
public static SortedDocValues getSortedDocValues(SolrIndexSearcher searcher, String field) throws IOException {
SchemaField sf = searcher.getSchema().getField(field);
QueryContext qContext = QueryContext.newContext(searcher);
return getSortedDocValues(qContext, sf, null);
}
use of org.apache.solr.schema.SchemaField in project lucene-solr by apache.
the class DeleteUpdateCommand method getIndexedId.
/** Returns the indexed ID for this delete. The returned BytesRef is retained across multiple calls, and should not be modified. */
public BytesRef getIndexedId() {
if (indexedId == null) {
IndexSchema schema = req.getSchema();
SchemaField sf = schema.getUniqueKeyField();
if (sf != null && id != null) {
BytesRefBuilder b = new BytesRefBuilder();
sf.getType().readableToIndexed(id, b);
indexedId = b.get();
}
}
return indexedId;
}
Aggregations