use of org.apache.solr.schema.FieldType in project lucene-solr by apache.
the class IntervalFacets method getCountNumeric.
private void getCountNumeric() throws IOException {
final FieldType ft = schemaField.getType();
final String fieldName = schemaField.getName();
final NumberType numericType = ft.getNumberType();
if (numericType == null) {
throw new IllegalStateException();
}
final List<LeafReaderContext> leaves = searcher.getIndexReader().leaves();
final Iterator<LeafReaderContext> ctxIt = leaves.iterator();
LeafReaderContext ctx = null;
NumericDocValues longs = null;
for (DocIterator docsIt = docs.iterator(); docsIt.hasNext(); ) {
final int doc = docsIt.nextDoc();
if (ctx == null || doc >= ctx.docBase + ctx.reader().maxDoc()) {
do {
ctx = ctxIt.next();
} while (ctx == null || doc >= ctx.docBase + ctx.reader().maxDoc());
assert doc >= ctx.docBase;
switch(numericType) {
case LONG:
case DATE:
case INTEGER:
longs = DocValues.getNumeric(ctx.reader(), fieldName);
break;
case FLOAT:
// TODO: this bit flipping should probably be moved to tie-break in the PQ comparator
longs = new FilterNumericDocValues(DocValues.getNumeric(ctx.reader(), fieldName)) {
@Override
public long longValue() throws IOException {
long bits = super.longValue();
if (bits < 0)
bits ^= 0x7fffffffffffffffL;
return bits;
}
};
break;
case DOUBLE:
// TODO: this bit flipping should probably be moved to tie-break in the PQ comparator
longs = new FilterNumericDocValues(DocValues.getNumeric(ctx.reader(), fieldName)) {
@Override
public long longValue() throws IOException {
long bits = super.longValue();
if (bits < 0)
bits ^= 0x7fffffffffffffffL;
return bits;
}
};
break;
default:
throw new AssertionError();
}
}
int valuesDocID = longs.docID();
if (valuesDocID < doc - ctx.docBase) {
valuesDocID = longs.advance(doc - ctx.docBase);
}
if (valuesDocID == doc - ctx.docBase) {
accumIntervalWithValue(longs.longValue());
}
}
}
use of org.apache.solr.schema.FieldType in project lucene-solr by apache.
the class NumericFacets method getCountsSingleValue.
private static NamedList<Integer> getCountsSingleValue(SolrIndexSearcher searcher, DocSet docs, String fieldName, int offset, int limit, int mincount, boolean missing, String sort) throws IOException {
boolean zeros = mincount <= 0;
mincount = Math.max(mincount, 1);
final SchemaField sf = searcher.getSchema().getField(fieldName);
final FieldType ft = sf.getType();
final NumberType numericType = ft.getNumberType();
if (numericType == null) {
throw new IllegalStateException();
}
// We don't return zeros when using PointFields or when index=false
zeros = zeros && !ft.isPointField() && sf.indexed();
final List<LeafReaderContext> leaves = searcher.getIndexReader().leaves();
// 1. accumulate
final HashTable hashTable = new HashTable(true);
final Iterator<LeafReaderContext> ctxIt = leaves.iterator();
LeafReaderContext ctx = null;
NumericDocValues longs = null;
int missingCount = 0;
for (DocIterator docsIt = docs.iterator(); docsIt.hasNext(); ) {
final int doc = docsIt.nextDoc();
if (ctx == null || doc >= ctx.docBase + ctx.reader().maxDoc()) {
do {
ctx = ctxIt.next();
} while (ctx == null || doc >= ctx.docBase + ctx.reader().maxDoc());
assert doc >= ctx.docBase;
switch(numericType) {
case LONG:
case DATE:
case INTEGER:
// Long, Date and Integer
longs = DocValues.getNumeric(ctx.reader(), fieldName);
break;
case FLOAT:
// TODO: this bit flipping should probably be moved to tie-break in the PQ comparator
longs = new FilterNumericDocValues(DocValues.getNumeric(ctx.reader(), fieldName)) {
@Override
public long longValue() throws IOException {
long bits = super.longValue();
if (bits < 0)
bits ^= 0x7fffffffffffffffL;
return bits;
}
};
break;
case DOUBLE:
// TODO: this bit flipping should probably be moved to tie-break in the PQ comparator
longs = new FilterNumericDocValues(DocValues.getNumeric(ctx.reader(), fieldName)) {
@Override
public long longValue() throws IOException {
long bits = super.longValue();
if (bits < 0)
bits ^= 0x7fffffffffffffffL;
return bits;
}
};
break;
default:
throw new AssertionError("Unexpected type: " + numericType);
}
}
int valuesDocID = longs.docID();
if (valuesDocID < doc - ctx.docBase) {
valuesDocID = longs.advance(doc - ctx.docBase);
}
if (valuesDocID == doc - ctx.docBase) {
hashTable.add(doc, longs.longValue(), 1);
} else {
++missingCount;
}
}
// 2. select top-k facet values
final int pqSize = limit < 0 ? hashTable.size : Math.min(offset + limit, hashTable.size);
final PriorityQueue<Entry> pq;
if (FacetParams.FACET_SORT_COUNT.equals(sort) || FacetParams.FACET_SORT_COUNT_LEGACY.equals(sort)) {
pq = new PriorityQueue<Entry>(pqSize) {
@Override
protected boolean lessThan(Entry a, Entry b) {
if (a.count < b.count || (a.count == b.count && a.bits > b.bits)) {
return true;
} else {
return false;
}
}
};
} else {
pq = new PriorityQueue<Entry>(pqSize) {
@Override
protected boolean lessThan(Entry a, Entry b) {
return a.bits > b.bits;
}
};
}
Entry e = null;
for (int i = 0; i < hashTable.bits.length; ++i) {
if (hashTable.counts[i] >= mincount) {
if (e == null) {
e = new Entry();
}
e.bits = hashTable.bits[i];
e.count = hashTable.counts[i];
e.docID = hashTable.docIDs[i];
e = pq.insertWithOverflow(e);
}
}
// 4. build the NamedList
final ValueSource vs = ft.getValueSource(sf, null);
final NamedList<Integer> result = new NamedList<>();
// to be merged with terms from the terms dict
if (!zeros || FacetParams.FACET_SORT_COUNT.equals(sort) || FacetParams.FACET_SORT_COUNT_LEGACY.equals(sort)) {
// Only keep items we're interested in
final Deque<Entry> counts = new ArrayDeque<>();
while (pq.size() > offset) {
counts.addFirst(pq.pop());
}
// Entries from the PQ first, then using the terms dictionary
for (Entry entry : counts) {
final int readerIdx = ReaderUtil.subIndex(entry.docID, leaves);
final FunctionValues values = vs.getValues(Collections.emptyMap(), leaves.get(readerIdx));
result.add(values.strVal(entry.docID - leaves.get(readerIdx).docBase), entry.count);
}
if (zeros && (limit < 0 || result.size() < limit)) {
// need to merge with the term dict
if (!sf.indexed() && !sf.hasDocValues()) {
throw new IllegalStateException("Cannot use " + FacetParams.FACET_MINCOUNT + "=0 on field " + sf.getName() + " which is neither indexed nor docValues");
}
// Add zeros until there are limit results
final Set<String> alreadySeen = new HashSet<>();
while (pq.size() > 0) {
Entry entry = pq.pop();
final int readerIdx = ReaderUtil.subIndex(entry.docID, leaves);
final FunctionValues values = vs.getValues(Collections.emptyMap(), leaves.get(readerIdx));
alreadySeen.add(values.strVal(entry.docID - leaves.get(readerIdx).docBase));
}
for (int i = 0; i < result.size(); ++i) {
alreadySeen.add(result.getName(i));
}
final Terms terms = searcher.getSlowAtomicReader().terms(fieldName);
if (terms != null) {
final String prefixStr = TrieField.getMainValuePrefix(ft);
final BytesRef prefix;
if (prefixStr != null) {
prefix = new BytesRef(prefixStr);
} else {
prefix = new BytesRef();
}
final TermsEnum termsEnum = terms.iterator();
BytesRef term;
switch(termsEnum.seekCeil(prefix)) {
case FOUND:
case NOT_FOUND:
term = termsEnum.term();
break;
case END:
term = null;
break;
default:
throw new AssertionError();
}
final CharsRefBuilder spare = new CharsRefBuilder();
for (int skipped = hashTable.size; skipped < offset && term != null && StringHelper.startsWith(term, prefix); ) {
ft.indexedToReadable(term, spare);
final String termStr = spare.toString();
if (!alreadySeen.contains(termStr)) {
++skipped;
}
term = termsEnum.next();
}
for (; term != null && StringHelper.startsWith(term, prefix) && (limit < 0 || result.size() < limit); term = termsEnum.next()) {
ft.indexedToReadable(term, spare);
final String termStr = spare.toString();
if (!alreadySeen.contains(termStr)) {
result.add(termStr, 0);
}
}
}
}
} else {
// => Merge the PQ and the terms dictionary on the fly
if (!sf.indexed()) {
throw new IllegalStateException("Cannot use " + FacetParams.FACET_SORT + "=" + FacetParams.FACET_SORT_INDEX + " on a field which is not indexed");
}
final Map<String, Integer> counts = new HashMap<>();
while (pq.size() > 0) {
final Entry entry = pq.pop();
final int readerIdx = ReaderUtil.subIndex(entry.docID, leaves);
final FunctionValues values = vs.getValues(Collections.emptyMap(), leaves.get(readerIdx));
counts.put(values.strVal(entry.docID - leaves.get(readerIdx).docBase), entry.count);
}
final Terms terms = searcher.getSlowAtomicReader().terms(fieldName);
if (terms != null) {
final String prefixStr = TrieField.getMainValuePrefix(ft);
final BytesRef prefix;
if (prefixStr != null) {
prefix = new BytesRef(prefixStr);
} else {
prefix = new BytesRef();
}
final TermsEnum termsEnum = terms.iterator();
BytesRef term;
switch(termsEnum.seekCeil(prefix)) {
case FOUND:
case NOT_FOUND:
term = termsEnum.term();
break;
case END:
term = null;
break;
default:
throw new AssertionError();
}
final CharsRefBuilder spare = new CharsRefBuilder();
for (int i = 0; i < offset && term != null && StringHelper.startsWith(term, prefix); ++i) {
term = termsEnum.next();
}
for (; term != null && StringHelper.startsWith(term, prefix) && (limit < 0 || result.size() < limit); term = termsEnum.next()) {
ft.indexedToReadable(term, spare);
final String termStr = spare.toString();
Integer count = counts.get(termStr);
if (count == null) {
count = 0;
}
result.add(termStr, count);
}
}
}
if (missing) {
result.add(null, missingCount);
}
return result;
}
use of org.apache.solr.schema.FieldType in project lucene-solr by apache.
the class IndexSortedFacetCollector method getFacetCounts.
NamedList<Integer> getFacetCounts(Executor executor) throws IOException {
CompletionService<SegFacet> completionService = new ExecutorCompletionService<>(executor);
// reuse the translation logic to go from top level set to per-segment set
baseSet = docs.getTopFilter();
final List<LeafReaderContext> leaves = searcher.getTopReaderContext().leaves();
// The list of pending tasks that aren't immediately submitted
// TODO: Is there a completion service, or a delegating executor that can
// limit the number of concurrent tasks submitted to a bigger executor?
LinkedList<Callable<SegFacet>> pending = new LinkedList<>();
int threads = nThreads <= 0 ? Integer.MAX_VALUE : nThreads;
for (final LeafReaderContext leave : leaves) {
final SegFacet segFacet = new SegFacet(leave);
Callable<SegFacet> task = () -> {
segFacet.countTerms();
return segFacet;
};
if (--threads >= 0) {
completionService.submit(task);
} else {
pending.add(task);
}
}
// now merge the per-segment results
PriorityQueue<SegFacet> queue = new PriorityQueue<SegFacet>(leaves.size()) {
@Override
protected boolean lessThan(SegFacet a, SegFacet b) {
return a.tempBR.compareTo(b.tempBR) < 0;
}
};
boolean hasMissingCount = false;
int missingCount = 0;
for (int i = 0, c = leaves.size(); i < c; i++) {
SegFacet seg = null;
try {
Future<SegFacet> future = completionService.take();
seg = future.get();
if (!pending.isEmpty()) {
completionService.submit(pending.removeFirst());
}
} catch (InterruptedException e) {
Thread.currentThread().interrupt();
throw new SolrException(SolrException.ErrorCode.SERVER_ERROR, e);
} catch (ExecutionException e) {
Throwable cause = e.getCause();
if (cause instanceof RuntimeException) {
throw (RuntimeException) cause;
} else {
throw new SolrException(SolrException.ErrorCode.SERVER_ERROR, "Error in per-segment faceting on field: " + fieldName, cause);
}
}
if (seg.startTermIndex < seg.endTermIndex) {
if (seg.startTermIndex == -1) {
hasMissingCount = true;
missingCount += seg.counts[0];
seg.pos = 0;
} else {
seg.pos = seg.startTermIndex;
}
if (seg.pos < seg.endTermIndex && (mincount < 1 || seg.hasAnyCount)) {
seg.tenum = seg.si.termsEnum();
seg.tenum.seekExact(seg.pos);
seg.tempBR = seg.tenum.term();
queue.add(seg);
}
}
}
FacetCollector collector;
if (sort.equals(FacetParams.FACET_SORT_COUNT) || sort.equals(FacetParams.FACET_SORT_COUNT_LEGACY)) {
collector = new CountSortedFacetCollector(offset, limit, mincount);
} else {
collector = new IndexSortedFacetCollector(offset, limit, mincount);
}
BytesRefBuilder val = new BytesRefBuilder();
while (queue.size() > 0) {
SegFacet seg = queue.top();
boolean collect = termFilter == null || termFilter.test(seg.tempBR);
// may be shared across calls.
if (collect) {
val.copyBytes(seg.tempBR);
}
int count = 0;
do {
if (collect) {
count += seg.counts[seg.pos - seg.startTermIndex];
}
// if mincount>0 then seg.pos++ can skip ahead to the next non-zero entry.
do {
++seg.pos;
} while (//stop incrementing before we run off the end
(seg.pos < seg.endTermIndex) && //move term enum forward with position -- dont care about value
(seg.tenum.next() != null || true) && //only skip ahead if mincount > 0
(mincount > 0) && //check zero count
(seg.counts[seg.pos - seg.startTermIndex] == 0));
if (seg.pos >= seg.endTermIndex) {
queue.pop();
seg = queue.top();
} else {
seg.tempBR = seg.tenum.term();
seg = queue.updateTop();
}
} while (seg != null && val.get().compareTo(seg.tempBR) == 0);
if (collect) {
boolean stop = collector.collect(val.get(), count);
if (stop)
break;
}
}
NamedList<Integer> res = collector.getFacetCounts();
// convert labels to readable form
FieldType ft = searcher.getSchema().getFieldType(fieldName);
int sz = res.size();
for (int i = 0; i < sz; i++) {
res.setName(i, ft.indexedToReadable(res.getName(i)));
}
if (missing) {
if (!hasMissingCount) {
missingCount = SimpleFacets.getFieldMissingCount(searcher, docs, fieldName);
}
res.add(null, missingCount);
}
return res;
}
use of org.apache.solr.schema.FieldType in project lucene-solr by apache.
the class SolrQueryParserBase method getFieldQuery.
// Assumption: quoted is always false
protected Query getFieldQuery(String field, List<String> queryTerms, boolean raw) throws SyntaxError {
checkNullField(field);
SchemaField sf;
if (field.equals(lastFieldName)) {
// only look up the SchemaField on a field change... this helps with memory allocation of dynamic fields
// and large queries like foo_i:(1 2 3 4 5 6 7 8 9 10) when we are passed "foo_i" each time.
sf = lastField;
} else {
// own functions.
if (field.charAt(0) == '_' && parser != null) {
MagicFieldName magic = MagicFieldName.get(field);
if (null != magic) {
subQParser = parser.subQuery(String.join(" ", queryTerms), magic.subParser);
return subQParser.getQuery();
}
}
lastFieldName = field;
sf = lastField = schema.getFieldOrNull(field);
}
if (sf != null) {
FieldType ft = sf.getType();
// delegate to type for everything except tokenized fields
if (ft.isTokenized() && sf.indexed()) {
String queryText = queryTerms.size() == 1 ? queryTerms.get(0) : String.join(" ", queryTerms);
boolean fieldAutoGenPhraseQueries = ft instanceof TextField && ((TextField) ft).getAutoGeneratePhraseQueries();
boolean fieldEnableGraphQueries = ft instanceof TextField && ((TextField) ft).getEnableGraphQueries();
return newFieldQuery(getAnalyzer(), field, queryText, false, fieldAutoGenPhraseQueries, fieldEnableGraphQueries);
} else {
if (raw) {
return new RawQuery(sf, queryTerms);
} else {
if (queryTerms.size() == 1) {
return ft.getFieldQuery(parser, sf, queryTerms.get(0));
} else {
List<Query> subqs = new ArrayList<>();
for (String queryTerm : queryTerms) {
try {
subqs.add(ft.getFieldQuery(parser, sf, queryTerm));
} catch (Exception e) {
// assumption: raw = false only when called from ExtendedDismaxQueryParser.getQuery()
// for edismax: ignore parsing failures
}
}
if (subqs.size() == 1) {
return subqs.get(0);
} else {
// delay building boolean query until we must
final BooleanClause.Occur occur = operator == AND_OPERATOR ? BooleanClause.Occur.MUST : BooleanClause.Occur.SHOULD;
BooleanQuery.Builder booleanBuilder = newBooleanQuery();
subqs.forEach(subq -> booleanBuilder.add(subq, occur));
return booleanBuilder.build();
}
}
}
}
}
// default to a normal field query
String queryText = queryTerms.size() == 1 ? queryTerms.get(0) : String.join(" ", queryTerms);
return newFieldQuery(getAnalyzer(), field, queryText, false, false, true);
}
use of org.apache.solr.schema.FieldType in project lucene-solr by apache.
the class FileFloatSource method getFloats.
private static float[] getFloats(FileFloatSource ffs, IndexReader reader) {
float[] vals = new float[reader.maxDoc()];
if (ffs.defVal != 0) {
Arrays.fill(vals, ffs.defVal);
}
InputStream is;
String fname = "external_" + ffs.field.getName();
try {
is = VersionedFile.getLatestFile(ffs.dataDir, fname);
} catch (IOException e) {
// log, use defaults
log.error("Error opening external value source file: " + e);
return vals;
}
BufferedReader r = new BufferedReader(new InputStreamReader(is, StandardCharsets.UTF_8));
String idName = ffs.keyField.getName();
FieldType idType = ffs.keyField.getType();
// warning: lucene's termEnum.skipTo() is not optimized... it simply does a next()
// because of this, simply ask the reader for a new termEnum rather than
// trying to use skipTo()
List<String> notFound = new ArrayList<>();
int notFoundCount = 0;
int otherErrors = 0;
char delimiter = '=';
BytesRefBuilder internalKey = new BytesRefBuilder();
try {
TermsEnum termsEnum = MultiFields.getTerms(reader, idName).iterator();
PostingsEnum postingsEnum = null;
for (String line; (line = r.readLine()) != null; ) {
int delimIndex = line.lastIndexOf(delimiter);
if (delimIndex < 0)
continue;
int endIndex = line.length();
String key = line.substring(0, delimIndex);
String val = line.substring(delimIndex + 1, endIndex);
float fval;
try {
idType.readableToIndexed(key, internalKey);
fval = Float.parseFloat(val);
} catch (Exception e) {
if (++otherErrors <= 10) {
log.error("Error loading external value source + fileName + " + e + (otherErrors < 10 ? "" : "\tSkipping future errors for this file."));
}
// go to next line in file.. leave values as default.
continue;
}
if (!termsEnum.seekExact(internalKey.get())) {
if (notFoundCount < 10) {
// collect first 10 not found for logging
notFound.add(key);
}
notFoundCount++;
continue;
}
postingsEnum = termsEnum.postings(postingsEnum, PostingsEnum.NONE);
int doc;
while ((doc = postingsEnum.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS) {
vals[doc] = fval;
}
}
} catch (IOException e) {
// log, use defaults
log.error("Error loading external value source: " + e);
} finally {
// exceptions that happened in the loop
try {
r.close();
} catch (Exception e) {
}
}
log.info("Loaded external value source " + fname + (notFoundCount == 0 ? "" : " :" + notFoundCount + " missing keys " + notFound));
return vals;
}
Aggregations