use of org.apache.solr.schema.FieldType in project lucene-solr by apache.
the class TermsComponent method process.
@Override
public void process(ResponseBuilder rb) throws IOException {
SolrParams params = rb.req.getParams();
if (!params.get(TermsParams.TERMS, "false").equals("true")) {
return;
}
String[] fields = params.getParams(TermsParams.TERMS_FIELD);
NamedList<Object> termsResult = new SimpleOrderedMap<>();
rb.rsp.add("terms", termsResult);
if (fields == null || fields.length == 0)
return;
boolean termStats = params.getBool(TermsParams.TERMS_STATS, false);
if (termStats) {
NamedList<Number> stats = new SimpleOrderedMap<>();
rb.rsp.add("indexstats", stats);
collectStats(rb.req.getSearcher(), stats);
}
String termList = params.get(TermsParams.TERMS_LIST);
if (termList != null) {
boolean includeTotalTermFreq = params.getBool(TermsParams.TERMS_TTF, false);
fetchTerms(rb.req.getSearcher(), fields, termList, includeTotalTermFreq, termsResult);
return;
}
int limit = params.getInt(TermsParams.TERMS_LIMIT, 10);
if (limit < 0) {
limit = Integer.MAX_VALUE;
}
String lowerStr = params.get(TermsParams.TERMS_LOWER);
String upperStr = params.get(TermsParams.TERMS_UPPER);
boolean upperIncl = params.getBool(TermsParams.TERMS_UPPER_INCLUSIVE, false);
boolean lowerIncl = params.getBool(TermsParams.TERMS_LOWER_INCLUSIVE, true);
boolean sort = !TermsParams.TERMS_SORT_INDEX.equals(params.get(TermsParams.TERMS_SORT, TermsParams.TERMS_SORT_COUNT));
int freqmin = params.getInt(TermsParams.TERMS_MINCOUNT, 1);
int freqmax = params.getInt(TermsParams.TERMS_MAXCOUNT, UNLIMITED_MAX_COUNT);
if (freqmax < 0) {
freqmax = Integer.MAX_VALUE;
}
String prefix = params.get(TermsParams.TERMS_PREFIX_STR);
String regexp = params.get(TermsParams.TERMS_REGEXP_STR);
Pattern pattern = regexp != null ? Pattern.compile(regexp, resolveRegexpFlags(params)) : null;
boolean raw = params.getBool(TermsParams.TERMS_RAW, false);
final LeafReader indexReader = rb.req.getSearcher().getSlowAtomicReader();
Fields lfields = indexReader.fields();
for (String field : fields) {
NamedList<Integer> fieldTerms = new NamedList<>();
termsResult.add(field, fieldTerms);
Terms terms = lfields.terms(field);
if (terms == null) {
// field does not exist
continue;
}
FieldType ft = raw ? null : rb.req.getSchema().getFieldTypeNoEx(field);
if (ft == null)
ft = new StrField();
// prefix must currently be text
BytesRef prefixBytes = prefix == null ? null : new BytesRef(prefix);
BytesRef upperBytes = null;
if (upperStr != null) {
BytesRefBuilder b = new BytesRefBuilder();
ft.readableToIndexed(upperStr, b);
upperBytes = b.get();
}
BytesRef lowerBytes;
if (lowerStr == null) {
// If no lower bound was specified, use the prefix
lowerBytes = prefixBytes;
} else {
lowerBytes = new BytesRef();
if (raw) {
// TODO: how to handle binary? perhaps we don't for "raw"... or if the field exists
// perhaps we detect if the FieldType is non-character and expect hex if so?
lowerBytes = new BytesRef(lowerStr);
} else {
BytesRefBuilder b = new BytesRefBuilder();
ft.readableToIndexed(lowerStr, b);
lowerBytes = b.get();
}
}
TermsEnum termsEnum = terms.iterator();
BytesRef term = null;
if (lowerBytes != null) {
if (termsEnum.seekCeil(lowerBytes) == TermsEnum.SeekStatus.END) {
termsEnum = null;
} else {
term = termsEnum.term();
//Only advance the enum if we are excluding the lower bound and the lower Term actually matches
if (lowerIncl == false && term.equals(lowerBytes)) {
term = termsEnum.next();
}
}
} else {
// position termsEnum on first term
term = termsEnum.next();
}
int i = 0;
BoundedTreeSet<CountPair<BytesRef, Integer>> queue = (sort ? new BoundedTreeSet<CountPair<BytesRef, Integer>>(limit) : null);
CharsRefBuilder external = new CharsRefBuilder();
while (term != null && (i < limit || sort)) {
// did we fill in "external" yet for this term?
boolean externalized = false;
// stop if the prefix doesn't match
if (prefixBytes != null && !StringHelper.startsWith(term, prefixBytes))
break;
if (pattern != null) {
// indexed text or external text?
// TODO: support "raw" mode?
ft.indexedToReadable(term, external);
externalized = true;
if (!pattern.matcher(external.get()).matches()) {
term = termsEnum.next();
continue;
}
}
if (upperBytes != null) {
int upperCmp = term.compareTo(upperBytes);
// if we are past the upper term, or equal to it (when don't include upper) then stop.
if (upperCmp > 0 || (upperCmp == 0 && !upperIncl))
break;
}
// This is a good term in the range. Check if mincount/maxcount conditions are satisfied.
int docFreq = termsEnum.docFreq();
if (docFreq >= freqmin && docFreq <= freqmax) {
// add the term to the list
if (sort) {
queue.add(new CountPair<>(BytesRef.deepCopyOf(term), docFreq));
} else {
// TODO: handle raw somehow
if (!externalized) {
ft.indexedToReadable(term, external);
}
fieldTerms.add(external.toString(), docFreq);
i++;
}
}
term = termsEnum.next();
}
if (sort) {
for (CountPair<BytesRef, Integer> item : queue) {
if (i >= limit)
break;
ft.indexedToReadable(item.key, external);
fieldTerms.add(external.toString(), item.val);
i++;
}
}
}
}
use of org.apache.solr.schema.FieldType in project lucene-solr by apache.
the class RealTimeGetComponent method process.
@Override
public void process(ResponseBuilder rb) throws IOException {
SolrQueryRequest req = rb.req;
SolrQueryResponse rsp = rb.rsp;
SolrParams params = req.getParams();
CloudDescriptor cloudDesc = req.getCore().getCoreDescriptor().getCloudDescriptor();
if (cloudDesc != null) {
Replica.Type replicaType = cloudDesc.getReplicaType();
if (replicaType != null) {
if (replicaType == Replica.Type.PULL) {
throw new SolrException(ErrorCode.BAD_REQUEST, String.format(Locale.ROOT, "%s can't handle realtime get requests. Replicas of type %s do not support these type of requests", cloudDesc.getCoreNodeName(), Replica.Type.PULL));
}
// non-leader TLOG replicas should not respond to distrib /get requests, but internal requests are OK
}
}
if (!params.getBool(COMPONENT_NAME, true)) {
return;
}
// This seems rather kludgey, may there is better way to indicate
// that replica can support handling version ranges
String val = params.get("checkCanHandleVersionRanges");
if (val != null) {
rb.rsp.add("canHandleVersionRanges", true);
return;
}
val = params.get("getFingerprint");
if (val != null) {
processGetFingeprint(rb);
return;
}
val = params.get("getVersions");
if (val != null) {
processGetVersions(rb);
return;
}
val = params.get("getUpdates");
if (val != null) {
// solrcloud_debug
if (log.isDebugEnabled()) {
try {
RefCounted<SolrIndexSearcher> searchHolder = req.getCore().getNewestSearcher(false);
SolrIndexSearcher searcher = searchHolder.get();
try {
log.debug(req.getCore().getCoreContainer().getZkController().getNodeName() + " min count to sync to (from most recent searcher view) " + searcher.search(new MatchAllDocsQuery(), 1).totalHits);
} finally {
searchHolder.decref();
}
} catch (Exception e) {
log.debug("Error in solrcloud_debug block", e);
}
}
processGetUpdates(rb);
return;
}
val = params.get("getInputDocument");
if (val != null) {
processGetInputDocument(rb);
return;
}
final IdsRequsted reqIds = IdsRequsted.parseParams(req);
if (reqIds.allIds.isEmpty()) {
return;
}
// parse any existing filters
try {
String[] fqs = req.getParams().getParams(CommonParams.FQ);
if (fqs != null && fqs.length != 0) {
List<Query> filters = rb.getFilters();
// if filters already exists, make a copy instead of modifying the original
filters = filters == null ? new ArrayList<Query>(fqs.length) : new ArrayList<>(filters);
for (String fq : fqs) {
if (fq != null && fq.trim().length() != 0) {
QParser fqp = QParser.getParser(fq, req);
filters.add(fqp.getQuery());
}
}
if (!filters.isEmpty()) {
rb.setFilters(filters);
}
}
} catch (SyntaxError e) {
throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, e);
}
final SolrCore core = req.getCore();
SchemaField idField = core.getLatestSchema().getUniqueKeyField();
FieldType fieldType = idField.getType();
SolrDocumentList docList = new SolrDocumentList();
UpdateLog ulog = core.getUpdateHandler().getUpdateLog();
SearcherInfo searcherInfo = new SearcherInfo(core);
// this is initialized & set on the context *after* any searcher (re-)opening
ResultContext resultContext = null;
final DocTransformer transformer = rsp.getReturnFields().getTransformer();
// true in any situation where we have to use a realtime searcher rather then returning docs
// directly from the UpdateLog
final boolean mustUseRealtimeSearcher = // if we have filters, we need to check those against the indexed form of the doc
(rb.getFilters() != null) || ((null != transformer) && transformer.needsSolrIndexSearcher());
try {
BytesRefBuilder idBytes = new BytesRefBuilder();
for (String idStr : reqIds.allIds) {
fieldType.readableToIndexed(idStr, idBytes);
if (ulog != null) {
Object o = ulog.lookup(idBytes.get());
if (o != null) {
// should currently be a List<Oper,Ver,Doc/Id>
List entry = (List) o;
assert entry.size() >= 3;
int oper = (Integer) entry.get(UpdateLog.FLAGS_IDX) & UpdateLog.OPERATION_MASK;
switch(oper) {
// fall through to ADD
case UpdateLog.UPDATE_INPLACE:
case UpdateLog.ADD:
if (mustUseRealtimeSearcher) {
// close handles to current searchers & result context
searcherInfo.clear();
resultContext = null;
// force open a new realtime searcher
ulog.openRealtimeSearcher();
// pretend we never found this record and fall through to use the searcher
o = null;
break;
}
SolrDocument doc;
if (oper == UpdateLog.ADD) {
doc = toSolrDoc((SolrInputDocument) entry.get(entry.size() - 1), core.getLatestSchema());
} else if (oper == UpdateLog.UPDATE_INPLACE) {
assert entry.size() == 5;
// For in-place update case, we have obtained the partial document till now. We need to
// resolve it to a full document to be returned to the user.
doc = resolveFullDocument(core, idBytes.get(), rsp.getReturnFields(), (SolrInputDocument) entry.get(entry.size() - 1), entry, null);
if (doc == null) {
// document has been deleted as the resolve was going on
break;
}
} else {
throw new SolrException(ErrorCode.INVALID_STATE, "Expected ADD or UPDATE_INPLACE. Got: " + oper);
}
if (transformer != null) {
// unknown docID
transformer.transform(doc, -1, 0);
}
docList.add(doc);
break;
case UpdateLog.DELETE:
break;
default:
throw new SolrException(SolrException.ErrorCode.SERVER_ERROR, "Unknown Operation! " + oper);
}
if (o != null)
continue;
}
}
// didn't find it in the update log, so it should be in the newest searcher opened
searcherInfo.init();
// don't bother with ResultContext yet, we won't need it if doc doesn't match filters
int docid = -1;
long segAndId = searcherInfo.getSearcher().lookupId(idBytes.get());
if (segAndId >= 0) {
int segid = (int) segAndId;
LeafReaderContext ctx = searcherInfo.getSearcher().getTopReaderContext().leaves().get((int) (segAndId >> 32));
docid = segid + ctx.docBase;
if (rb.getFilters() != null) {
for (Query raw : rb.getFilters()) {
Query q = raw.rewrite(searcherInfo.getSearcher().getIndexReader());
Scorer scorer = searcherInfo.getSearcher().createWeight(q, false, 1f).scorer(ctx);
if (scorer == null || segid != scorer.iterator().advance(segid)) {
// filter doesn't match.
docid = -1;
break;
}
}
}
}
if (docid < 0)
continue;
Document luceneDocument = searcherInfo.getSearcher().doc(docid, rsp.getReturnFields().getLuceneFieldNames());
SolrDocument doc = toSolrDoc(luceneDocument, core.getLatestSchema());
SolrDocumentFetcher docFetcher = searcherInfo.getSearcher().getDocFetcher();
docFetcher.decorateDocValueFields(doc, docid, docFetcher.getNonStoredDVs(true));
if (null != transformer) {
if (null == resultContext) {
// either first pass, or we've re-opened searcher - either way now we setContext
resultContext = new RTGResultContext(rsp.getReturnFields(), searcherInfo.getSearcher(), req);
transformer.setContext(resultContext);
}
transformer.transform(doc, docid, 0);
}
docList.add(doc);
}
} finally {
searcherInfo.clear();
}
addDocListToResponse(rb, docList);
}
use of org.apache.solr.schema.FieldType in project lucene-solr by apache.
the class SpellCheckComponent method inform.
@Override
public void inform(SolrCore core) {
if (initParams != null) {
LOG.info("Initializing spell checkers");
boolean hasDefault = false;
for (int i = 0; i < initParams.size(); i++) {
if (initParams.getName(i).equals("spellchecker")) {
Object cfg = initParams.getVal(i);
if (cfg instanceof NamedList) {
addSpellChecker(core, hasDefault, (NamedList) cfg);
} else if (cfg instanceof Map) {
addSpellChecker(core, hasDefault, new NamedList((Map) cfg));
} else if (cfg instanceof List) {
for (Object o : (List) cfg) {
if (o instanceof Map) {
addSpellChecker(core, hasDefault, new NamedList((Map) o));
}
}
}
}
}
Map<String, QueryConverter> queryConverters = new HashMap<>();
core.initPlugins(queryConverters, QueryConverter.class);
//ensure that there is at least one query converter defined
if (queryConverters.size() == 0) {
LOG.trace("No queryConverter defined, using default converter");
queryConverters.put("queryConverter", new SpellingQueryConverter());
}
//there should only be one
if (queryConverters.size() == 1) {
queryConverter = queryConverters.values().iterator().next();
IndexSchema schema = core.getLatestSchema();
String fieldTypeName = (String) initParams.get("queryAnalyzerFieldType");
FieldType fieldType = schema.getFieldTypes().get(fieldTypeName);
Analyzer analyzer = fieldType == null ? new WhitespaceAnalyzer() : fieldType.getQueryAnalyzer();
//TODO: There's got to be a better way! Where's Spring when you need it?
queryConverter.setAnalyzer(analyzer);
}
}
}
use of org.apache.solr.schema.FieldType in project lucene-solr by apache.
the class LukeRequestHandler method getIndexedFieldsInfo.
private static SimpleOrderedMap<Object> getIndexedFieldsInfo(SolrQueryRequest req) throws Exception {
SolrIndexSearcher searcher = req.getSearcher();
SolrParams params = req.getParams();
Set<String> fields = null;
String fl = params.get(CommonParams.FL);
if (fl != null) {
fields = new TreeSet<>(Arrays.asList(fl.split("[,\\s]+")));
}
LeafReader reader = searcher.getSlowAtomicReader();
IndexSchema schema = searcher.getSchema();
// Don't be tempted to put this in the loop below, the whole point here is to alphabetize the fields!
Set<String> fieldNames = new TreeSet<>();
for (FieldInfo fieldInfo : reader.getFieldInfos()) {
fieldNames.add(fieldInfo.name);
}
// Walk the term enum and keep a priority queue for each map in our set
SimpleOrderedMap<Object> finfo = new SimpleOrderedMap<>();
for (String fieldName : fieldNames) {
if (fields != null && !fields.contains(fieldName) && !fields.contains("*")) {
//we're not interested in this field Still an issue here
continue;
}
SimpleOrderedMap<Object> fieldMap = new SimpleOrderedMap<>();
SchemaField sfield = schema.getFieldOrNull(fieldName);
FieldType ftype = (sfield == null) ? null : sfield.getType();
fieldMap.add("type", (ftype == null) ? null : ftype.getTypeName());
fieldMap.add("schema", getFieldFlags(sfield));
if (sfield != null && schema.isDynamicField(sfield.getName()) && schema.getDynamicPattern(sfield.getName()) != null) {
fieldMap.add("dynamicBase", schema.getDynamicPattern(sfield.getName()));
}
Terms terms = reader.fields().terms(fieldName);
if (terms == null) {
// Not indexed, so we need to report what we can (it made it through the fl param if specified)
finfo.add(fieldName, fieldMap);
continue;
}
if (sfield != null && sfield.indexed()) {
if (params.getBool(INCLUDE_INDEX_FIELD_FLAGS, true)) {
Document doc = getFirstLiveDoc(terms, reader);
if (doc != null) {
// Found a document with this field
try {
IndexableField fld = doc.getField(fieldName);
if (fld != null) {
fieldMap.add("index", getFieldFlags(fld));
} else {
// it is a non-stored field...
fieldMap.add("index", "(unstored field)");
}
} catch (Exception ex) {
log.warn("error reading field: " + fieldName);
}
}
}
fieldMap.add("docs", terms.getDocCount());
}
if (fields != null && (fields.contains(fieldName) || fields.contains("*"))) {
getDetailedFieldInfo(req, fieldName, fieldMap);
}
// Add the field
finfo.add(fieldName, fieldMap);
}
return finfo;
}
use of org.apache.solr.schema.FieldType in project lucene-solr by apache.
the class LukeRequestHandler method getDocumentFieldsInfo.
private static SimpleOrderedMap<Object> getDocumentFieldsInfo(Document doc, int docId, IndexReader reader, IndexSchema schema) throws IOException {
final CharsRefBuilder spare = new CharsRefBuilder();
SimpleOrderedMap<Object> finfo = new SimpleOrderedMap<>();
for (Object o : doc.getFields()) {
Field field = (Field) o;
SimpleOrderedMap<Object> f = new SimpleOrderedMap<>();
SchemaField sfield = schema.getFieldOrNull(field.name());
FieldType ftype = (sfield == null) ? null : sfield.getType();
f.add("type", (ftype == null) ? null : ftype.getTypeName());
f.add("schema", getFieldFlags(sfield));
f.add("flags", getFieldFlags(field));
f.add("value", (ftype == null) ? null : ftype.toExternal(field));
// TODO: this really should be "stored"
// may be a binary number
f.add("internal", field.stringValue());
BytesRef bytes = field.binaryValue();
if (bytes != null) {
f.add("binary", Base64.byteArrayToBase64(bytes.bytes, bytes.offset, bytes.length));
}
if (!ftype.isPointField()) {
Term t = new Term(field.name(), ftype != null ? ftype.storedToIndexed(field) : field.stringValue());
// this can be 0 for non-indexed fields
f.add("docFreq", t.text() == null ? 0 : reader.docFreq(t));
}
// If we have a term vector, return that
if (field.fieldType().storeTermVectors()) {
try {
Terms v = reader.getTermVector(docId, field.name());
if (v != null) {
SimpleOrderedMap<Integer> tfv = new SimpleOrderedMap<>();
final TermsEnum termsEnum = v.iterator();
BytesRef text;
while ((text = termsEnum.next()) != null) {
final int freq = (int) termsEnum.totalTermFreq();
spare.copyUTF8Bytes(text);
tfv.add(spare.toString(), freq);
}
f.add("termVector", tfv);
}
} catch (Exception ex) {
log.warn("error writing term vector", ex);
}
}
finfo.add(field.name(), f);
}
return finfo;
}
Aggregations