use of org.apache.solr.search.SolrIndexSearcher in project lucene-solr by apache.
the class SimpleMLTQParser method parse.
public Query parse() {
String defaultField = req.getSchema().getUniqueKeyField().getName();
String uniqueValue = localParams.get(QueryParsing.V);
String[] qf = localParams.getParams("qf");
SolrIndexSearcher searcher = req.getSearcher();
Query docIdQuery = createIdQuery(defaultField, uniqueValue);
Map<String, Float> boostFields = new HashMap<>();
try {
TopDocs td = searcher.search(docIdQuery, 1);
if (td.totalHits != 1)
throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, "Error completing MLT request. Could not fetch " + "document with id [" + uniqueValue + "]");
ScoreDoc[] scoreDocs = td.scoreDocs;
MoreLikeThis mlt = new MoreLikeThis(req.getSearcher().getIndexReader());
mlt.setMinTermFreq(localParams.getInt("mintf", MoreLikeThis.DEFAULT_MIN_TERM_FREQ));
mlt.setMinDocFreq(localParams.getInt("mindf", MoreLikeThis.DEFAULT_MIN_DOC_FREQ));
mlt.setMinWordLen(localParams.getInt("minwl", MoreLikeThis.DEFAULT_MIN_WORD_LENGTH));
mlt.setMaxWordLen(localParams.getInt("maxwl", MoreLikeThis.DEFAULT_MAX_WORD_LENGTH));
mlt.setMaxQueryTerms(localParams.getInt("maxqt", MoreLikeThis.DEFAULT_MAX_QUERY_TERMS));
mlt.setMaxNumTokensParsed(localParams.getInt("maxntp", MoreLikeThis.DEFAULT_MAX_NUM_TOKENS_PARSED));
mlt.setMaxDocFreq(localParams.getInt("maxdf", MoreLikeThis.DEFAULT_MAX_DOC_FREQ));
Boolean boost = localParams.getBool("boost", false);
mlt.setBoost(boost);
String[] fieldNames;
if (qf != null) {
ArrayList<String> fields = new ArrayList<>();
for (String fieldName : qf) {
if (!StringUtils.isEmpty(fieldName)) {
String[] strings = splitList.split(fieldName);
for (String string : strings) {
if (!StringUtils.isEmpty(string)) {
fields.add(string);
}
}
}
}
// Parse field names and boosts from the fields
boostFields = SolrPluginUtils.parseFieldBoosts(fields.toArray(new String[0]));
fieldNames = boostFields.keySet().toArray(new String[0]);
} else {
Map<String, SchemaField> fieldDefinitions = req.getSearcher().getSchema().getFields();
ArrayList<String> fields = new ArrayList();
for (String fieldName : fieldDefinitions.keySet()) {
if (fieldDefinitions.get(fieldName).indexed() && fieldDefinitions.get(fieldName).stored())
if (fieldDefinitions.get(fieldName).getType().getNumberType() == null)
fields.add(fieldName);
}
fieldNames = fields.toArray(new String[0]);
}
if (fieldNames.length < 1) {
throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, "MoreLikeThis requires at least one similarity field: qf");
}
mlt.setFieldNames(fieldNames);
mlt.setAnalyzer(req.getSchema().getIndexAnalyzer());
Query rawMLTQuery = mlt.like(scoreDocs[0].doc);
BooleanQuery boostedMLTQuery = (BooleanQuery) rawMLTQuery;
if (boost && boostFields.size() > 0) {
BooleanQuery.Builder newQ = new BooleanQuery.Builder();
newQ.setMinimumNumberShouldMatch(boostedMLTQuery.getMinimumNumberShouldMatch());
for (BooleanClause clause : boostedMLTQuery) {
Query q = clause.getQuery();
float originalBoost = 1f;
if (q instanceof BoostQuery) {
BoostQuery bq = (BoostQuery) q;
q = bq.getQuery();
originalBoost = bq.getBoost();
}
Float fieldBoost = boostFields.get(((TermQuery) q).getTerm().field());
q = ((fieldBoost != null) ? new BoostQuery(q, fieldBoost * originalBoost) : clause.getQuery());
newQ.add(q, clause.getOccur());
}
boostedMLTQuery = newQ.build();
}
// exclude current document from results
BooleanQuery.Builder realMLTQuery = new BooleanQuery.Builder();
realMLTQuery.add(boostedMLTQuery, BooleanClause.Occur.MUST);
realMLTQuery.add(docIdQuery, BooleanClause.Occur.MUST_NOT);
return realMLTQuery.build();
} catch (IOException e) {
throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, "Error completing MLT request" + e.getMessage());
}
}
use of org.apache.solr.search.SolrIndexSearcher in project lucene-solr by apache.
the class CarrotClusteringEngine method getDocuments.
/**
* Prepares Carrot2 documents for clustering.
*/
private List<Document> getDocuments(SolrDocumentList solrDocList, Map<SolrDocument, Integer> docIds, Query query, final SolrQueryRequest sreq) throws IOException {
SolrHighlighter highlighter = null;
SolrParams solrParams = sreq.getParams();
SolrCore core = sreq.getCore();
String urlField = solrParams.get(CarrotParams.URL_FIELD_NAME, "url");
String titleFieldSpec = solrParams.get(CarrotParams.TITLE_FIELD_NAME, "title");
String snippetFieldSpec = solrParams.get(CarrotParams.SNIPPET_FIELD_NAME, titleFieldSpec);
String languageField = solrParams.get(CarrotParams.LANGUAGE_FIELD_NAME, null);
// Maps Solr field names to Carrot2 custom field names
Map<String, String> customFields = getCustomFieldsMap(solrParams);
// Parse language code map string into a map
Map<String, String> languageCodeMap = new HashMap<>();
if (StringUtils.isNotBlank(languageField)) {
for (String pair : solrParams.get(CarrotParams.LANGUAGE_CODE_MAP, "").split("[, ]")) {
final String[] split = pair.split(":");
if (split.length == 2 && StringUtils.isNotBlank(split[0]) && StringUtils.isNotBlank(split[1])) {
languageCodeMap.put(split[0], split[1]);
} else {
log.warn("Unsupported format for " + CarrotParams.LANGUAGE_CODE_MAP + ": '" + pair + "'. Skipping this mapping.");
}
}
}
// Get the documents
boolean produceSummary = solrParams.getBool(CarrotParams.PRODUCE_SUMMARY, false);
SolrQueryRequest req = null;
String[] snippetFieldAry = null;
if (produceSummary) {
highlighter = HighlightComponent.getHighlighter(core);
if (highlighter != null) {
Map<String, Object> args = new HashMap<>();
snippetFieldAry = snippetFieldSpec.split("[, ]");
args.put(HighlightParams.FIELDS, snippetFieldAry);
args.put(HighlightParams.HIGHLIGHT, "true");
//we don't care about actually highlighting the area
args.put(HighlightParams.SIMPLE_PRE, "");
args.put(HighlightParams.SIMPLE_POST, "");
args.put(HighlightParams.FRAGSIZE, solrParams.getInt(CarrotParams.SUMMARY_FRAGSIZE, solrParams.getInt(HighlightParams.FRAGSIZE, 100)));
args.put(HighlightParams.SNIPPETS, solrParams.getInt(CarrotParams.SUMMARY_SNIPPETS, solrParams.getInt(HighlightParams.SNIPPETS, 1)));
req = new LocalSolrQueryRequest(core, query.toString(), "", 0, 1, args) {
@Override
public SolrIndexSearcher getSearcher() {
return sreq.getSearcher();
}
};
} else {
log.warn("No highlighter configured, cannot produce summary");
produceSummary = false;
}
}
Iterator<SolrDocument> docsIter = solrDocList.iterator();
List<Document> result = new ArrayList<>(solrDocList.size());
float[] scores = { 1.0f };
int[] docsHolder = new int[1];
Query theQuery = query;
while (docsIter.hasNext()) {
SolrDocument sdoc = docsIter.next();
String snippet = null;
// See comment in ClusteringComponent#finishStage().
if (produceSummary && docIds != null) {
docsHolder[0] = docIds.get(sdoc).intValue();
DocList docAsList = new DocSlice(0, 1, docsHolder, scores, 1, 1.0f);
NamedList<Object> highlights = highlighter.doHighlighting(docAsList, theQuery, req, snippetFieldAry);
if (highlights != null && highlights.size() == 1) {
// should only be one value given our setup
// should only be one document
@SuppressWarnings("unchecked") NamedList<String[]> tmp = (NamedList<String[]>) highlights.getVal(0);
final StringBuilder sb = new StringBuilder();
for (int j = 0; j < snippetFieldAry.length; j++) {
// Join fragments with a period, so that Carrot2 does not create
// cross-fragment phrases, such phrases rarely make sense.
String[] highlt = tmp.get(snippetFieldAry[j]);
if (highlt != null && highlt.length > 0) {
for (int i = 0; i < highlt.length; i++) {
sb.append(highlt[i]);
sb.append(" . ");
}
}
}
snippet = sb.toString();
}
}
// If summaries not enabled or summary generation failed, use full content.
if (snippet == null) {
snippet = getConcatenated(sdoc, snippetFieldSpec);
}
// Create a Carrot2 document
Document carrotDocument = new Document(getConcatenated(sdoc, titleFieldSpec), snippet, ObjectUtils.toString(sdoc.getFieldValue(urlField), ""));
// Store Solr id of the document, we need it to map document instances
// found in clusters back to identifiers.
carrotDocument.setField(SOLR_DOCUMENT_ID, sdoc.getFieldValue(idFieldName));
// Set language
if (StringUtils.isNotBlank(languageField)) {
Collection<Object> languages = sdoc.getFieldValues(languageField);
if (languages != null) {
// Use the first Carrot2-supported language
for (Object l : languages) {
String lang = ObjectUtils.toString(l, "");
if (languageCodeMap.containsKey(lang)) {
lang = languageCodeMap.get(lang);
}
// language variants, such as 'zh-cn', but Carrot2 uses underscores.
if (lang.indexOf('-') > 0) {
lang = lang.replace('-', '_');
}
// If the language is supported by Carrot2, we'll get a non-null value
final LanguageCode carrot2Language = LanguageCode.forISOCode(lang);
if (carrot2Language != null) {
carrotDocument.setLanguage(carrot2Language);
break;
}
}
}
}
// Add custom fields
if (customFields != null) {
for (Entry<String, String> entry : customFields.entrySet()) {
carrotDocument.setField(entry.getValue(), sdoc.getFieldValue(entry.getKey()));
}
}
result.add(carrotDocument);
}
return result;
}
use of org.apache.solr.search.SolrIndexSearcher in project lucene-solr by apache.
the class ReplicationHandler method inform.
@Override
@SuppressWarnings("unchecked")
public void inform(SolrCore core) {
this.core = core;
registerCloseHook();
Object nbtk = initArgs.get(NUMBER_BACKUPS_TO_KEEP_INIT_PARAM);
if (nbtk != null) {
numberBackupsToKeep = Integer.parseInt(nbtk.toString());
} else {
numberBackupsToKeep = 0;
}
NamedList slave = (NamedList) initArgs.get("slave");
boolean enableSlave = isEnabled(slave);
if (enableSlave) {
currentIndexFetcher = pollingIndexFetcher = new IndexFetcher(slave, this, core);
setupPolling((String) slave.get(POLL_INTERVAL));
isSlave = true;
}
NamedList master = (NamedList) initArgs.get("master");
boolean enableMaster = isEnabled(master);
if (enableMaster || enableSlave) {
if (core.getCoreContainer().getZkController() != null) {
LOG.warn("SolrCloud is enabled for core " + core.getName() + " but so is old-style replication. Make sure you" + " intend this behavior, it usually indicates a mis-configuration. Master setting is " + Boolean.toString(enableMaster) + " and slave setting is " + Boolean.toString(enableSlave));
}
}
if (!enableSlave && !enableMaster) {
enableMaster = true;
master = new NamedList<>();
}
if (enableMaster) {
includeConfFiles = (String) master.get(CONF_FILES);
if (includeConfFiles != null && includeConfFiles.trim().length() > 0) {
List<String> files = Arrays.asList(includeConfFiles.split(","));
for (String file : files) {
if (file.trim().length() == 0)
continue;
String[] strs = file.trim().split(":");
// if there is an alias add it or it is null
confFileNameAlias.add(strs[0], strs.length > 1 ? strs[1] : null);
}
LOG.info("Replication enabled for following config files: " + includeConfFiles);
}
List backup = master.getAll("backupAfter");
boolean backupOnCommit = backup.contains("commit");
boolean backupOnOptimize = !backupOnCommit && backup.contains("optimize");
List replicateAfter = master.getAll(REPLICATE_AFTER);
replicateOnCommit = replicateAfter.contains("commit");
replicateOnOptimize = !replicateOnCommit && replicateAfter.contains("optimize");
if (!replicateOnCommit && !replicateOnOptimize) {
replicateOnCommit = true;
}
// save the last optimized commit point.
if (replicateOnOptimize) {
IndexDeletionPolicyWrapper wrapper = core.getDeletionPolicy();
IndexDeletionPolicy policy = wrapper == null ? null : wrapper.getWrappedDeletionPolicy();
if (policy instanceof SolrDeletionPolicy) {
SolrDeletionPolicy solrPolicy = (SolrDeletionPolicy) policy;
if (solrPolicy.getMaxOptimizedCommitsToKeep() < 1) {
solrPolicy.setMaxOptimizedCommitsToKeep(1);
}
} else {
LOG.warn("Replication can't call setMaxOptimizedCommitsToKeep on " + policy);
}
}
if (replicateOnOptimize || backupOnOptimize) {
core.getUpdateHandler().registerOptimizeCallback(getEventListener(backupOnOptimize, replicateOnOptimize));
}
if (replicateOnCommit || backupOnCommit) {
replicateOnCommit = true;
core.getUpdateHandler().registerCommitCallback(getEventListener(backupOnCommit, replicateOnCommit));
}
if (replicateAfter.contains("startup")) {
replicateOnStart = true;
RefCounted<SolrIndexSearcher> s = core.getNewestSearcher(false);
try {
DirectoryReader reader = s == null ? null : s.get().getIndexReader();
if (reader != null && reader.getIndexCommit() != null && reader.getIndexCommit().getGeneration() != 1L) {
try {
if (replicateOnOptimize) {
Collection<IndexCommit> commits = DirectoryReader.listCommits(reader.directory());
for (IndexCommit ic : commits) {
if (ic.getSegmentCount() == 1) {
if (indexCommitPoint == null || indexCommitPoint.getGeneration() < ic.getGeneration())
indexCommitPoint = ic;
}
}
} else {
indexCommitPoint = reader.getIndexCommit();
}
} finally {
// We don't need to save commit points for replication, the SolrDeletionPolicy
// always saves the last commit point (and the last optimized commit point, if needed)
/***
if(indexCommitPoint != null){
core.getDeletionPolicy().saveCommitPoint(indexCommitPoint.getGeneration());
}
***/
}
}
// ensure the writer is init'd so that we have a list of commit points
RefCounted<IndexWriter> iw = core.getUpdateHandler().getSolrCoreState().getIndexWriter(core);
iw.decref();
} catch (IOException e) {
LOG.warn("Unable to get IndexCommit on startup", e);
} finally {
if (s != null)
s.decref();
}
}
String reserve = (String) master.get(RESERVE);
if (reserve != null && !reserve.trim().equals("")) {
reserveCommitDuration = readIntervalMs(reserve);
}
LOG.info("Commits will be reserved for " + reserveCommitDuration);
isMaster = true;
}
}
use of org.apache.solr.search.SolrIndexSearcher in project lucene-solr by apache.
the class IndexFetcher method openNewSearcherAndUpdateCommitPoint.
private void openNewSearcherAndUpdateCommitPoint() throws IOException {
RefCounted<SolrIndexSearcher> searcher = null;
IndexCommit commitPoint;
// must get the latest solrCore object because the one we have might be closed because of a reload
// todo stop keeping solrCore around
SolrCore core = solrCore.getCoreContainer().getCore(solrCore.getName());
try {
Future[] waitSearcher = new Future[1];
searcher = core.getSearcher(true, true, waitSearcher, true);
if (waitSearcher[0] != null) {
try {
waitSearcher[0].get();
} catch (InterruptedException | ExecutionException e) {
SolrException.log(LOG, e);
}
}
commitPoint = searcher.get().getIndexReader().getIndexCommit();
} finally {
if (searcher != null) {
searcher.decref();
}
core.close();
}
// update the commit point in replication handler
replicationHandler.indexCommitPoint = commitPoint;
}
use of org.apache.solr.search.SolrIndexSearcher in project lucene-solr by apache.
the class CreateSnapshotOp method execute.
@Override
public void execute(CoreAdminHandler.CallInfo it) throws Exception {
CoreContainer cc = it.handler.getCoreContainer();
final SolrParams params = it.req.getParams();
String commitName = params.required().get(CoreAdminParams.COMMIT_NAME);
String cname = params.required().get(CoreAdminParams.CORE);
try (SolrCore core = cc.getCore(cname)) {
if (core == null) {
throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, "Unable to locate core " + cname);
}
String indexDirPath = core.getIndexDir();
IndexCommit ic = core.getDeletionPolicy().getLatestCommit();
if (ic == null) {
RefCounted<SolrIndexSearcher> searcher = core.getSearcher();
try {
ic = searcher.get().getIndexReader().getIndexCommit();
} finally {
searcher.decref();
}
}
SolrSnapshotMetaDataManager mgr = core.getSnapshotMetaDataManager();
mgr.snapshot(commitName, indexDirPath, ic.getGeneration());
it.rsp.add(CoreAdminParams.CORE, core.getName());
it.rsp.add(CoreAdminParams.COMMIT_NAME, commitName);
it.rsp.add(SolrSnapshotManager.INDEX_DIR_PATH, indexDirPath);
it.rsp.add(SolrSnapshotManager.GENERATION_NUM, ic.getGeneration());
it.rsp.add(SolrSnapshotManager.FILE_LIST, ic.getFileNames());
}
}
Aggregations