Search in sources :

Example 11 with SolrIndexSearcher

use of org.apache.solr.search.SolrIndexSearcher in project lucene-solr by apache.

the class SimpleMLTQParser method parse.

public Query parse() {
    String defaultField = req.getSchema().getUniqueKeyField().getName();
    String uniqueValue = localParams.get(QueryParsing.V);
    String[] qf = localParams.getParams("qf");
    SolrIndexSearcher searcher = req.getSearcher();
    Query docIdQuery = createIdQuery(defaultField, uniqueValue);
    Map<String, Float> boostFields = new HashMap<>();
    try {
        TopDocs td = searcher.search(docIdQuery, 1);
        if (td.totalHits != 1)
            throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, "Error completing MLT request. Could not fetch " + "document with id [" + uniqueValue + "]");
        ScoreDoc[] scoreDocs = td.scoreDocs;
        MoreLikeThis mlt = new MoreLikeThis(req.getSearcher().getIndexReader());
        mlt.setMinTermFreq(localParams.getInt("mintf", MoreLikeThis.DEFAULT_MIN_TERM_FREQ));
        mlt.setMinDocFreq(localParams.getInt("mindf", MoreLikeThis.DEFAULT_MIN_DOC_FREQ));
        mlt.setMinWordLen(localParams.getInt("minwl", MoreLikeThis.DEFAULT_MIN_WORD_LENGTH));
        mlt.setMaxWordLen(localParams.getInt("maxwl", MoreLikeThis.DEFAULT_MAX_WORD_LENGTH));
        mlt.setMaxQueryTerms(localParams.getInt("maxqt", MoreLikeThis.DEFAULT_MAX_QUERY_TERMS));
        mlt.setMaxNumTokensParsed(localParams.getInt("maxntp", MoreLikeThis.DEFAULT_MAX_NUM_TOKENS_PARSED));
        mlt.setMaxDocFreq(localParams.getInt("maxdf", MoreLikeThis.DEFAULT_MAX_DOC_FREQ));
        Boolean boost = localParams.getBool("boost", false);
        mlt.setBoost(boost);
        String[] fieldNames;
        if (qf != null) {
            ArrayList<String> fields = new ArrayList<>();
            for (String fieldName : qf) {
                if (!StringUtils.isEmpty(fieldName)) {
                    String[] strings = splitList.split(fieldName);
                    for (String string : strings) {
                        if (!StringUtils.isEmpty(string)) {
                            fields.add(string);
                        }
                    }
                }
            }
            // Parse field names and boosts from the fields
            boostFields = SolrPluginUtils.parseFieldBoosts(fields.toArray(new String[0]));
            fieldNames = boostFields.keySet().toArray(new String[0]);
        } else {
            Map<String, SchemaField> fieldDefinitions = req.getSearcher().getSchema().getFields();
            ArrayList<String> fields = new ArrayList();
            for (String fieldName : fieldDefinitions.keySet()) {
                if (fieldDefinitions.get(fieldName).indexed() && fieldDefinitions.get(fieldName).stored())
                    if (fieldDefinitions.get(fieldName).getType().getNumberType() == null)
                        fields.add(fieldName);
            }
            fieldNames = fields.toArray(new String[0]);
        }
        if (fieldNames.length < 1) {
            throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, "MoreLikeThis requires at least one similarity field: qf");
        }
        mlt.setFieldNames(fieldNames);
        mlt.setAnalyzer(req.getSchema().getIndexAnalyzer());
        Query rawMLTQuery = mlt.like(scoreDocs[0].doc);
        BooleanQuery boostedMLTQuery = (BooleanQuery) rawMLTQuery;
        if (boost && boostFields.size() > 0) {
            BooleanQuery.Builder newQ = new BooleanQuery.Builder();
            newQ.setMinimumNumberShouldMatch(boostedMLTQuery.getMinimumNumberShouldMatch());
            for (BooleanClause clause : boostedMLTQuery) {
                Query q = clause.getQuery();
                float originalBoost = 1f;
                if (q instanceof BoostQuery) {
                    BoostQuery bq = (BoostQuery) q;
                    q = bq.getQuery();
                    originalBoost = bq.getBoost();
                }
                Float fieldBoost = boostFields.get(((TermQuery) q).getTerm().field());
                q = ((fieldBoost != null) ? new BoostQuery(q, fieldBoost * originalBoost) : clause.getQuery());
                newQ.add(q, clause.getOccur());
            }
            boostedMLTQuery = newQ.build();
        }
        // exclude current document from results
        BooleanQuery.Builder realMLTQuery = new BooleanQuery.Builder();
        realMLTQuery.add(boostedMLTQuery, BooleanClause.Occur.MUST);
        realMLTQuery.add(docIdQuery, BooleanClause.Occur.MUST_NOT);
        return realMLTQuery.build();
    } catch (IOException e) {
        throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, "Error completing MLT request" + e.getMessage());
    }
}
Also used : BooleanQuery(org.apache.lucene.search.BooleanQuery) Query(org.apache.lucene.search.Query) TermQuery(org.apache.lucene.search.TermQuery) BooleanQuery(org.apache.lucene.search.BooleanQuery) BoostQuery(org.apache.lucene.search.BoostQuery) HashMap(java.util.HashMap) BytesRefBuilder(org.apache.lucene.util.BytesRefBuilder) ArrayList(java.util.ArrayList) MoreLikeThis(org.apache.lucene.queries.mlt.MoreLikeThis) BoostQuery(org.apache.lucene.search.BoostQuery) ScoreDoc(org.apache.lucene.search.ScoreDoc) TopDocs(org.apache.lucene.search.TopDocs) SolrException(org.apache.solr.common.SolrException) TermQuery(org.apache.lucene.search.TermQuery) SolrIndexSearcher(org.apache.solr.search.SolrIndexSearcher) IOException(java.io.IOException) SchemaField(org.apache.solr.schema.SchemaField) BooleanClause(org.apache.lucene.search.BooleanClause)

Example 12 with SolrIndexSearcher

use of org.apache.solr.search.SolrIndexSearcher in project lucene-solr by apache.

the class CarrotClusteringEngine method getDocuments.

/**
   * Prepares Carrot2 documents for clustering.
   */
private List<Document> getDocuments(SolrDocumentList solrDocList, Map<SolrDocument, Integer> docIds, Query query, final SolrQueryRequest sreq) throws IOException {
    SolrHighlighter highlighter = null;
    SolrParams solrParams = sreq.getParams();
    SolrCore core = sreq.getCore();
    String urlField = solrParams.get(CarrotParams.URL_FIELD_NAME, "url");
    String titleFieldSpec = solrParams.get(CarrotParams.TITLE_FIELD_NAME, "title");
    String snippetFieldSpec = solrParams.get(CarrotParams.SNIPPET_FIELD_NAME, titleFieldSpec);
    String languageField = solrParams.get(CarrotParams.LANGUAGE_FIELD_NAME, null);
    // Maps Solr field names to Carrot2 custom field names
    Map<String, String> customFields = getCustomFieldsMap(solrParams);
    // Parse language code map string into a map
    Map<String, String> languageCodeMap = new HashMap<>();
    if (StringUtils.isNotBlank(languageField)) {
        for (String pair : solrParams.get(CarrotParams.LANGUAGE_CODE_MAP, "").split("[, ]")) {
            final String[] split = pair.split(":");
            if (split.length == 2 && StringUtils.isNotBlank(split[0]) && StringUtils.isNotBlank(split[1])) {
                languageCodeMap.put(split[0], split[1]);
            } else {
                log.warn("Unsupported format for " + CarrotParams.LANGUAGE_CODE_MAP + ": '" + pair + "'. Skipping this mapping.");
            }
        }
    }
    // Get the documents
    boolean produceSummary = solrParams.getBool(CarrotParams.PRODUCE_SUMMARY, false);
    SolrQueryRequest req = null;
    String[] snippetFieldAry = null;
    if (produceSummary) {
        highlighter = HighlightComponent.getHighlighter(core);
        if (highlighter != null) {
            Map<String, Object> args = new HashMap<>();
            snippetFieldAry = snippetFieldSpec.split("[, ]");
            args.put(HighlightParams.FIELDS, snippetFieldAry);
            args.put(HighlightParams.HIGHLIGHT, "true");
            //we don't care about actually highlighting the area
            args.put(HighlightParams.SIMPLE_PRE, "");
            args.put(HighlightParams.SIMPLE_POST, "");
            args.put(HighlightParams.FRAGSIZE, solrParams.getInt(CarrotParams.SUMMARY_FRAGSIZE, solrParams.getInt(HighlightParams.FRAGSIZE, 100)));
            args.put(HighlightParams.SNIPPETS, solrParams.getInt(CarrotParams.SUMMARY_SNIPPETS, solrParams.getInt(HighlightParams.SNIPPETS, 1)));
            req = new LocalSolrQueryRequest(core, query.toString(), "", 0, 1, args) {

                @Override
                public SolrIndexSearcher getSearcher() {
                    return sreq.getSearcher();
                }
            };
        } else {
            log.warn("No highlighter configured, cannot produce summary");
            produceSummary = false;
        }
    }
    Iterator<SolrDocument> docsIter = solrDocList.iterator();
    List<Document> result = new ArrayList<>(solrDocList.size());
    float[] scores = { 1.0f };
    int[] docsHolder = new int[1];
    Query theQuery = query;
    while (docsIter.hasNext()) {
        SolrDocument sdoc = docsIter.next();
        String snippet = null;
        // See comment in ClusteringComponent#finishStage().
        if (produceSummary && docIds != null) {
            docsHolder[0] = docIds.get(sdoc).intValue();
            DocList docAsList = new DocSlice(0, 1, docsHolder, scores, 1, 1.0f);
            NamedList<Object> highlights = highlighter.doHighlighting(docAsList, theQuery, req, snippetFieldAry);
            if (highlights != null && highlights.size() == 1) {
                // should only be one value given our setup
                // should only be one document
                @SuppressWarnings("unchecked") NamedList<String[]> tmp = (NamedList<String[]>) highlights.getVal(0);
                final StringBuilder sb = new StringBuilder();
                for (int j = 0; j < snippetFieldAry.length; j++) {
                    // Join fragments with a period, so that Carrot2 does not create
                    // cross-fragment phrases, such phrases rarely make sense.
                    String[] highlt = tmp.get(snippetFieldAry[j]);
                    if (highlt != null && highlt.length > 0) {
                        for (int i = 0; i < highlt.length; i++) {
                            sb.append(highlt[i]);
                            sb.append(" . ");
                        }
                    }
                }
                snippet = sb.toString();
            }
        }
        // If summaries not enabled or summary generation failed, use full content.
        if (snippet == null) {
            snippet = getConcatenated(sdoc, snippetFieldSpec);
        }
        // Create a Carrot2 document
        Document carrotDocument = new Document(getConcatenated(sdoc, titleFieldSpec), snippet, ObjectUtils.toString(sdoc.getFieldValue(urlField), ""));
        // Store Solr id of the document, we need it to map document instances 
        // found in clusters back to identifiers.
        carrotDocument.setField(SOLR_DOCUMENT_ID, sdoc.getFieldValue(idFieldName));
        // Set language
        if (StringUtils.isNotBlank(languageField)) {
            Collection<Object> languages = sdoc.getFieldValues(languageField);
            if (languages != null) {
                // Use the first Carrot2-supported language
                for (Object l : languages) {
                    String lang = ObjectUtils.toString(l, "");
                    if (languageCodeMap.containsKey(lang)) {
                        lang = languageCodeMap.get(lang);
                    }
                    // language variants, such as 'zh-cn', but Carrot2 uses underscores.
                    if (lang.indexOf('-') > 0) {
                        lang = lang.replace('-', '_');
                    }
                    // If the language is supported by Carrot2, we'll get a non-null value
                    final LanguageCode carrot2Language = LanguageCode.forISOCode(lang);
                    if (carrot2Language != null) {
                        carrotDocument.setLanguage(carrot2Language);
                        break;
                    }
                }
            }
        }
        // Add custom fields
        if (customFields != null) {
            for (Entry<String, String> entry : customFields.entrySet()) {
                carrotDocument.setField(entry.getValue(), sdoc.getFieldValue(entry.getKey()));
            }
        }
        result.add(carrotDocument);
    }
    return result;
}
Also used : Query(org.apache.lucene.search.Query) HashMap(java.util.HashMap) SolrCore(org.apache.solr.core.SolrCore) ArrayList(java.util.ArrayList) Document(org.carrot2.core.Document) SolrDocument(org.apache.solr.common.SolrDocument) DocSlice(org.apache.solr.search.DocSlice) LanguageCode(org.carrot2.core.LanguageCode) SolrDocument(org.apache.solr.common.SolrDocument) NamedList(org.apache.solr.common.util.NamedList) SolrIndexSearcher(org.apache.solr.search.SolrIndexSearcher) SolrHighlighter(org.apache.solr.highlight.SolrHighlighter) LocalSolrQueryRequest(org.apache.solr.request.LocalSolrQueryRequest) SolrQueryRequest(org.apache.solr.request.SolrQueryRequest) LocalSolrQueryRequest(org.apache.solr.request.LocalSolrQueryRequest) SolrParams(org.apache.solr.common.params.SolrParams) DocList(org.apache.solr.search.DocList)

Example 13 with SolrIndexSearcher

use of org.apache.solr.search.SolrIndexSearcher in project lucene-solr by apache.

the class ReplicationHandler method inform.

@Override
@SuppressWarnings("unchecked")
public void inform(SolrCore core) {
    this.core = core;
    registerCloseHook();
    Object nbtk = initArgs.get(NUMBER_BACKUPS_TO_KEEP_INIT_PARAM);
    if (nbtk != null) {
        numberBackupsToKeep = Integer.parseInt(nbtk.toString());
    } else {
        numberBackupsToKeep = 0;
    }
    NamedList slave = (NamedList) initArgs.get("slave");
    boolean enableSlave = isEnabled(slave);
    if (enableSlave) {
        currentIndexFetcher = pollingIndexFetcher = new IndexFetcher(slave, this, core);
        setupPolling((String) slave.get(POLL_INTERVAL));
        isSlave = true;
    }
    NamedList master = (NamedList) initArgs.get("master");
    boolean enableMaster = isEnabled(master);
    if (enableMaster || enableSlave) {
        if (core.getCoreContainer().getZkController() != null) {
            LOG.warn("SolrCloud is enabled for core " + core.getName() + " but so is old-style replication. Make sure you" + " intend this behavior, it usually indicates a mis-configuration. Master setting is " + Boolean.toString(enableMaster) + " and slave setting is " + Boolean.toString(enableSlave));
        }
    }
    if (!enableSlave && !enableMaster) {
        enableMaster = true;
        master = new NamedList<>();
    }
    if (enableMaster) {
        includeConfFiles = (String) master.get(CONF_FILES);
        if (includeConfFiles != null && includeConfFiles.trim().length() > 0) {
            List<String> files = Arrays.asList(includeConfFiles.split(","));
            for (String file : files) {
                if (file.trim().length() == 0)
                    continue;
                String[] strs = file.trim().split(":");
                // if there is an alias add it or it is null
                confFileNameAlias.add(strs[0], strs.length > 1 ? strs[1] : null);
            }
            LOG.info("Replication enabled for following config files: " + includeConfFiles);
        }
        List backup = master.getAll("backupAfter");
        boolean backupOnCommit = backup.contains("commit");
        boolean backupOnOptimize = !backupOnCommit && backup.contains("optimize");
        List replicateAfter = master.getAll(REPLICATE_AFTER);
        replicateOnCommit = replicateAfter.contains("commit");
        replicateOnOptimize = !replicateOnCommit && replicateAfter.contains("optimize");
        if (!replicateOnCommit && !replicateOnOptimize) {
            replicateOnCommit = true;
        }
        // save the last optimized commit point.
        if (replicateOnOptimize) {
            IndexDeletionPolicyWrapper wrapper = core.getDeletionPolicy();
            IndexDeletionPolicy policy = wrapper == null ? null : wrapper.getWrappedDeletionPolicy();
            if (policy instanceof SolrDeletionPolicy) {
                SolrDeletionPolicy solrPolicy = (SolrDeletionPolicy) policy;
                if (solrPolicy.getMaxOptimizedCommitsToKeep() < 1) {
                    solrPolicy.setMaxOptimizedCommitsToKeep(1);
                }
            } else {
                LOG.warn("Replication can't call setMaxOptimizedCommitsToKeep on " + policy);
            }
        }
        if (replicateOnOptimize || backupOnOptimize) {
            core.getUpdateHandler().registerOptimizeCallback(getEventListener(backupOnOptimize, replicateOnOptimize));
        }
        if (replicateOnCommit || backupOnCommit) {
            replicateOnCommit = true;
            core.getUpdateHandler().registerCommitCallback(getEventListener(backupOnCommit, replicateOnCommit));
        }
        if (replicateAfter.contains("startup")) {
            replicateOnStart = true;
            RefCounted<SolrIndexSearcher> s = core.getNewestSearcher(false);
            try {
                DirectoryReader reader = s == null ? null : s.get().getIndexReader();
                if (reader != null && reader.getIndexCommit() != null && reader.getIndexCommit().getGeneration() != 1L) {
                    try {
                        if (replicateOnOptimize) {
                            Collection<IndexCommit> commits = DirectoryReader.listCommits(reader.directory());
                            for (IndexCommit ic : commits) {
                                if (ic.getSegmentCount() == 1) {
                                    if (indexCommitPoint == null || indexCommitPoint.getGeneration() < ic.getGeneration())
                                        indexCommitPoint = ic;
                                }
                            }
                        } else {
                            indexCommitPoint = reader.getIndexCommit();
                        }
                    } finally {
                    // We don't need to save commit points for replication, the SolrDeletionPolicy
                    // always saves the last commit point (and the last optimized commit point, if needed)
                    /***
              if(indexCommitPoint != null){
                core.getDeletionPolicy().saveCommitPoint(indexCommitPoint.getGeneration());
              }
              ***/
                    }
                }
                // ensure the writer is init'd so that we have a list of commit points
                RefCounted<IndexWriter> iw = core.getUpdateHandler().getSolrCoreState().getIndexWriter(core);
                iw.decref();
            } catch (IOException e) {
                LOG.warn("Unable to get IndexCommit on startup", e);
            } finally {
                if (s != null)
                    s.decref();
            }
        }
        String reserve = (String) master.get(RESERVE);
        if (reserve != null && !reserve.trim().equals("")) {
            reserveCommitDuration = readIntervalMs(reserve);
        }
        LOG.info("Commits will be reserved for  " + reserveCommitDuration);
        isMaster = true;
    }
}
Also used : DirectoryReader(org.apache.lucene.index.DirectoryReader) NamedList(org.apache.solr.common.util.NamedList) IndexDeletionPolicyWrapper(org.apache.solr.core.IndexDeletionPolicyWrapper) SolrIndexSearcher(org.apache.solr.search.SolrIndexSearcher) IOException(java.io.IOException) IndexCommit(org.apache.lucene.index.IndexCommit) SolrIndexWriter(org.apache.solr.update.SolrIndexWriter) IndexWriter(org.apache.lucene.index.IndexWriter) SolrDeletionPolicy(org.apache.solr.core.SolrDeletionPolicy) List(java.util.List) ArrayList(java.util.ArrayList) NamedList(org.apache.solr.common.util.NamedList) IndexDeletionPolicy(org.apache.lucene.index.IndexDeletionPolicy)

Example 14 with SolrIndexSearcher

use of org.apache.solr.search.SolrIndexSearcher in project lucene-solr by apache.

the class IndexFetcher method openNewSearcherAndUpdateCommitPoint.

private void openNewSearcherAndUpdateCommitPoint() throws IOException {
    RefCounted<SolrIndexSearcher> searcher = null;
    IndexCommit commitPoint;
    // must get the latest solrCore object because the one we have might be closed because of a reload
    // todo stop keeping solrCore around
    SolrCore core = solrCore.getCoreContainer().getCore(solrCore.getName());
    try {
        Future[] waitSearcher = new Future[1];
        searcher = core.getSearcher(true, true, waitSearcher, true);
        if (waitSearcher[0] != null) {
            try {
                waitSearcher[0].get();
            } catch (InterruptedException | ExecutionException e) {
                SolrException.log(LOG, e);
            }
        }
        commitPoint = searcher.get().getIndexReader().getIndexCommit();
    } finally {
        if (searcher != null) {
            searcher.decref();
        }
        core.close();
    }
    // update the commit point in replication handler
    replicationHandler.indexCommitPoint = commitPoint;
}
Also used : SolrCore(org.apache.solr.core.SolrCore) Future(java.util.concurrent.Future) SolrIndexSearcher(org.apache.solr.search.SolrIndexSearcher) ExecutionException(java.util.concurrent.ExecutionException) IndexCommit(org.apache.lucene.index.IndexCommit)

Example 15 with SolrIndexSearcher

use of org.apache.solr.search.SolrIndexSearcher in project lucene-solr by apache.

the class CreateSnapshotOp method execute.

@Override
public void execute(CoreAdminHandler.CallInfo it) throws Exception {
    CoreContainer cc = it.handler.getCoreContainer();
    final SolrParams params = it.req.getParams();
    String commitName = params.required().get(CoreAdminParams.COMMIT_NAME);
    String cname = params.required().get(CoreAdminParams.CORE);
    try (SolrCore core = cc.getCore(cname)) {
        if (core == null) {
            throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, "Unable to locate core " + cname);
        }
        String indexDirPath = core.getIndexDir();
        IndexCommit ic = core.getDeletionPolicy().getLatestCommit();
        if (ic == null) {
            RefCounted<SolrIndexSearcher> searcher = core.getSearcher();
            try {
                ic = searcher.get().getIndexReader().getIndexCommit();
            } finally {
                searcher.decref();
            }
        }
        SolrSnapshotMetaDataManager mgr = core.getSnapshotMetaDataManager();
        mgr.snapshot(commitName, indexDirPath, ic.getGeneration());
        it.rsp.add(CoreAdminParams.CORE, core.getName());
        it.rsp.add(CoreAdminParams.COMMIT_NAME, commitName);
        it.rsp.add(SolrSnapshotManager.INDEX_DIR_PATH, indexDirPath);
        it.rsp.add(SolrSnapshotManager.GENERATION_NUM, ic.getGeneration());
        it.rsp.add(SolrSnapshotManager.FILE_LIST, ic.getFileNames());
    }
}
Also used : SolrSnapshotMetaDataManager(org.apache.solr.core.snapshots.SolrSnapshotMetaDataManager) CoreContainer(org.apache.solr.core.CoreContainer) SolrCore(org.apache.solr.core.SolrCore) SolrParams(org.apache.solr.common.params.SolrParams) SolrIndexSearcher(org.apache.solr.search.SolrIndexSearcher) SolrException(org.apache.solr.common.SolrException) IndexCommit(org.apache.lucene.index.IndexCommit)

Aggregations

SolrIndexSearcher (org.apache.solr.search.SolrIndexSearcher)125 SolrCore (org.apache.solr.core.SolrCore)33 NamedList (org.apache.solr.common.util.NamedList)32 SolrException (org.apache.solr.common.SolrException)31 IOException (java.io.IOException)29 ArrayList (java.util.ArrayList)24 SolrParams (org.apache.solr.common.params.SolrParams)22 SchemaField (org.apache.solr.schema.SchemaField)22 Test (org.junit.Test)22 Document (org.apache.lucene.document.Document)21 SolrInputDocument (org.apache.solr.common.SolrInputDocument)17 Term (org.apache.lucene.index.Term)16 IndexReader (org.apache.lucene.index.IndexReader)14 SolrQueryRequest (org.apache.solr.request.SolrQueryRequest)14 IndexSchema (org.apache.solr.schema.IndexSchema)14 DocList (org.apache.solr.search.DocList)14 LeafReaderContext (org.apache.lucene.index.LeafReaderContext)13 SolrDocument (org.apache.solr.common.SolrDocument)13 FieldType (org.apache.solr.schema.FieldType)13 Query (org.apache.lucene.search.Query)12