Search in sources :

Example 21 with SolrDocument

use of org.apache.solr.common.SolrDocument in project lucene-solr by apache.

the class ClusteringComponent method process.

@Override
public void process(ResponseBuilder rb) throws IOException {
    SolrParams params = rb.req.getParams();
    if (!params.getBool(COMPONENT_NAME, false)) {
        return;
    }
    final String name = getClusteringEngineName(rb);
    boolean useResults = params.getBool(ClusteringParams.USE_SEARCH_RESULTS, false);
    if (useResults == true) {
        SearchClusteringEngine engine = searchClusteringEngines.get(name);
        if (engine != null) {
            checkAvailable(name, engine);
            DocListAndSet results = rb.getResults();
            Map<SolrDocument, Integer> docIds = new HashMap<>(results.docList.size());
            SolrDocumentList solrDocList = docListToSolrDocumentList(results.docList, rb.req.getSearcher(), engine.getFieldsToLoad(rb.req), docIds);
            Object clusters = engine.cluster(rb.getQuery(), solrDocList, docIds, rb.req);
            rb.rsp.add("clusters", clusters);
        } else {
            log.warn("No engine named: " + name);
        }
    }
    boolean useCollection = params.getBool(ClusteringParams.USE_COLLECTION, false);
    if (useCollection == true) {
        DocumentClusteringEngine engine = documentClusteringEngines.get(name);
        if (engine != null) {
            checkAvailable(name, engine);
            boolean useDocSet = params.getBool(ClusteringParams.USE_DOC_SET, false);
            NamedList<?> nl = null;
            // TODO: This likely needs to be made into a background task that runs in an executor
            if (useDocSet == true) {
                nl = engine.cluster(rb.getResults().docSet, params);
            } else {
                nl = engine.cluster(params);
            }
            rb.rsp.add("clusters", nl);
        } else {
            log.warn("No engine named: " + name);
        }
    }
}
Also used : HashMap(java.util.HashMap) LinkedHashMap(java.util.LinkedHashMap) DocListAndSet(org.apache.solr.search.DocListAndSet) SolrDocumentList(org.apache.solr.common.SolrDocumentList) SolrDocument(org.apache.solr.common.SolrDocument) SolrParams(org.apache.solr.common.params.SolrParams)

Example 22 with SolrDocument

use of org.apache.solr.common.SolrDocument in project lucene-solr by apache.

the class CarrotClusteringEngine method cluster.

@Override
public Object cluster(Query query, SolrDocumentList solrDocList, Map<SolrDocument, Integer> docIds, SolrQueryRequest sreq) {
    try {
        // Prepare attributes for Carrot2 clustering call
        Map<String, Object> attributes = new HashMap<>();
        List<Document> documents = getDocuments(solrDocList, docIds, query, sreq);
        attributes.put(AttributeNames.DOCUMENTS, documents);
        attributes.put(AttributeNames.QUERY, query.toString());
        // Pass the fields on which clustering runs.
        attributes.put("solrFieldNames", getFieldsForClustering(sreq));
        // Pass extra overriding attributes from the request, if any
        extractCarrotAttributes(sreq.getParams(), attributes);
        // Perform clustering and convert to an output structure of clusters.
        //
        // Carrot2 uses current thread's context class loader to get
        // certain classes (e.g. custom tokenizer/stemmer) at runtime.
        // To make sure classes from contrib JARs are available,
        // we swap the context class loader for the time of clustering.
        Thread ct = Thread.currentThread();
        ClassLoader prev = ct.getContextClassLoader();
        try {
            ct.setContextClassLoader(core.getResourceLoader().getClassLoader());
            return clustersToNamedList(controller.process(attributes, clusteringAlgorithmClass).getClusters(), sreq.getParams());
        } finally {
            ct.setContextClassLoader(prev);
        }
    } catch (Exception e) {
        log.error("Carrot2 clustering failed", e);
        throw new SolrException(ErrorCode.SERVER_ERROR, "Carrot2 clustering failed", e);
    }
}
Also used : HashMap(java.util.HashMap) Document(org.carrot2.core.Document) SolrDocument(org.apache.solr.common.SolrDocument) SolrException(org.apache.solr.common.SolrException) IOException(java.io.IOException) SolrException(org.apache.solr.common.SolrException)

Example 23 with SolrDocument

use of org.apache.solr.common.SolrDocument in project lucene-solr by apache.

the class CarrotClusteringEngine method getDocuments.

/**
   * Prepares Carrot2 documents for clustering.
   */
private List<Document> getDocuments(SolrDocumentList solrDocList, Map<SolrDocument, Integer> docIds, Query query, final SolrQueryRequest sreq) throws IOException {
    SolrHighlighter highlighter = null;
    SolrParams solrParams = sreq.getParams();
    SolrCore core = sreq.getCore();
    String urlField = solrParams.get(CarrotParams.URL_FIELD_NAME, "url");
    String titleFieldSpec = solrParams.get(CarrotParams.TITLE_FIELD_NAME, "title");
    String snippetFieldSpec = solrParams.get(CarrotParams.SNIPPET_FIELD_NAME, titleFieldSpec);
    String languageField = solrParams.get(CarrotParams.LANGUAGE_FIELD_NAME, null);
    // Maps Solr field names to Carrot2 custom field names
    Map<String, String> customFields = getCustomFieldsMap(solrParams);
    // Parse language code map string into a map
    Map<String, String> languageCodeMap = new HashMap<>();
    if (StringUtils.isNotBlank(languageField)) {
        for (String pair : solrParams.get(CarrotParams.LANGUAGE_CODE_MAP, "").split("[, ]")) {
            final String[] split = pair.split(":");
            if (split.length == 2 && StringUtils.isNotBlank(split[0]) && StringUtils.isNotBlank(split[1])) {
                languageCodeMap.put(split[0], split[1]);
            } else {
                log.warn("Unsupported format for " + CarrotParams.LANGUAGE_CODE_MAP + ": '" + pair + "'. Skipping this mapping.");
            }
        }
    }
    // Get the documents
    boolean produceSummary = solrParams.getBool(CarrotParams.PRODUCE_SUMMARY, false);
    SolrQueryRequest req = null;
    String[] snippetFieldAry = null;
    if (produceSummary) {
        highlighter = HighlightComponent.getHighlighter(core);
        if (highlighter != null) {
            Map<String, Object> args = new HashMap<>();
            snippetFieldAry = snippetFieldSpec.split("[, ]");
            args.put(HighlightParams.FIELDS, snippetFieldAry);
            args.put(HighlightParams.HIGHLIGHT, "true");
            //we don't care about actually highlighting the area
            args.put(HighlightParams.SIMPLE_PRE, "");
            args.put(HighlightParams.SIMPLE_POST, "");
            args.put(HighlightParams.FRAGSIZE, solrParams.getInt(CarrotParams.SUMMARY_FRAGSIZE, solrParams.getInt(HighlightParams.FRAGSIZE, 100)));
            args.put(HighlightParams.SNIPPETS, solrParams.getInt(CarrotParams.SUMMARY_SNIPPETS, solrParams.getInt(HighlightParams.SNIPPETS, 1)));
            req = new LocalSolrQueryRequest(core, query.toString(), "", 0, 1, args) {

                @Override
                public SolrIndexSearcher getSearcher() {
                    return sreq.getSearcher();
                }
            };
        } else {
            log.warn("No highlighter configured, cannot produce summary");
            produceSummary = false;
        }
    }
    Iterator<SolrDocument> docsIter = solrDocList.iterator();
    List<Document> result = new ArrayList<>(solrDocList.size());
    float[] scores = { 1.0f };
    int[] docsHolder = new int[1];
    Query theQuery = query;
    while (docsIter.hasNext()) {
        SolrDocument sdoc = docsIter.next();
        String snippet = null;
        // See comment in ClusteringComponent#finishStage().
        if (produceSummary && docIds != null) {
            docsHolder[0] = docIds.get(sdoc).intValue();
            DocList docAsList = new DocSlice(0, 1, docsHolder, scores, 1, 1.0f);
            NamedList<Object> highlights = highlighter.doHighlighting(docAsList, theQuery, req, snippetFieldAry);
            if (highlights != null && highlights.size() == 1) {
                // should only be one value given our setup
                // should only be one document
                @SuppressWarnings("unchecked") NamedList<String[]> tmp = (NamedList<String[]>) highlights.getVal(0);
                final StringBuilder sb = new StringBuilder();
                for (int j = 0; j < snippetFieldAry.length; j++) {
                    // Join fragments with a period, so that Carrot2 does not create
                    // cross-fragment phrases, such phrases rarely make sense.
                    String[] highlt = tmp.get(snippetFieldAry[j]);
                    if (highlt != null && highlt.length > 0) {
                        for (int i = 0; i < highlt.length; i++) {
                            sb.append(highlt[i]);
                            sb.append(" . ");
                        }
                    }
                }
                snippet = sb.toString();
            }
        }
        // If summaries not enabled or summary generation failed, use full content.
        if (snippet == null) {
            snippet = getConcatenated(sdoc, snippetFieldSpec);
        }
        // Create a Carrot2 document
        Document carrotDocument = new Document(getConcatenated(sdoc, titleFieldSpec), snippet, ObjectUtils.toString(sdoc.getFieldValue(urlField), ""));
        // Store Solr id of the document, we need it to map document instances 
        // found in clusters back to identifiers.
        carrotDocument.setField(SOLR_DOCUMENT_ID, sdoc.getFieldValue(idFieldName));
        // Set language
        if (StringUtils.isNotBlank(languageField)) {
            Collection<Object> languages = sdoc.getFieldValues(languageField);
            if (languages != null) {
                // Use the first Carrot2-supported language
                for (Object l : languages) {
                    String lang = ObjectUtils.toString(l, "");
                    if (languageCodeMap.containsKey(lang)) {
                        lang = languageCodeMap.get(lang);
                    }
                    // language variants, such as 'zh-cn', but Carrot2 uses underscores.
                    if (lang.indexOf('-') > 0) {
                        lang = lang.replace('-', '_');
                    }
                    // If the language is supported by Carrot2, we'll get a non-null value
                    final LanguageCode carrot2Language = LanguageCode.forISOCode(lang);
                    if (carrot2Language != null) {
                        carrotDocument.setLanguage(carrot2Language);
                        break;
                    }
                }
            }
        }
        // Add custom fields
        if (customFields != null) {
            for (Entry<String, String> entry : customFields.entrySet()) {
                carrotDocument.setField(entry.getValue(), sdoc.getFieldValue(entry.getKey()));
            }
        }
        result.add(carrotDocument);
    }
    return result;
}
Also used : Query(org.apache.lucene.search.Query) HashMap(java.util.HashMap) SolrCore(org.apache.solr.core.SolrCore) ArrayList(java.util.ArrayList) Document(org.carrot2.core.Document) SolrDocument(org.apache.solr.common.SolrDocument) DocSlice(org.apache.solr.search.DocSlice) LanguageCode(org.carrot2.core.LanguageCode) SolrDocument(org.apache.solr.common.SolrDocument) NamedList(org.apache.solr.common.util.NamedList) SolrIndexSearcher(org.apache.solr.search.SolrIndexSearcher) SolrHighlighter(org.apache.solr.highlight.SolrHighlighter) LocalSolrQueryRequest(org.apache.solr.request.LocalSolrQueryRequest) SolrQueryRequest(org.apache.solr.request.SolrQueryRequest) LocalSolrQueryRequest(org.apache.solr.request.LocalSolrQueryRequest) SolrParams(org.apache.solr.common.params.SolrParams) DocList(org.apache.solr.search.DocList)

Example 24 with SolrDocument

use of org.apache.solr.common.SolrDocument in project lucene-solr by apache.

the class TextResponseWriter method writeVal.

public final void writeVal(String name, Object val) throws IOException {
    // go in order of most common to least common
    if (val == null) {
        writeNull(name);
    } else if (val instanceof String) {
        writeStr(name, val.toString(), true);
    // micro-optimization... using toString() avoids a cast first
    } else if (val instanceof IndexableField) {
        IndexableField f = (IndexableField) val;
        SchemaField sf = schema.getFieldOrNull(f.name());
        if (sf != null) {
            sf.getType().write(this, name, f);
        } else {
            writeStr(name, f.stringValue(), true);
        }
    } else if (val instanceof Number) {
        writeNumber(name, (Number) val);
    } else if (val instanceof Boolean) {
        writeBool(name, (Boolean) val);
    } else if (val instanceof Date) {
        writeDate(name, (Date) val);
    } else if (val instanceof Document) {
        SolrDocument doc = DocsStreamer.convertLuceneDocToSolrDoc((Document) val, schema);
        writeSolrDocument(name, doc, returnFields, 0);
    } else if (val instanceof SolrDocument) {
        writeSolrDocument(name, (SolrDocument) val, returnFields, 0);
    } else if (val instanceof ResultContext) {
        // requires access to IndexReader
        writeDocuments(name, (ResultContext) val);
    } else if (val instanceof DocList) {
        // Should not happen normally
        ResultContext ctx = new BasicResultContext((DocList) val, returnFields, null, null, req);
        writeDocuments(name, ctx);
    // }
    // else if (val instanceof DocSet) {
    // how do we know what fields to read?
    // todo: have a DocList/DocSet wrapper that
    // restricts the fields to write...?
    } else if (val instanceof SolrDocumentList) {
        writeSolrDocumentList(name, (SolrDocumentList) val, returnFields);
    } else if (val instanceof Map) {
        writeMap(name, (Map) val, false, true);
    } else if (val instanceof NamedList) {
        writeNamedList(name, (NamedList) val);
    } else if (val instanceof Path) {
        writeStr(name, ((Path) val).toAbsolutePath().toString(), true);
    } else if (val instanceof IteratorWriter) {
        writeIterator((IteratorWriter) val);
    } else if (val instanceof Iterable) {
        writeArray(name, ((Iterable) val).iterator());
    } else if (val instanceof Object[]) {
        writeArray(name, (Object[]) val);
    } else if (val instanceof Iterator) {
        writeArray(name, (Iterator) val);
    } else if (val instanceof byte[]) {
        byte[] arr = (byte[]) val;
        writeByteArr(name, arr, 0, arr.length);
    } else if (val instanceof BytesRef) {
        BytesRef arr = (BytesRef) val;
        writeByteArr(name, arr.bytes, arr.offset, arr.length);
    } else if (val instanceof EnumFieldValue) {
        writeStr(name, val.toString(), true);
    } else if (val instanceof WriteableValue) {
        ((WriteableValue) val).write(name, this);
    } else if (val instanceof MapWriter) {
        writeMap((MapWriter) val);
    } else if (val instanceof MapSerializable) {
        //todo find a better way to reuse the map more efficiently
        writeMap(name, ((MapSerializable) val).toMap(new LinkedHashMap<>()), false, true);
    } else {
        // default... for debugging only
        writeStr(name, val.getClass().getName() + ':' + val.toString(), true);
    }
}
Also used : MapWriter(org.apache.solr.common.MapWriter) EnumFieldValue(org.apache.solr.common.EnumFieldValue) Document(org.apache.lucene.document.Document) SolrDocument(org.apache.solr.common.SolrDocument) SolrDocument(org.apache.solr.common.SolrDocument) IteratorWriter(org.apache.solr.common.IteratorWriter) Iterator(java.util.Iterator) BytesRef(org.apache.lucene.util.BytesRef) Path(java.nio.file.Path) MapSerializable(org.apache.solr.common.MapSerializable) NamedList(org.apache.solr.common.util.NamedList) SolrDocumentList(org.apache.solr.common.SolrDocumentList) Date(java.util.Date) IndexableField(org.apache.lucene.index.IndexableField) SchemaField(org.apache.solr.schema.SchemaField) LinkedHashMap(java.util.LinkedHashMap) Map(java.util.Map) DocList(org.apache.solr.search.DocList)

Example 25 with SolrDocument

use of org.apache.solr.common.SolrDocument in project lucene-solr by apache.

the class TextResponseWriter method writeDocuments.

public final void writeDocuments(String name, ResultContext res) throws IOException {
    DocList ids = res.getDocList();
    Iterator<SolrDocument> docsStreamer = res.getProcessedDocuments();
    writeStartDocumentList(name, ids.offset(), ids.size(), ids.matches(), res.wantsScores() ? new Float(ids.maxScore()) : null);
    int idx = 0;
    while (docsStreamer.hasNext()) {
        writeSolrDocument(null, docsStreamer.next(), res.getReturnFields(), idx);
        idx++;
    }
    writeEndDocumentList();
}
Also used : SolrDocument(org.apache.solr.common.SolrDocument) DocList(org.apache.solr.search.DocList)

Aggregations

SolrDocument (org.apache.solr.common.SolrDocument)230 SolrDocumentList (org.apache.solr.common.SolrDocumentList)93 QueryResponse (org.apache.solr.client.solrj.response.QueryResponse)81 ArrayList (java.util.ArrayList)50 SolrQuery (org.apache.solr.client.solrj.SolrQuery)47 Test (org.junit.Test)46 SolrParams (org.apache.solr.common.params.SolrParams)38 SolrServerException (org.apache.solr.client.solrj.SolrServerException)35 ModifiableSolrParams (org.apache.solr.common.params.ModifiableSolrParams)35 IOException (java.io.IOException)32 SolrInputDocument (org.apache.solr.common.SolrInputDocument)28 HashMap (java.util.HashMap)26 HttpSolrClient (org.apache.solr.client.solrj.impl.HttpSolrClient)24 NamedList (org.apache.solr.common.util.NamedList)21 Map (java.util.Map)20 List (java.util.List)18 SolrClient (org.apache.solr.client.solrj.SolrClient)13 ByteArrayInputStream (java.io.ByteArrayInputStream)12 UpdateRequest (org.apache.solr.client.solrj.request.UpdateRequest)12 SolrException (org.apache.solr.common.SolrException)12