Search in sources :

Example 1 with DocList

use of org.apache.solr.search.DocList in project lucene-solr by apache.

the class CarrotClusteringEngine method getDocuments.

/**
   * Prepares Carrot2 documents for clustering.
   */
private List<Document> getDocuments(SolrDocumentList solrDocList, Map<SolrDocument, Integer> docIds, Query query, final SolrQueryRequest sreq) throws IOException {
    SolrHighlighter highlighter = null;
    SolrParams solrParams = sreq.getParams();
    SolrCore core = sreq.getCore();
    String urlField = solrParams.get(CarrotParams.URL_FIELD_NAME, "url");
    String titleFieldSpec = solrParams.get(CarrotParams.TITLE_FIELD_NAME, "title");
    String snippetFieldSpec = solrParams.get(CarrotParams.SNIPPET_FIELD_NAME, titleFieldSpec);
    String languageField = solrParams.get(CarrotParams.LANGUAGE_FIELD_NAME, null);
    // Maps Solr field names to Carrot2 custom field names
    Map<String, String> customFields = getCustomFieldsMap(solrParams);
    // Parse language code map string into a map
    Map<String, String> languageCodeMap = new HashMap<>();
    if (StringUtils.isNotBlank(languageField)) {
        for (String pair : solrParams.get(CarrotParams.LANGUAGE_CODE_MAP, "").split("[, ]")) {
            final String[] split = pair.split(":");
            if (split.length == 2 && StringUtils.isNotBlank(split[0]) && StringUtils.isNotBlank(split[1])) {
                languageCodeMap.put(split[0], split[1]);
            } else {
                log.warn("Unsupported format for " + CarrotParams.LANGUAGE_CODE_MAP + ": '" + pair + "'. Skipping this mapping.");
            }
        }
    }
    // Get the documents
    boolean produceSummary = solrParams.getBool(CarrotParams.PRODUCE_SUMMARY, false);
    SolrQueryRequest req = null;
    String[] snippetFieldAry = null;
    if (produceSummary) {
        highlighter = HighlightComponent.getHighlighter(core);
        if (highlighter != null) {
            Map<String, Object> args = new HashMap<>();
            snippetFieldAry = snippetFieldSpec.split("[, ]");
            args.put(HighlightParams.FIELDS, snippetFieldAry);
            args.put(HighlightParams.HIGHLIGHT, "true");
            //we don't care about actually highlighting the area
            args.put(HighlightParams.SIMPLE_PRE, "");
            args.put(HighlightParams.SIMPLE_POST, "");
            args.put(HighlightParams.FRAGSIZE, solrParams.getInt(CarrotParams.SUMMARY_FRAGSIZE, solrParams.getInt(HighlightParams.FRAGSIZE, 100)));
            args.put(HighlightParams.SNIPPETS, solrParams.getInt(CarrotParams.SUMMARY_SNIPPETS, solrParams.getInt(HighlightParams.SNIPPETS, 1)));
            req = new LocalSolrQueryRequest(core, query.toString(), "", 0, 1, args) {

                @Override
                public SolrIndexSearcher getSearcher() {
                    return sreq.getSearcher();
                }
            };
        } else {
            log.warn("No highlighter configured, cannot produce summary");
            produceSummary = false;
        }
    }
    Iterator<SolrDocument> docsIter = solrDocList.iterator();
    List<Document> result = new ArrayList<>(solrDocList.size());
    float[] scores = { 1.0f };
    int[] docsHolder = new int[1];
    Query theQuery = query;
    while (docsIter.hasNext()) {
        SolrDocument sdoc = docsIter.next();
        String snippet = null;
        // See comment in ClusteringComponent#finishStage().
        if (produceSummary && docIds != null) {
            docsHolder[0] = docIds.get(sdoc).intValue();
            DocList docAsList = new DocSlice(0, 1, docsHolder, scores, 1, 1.0f);
            NamedList<Object> highlights = highlighter.doHighlighting(docAsList, theQuery, req, snippetFieldAry);
            if (highlights != null && highlights.size() == 1) {
                // should only be one value given our setup
                // should only be one document
                @SuppressWarnings("unchecked") NamedList<String[]> tmp = (NamedList<String[]>) highlights.getVal(0);
                final StringBuilder sb = new StringBuilder();
                for (int j = 0; j < snippetFieldAry.length; j++) {
                    // Join fragments with a period, so that Carrot2 does not create
                    // cross-fragment phrases, such phrases rarely make sense.
                    String[] highlt = tmp.get(snippetFieldAry[j]);
                    if (highlt != null && highlt.length > 0) {
                        for (int i = 0; i < highlt.length; i++) {
                            sb.append(highlt[i]);
                            sb.append(" . ");
                        }
                    }
                }
                snippet = sb.toString();
            }
        }
        // If summaries not enabled or summary generation failed, use full content.
        if (snippet == null) {
            snippet = getConcatenated(sdoc, snippetFieldSpec);
        }
        // Create a Carrot2 document
        Document carrotDocument = new Document(getConcatenated(sdoc, titleFieldSpec), snippet, ObjectUtils.toString(sdoc.getFieldValue(urlField), ""));
        // Store Solr id of the document, we need it to map document instances 
        // found in clusters back to identifiers.
        carrotDocument.setField(SOLR_DOCUMENT_ID, sdoc.getFieldValue(idFieldName));
        // Set language
        if (StringUtils.isNotBlank(languageField)) {
            Collection<Object> languages = sdoc.getFieldValues(languageField);
            if (languages != null) {
                // Use the first Carrot2-supported language
                for (Object l : languages) {
                    String lang = ObjectUtils.toString(l, "");
                    if (languageCodeMap.containsKey(lang)) {
                        lang = languageCodeMap.get(lang);
                    }
                    // language variants, such as 'zh-cn', but Carrot2 uses underscores.
                    if (lang.indexOf('-') > 0) {
                        lang = lang.replace('-', '_');
                    }
                    // If the language is supported by Carrot2, we'll get a non-null value
                    final LanguageCode carrot2Language = LanguageCode.forISOCode(lang);
                    if (carrot2Language != null) {
                        carrotDocument.setLanguage(carrot2Language);
                        break;
                    }
                }
            }
        }
        // Add custom fields
        if (customFields != null) {
            for (Entry<String, String> entry : customFields.entrySet()) {
                carrotDocument.setField(entry.getValue(), sdoc.getFieldValue(entry.getKey()));
            }
        }
        result.add(carrotDocument);
    }
    return result;
}
Also used : Query(org.apache.lucene.search.Query) HashMap(java.util.HashMap) SolrCore(org.apache.solr.core.SolrCore) ArrayList(java.util.ArrayList) Document(org.carrot2.core.Document) SolrDocument(org.apache.solr.common.SolrDocument) DocSlice(org.apache.solr.search.DocSlice) LanguageCode(org.carrot2.core.LanguageCode) SolrDocument(org.apache.solr.common.SolrDocument) NamedList(org.apache.solr.common.util.NamedList) SolrIndexSearcher(org.apache.solr.search.SolrIndexSearcher) SolrHighlighter(org.apache.solr.highlight.SolrHighlighter) LocalSolrQueryRequest(org.apache.solr.request.LocalSolrQueryRequest) SolrQueryRequest(org.apache.solr.request.SolrQueryRequest) LocalSolrQueryRequest(org.apache.solr.request.LocalSolrQueryRequest) SolrParams(org.apache.solr.common.params.SolrParams) DocList(org.apache.solr.search.DocList)

Example 2 with DocList

use of org.apache.solr.search.DocList in project lucene-solr by apache.

the class TextResponseWriter method writeVal.

public final void writeVal(String name, Object val) throws IOException {
    // go in order of most common to least common
    if (val == null) {
        writeNull(name);
    } else if (val instanceof String) {
        writeStr(name, val.toString(), true);
    // micro-optimization... using toString() avoids a cast first
    } else if (val instanceof IndexableField) {
        IndexableField f = (IndexableField) val;
        SchemaField sf = schema.getFieldOrNull(f.name());
        if (sf != null) {
            sf.getType().write(this, name, f);
        } else {
            writeStr(name, f.stringValue(), true);
        }
    } else if (val instanceof Number) {
        writeNumber(name, (Number) val);
    } else if (val instanceof Boolean) {
        writeBool(name, (Boolean) val);
    } else if (val instanceof Date) {
        writeDate(name, (Date) val);
    } else if (val instanceof Document) {
        SolrDocument doc = DocsStreamer.convertLuceneDocToSolrDoc((Document) val, schema);
        writeSolrDocument(name, doc, returnFields, 0);
    } else if (val instanceof SolrDocument) {
        writeSolrDocument(name, (SolrDocument) val, returnFields, 0);
    } else if (val instanceof ResultContext) {
        // requires access to IndexReader
        writeDocuments(name, (ResultContext) val);
    } else if (val instanceof DocList) {
        // Should not happen normally
        ResultContext ctx = new BasicResultContext((DocList) val, returnFields, null, null, req);
        writeDocuments(name, ctx);
    // }
    // else if (val instanceof DocSet) {
    // how do we know what fields to read?
    // todo: have a DocList/DocSet wrapper that
    // restricts the fields to write...?
    } else if (val instanceof SolrDocumentList) {
        writeSolrDocumentList(name, (SolrDocumentList) val, returnFields);
    } else if (val instanceof Map) {
        writeMap(name, (Map) val, false, true);
    } else if (val instanceof NamedList) {
        writeNamedList(name, (NamedList) val);
    } else if (val instanceof Path) {
        writeStr(name, ((Path) val).toAbsolutePath().toString(), true);
    } else if (val instanceof IteratorWriter) {
        writeIterator((IteratorWriter) val);
    } else if (val instanceof Iterable) {
        writeArray(name, ((Iterable) val).iterator());
    } else if (val instanceof Object[]) {
        writeArray(name, (Object[]) val);
    } else if (val instanceof Iterator) {
        writeArray(name, (Iterator) val);
    } else if (val instanceof byte[]) {
        byte[] arr = (byte[]) val;
        writeByteArr(name, arr, 0, arr.length);
    } else if (val instanceof BytesRef) {
        BytesRef arr = (BytesRef) val;
        writeByteArr(name, arr.bytes, arr.offset, arr.length);
    } else if (val instanceof EnumFieldValue) {
        writeStr(name, val.toString(), true);
    } else if (val instanceof WriteableValue) {
        ((WriteableValue) val).write(name, this);
    } else if (val instanceof MapWriter) {
        writeMap((MapWriter) val);
    } else if (val instanceof MapSerializable) {
        //todo find a better way to reuse the map more efficiently
        writeMap(name, ((MapSerializable) val).toMap(new LinkedHashMap<>()), false, true);
    } else {
        // default... for debugging only
        writeStr(name, val.getClass().getName() + ':' + val.toString(), true);
    }
}
Also used : MapWriter(org.apache.solr.common.MapWriter) EnumFieldValue(org.apache.solr.common.EnumFieldValue) Document(org.apache.lucene.document.Document) SolrDocument(org.apache.solr.common.SolrDocument) SolrDocument(org.apache.solr.common.SolrDocument) IteratorWriter(org.apache.solr.common.IteratorWriter) Iterator(java.util.Iterator) BytesRef(org.apache.lucene.util.BytesRef) Path(java.nio.file.Path) MapSerializable(org.apache.solr.common.MapSerializable) NamedList(org.apache.solr.common.util.NamedList) SolrDocumentList(org.apache.solr.common.SolrDocumentList) Date(java.util.Date) IndexableField(org.apache.lucene.index.IndexableField) SchemaField(org.apache.solr.schema.SchemaField) LinkedHashMap(java.util.LinkedHashMap) Map(java.util.Map) DocList(org.apache.solr.search.DocList)

Example 3 with DocList

use of org.apache.solr.search.DocList in project lucene-solr by apache.

the class TextResponseWriter method writeDocuments.

public final void writeDocuments(String name, ResultContext res) throws IOException {
    DocList ids = res.getDocList();
    Iterator<SolrDocument> docsStreamer = res.getProcessedDocuments();
    writeStartDocumentList(name, ids.offset(), ids.size(), ids.matches(), res.wantsScores() ? new Float(ids.maxScore()) : null);
    int idx = 0;
    while (docsStreamer.hasNext()) {
        writeSolrDocument(null, docsStreamer.next(), res.getReturnFields(), idx);
        idx++;
    }
    writeEndDocumentList();
}
Also used : SolrDocument(org.apache.solr.common.SolrDocument) DocList(org.apache.solr.search.DocList)

Example 4 with DocList

use of org.apache.solr.search.DocList in project lucene-solr by apache.

the class ChildDocTransformer method transform.

@Override
public void transform(SolrDocument doc, int docid, float score) {
    FieldType idFt = idField.getType();
    Object parentIdField = doc.getFirstValue(idField.getName());
    String parentIdExt = parentIdField instanceof IndexableField ? idFt.toExternal((IndexableField) parentIdField) : parentIdField.toString();
    try {
        Query parentQuery = idFt.getFieldQuery(null, idField, parentIdExt);
        Query query = new ToChildBlockJoinQuery(parentQuery, parentsFilter);
        DocList children = context.getSearcher().getDocList(query, childFilterQuery, new Sort(), 0, limit);
        if (children.matches() > 0) {
            DocIterator i = children.iterator();
            while (i.hasNext()) {
                Integer childDocNum = i.next();
                Document childDoc = context.getSearcher().doc(childDocNum);
                SolrDocument solrChildDoc = DocsStreamer.convertLuceneDocToSolrDoc(childDoc, schema);
                // TODO: future enhancement...
                // support an fl local param in the transformer, which is used to build
                // a private ReturnFields instance that we use to prune unwanted field 
                // names from solrChildDoc
                doc.addChildDocument(solrChildDoc);
            }
        }
    } catch (IOException e) {
        doc.put(name, "Could not fetch child Documents");
    }
}
Also used : DocIterator(org.apache.solr.search.DocIterator) Query(org.apache.lucene.search.Query) ToChildBlockJoinQuery(org.apache.lucene.search.join.ToChildBlockJoinQuery) IOException(java.io.IOException) SolrDocument(org.apache.solr.common.SolrDocument) Document(org.apache.lucene.document.Document) ToChildBlockJoinQuery(org.apache.lucene.search.join.ToChildBlockJoinQuery) FieldType(org.apache.solr.schema.FieldType) IndexableField(org.apache.lucene.index.IndexableField) SolrDocument(org.apache.solr.common.SolrDocument) Sort(org.apache.lucene.search.Sort) DocList(org.apache.solr.search.DocList)

Example 5 with DocList

use of org.apache.solr.search.DocList in project lucene-solr by apache.

the class XLSXWriter method writeResponse.

public void writeResponse(OutputStream out, LinkedHashMap<String, String> colNamesMap, LinkedHashMap<String, Integer> colWidthsMap) throws IOException {
    SolrParams params = req.getParams();
    Collection<String> fields = returnFields.getRequestedFieldNames();
    Object responseObj = rsp.getValues().get("response");
    boolean returnOnlyStored = false;
    if (fields == null || returnFields.hasPatternMatching()) {
        if (responseObj instanceof SolrDocumentList) {
            // get the list of fields from the SolrDocumentList
            if (fields == null) {
                fields = new LinkedHashSet<String>();
            }
            for (SolrDocument sdoc : (SolrDocumentList) responseObj) {
                fields.addAll(sdoc.getFieldNames());
            }
        } else {
            // get the list of fields from the index
            Iterable<String> all = req.getSearcher().getFieldNames();
            if (fields == null) {
                fields = Sets.newHashSet(all);
            } else {
                Iterables.addAll(fields, all);
            }
        }
        if (returnFields.wantsScore()) {
            fields.add("score");
        } else {
            fields.remove("score");
        }
        returnOnlyStored = true;
    }
    for (String field : fields) {
        if (!returnFields.wantsField(field)) {
            continue;
        }
        if (field.equals("score")) {
            XLField xlField = new XLField();
            xlField.name = "score";
            xlFields.put("score", xlField);
            continue;
        }
        SchemaField sf = schema.getFieldOrNull(field);
        if (sf == null) {
            FieldType ft = new StrField();
            sf = new SchemaField(field, ft);
        }
        // Return only stored fields, unless an explicit field list is specified
        if (returnOnlyStored && sf != null && !sf.stored()) {
            continue;
        }
        XLField xlField = new XLField();
        xlField.name = field;
        xlField.sf = sf;
        xlFields.put(field, xlField);
    }
    wb.addRow();
    //write header
    for (XLField xlField : xlFields.values()) {
        String printName = xlField.name;
        int colWidth = 14;
        String niceName = colNamesMap.get(xlField.name);
        if (niceName != null) {
            printName = niceName;
        }
        Integer niceWidth = colWidthsMap.get(xlField.name);
        if (niceWidth != null) {
            colWidth = niceWidth.intValue();
        }
        writeStr(xlField.name, printName, false);
        wb.setColWidth(colWidth);
        wb.setHeaderCell();
    }
    wb.setHeaderRow();
    wb.addRow();
    if (responseObj instanceof ResultContext) {
        writeDocuments(null, (ResultContext) responseObj);
    } else if (responseObj instanceof DocList) {
        ResultContext ctx = new BasicResultContext((DocList) responseObj, returnFields, null, null, req);
        writeDocuments(null, ctx);
    } else if (responseObj instanceof SolrDocumentList) {
        writeSolrDocumentList(null, (SolrDocumentList) responseObj, returnFields);
    }
    wb.flush(out);
    wb = null;
}
Also used : BasicResultContext(org.apache.solr.response.BasicResultContext) ResultContext(org.apache.solr.response.ResultContext) StrField(org.apache.solr.schema.StrField) SolrDocumentList(org.apache.solr.common.SolrDocumentList) FieldType(org.apache.solr.schema.FieldType) SchemaField(org.apache.solr.schema.SchemaField) BasicResultContext(org.apache.solr.response.BasicResultContext) SolrDocument(org.apache.solr.common.SolrDocument) SolrParams(org.apache.solr.common.params.SolrParams) DocList(org.apache.solr.search.DocList)

Aggregations

DocList (org.apache.solr.search.DocList)23 SolrParams (org.apache.solr.common.params.SolrParams)11 NamedList (org.apache.solr.common.util.NamedList)11 SolrIndexSearcher (org.apache.solr.search.SolrIndexSearcher)11 SolrDocument (org.apache.solr.common.SolrDocument)9 ArrayList (java.util.ArrayList)8 Query (org.apache.lucene.search.Query)8 SolrQueryRequest (org.apache.solr.request.SolrQueryRequest)8 SchemaField (org.apache.solr.schema.SchemaField)8 DocIterator (org.apache.solr.search.DocIterator)8 SolrDocumentList (org.apache.solr.common.SolrDocumentList)6 ResultContext (org.apache.solr.response.ResultContext)6 FieldType (org.apache.solr.schema.FieldType)6 Document (org.apache.lucene.document.Document)5 Sort (org.apache.lucene.search.Sort)5 SolrException (org.apache.solr.common.SolrException)5 LocalSolrQueryRequest (org.apache.solr.request.LocalSolrQueryRequest)5 SolrQueryResponse (org.apache.solr.response.SolrQueryResponse)5 IOException (java.io.IOException)4 HashMap (java.util.HashMap)4