Search in sources :

Example 6 with Encoder

use of org.apache.lucene.search.highlight.Encoder in project lucene-solr by apache.

the class FastVectorHighlighterTest method testMultiValuedSortByScore.

public void testMultiValuedSortByScore() throws IOException {
    Directory dir = newDirectory();
    IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig(new MockAnalyzer(random())));
    Document doc = new Document();
    FieldType type = new FieldType(TextField.TYPE_STORED);
    type.setStoreTermVectorOffsets(true);
    type.setStoreTermVectorPositions(true);
    type.setStoreTermVectors(true);
    type.freeze();
    // The first two fields contain the best match
    doc.add(new Field("field", "zero if naught", type));
    // but total a lower score (3) than the bottom
    doc.add(new Field("field", "hero of legend", type));
    // two fields (4)
    doc.add(new Field("field", "naught of hero", type));
    doc.add(new Field("field", "naught of hero", type));
    writer.addDocument(doc);
    FastVectorHighlighter highlighter = new FastVectorHighlighter();
    ScoreOrderFragmentsBuilder fragmentsBuilder = new ScoreOrderFragmentsBuilder();
    fragmentsBuilder.setDiscreteMultiValueHighlighting(true);
    IndexReader reader = DirectoryReader.open(writer);
    String[] preTags = new String[] { "<b>" };
    String[] postTags = new String[] { "</b>" };
    Encoder encoder = new DefaultEncoder();
    int docId = 0;
    BooleanQuery.Builder query = new BooleanQuery.Builder();
    query.add(clause("field", "hero"), Occur.SHOULD);
    query.add(clause("field", "of"), Occur.SHOULD);
    query.add(clause("field", "legend"), Occur.SHOULD);
    FieldQuery fieldQuery = highlighter.getFieldQuery(query.build(), reader);
    for (FragListBuilder fragListBuilder : new FragListBuilder[] { new SimpleFragListBuilder(), new WeightedFragListBuilder() }) {
        String[] bestFragments = highlighter.getBestFragments(fieldQuery, reader, docId, "field", 20, 1, fragListBuilder, fragmentsBuilder, preTags, postTags, encoder);
        assertEquals("<b>hero</b> <b>of</b> <b>legend</b>", bestFragments[0]);
        bestFragments = highlighter.getBestFragments(fieldQuery, reader, docId, "field", 28, 1, fragListBuilder, fragmentsBuilder, preTags, postTags, encoder);
        assertEquals("<b>hero</b> <b>of</b> <b>legend</b>", bestFragments[0]);
        bestFragments = highlighter.getBestFragments(fieldQuery, reader, docId, "field", 30000, 1, fragListBuilder, fragmentsBuilder, preTags, postTags, encoder);
        assertEquals("<b>hero</b> <b>of</b> <b>legend</b>", bestFragments[0]);
    }
    reader.close();
    writer.close();
    dir.close();
}
Also used : BooleanQuery(org.apache.lucene.search.BooleanQuery) Document(org.apache.lucene.document.Document) FieldType(org.apache.lucene.document.FieldType) StoredField(org.apache.lucene.document.StoredField) Field(org.apache.lucene.document.Field) TextField(org.apache.lucene.document.TextField) MockAnalyzer(org.apache.lucene.analysis.MockAnalyzer) IndexWriter(org.apache.lucene.index.IndexWriter) DefaultEncoder(org.apache.lucene.search.highlight.DefaultEncoder) Encoder(org.apache.lucene.search.highlight.Encoder) DefaultEncoder(org.apache.lucene.search.highlight.DefaultEncoder) IndexReader(org.apache.lucene.index.IndexReader) Directory(org.apache.lucene.store.Directory)

Example 7 with Encoder

use of org.apache.lucene.search.highlight.Encoder in project lucene-solr by apache.

the class TermVectorReusingLeafReader method alternateField.

/** Returns the alternate highlight object for this field -- a String[] by default.  Null if none. */
@SuppressWarnings("unchecked")
protected Object alternateField(Document doc, int docId, String fieldName, FvhContainer fvhContainer, Query query, IndexReader reader, SolrQueryRequest req) throws IOException {
    IndexSchema schema = req.getSearcher().getSchema();
    SolrParams params = req.getParams();
    String alternateField = params.getFieldParam(fieldName, HighlightParams.ALTERNATE_FIELD);
    int alternateFieldLen = params.getFieldInt(fieldName, HighlightParams.ALTERNATE_FIELD_LENGTH, 0);
    if (alternateField == null || alternateField.length() == 0) {
        return null;
    }
    if (params.getFieldBool(fieldName, HighlightParams.HIGHLIGHT_ALTERNATE, true) && !alternateField.equals(fieldName)) {
        // Try to highlight alternate field
        Object fieldHighlights = null;
        SchemaField schemaField = schema.getFieldOrNull(alternateField);
        if (schemaField != null) {
            HashMap<String, String> invariants = new HashMap<>();
            invariants.put("f." + alternateField + "." + HighlightParams.SNIPPETS, "1");
            // Enforce maxAlternateFieldLength by FRAGSIZE. Minimum 18 due to FVH limitations
            invariants.put("f." + alternateField + "." + HighlightParams.FRAGSIZE, alternateFieldLen > 0 ? String.valueOf(Math.max(18, alternateFieldLen)) : String.valueOf(Integer.MAX_VALUE));
            SolrParams origParams = req.getParams();
            req.setParams(SolrParams.wrapDefaults(new MapSolrParams(invariants), origParams));
            fieldHighlights = doHighlightingOfField(doc, docId, schemaField, fvhContainer, query, reader, req, params);
            req.setParams(origParams);
            if (fieldHighlights != null) {
                return fieldHighlights;
            }
        }
    }
    // Fallback to static non-highlighted
    IndexableField[] docFields = doc.getFields(alternateField);
    if (docFields.length == 0) {
        // The alternate field did not exist, treat the original field as fallback instead
        docFields = doc.getFields(fieldName);
    }
    List<String> listFields = new ArrayList<>();
    for (IndexableField field : docFields) {
        if (field.binaryValue() == null)
            listFields.add(field.stringValue());
    }
    if (listFields.isEmpty()) {
        return null;
    }
    String[] altTexts = listFields.toArray(new String[listFields.size()]);
    Encoder encoder = getEncoder(fieldName, params);
    List<String> altList = new ArrayList<>();
    int len = 0;
    for (String altText : altTexts) {
        if (alternateFieldLen <= 0) {
            altList.add(encoder.encodeText(altText));
        } else {
            altList.add(len + altText.length() > alternateFieldLen ? encoder.encodeText(altText.substring(0, alternateFieldLen - len)) : encoder.encodeText(altText));
            len += altText.length();
            if (len >= alternateFieldLen)
                break;
        }
    }
    return altList;
}
Also used : HashMap(java.util.HashMap) ArrayList(java.util.ArrayList) SchemaField(org.apache.solr.schema.SchemaField) IndexableField(org.apache.lucene.index.IndexableField) MapSolrParams(org.apache.solr.common.params.MapSolrParams) Encoder(org.apache.lucene.search.highlight.Encoder) SolrParams(org.apache.solr.common.params.SolrParams) MapSolrParams(org.apache.solr.common.params.MapSolrParams) IndexSchema(org.apache.solr.schema.IndexSchema)

Aggregations

Encoder (org.apache.lucene.search.highlight.Encoder)7 ArrayList (java.util.ArrayList)4 Analyzer (org.apache.lucene.analysis.Analyzer)4 FieldMapper (org.elasticsearch.index.mapper.FieldMapper)4 FetchPhaseExecutionException (org.elasticsearch.search.fetch.FetchPhaseExecutionException)4 FetchSubPhase (org.elasticsearch.search.fetch.FetchSubPhase)4 SearchContext (org.elasticsearch.search.internal.SearchContext)4 IOException (java.io.IOException)3 HashMap (java.util.HashMap)2 MockAnalyzer (org.apache.lucene.analysis.MockAnalyzer)2 Document (org.apache.lucene.document.Document)2 Field (org.apache.lucene.document.Field)2 FieldType (org.apache.lucene.document.FieldType)2 StoredField (org.apache.lucene.document.StoredField)2 TextField (org.apache.lucene.document.TextField)2 IndexReader (org.apache.lucene.index.IndexReader)2 IndexWriter (org.apache.lucene.index.IndexWriter)2 BooleanQuery (org.apache.lucene.search.BooleanQuery)2 IndexSearcher (org.apache.lucene.search.IndexSearcher)2 DefaultEncoder (org.apache.lucene.search.highlight.DefaultEncoder)2