Search in sources :

Example 6 with QueryScorer

use of org.apache.lucene.search.highlight.QueryScorer in project lucene-solr by apache.

the class TermVectorReusingLeafReader method getSpanQueryScorer.

/**
   * Return a {@link org.apache.lucene.search.highlight.QueryScorer} suitable for this Query and field.
   * @param query The current query
   * @param tokenStream document text tokenStream that implements reset() efficiently (e.g. CachingTokenFilter).
   *                    If it's used, call reset() first.
   * @param fieldName The name of the field
   * @param request The SolrQueryRequest
   */
protected QueryScorer getSpanQueryScorer(Query query, String fieldName, TokenStream tokenStream, SolrQueryRequest request) {
    QueryScorer scorer = new QueryScorer(query, request.getParams().getFieldBool(fieldName, HighlightParams.FIELD_MATCH, false) ? fieldName : null);
    scorer.setExpandMultiTermQuery(request.getParams().getBool(HighlightParams.HIGHLIGHT_MULTI_TERM, true));
    //overwritten below
    boolean defaultPayloads = true;
    try {
        // It'd be nice to know if payloads are on the tokenStream but the presence of the attribute isn't a good
        // indicator.
        final Terms terms = request.getSearcher().getSlowAtomicReader().fields().terms(fieldName);
        if (terms != null) {
            defaultPayloads = terms.hasPayloads();
        }
    } catch (IOException e) {
        log.error("Couldn't check for existence of payloads", e);
    }
    scorer.setUsePayloads(request.getParams().getFieldBool(fieldName, HighlightParams.PAYLOADS, defaultPayloads));
    return scorer;
}
Also used : QueryScorer(org.apache.lucene.search.highlight.QueryScorer) Terms(org.apache.lucene.index.Terms) IOException(java.io.IOException)

Example 7 with QueryScorer

use of org.apache.lucene.search.highlight.QueryScorer in project elasticsearch by elastic.

the class PlainHighlighterTests method testHighlightPhrase.

public void testHighlightPhrase() throws Exception {
    Query query = new PhraseQuery.Builder().add(new Term("field", "foo")).add(new Term("field", "bar")).build();
    QueryScorer queryScorer = new CustomQueryScorer(query);
    org.apache.lucene.search.highlight.Highlighter highlighter = new org.apache.lucene.search.highlight.Highlighter(queryScorer);
    String[] frags = highlighter.getBestFragments(new MockAnalyzer(random()), "field", "bar foo bar foo", 10);
    assertArrayEquals(new String[] { "bar <B>foo</B> <B>bar</B> foo" }, frags);
}
Also used : Query(org.apache.lucene.search.Query) PhraseQuery(org.apache.lucene.search.PhraseQuery) GeoPointDistanceQuery(org.apache.lucene.spatial.geopoint.search.GeoPointDistanceQuery) TermQuery(org.apache.lucene.search.TermQuery) BooleanQuery(org.apache.lucene.search.BooleanQuery) GeoPointInBBoxQuery(org.apache.lucene.spatial.geopoint.search.GeoPointInBBoxQuery) GeoPointInPolygonQuery(org.apache.lucene.spatial.geopoint.search.GeoPointInPolygonQuery) PhraseQuery(org.apache.lucene.search.PhraseQuery) QueryScorer(org.apache.lucene.search.highlight.QueryScorer) CustomQueryScorer(org.elasticsearch.search.fetch.subphase.highlight.CustomQueryScorer) CustomQueryScorer(org.elasticsearch.search.fetch.subphase.highlight.CustomQueryScorer) Term(org.apache.lucene.index.Term) MockAnalyzer(org.apache.lucene.analysis.MockAnalyzer)

Example 8 with QueryScorer

use of org.apache.lucene.search.highlight.QueryScorer in project gitblit by gitblit.

the class LuceneService method getHighlightedFragment.

/**
	 *
	 * @param analyzer
	 * @param query
	 * @param content
	 * @param result
	 * @return
	 * @throws IOException
	 * @throws InvalidTokenOffsetsException
	 */
private String getHighlightedFragment(Analyzer analyzer, Query query, String content, SearchResult result) throws IOException, InvalidTokenOffsetsException {
    if (content == null) {
        content = "";
    }
    int tabLength = storedSettings.getInteger(Keys.web.tabLength, 4);
    int fragmentLength = SearchObjectType.commit == result.type ? 512 : 150;
    QueryScorer scorer = new QueryScorer(query, "content");
    Fragmenter fragmenter = new SimpleSpanFragmenter(scorer, fragmentLength);
    // use an artificial delimiter for the token
    String termTag = "!!--[";
    String termTagEnd = "]--!!";
    SimpleHTMLFormatter formatter = new SimpleHTMLFormatter(termTag, termTagEnd);
    Highlighter highlighter = new Highlighter(formatter, scorer);
    highlighter.setTextFragmenter(fragmenter);
    String[] fragments = highlighter.getBestFragments(analyzer, "content", content, 3);
    if (ArrayUtils.isEmpty(fragments)) {
        if (SearchObjectType.blob == result.type) {
            return "";
        }
        // clip commit message
        String fragment = content;
        if (fragment.length() > fragmentLength) {
            fragment = fragment.substring(0, fragmentLength) + "...";
        }
        return "<pre class=\"text\">" + StringUtils.escapeForHtml(fragment, true, tabLength) + "</pre>";
    }
    // make sure we have unique fragments
    Set<String> uniqueFragments = new LinkedHashSet<String>();
    for (String fragment : fragments) {
        uniqueFragments.add(fragment);
    }
    fragments = uniqueFragments.toArray(new String[uniqueFragments.size()]);
    StringBuilder sb = new StringBuilder();
    for (int i = 0, len = fragments.length; i < len; i++) {
        String fragment = fragments[i];
        String tag = "<pre class=\"text\">";
        // resurrect the raw fragment from removing the artificial delimiters
        String raw = fragment.replace(termTag, "").replace(termTagEnd, "");
        // determine position of the raw fragment in the content
        int pos = content.indexOf(raw);
        // restore complete first line of fragment
        int c = pos;
        while (c > 0) {
            c--;
            if (content.charAt(c) == '\n') {
                break;
            }
        }
        if (c > 0) {
            // inject leading chunk of first fragment line
            fragment = content.substring(c + 1, pos) + fragment;
        }
        if (SearchObjectType.blob == result.type) {
            // count lines as offset into the content for this fragment
            int line = Math.max(1, StringUtils.countLines(content.substring(0, pos)));
            // create fragment tag with line number and language
            String lang = "";
            String ext = StringUtils.getFileExtension(result.path).toLowerCase();
            if (!StringUtils.isEmpty(ext)) {
                // maintain leading space!
                lang = " lang-" + ext;
            }
            tag = MessageFormat.format("<pre class=\"prettyprint linenums:{0,number,0}{1}\">", line, lang);
        }
        sb.append(tag);
        // replace the artificial delimiter with html tags
        String html = StringUtils.escapeForHtml(fragment, false);
        html = html.replace(termTag, "<span class=\"highlight\">").replace(termTagEnd, "</span>");
        sb.append(html);
        sb.append("</pre>");
        if (i < len - 1) {
            sb.append("<span class=\"ellipses\">...</span><br/>");
        }
    }
    return sb.toString();
}
Also used : LinkedHashSet(java.util.LinkedHashSet) SimpleSpanFragmenter(org.apache.lucene.search.highlight.SimpleSpanFragmenter) QueryScorer(org.apache.lucene.search.highlight.QueryScorer) Fragmenter(org.apache.lucene.search.highlight.Fragmenter) SimpleSpanFragmenter(org.apache.lucene.search.highlight.SimpleSpanFragmenter) SimpleHTMLFormatter(org.apache.lucene.search.highlight.SimpleHTMLFormatter) Highlighter(org.apache.lucene.search.highlight.Highlighter)

Example 9 with QueryScorer

use of org.apache.lucene.search.highlight.QueryScorer in project ansj_seg by NLPchina.

the class IndexTest method toHighlighter.

/**
	 * 高亮设置
	 * 
	 * @param query
	 * @param doc
	 * @param field
	 * @return
	 */
private String toHighlighter(Analyzer analyzer, Query query, Document doc) {
    String field = "text";
    try {
        SimpleHTMLFormatter simpleHtmlFormatter = new SimpleHTMLFormatter("<font color=\"red\">", "</font>");
        Highlighter highlighter = new Highlighter(simpleHtmlFormatter, new QueryScorer(query));
        TokenStream tokenStream1 = analyzer.tokenStream("text", new StringReader(doc.get(field)));
        String highlighterStr = highlighter.getBestFragment(tokenStream1, doc.get(field));
        return highlighterStr == null ? doc.get(field) : highlighterStr;
    } catch (IOException e) {
        // TODO Auto-generated catch block
        e.printStackTrace();
    } catch (InvalidTokenOffsetsException e) {
        // TODO Auto-generated catch block
        e.printStackTrace();
    }
    return null;
}
Also used : TokenStream(org.apache.lucene.analysis.TokenStream) QueryScorer(org.apache.lucene.search.highlight.QueryScorer) InvalidTokenOffsetsException(org.apache.lucene.search.highlight.InvalidTokenOffsetsException) StringReader(java.io.StringReader) IOException(java.io.IOException) SimpleHTMLFormatter(org.apache.lucene.search.highlight.SimpleHTMLFormatter) Highlighter(org.apache.lucene.search.highlight.Highlighter)

Example 10 with QueryScorer

use of org.apache.lucene.search.highlight.QueryScorer in project ansj_seg by NLPchina.

the class HeightLightTest method toHighlighter.

/**
	 * 高亮设置
	 * 
	 * @param query
	 * @param doc
	 * @param field
	 * @return
	 */
private static String toHighlighter(Analyzer analyzer, Query query, Document doc) {
    String field = "text";
    try {
        SimpleHTMLFormatter simpleHtmlFormatter = new SimpleHTMLFormatter("<font color=\"red\">", "</font>");
        Highlighter highlighter = new Highlighter(simpleHtmlFormatter, new QueryScorer(query));
        TokenStream tokenStream1 = indexAnalyzer.tokenStream("text", new StringReader(doc.get(field)));
        String highlighterStr = highlighter.getBestFragment(tokenStream1, doc.get(field));
        return highlighterStr == null ? doc.get(field) : highlighterStr;
    } catch (IOException e) {
        // TODO Auto-generated catch block
        e.printStackTrace();
    } catch (InvalidTokenOffsetsException e) {
        // TODO Auto-generated catch block
        e.printStackTrace();
    }
    return null;
}
Also used : TokenStream(org.apache.lucene.analysis.TokenStream) QueryScorer(org.apache.lucene.search.highlight.QueryScorer) InvalidTokenOffsetsException(org.apache.lucene.search.highlight.InvalidTokenOffsetsException) StringReader(java.io.StringReader) IOException(java.io.IOException) SimpleHTMLFormatter(org.apache.lucene.search.highlight.SimpleHTMLFormatter) Highlighter(org.apache.lucene.search.highlight.Highlighter)

Aggregations

QueryScorer (org.apache.lucene.search.highlight.QueryScorer)15 IOException (java.io.IOException)12 Highlighter (org.apache.lucene.search.highlight.Highlighter)10 SimpleHTMLFormatter (org.apache.lucene.search.highlight.SimpleHTMLFormatter)10 TokenStream (org.apache.lucene.analysis.TokenStream)9 StringReader (java.io.StringReader)8 InvalidTokenOffsetsException (org.apache.lucene.search.highlight.InvalidTokenOffsetsException)8 Analyzer (org.apache.lucene.analysis.Analyzer)4 Query (org.apache.lucene.search.Query)4 IndexReader (org.apache.lucene.index.IndexReader)3 BooleanQuery (org.apache.lucene.search.BooleanQuery)3 IndexSearcher (org.apache.lucene.search.IndexSearcher)3 TermQuery (org.apache.lucene.search.TermQuery)3 WildcardQuery (org.apache.lucene.search.WildcardQuery)3 AbstractIterator (com.google.common.collect.AbstractIterator)2 ArrayList (java.util.ArrayList)2 Deque (java.util.Deque)2 Map (java.util.Map)2 Set (java.util.Set)2 SpellcheckHelper (org.apache.jackrabbit.oak.plugins.index.lucene.util.SpellcheckHelper)2