use of org.apache.lucene.search.highlight.SimpleHTMLFormatter in project ansj_seg by NLPchina.
the class IndexTest method toHighlighter.
/**
* 高亮设置
*
* @param query
* @param doc
* @param field
* @return
*/
private String toHighlighter(Analyzer analyzer, Query query, Document doc) {
String field = "text";
try {
SimpleHTMLFormatter simpleHtmlFormatter = new SimpleHTMLFormatter("<font color=\"red\">", "</font>");
Highlighter highlighter = new Highlighter(simpleHtmlFormatter, new QueryScorer(query));
TokenStream tokenStream1 = analyzer.tokenStream("text", new StringReader(doc.get(field)));
String highlighterStr = highlighter.getBestFragment(tokenStream1, doc.get(field));
return highlighterStr == null ? doc.get(field) : highlighterStr;
} catch (IOException e) {
// TODO Auto-generated catch block
e.printStackTrace();
} catch (InvalidTokenOffsetsException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
return null;
}
use of org.apache.lucene.search.highlight.SimpleHTMLFormatter in project zeppelin by apache.
the class LuceneSearch method query.
/* (non-Javadoc)
* @see org.apache.zeppelin.search.Search#query(java.lang.String)
*/
@Override
public List<Map<String, String>> query(String queryStr) {
if (null == indexDirectory) {
throw new IllegalStateException("Something went wrong on instance creation time, index dir is null");
}
List<Map<String, String>> result = Collections.emptyList();
try (IndexReader indexReader = DirectoryReader.open(indexDirectory)) {
IndexSearcher indexSearcher = new IndexSearcher(indexReader);
Analyzer analyzer = new StandardAnalyzer();
MultiFieldQueryParser parser = new MultiFieldQueryParser(new String[] { SEARCH_FIELD_TEXT, SEARCH_FIELD_TITLE }, analyzer);
Query query = parser.parse(queryStr);
if (LOGGER.isDebugEnabled()) {
LOGGER.debug("Searching for: {}", query.toString(SEARCH_FIELD_TEXT));
}
SimpleHTMLFormatter htmlFormatter = new SimpleHTMLFormatter();
Highlighter highlighter = new Highlighter(htmlFormatter, new QueryScorer(query));
result = doSearch(indexSearcher, query, analyzer, highlighter);
} catch (IOException e) {
LOGGER.error("Failed to open index dir {}, make sure indexing finished OK", indexDirectory, e);
} catch (ParseException e) {
LOGGER.error("Failed to parse query {}", queryStr, e);
}
return result;
}
use of org.apache.lucene.search.highlight.SimpleHTMLFormatter in project sppanblog4springboot by whoismy8023.
the class LuceneSearcher method setHighlighter.
/**
* 高亮设置
*
* @param query
* @param doc
* @param field
* @return
*/
private String setHighlighter(Query query, Document doc, String field) {
try {
SimpleHTMLFormatter simpleHtmlFormatter = new SimpleHTMLFormatter("<font color=\"red\">", "</font>");
Highlighter highlighter = new Highlighter(simpleHtmlFormatter, new QueryScorer(query));
String fieldValue = doc.get(field);
String highlighterStr = highlighter.getBestFragment(analyzer, field, fieldValue);
return highlighterStr == null ? fieldValue : highlighterStr;
} catch (Exception e) {
e.printStackTrace();
}
return null;
}
use of org.apache.lucene.search.highlight.SimpleHTMLFormatter in project jspwiki by apache.
the class LuceneSearchProvider method findPages.
/**
* Searches pages using a particular combination of flags.
*
* @param query The query to perform in Lucene query language
* @param flags A set of flags
* @return A Collection of SearchResult instances
* @throws ProviderException if there is a problem with the backend
*/
public Collection findPages(String query, int flags, WikiContext wikiContext) throws ProviderException {
IndexSearcher searcher = null;
ArrayList<SearchResult> list = null;
Highlighter highlighter = null;
try {
String[] queryfields = { LUCENE_PAGE_CONTENTS, LUCENE_PAGE_NAME, LUCENE_AUTHOR, LUCENE_ATTACHMENTS };
QueryParser qp = new MultiFieldQueryParser(Version.LUCENE_47, queryfields, getLuceneAnalyzer());
// QueryParser qp = new QueryParser( LUCENE_PAGE_CONTENTS, getLuceneAnalyzer() );
Query luceneQuery = qp.parse(query);
if ((flags & FLAG_CONTEXTS) != 0) {
highlighter = new Highlighter(new SimpleHTMLFormatter("<span class=\"searchmatch\">", "</span>"), new SimpleHTMLEncoder(), new QueryScorer(luceneQuery));
}
try {
File dir = new File(m_luceneDirectory);
Directory luceneDir = new SimpleFSDirectory(dir, null);
IndexReader reader = DirectoryReader.open(luceneDir);
searcher = new IndexSearcher(reader);
} catch (Exception ex) {
log.info("Lucene not yet ready; indexing not started", ex);
return null;
}
ScoreDoc[] hits = searcher.search(luceneQuery, MAX_SEARCH_HITS).scoreDocs;
AuthorizationManager mgr = m_engine.getAuthorizationManager();
list = new ArrayList<SearchResult>(hits.length);
for (int curr = 0; curr < hits.length; curr++) {
int docID = hits[curr].doc;
Document doc = searcher.doc(docID);
String pageName = doc.get(LUCENE_ID);
WikiPage page = m_engine.getPage(pageName, WikiPageProvider.LATEST_VERSION);
if (page != null) {
if (page instanceof Attachment) {
// Currently attachments don't look nice on the search-results page
// When the search-results are cleaned up this can be enabled again.
}
PagePermission pp = new PagePermission(page, PagePermission.VIEW_ACTION);
if (mgr.checkPermission(wikiContext.getWikiSession(), pp)) {
int score = (int) (hits[curr].score * 100);
// Get highlighted search contexts
String text = doc.get(LUCENE_PAGE_CONTENTS);
String[] fragments = new String[0];
if (text != null && highlighter != null) {
TokenStream tokenStream = getLuceneAnalyzer().tokenStream(LUCENE_PAGE_CONTENTS, new StringReader(text));
fragments = highlighter.getBestFragments(tokenStream, text, MAX_FRAGMENTS);
}
SearchResult result = new SearchResultImpl(page, score, fragments);
list.add(result);
}
} else {
log.error("Lucene found a result page '" + pageName + "' that could not be loaded, removing from Lucene cache");
pageRemoved(new WikiPage(m_engine, pageName));
}
}
} catch (IOException e) {
log.error("Failed during lucene search", e);
} catch (ParseException e) {
log.info("Broken query; cannot parse query ", e);
throw new ProviderException("You have entered a query Lucene cannot process: " + e.getMessage());
} catch (InvalidTokenOffsetsException e) {
log.error("Tokens are incompatible with provided text ", e);
} finally {
if (searcher != null) {
try {
searcher.getIndexReader().close();
} catch (IOException e) {
log.error(e);
}
}
}
return list;
}
use of org.apache.lucene.search.highlight.SimpleHTMLFormatter in project openolat by klemens.
the class SearchResultsImpl method doHighlight.
/**
* Highlight (bold,color) query words in result-document. Set HighlightResult for content or description.
* @param query
* @param analyzer
* @param doc
* @param resultDocument
* @throws IOException
*/
private void doHighlight(Query query, Analyzer analyzer, Document doc, ResultDocument resultDocument) throws IOException {
Highlighter highlighter = new Highlighter(new SimpleHTMLFormatter(HIGHLIGHT_PRE_TAG, HIGHLIGHT_POST_TAG), new SimpleHTMLEncoder(), new QueryScorer(query));
// Get 3 best fragments of content and seperate with a "..."
try {
// highlight content
String content = doc.get(AbstractOlatDocument.CONTENT_FIELD_NAME);
TokenStream tokenStream = analyzer.tokenStream(AbstractOlatDocument.CONTENT_FIELD_NAME, new StringReader(content));
String highlightResult = highlighter.getBestFragments(tokenStream, content, 3, HIGHLIGHT_SEPARATOR);
// if no highlightResult is in content => look in description
if (highlightResult.length() == 0) {
String description = doc.get(AbstractOlatDocument.DESCRIPTION_FIELD_NAME);
tokenStream = analyzer.tokenStream(AbstractOlatDocument.DESCRIPTION_FIELD_NAME, new StringReader(description));
highlightResult = highlighter.getBestFragments(tokenStream, description, 3, HIGHLIGHT_SEPARATOR);
resultDocument.setHighlightingDescription(true);
}
resultDocument.setHighlightResult(highlightResult);
// highlight title
String title = doc.get(AbstractOlatDocument.TITLE_FIELD_NAME);
title = title.trim();
if (title.length() > 128) {
title = FilterFactory.getHtmlTagAndDescapingFilter().filter(title);
title = Formatter.truncate(title, 128);
}
tokenStream = analyzer.tokenStream(AbstractOlatDocument.TITLE_FIELD_NAME, new StringReader(title));
String highlightTitle = highlighter.getBestFragments(tokenStream, title, 3, " ");
resultDocument.setHighlightTitle(highlightTitle);
} catch (InvalidTokenOffsetsException e) {
log.warn("", e);
}
}
Aggregations