use of org.apache.lucene.analysis.standard.StandardAnalyzer in project gitblit by gitblit.
the class LuceneService method search.
/**
* Searches the specified repositories for the given text or query
*
* @param text
* if the text is null or empty, null is returned
* @param page
* the page number to retrieve. page is 1-indexed.
* @param pageSize
* the number of elements to return for this page
* @param repositories
* a list of repositories to search. if no repositories are
* specified null is returned.
* @return a list of SearchResults in order from highest to the lowest score
*
*/
public List<SearchResult> search(String text, int page, int pageSize, String... repositories) {
if (StringUtils.isEmpty(text)) {
return null;
}
if (ArrayUtils.isEmpty(repositories)) {
return null;
}
Set<SearchResult> results = new LinkedHashSet<SearchResult>();
StandardAnalyzer analyzer = new StandardAnalyzer();
try {
// default search checks summary and content
BooleanQuery.Builder bldr = new BooleanQuery.Builder();
QueryParser qp;
qp = new QueryParser(FIELD_SUMMARY, analyzer);
qp.setAllowLeadingWildcard(true);
bldr.add(qp.parse(text), Occur.SHOULD);
qp = new QueryParser(FIELD_CONTENT, analyzer);
qp.setAllowLeadingWildcard(true);
bldr.add(qp.parse(text), Occur.SHOULD);
IndexSearcher searcher;
if (repositories.length == 1) {
// single repository search
searcher = getIndexSearcher(repositories[0]);
} else {
// multiple repository search
List<IndexReader> readers = new ArrayList<IndexReader>();
for (String repository : repositories) {
IndexSearcher repositoryIndex = getIndexSearcher(repository);
readers.add(repositoryIndex.getIndexReader());
}
IndexReader[] rdrs = readers.toArray(new IndexReader[readers.size()]);
MultiSourceReader reader = new MultiSourceReader(rdrs);
searcher = new IndexSearcher(reader);
}
BooleanQuery query = bldr.build();
Query rewrittenQuery = searcher.rewrite(query);
logger.debug(rewrittenQuery.toString());
TopScoreDocCollector collector = TopScoreDocCollector.create(5000);
searcher.search(rewrittenQuery, collector);
int offset = Math.max(0, (page - 1) * pageSize);
ScoreDoc[] hits = collector.topDocs(offset, pageSize).scoreDocs;
int totalHits = collector.getTotalHits();
for (int i = 0; i < hits.length; i++) {
int docId = hits[i].doc;
Document doc = searcher.doc(docId);
SearchResult result = createSearchResult(doc, hits[i].score, offset + i + 1, totalHits);
if (repositories.length == 1) {
// single repository search
result.repository = repositories[0];
} else {
// multi-repository search
MultiSourceReader reader = (MultiSourceReader) searcher.getIndexReader();
int index = reader.getSourceIndex(docId);
result.repository = repositories[index];
}
String content = doc.get(FIELD_CONTENT);
result.fragment = getHighlightedFragment(analyzer, query, content, result);
results.add(result);
}
} catch (Exception e) {
logger.error(MessageFormat.format("Exception while searching for {0}", text), e);
}
return new ArrayList<SearchResult>(results);
}
use of org.apache.lucene.analysis.standard.StandardAnalyzer in project Openfire by igniterealtime.
the class WordMatchRouter method setStemmingEnabled.
/**
* Toggles whether stemming will be applied to keywords. Stemming is a mechanism
* for matching multiple versions of the same word. For example, when stemming is
* enabled the word "cats" will match "cat" and "thrill" will match "thrilling".<p/>
*
* The stemming implementation uses the Porter algorithm, which is only suitable
* for English text. If your content is non-english, stemming should be disabled.
*
* @param stemmingEnabled true if stemming should be enabled.
*/
public void setStemmingEnabled(boolean stemmingEnabled) {
// If not changing the value, do nothing.
if (this.stemmingEnabled == stemmingEnabled) {
return;
}
if (stemmingEnabled) {
// Turn of stemming.
this.stemmingEnabled = true;
analyzer = new StemmingAnalyzer();
} else {
// Turn off stemming.
this.stemmingEnabled = false;
analyzer = new StandardAnalyzer();
}
}
use of org.apache.lucene.analysis.standard.StandardAnalyzer in project Openfire by igniterealtime.
the class ArchiveSearcher method luceneSearch.
/**
* Searches the Lucene index for all archived conversations using the specified search.
*
* @param search the search.
* @return the collection of conversations that match the search.
*/
private Collection<Conversation> luceneSearch(ArchiveSearch search) {
try {
IndexSearcher searcher = archiveIndexer.getSearcher();
final StandardAnalyzer analyzer = new StandardAnalyzer();
// Create the query based on the search terms.
Query query = new QueryParser("text", analyzer).parse(search.getQueryString());
// See if the user wants to sort on something other than relevance. If so, we need
// to tell Lucene to do sorting. Default to a null sort so that it has no
// effect if sorting hasn't been selected.
Sort sort = null;
if (search.getSortField() != ArchiveSearch.SortField.relevance) {
if (search.getSortField() == ArchiveSearch.SortField.date) {
sort = new Sort("date", search.getSortOrder() == ArchiveSearch.SortOrder.descending);
}
}
// See if we need to filter on date. Default to a null filter so that it has
// no effect if date filtering hasn't been selected.
Filter filter = null;
if (search.getDateRangeMin() != null || search.getDateRangeMax() != null) {
String min = null;
if (search.getDateRangeMin() != null) {
min = DateTools.dateToString(search.getDateRangeMin(), DateTools.Resolution.DAY);
}
String max = null;
if (search.getDateRangeMax() != null) {
max = DateTools.dateToString(search.getDateRangeMax(), DateTools.Resolution.DAY);
}
// ENT-271: don't include upper or lower bound if these elements are null
filter = new RangeFilter("date", min, max, min != null, max != null);
}
// See if we need to match external conversations. This will only be true
// when less than two conversation participants are specified and external
// wildcard matching is enabled.
Collection<JID> participants = search.getParticipants();
if (search.getParticipants().size() < 2 && search.isExternalWildcardMode()) {
TermQuery externalQuery = new TermQuery(new Term("external", "true"));
// Add this query to the existing query.
BooleanQuery booleanQuery = new BooleanQuery();
booleanQuery.add(query, BooleanClause.Occur.MUST);
booleanQuery.add(externalQuery, BooleanClause.Occur.MUST);
query = booleanQuery;
}
// See if we need to restrict the search to certain users.
if (!participants.isEmpty()) {
if (participants.size() == 1) {
String jid = participants.iterator().next().toBareJID();
Query participantQuery = new QueryParser("jid", analyzer).parse(jid);
// Add this query to the existing query.
BooleanQuery booleanQuery = new BooleanQuery();
booleanQuery.add(query, BooleanClause.Occur.MUST);
booleanQuery.add(participantQuery, BooleanClause.Occur.MUST);
query = booleanQuery;
} else // Otherwise there are two participants.
{
Iterator<JID> iter = participants.iterator();
String participant1 = iter.next().toBareJID();
String participant2 = iter.next().toBareJID();
BooleanQuery participantQuery = new BooleanQuery();
participantQuery.add(new QueryParser("jid", analyzer).parse(participant1), BooleanClause.Occur.MUST);
participantQuery.add(new QueryParser("jid", analyzer).parse(participant2), BooleanClause.Occur.MUST);
// Add this query to the existing query.
BooleanQuery booleanQuery = new BooleanQuery();
booleanQuery.add(query, BooleanClause.Occur.MUST);
booleanQuery.add(participantQuery, BooleanClause.Occur.MUST);
query = booleanQuery;
}
}
Hits hits = searcher.search(query, filter, sort);
int startIndex = search.getStartIndex();
int endIndex = startIndex + search.getNumResults() - 1;
// The end index can't be after the end of the results.
if (endIndex > hits.length() - 1) {
// endIndex = hits.length() - 1;
// TODO: We need to determine if this is necessary.
}
// If the start index is positioned after the end, return an empty list.
if (((endIndex - startIndex) + 1) <= 0) {
return Collections.emptyList();
} else // Otherwise return the results.
{
return new LuceneQueryResults(hits, startIndex, endIndex);
}
} catch (ParseException pe) {
Log.error(pe.getMessage(), pe);
return Collections.emptySet();
} catch (IOException ioe) {
Log.error(ioe.getMessage(), ioe);
return Collections.emptySet();
}
}
use of org.apache.lucene.analysis.standard.StandardAnalyzer in project languagetool by languagetool-org.
the class EmptyLuceneIndexCreator method main.
public static void main(String[] args) throws IOException {
if (args.length != 1) {
System.out.println("Usage: " + EmptyLuceneIndexCreator.class.getSimpleName() + " <indexPath>");
System.exit(1);
}
Analyzer analyzer = new StandardAnalyzer();
IndexWriterConfig config = new IndexWriterConfig(analyzer);
Directory directory = FSDirectory.open(new File(args[0]).toPath());
try (IndexWriter writer = new IndexWriter(directory, config)) {
FieldType fieldType = new FieldType();
fieldType.setIndexOptions(IndexOptions.DOCS);
fieldType.setStored(true);
Field countField = new Field("totalTokenCount", String.valueOf(0), fieldType);
Document doc = new Document();
doc.add(countField);
writer.addDocument(doc);
}
}
use of org.apache.lucene.analysis.standard.StandardAnalyzer in project languagetool by languagetool-org.
the class TextIndexCreator method index.
private void index(File outputDir, String[] inputFiles) throws IOException {
Analyzer analyzer = new StandardAnalyzer();
IndexWriterConfig config = new IndexWriterConfig(analyzer);
try (FSDirectory directory = FSDirectory.open(outputDir.toPath());
IndexWriter indexWriter = new IndexWriter(directory, config)) {
for (String inputFile : inputFiles) {
indexFile(indexWriter, inputFile);
}
}
}
Aggregations