use of org.apache.lucene.queryparser.classic.QueryParser in project ansj_seg by NLPchina.
the class IndexTest method search.
private void search(Analyzer queryAnalyzer, Directory directory, String queryStr) throws CorruptIndexException, IOException, ParseException {
IndexSearcher isearcher;
DirectoryReader directoryReader = IndexReader.open(directory);
// 查询索引
isearcher = new IndexSearcher(directoryReader);
QueryParser tq = new QueryParser(Version.LUCENE_44, "text", queryAnalyzer);
Query query = tq.parse(queryStr);
System.out.println(query);
TopDocs hits = isearcher.search(query, 5);
System.out.println(queryStr + ":共找到" + hits.totalHits + "条记录!");
for (int i = 0; i < hits.scoreDocs.length; i++) {
int docId = hits.scoreDocs[i].doc;
Document document = isearcher.doc(docId);
System.out.println(toHighlighter(queryAnalyzer, query, document));
}
}
use of org.apache.lucene.queryparser.classic.QueryParser in project titan by thinkaurelius.
the class LuceneIndex method query.
@Override
public Iterable<RawQuery.Result<String>> query(RawQuery query, KeyInformation.IndexRetriever informations, BaseTransaction tx) throws BackendException {
Query q;
try {
q = new QueryParser("_all", analyzer).parse(query.getQuery());
} catch (ParseException e) {
throw new PermanentBackendException("Could not parse raw query: " + query.getQuery(), e);
}
try {
IndexSearcher searcher = ((Transaction) tx).getSearcher(query.getStore());
//Index does not yet exist
if (searcher == null)
return ImmutableList.of();
long time = System.currentTimeMillis();
//TODO: can we make offset more efficient in Lucene?
final int offset = query.getOffset();
int adjustedLimit = query.hasLimit() ? query.getLimit() : Integer.MAX_VALUE - 1;
if (adjustedLimit < Integer.MAX_VALUE - 1 - offset)
adjustedLimit += offset;
else
adjustedLimit = Integer.MAX_VALUE - 1;
TopDocs docs = searcher.search(q, adjustedLimit);
log.debug("Executed query [{}] in {} ms", q, System.currentTimeMillis() - time);
List<RawQuery.Result<String>> result = new ArrayList<RawQuery.Result<String>>(docs.scoreDocs.length);
for (int i = offset; i < docs.scoreDocs.length; i++) {
result.add(new RawQuery.Result<String>(searcher.doc(docs.scoreDocs[i].doc).getField(DOCID).stringValue(), docs.scoreDocs[i].score));
}
return result;
} catch (IOException e) {
throw new TemporaryBackendException("Could not execute Lucene query", e);
}
}
use of org.apache.lucene.queryparser.classic.QueryParser in project gitblit by gitblit.
the class TicketIndexer method delete.
/**
* Delete a ticket from the Lucene index.
*
* @param repository
* @param ticketId
* @throws Exception
* @return true, if deleted, false if no record was deleted
*/
private boolean delete(String repository, long ticketId, IndexWriter writer) throws Exception {
StandardAnalyzer analyzer = new StandardAnalyzer();
QueryParser qp = new QueryParser(Lucene.did.name(), analyzer);
BooleanQuery query = new BooleanQuery.Builder().add(qp.parse(StringUtils.getSHA1(repository + ticketId)), Occur.MUST).build();
int numDocsBefore = writer.numDocs();
writer.deleteDocuments(query);
writer.commit();
closeSearcher();
int numDocsAfter = writer.numDocs();
if (numDocsBefore == numDocsAfter) {
log.debug(MessageFormat.format("no records found to delete in {0}", repository));
return false;
} else {
log.debug(MessageFormat.format("deleted {0} records in {1}", numDocsBefore - numDocsAfter, repository));
return true;
}
}
use of org.apache.lucene.queryparser.classic.QueryParser in project gitblit by gitblit.
the class TicketIndexer method searchFor.
/**
* Search for tickets matching the query. The returned tickets are
* shadows of the real ticket, but suitable for a results list.
*
* @param repository
* @param text
* @param page
* @param pageSize
* @return search results
*/
public List<QueryResult> searchFor(RepositoryModel repository, String text, int page, int pageSize) {
if (StringUtils.isEmpty(text)) {
return Collections.emptyList();
}
Set<QueryResult> results = new LinkedHashSet<QueryResult>();
StandardAnalyzer analyzer = new StandardAnalyzer();
try {
// search the title, description and content
BooleanQuery.Builder bldr = new BooleanQuery.Builder();
QueryParser qp;
qp = new QueryParser(Lucene.title.name(), analyzer);
qp.setAllowLeadingWildcard(true);
bldr.add(qp.parse(text), Occur.SHOULD);
qp = new QueryParser(Lucene.body.name(), analyzer);
qp.setAllowLeadingWildcard(true);
bldr.add(qp.parse(text), Occur.SHOULD);
qp = new QueryParser(Lucene.content.name(), analyzer);
qp.setAllowLeadingWildcard(true);
bldr.add(qp.parse(text), Occur.SHOULD);
IndexSearcher searcher = getSearcher();
Query rewrittenQuery = searcher.rewrite(bldr.build());
log.debug(rewrittenQuery.toString());
TopScoreDocCollector collector = TopScoreDocCollector.create(5000);
searcher.search(rewrittenQuery, collector);
int offset = Math.max(0, (page - 1) * pageSize);
ScoreDoc[] hits = collector.topDocs(offset, pageSize).scoreDocs;
for (int i = 0; i < hits.length; i++) {
int docId = hits[i].doc;
Document doc = searcher.doc(docId);
QueryResult result = docToQueryResult(doc);
if (repository != null) {
if (!result.repository.equalsIgnoreCase(repository.name)) {
continue;
}
}
results.add(result);
}
} catch (Exception e) {
log.error(MessageFormat.format("Exception while searching for {0}", text), e);
}
return new ArrayList<QueryResult>(results);
}
use of org.apache.lucene.queryparser.classic.QueryParser in project geode by apache.
the class DistributedScoringJUnitTest method uniformDistributionProducesComparableScores.
/**
* The goal of this test is to verify fair scoring if entries are uniformly distributed. It
* compares ordered results from a single IndexRepository (IR) with merged-ordered results from
* multiple repositories (ir1, ir2, ir3). The records inserted in IR are same as the combined
* records in irX. This simulates merging of results from buckets of a region.
*/
@Test
public void uniformDistributionProducesComparableScores() throws Exception {
// the strings below have been grouped to be split between three index repositories
String[] testStrings = { "hello world", "foo bar", "just any string", "hello world is usually the first program", "water on mars", "test world", "hello", "test hello test", "find the aliens" };
QueryParser parser = new QueryParser("txt", analyzer);
Query query = parser.parse("hello world");
IndexRepositoryImpl singleIndexRepo = createIndexRepo();
populateIndex(testStrings, singleIndexRepo, 0, testStrings.length);
TopEntriesCollector collector = new TopEntriesCollector();
singleIndexRepo.query(query, 100, collector);
List<EntryScore<String>> singleResult = collector.getEntries().getHits();
IndexRepositoryImpl distIR1 = createIndexRepo();
populateIndex(testStrings, distIR1, 0, testStrings.length / 3);
IndexRepositoryImpl distIR2 = createIndexRepo();
populateIndex(testStrings, distIR2, testStrings.length / 3, (testStrings.length * 2) / 3);
IndexRepositoryImpl distIR3 = createIndexRepo();
populateIndex(testStrings, distIR3, (testStrings.length * 2) / 3, testStrings.length);
ArrayList<TopEntriesCollector> collectors = new ArrayList<>();
TopEntriesCollectorManager manager = new TopEntriesCollectorManager();
TopEntriesCollector collector1 = manager.newCollector("");
distIR1.query(query, 100, collector1);
collectors.add(collector1);
TopEntriesCollector collector2 = manager.newCollector("");
distIR2.query(query, 100, collector2);
collectors.add(collector2);
TopEntriesCollector collector3 = manager.newCollector("");
distIR3.query(query, 100, collector3);
collectors.add(collector3);
List<EntryScore<String>> distResult = manager.reduce(collectors).getEntries().getHits();
Assert.assertEquals(singleResult.size(), distResult.size());
Assert.assertTrue(singleResult.size() > 0);
for (Iterator single = distResult.iterator(), dist = singleResult.iterator(); single.hasNext() && dist.hasNext(); ) {
EntryScore<String> singleScore = (EntryScore<String>) single.next();
EntryScore<String> distScore = (EntryScore<String>) dist.next();
Assert.assertEquals(singleScore.getKey(), distScore.getKey());
}
}
Aggregations