Search in sources :

Example 6 with QueryParser

use of org.apache.lucene.queryparser.classic.QueryParser in project ansj_seg by NLPchina.

the class IndexTest method search.

private void search(Analyzer queryAnalyzer, Directory directory, String queryStr) throws CorruptIndexException, IOException, ParseException {
    IndexSearcher isearcher;
    DirectoryReader directoryReader = IndexReader.open(directory);
    // 查询索引
    isearcher = new IndexSearcher(directoryReader);
    QueryParser tq = new QueryParser(Version.LUCENE_44, "text", queryAnalyzer);
    Query query = tq.parse(queryStr);
    System.out.println(query);
    TopDocs hits = isearcher.search(query, 5);
    System.out.println(queryStr + ":共找到" + hits.totalHits + "条记录!");
    for (int i = 0; i < hits.scoreDocs.length; i++) {
        int docId = hits.scoreDocs[i].doc;
        Document document = isearcher.doc(docId);
        System.out.println(toHighlighter(queryAnalyzer, query, document));
    }
}
Also used : IndexSearcher(org.apache.lucene.search.IndexSearcher) TopDocs(org.apache.lucene.search.TopDocs) QueryParser(org.apache.lucene.queryparser.classic.QueryParser) Query(org.apache.lucene.search.Query) DirectoryReader(org.apache.lucene.index.DirectoryReader) Document(org.apache.lucene.document.Document)

Example 7 with QueryParser

use of org.apache.lucene.queryparser.classic.QueryParser in project titan by thinkaurelius.

the class LuceneIndex method query.

@Override
public Iterable<RawQuery.Result<String>> query(RawQuery query, KeyInformation.IndexRetriever informations, BaseTransaction tx) throws BackendException {
    Query q;
    try {
        q = new QueryParser("_all", analyzer).parse(query.getQuery());
    } catch (ParseException e) {
        throw new PermanentBackendException("Could not parse raw query: " + query.getQuery(), e);
    }
    try {
        IndexSearcher searcher = ((Transaction) tx).getSearcher(query.getStore());
        //Index does not yet exist
        if (searcher == null)
            return ImmutableList.of();
        long time = System.currentTimeMillis();
        //TODO: can we make offset more efficient in Lucene?
        final int offset = query.getOffset();
        int adjustedLimit = query.hasLimit() ? query.getLimit() : Integer.MAX_VALUE - 1;
        if (adjustedLimit < Integer.MAX_VALUE - 1 - offset)
            adjustedLimit += offset;
        else
            adjustedLimit = Integer.MAX_VALUE - 1;
        TopDocs docs = searcher.search(q, adjustedLimit);
        log.debug("Executed query [{}] in {} ms", q, System.currentTimeMillis() - time);
        List<RawQuery.Result<String>> result = new ArrayList<RawQuery.Result<String>>(docs.scoreDocs.length);
        for (int i = offset; i < docs.scoreDocs.length; i++) {
            result.add(new RawQuery.Result<String>(searcher.doc(docs.scoreDocs[i].doc).getField(DOCID).stringValue(), docs.scoreDocs[i].score));
        }
        return result;
    } catch (IOException e) {
        throw new TemporaryBackendException("Could not execute Lucene query", e);
    }
}
Also used : IOException(java.io.IOException) Point(com.spatial4j.core.shape.Point) QueryParser(org.apache.lucene.queryparser.classic.QueryParser) ParseException(org.apache.lucene.queryparser.classic.ParseException)

Example 8 with QueryParser

use of org.apache.lucene.queryparser.classic.QueryParser in project gitblit by gitblit.

the class TicketIndexer method delete.

/**
	 * Delete a ticket from the Lucene index.
	 *
	 * @param repository
	 * @param ticketId
	 * @throws Exception
	 * @return true, if deleted, false if no record was deleted
	 */
private boolean delete(String repository, long ticketId, IndexWriter writer) throws Exception {
    StandardAnalyzer analyzer = new StandardAnalyzer();
    QueryParser qp = new QueryParser(Lucene.did.name(), analyzer);
    BooleanQuery query = new BooleanQuery.Builder().add(qp.parse(StringUtils.getSHA1(repository + ticketId)), Occur.MUST).build();
    int numDocsBefore = writer.numDocs();
    writer.deleteDocuments(query);
    writer.commit();
    closeSearcher();
    int numDocsAfter = writer.numDocs();
    if (numDocsBefore == numDocsAfter) {
        log.debug(MessageFormat.format("no records found to delete in {0}", repository));
        return false;
    } else {
        log.debug(MessageFormat.format("deleted {0} records in {1}", numDocsBefore - numDocsAfter, repository));
        return true;
    }
}
Also used : BooleanQuery(org.apache.lucene.search.BooleanQuery) QueryParser(org.apache.lucene.queryparser.classic.QueryParser) StandardAnalyzer(org.apache.lucene.analysis.standard.StandardAnalyzer)

Example 9 with QueryParser

use of org.apache.lucene.queryparser.classic.QueryParser in project gitblit by gitblit.

the class TicketIndexer method searchFor.

/**
	 * Search for tickets matching the query.  The returned tickets are
	 * shadows of the real ticket, but suitable for a results list.
	 *
	 * @param repository
	 * @param text
	 * @param page
	 * @param pageSize
	 * @return search results
	 */
public List<QueryResult> searchFor(RepositoryModel repository, String text, int page, int pageSize) {
    if (StringUtils.isEmpty(text)) {
        return Collections.emptyList();
    }
    Set<QueryResult> results = new LinkedHashSet<QueryResult>();
    StandardAnalyzer analyzer = new StandardAnalyzer();
    try {
        // search the title, description and content
        BooleanQuery.Builder bldr = new BooleanQuery.Builder();
        QueryParser qp;
        qp = new QueryParser(Lucene.title.name(), analyzer);
        qp.setAllowLeadingWildcard(true);
        bldr.add(qp.parse(text), Occur.SHOULD);
        qp = new QueryParser(Lucene.body.name(), analyzer);
        qp.setAllowLeadingWildcard(true);
        bldr.add(qp.parse(text), Occur.SHOULD);
        qp = new QueryParser(Lucene.content.name(), analyzer);
        qp.setAllowLeadingWildcard(true);
        bldr.add(qp.parse(text), Occur.SHOULD);
        IndexSearcher searcher = getSearcher();
        Query rewrittenQuery = searcher.rewrite(bldr.build());
        log.debug(rewrittenQuery.toString());
        TopScoreDocCollector collector = TopScoreDocCollector.create(5000);
        searcher.search(rewrittenQuery, collector);
        int offset = Math.max(0, (page - 1) * pageSize);
        ScoreDoc[] hits = collector.topDocs(offset, pageSize).scoreDocs;
        for (int i = 0; i < hits.length; i++) {
            int docId = hits[i].doc;
            Document doc = searcher.doc(docId);
            QueryResult result = docToQueryResult(doc);
            if (repository != null) {
                if (!result.repository.equalsIgnoreCase(repository.name)) {
                    continue;
                }
            }
            results.add(result);
        }
    } catch (Exception e) {
        log.error(MessageFormat.format("Exception while searching for {0}", text), e);
    }
    return new ArrayList<QueryResult>(results);
}
Also used : LinkedHashSet(java.util.LinkedHashSet) IndexSearcher(org.apache.lucene.search.IndexSearcher) BooleanQuery(org.apache.lucene.search.BooleanQuery) Query(org.apache.lucene.search.Query) BooleanQuery(org.apache.lucene.search.BooleanQuery) TopScoreDocCollector(org.apache.lucene.search.TopScoreDocCollector) ArrayList(java.util.ArrayList) Document(org.apache.lucene.document.Document) ParseException(java.text.ParseException) IOException(java.io.IOException) ScoreDoc(org.apache.lucene.search.ScoreDoc) QueryParser(org.apache.lucene.queryparser.classic.QueryParser) StandardAnalyzer(org.apache.lucene.analysis.standard.StandardAnalyzer)

Example 10 with QueryParser

use of org.apache.lucene.queryparser.classic.QueryParser in project geode by apache.

the class DistributedScoringJUnitTest method uniformDistributionProducesComparableScores.

/**
   * The goal of this test is to verify fair scoring if entries are uniformly distributed. It
   * compares ordered results from a single IndexRepository (IR) with merged-ordered results from
   * multiple repositories (ir1, ir2, ir3). The records inserted in IR are same as the combined
   * records in irX. This simulates merging of results from buckets of a region.
   */
@Test
public void uniformDistributionProducesComparableScores() throws Exception {
    // the strings below have been grouped to be split between three index repositories
    String[] testStrings = { "hello world", "foo bar", "just any string", "hello world is usually the first program", "water on mars", "test world", "hello", "test hello test", "find the aliens" };
    QueryParser parser = new QueryParser("txt", analyzer);
    Query query = parser.parse("hello world");
    IndexRepositoryImpl singleIndexRepo = createIndexRepo();
    populateIndex(testStrings, singleIndexRepo, 0, testStrings.length);
    TopEntriesCollector collector = new TopEntriesCollector();
    singleIndexRepo.query(query, 100, collector);
    List<EntryScore<String>> singleResult = collector.getEntries().getHits();
    IndexRepositoryImpl distIR1 = createIndexRepo();
    populateIndex(testStrings, distIR1, 0, testStrings.length / 3);
    IndexRepositoryImpl distIR2 = createIndexRepo();
    populateIndex(testStrings, distIR2, testStrings.length / 3, (testStrings.length * 2) / 3);
    IndexRepositoryImpl distIR3 = createIndexRepo();
    populateIndex(testStrings, distIR3, (testStrings.length * 2) / 3, testStrings.length);
    ArrayList<TopEntriesCollector> collectors = new ArrayList<>();
    TopEntriesCollectorManager manager = new TopEntriesCollectorManager();
    TopEntriesCollector collector1 = manager.newCollector("");
    distIR1.query(query, 100, collector1);
    collectors.add(collector1);
    TopEntriesCollector collector2 = manager.newCollector("");
    distIR2.query(query, 100, collector2);
    collectors.add(collector2);
    TopEntriesCollector collector3 = manager.newCollector("");
    distIR3.query(query, 100, collector3);
    collectors.add(collector3);
    List<EntryScore<String>> distResult = manager.reduce(collectors).getEntries().getHits();
    Assert.assertEquals(singleResult.size(), distResult.size());
    Assert.assertTrue(singleResult.size() > 0);
    for (Iterator single = distResult.iterator(), dist = singleResult.iterator(); single.hasNext() && dist.hasNext(); ) {
        EntryScore<String> singleScore = (EntryScore<String>) single.next();
        EntryScore<String> distScore = (EntryScore<String>) dist.next();
        Assert.assertEquals(singleScore.getKey(), distScore.getKey());
    }
}
Also used : Query(org.apache.lucene.search.Query) ArrayList(java.util.ArrayList) QueryParser(org.apache.lucene.queryparser.classic.QueryParser) Iterator(java.util.Iterator) IndexRepositoryImpl(org.apache.geode.cache.lucene.internal.repository.IndexRepositoryImpl) Test(org.junit.Test) UnitTest(org.apache.geode.test.junit.categories.UnitTest)

Aggregations

QueryParser (org.apache.lucene.queryparser.classic.QueryParser)67 Query (org.apache.lucene.search.Query)46 IndexSearcher (org.apache.lucene.search.IndexSearcher)30 Document (org.apache.lucene.document.Document)25 IOException (java.io.IOException)19 Analyzer (org.apache.lucene.analysis.Analyzer)19 IndexReader (org.apache.lucene.index.IndexReader)18 TopDocs (org.apache.lucene.search.TopDocs)18 ScoreDoc (org.apache.lucene.search.ScoreDoc)17 ArrayList (java.util.ArrayList)14 BooleanQuery (org.apache.lucene.search.BooleanQuery)14 StandardAnalyzer (org.apache.lucene.analysis.standard.StandardAnalyzer)13 ParseException (org.apache.lucene.queryparser.classic.ParseException)12 TermQuery (org.apache.lucene.search.TermQuery)11 Term (org.apache.lucene.index.Term)6 MatchAllDocsQuery (org.apache.lucene.search.MatchAllDocsQuery)6 WildcardQuery (org.apache.lucene.search.WildcardQuery)6 EnglishAnalyzer (org.apache.lucene.analysis.en.EnglishAnalyzer)5 IndexWriter (org.apache.lucene.index.IndexWriter)5 ScoredDocuments (io.anserini.rerank.ScoredDocuments)4