Search in sources :

Example 1 with ScoreDoc

use of org.apache.lucene.search.ScoreDoc in project camel by apache.

the class LuceneSearcher method search.

public Hits search(String searchPhrase, int maxNumberOfHits, Version luceneVersion, boolean returnLuceneDocs) throws Exception {
    Hits searchHits = new Hits();
    int numberOfHits = doSearch(searchPhrase, maxNumberOfHits, luceneVersion);
    searchHits.setNumberOfHits(numberOfHits);
    for (ScoreDoc hit : hits) {
        Document selectedDocument = indexSearcher.doc(hit.doc);
        Hit aHit = new Hit();
        if (returnLuceneDocs) {
            aHit.setDocument(selectedDocument);
        }
        aHit.setHitLocation(hit.doc);
        aHit.setScore(hit.score);
        aHit.setData(selectedDocument.get("contents"));
        searchHits.getHit().add(aHit);
    }
    return searchHits;
}
Also used : Hits(org.apache.camel.processor.lucene.support.Hits) Hit(org.apache.camel.processor.lucene.support.Hit) Document(org.apache.lucene.document.Document) ScoreDoc(org.apache.lucene.search.ScoreDoc)

Example 2 with ScoreDoc

use of org.apache.lucene.search.ScoreDoc in project zeppelin by apache.

the class LuceneSearch method doSearch.

private List<Map<String, String>> doSearch(IndexSearcher searcher, Query query, Analyzer analyzer, Highlighter highlighter) {
    List<Map<String, String>> matchingParagraphs = Lists.newArrayList();
    ScoreDoc[] hits;
    try {
        hits = searcher.search(query, 20).scoreDocs;
        for (int i = 0; i < hits.length; i++) {
            LOG.debug("doc={} score={}", hits[i].doc, hits[i].score);
            int id = hits[i].doc;
            Document doc = searcher.doc(id);
            String path = doc.get(ID_FIELD);
            if (path != null) {
                LOG.debug((i + 1) + ". " + path);
                String title = doc.get("title");
                if (title != null) {
                    LOG.debug("   Title: {}", doc.get("title"));
                }
                String text = doc.get(SEARCH_FIELD_TEXT);
                String header = doc.get(SEARCH_FIELD_TITLE);
                String fragment = "";
                if (text != null) {
                    TokenStream tokenStream = TokenSources.getTokenStream(searcher.getIndexReader(), id, SEARCH_FIELD_TEXT, analyzer);
                    TextFragment[] frag = highlighter.getBestTextFragments(tokenStream, text, true, 3);
                    LOG.debug("    {} fragments found for query '{}'", frag.length, query);
                    for (int j = 0; j < frag.length; j++) {
                        if ((frag[j] != null) && (frag[j].getScore() > 0)) {
                            LOG.debug("    Fragment: {}", frag[j].toString());
                        }
                    }
                    fragment = (frag != null && frag.length > 0) ? frag[0].toString() : "";
                }
                if (header != null) {
                    TokenStream tokenTitle = TokenSources.getTokenStream(searcher.getIndexReader(), id, SEARCH_FIELD_TITLE, analyzer);
                    TextFragment[] frgTitle = highlighter.getBestTextFragments(tokenTitle, header, true, 3);
                    header = (frgTitle != null && frgTitle.length > 0) ? frgTitle[0].toString() : "";
                } else {
                    header = "";
                }
                matchingParagraphs.add(// <noteId>/paragraph/<paragraphId>
                ImmutableMap.of(// <noteId>/paragraph/<paragraphId>
                "id", // <noteId>/paragraph/<paragraphId>
                path, "name", title, "snippet", fragment, "text", text, "header", header));
            } else {
                LOG.info("{}. No {} for this document", i + 1, ID_FIELD);
            }
        }
    } catch (IOException | InvalidTokenOffsetsException e) {
        LOG.error("Exception on searching for {}", query, e);
    }
    return matchingParagraphs;
}
Also used : TokenStream(org.apache.lucene.analysis.TokenStream) IOException(java.io.IOException) Document(org.apache.lucene.document.Document) TextFragment(org.apache.lucene.search.highlight.TextFragment) ScoreDoc(org.apache.lucene.search.ScoreDoc) InvalidTokenOffsetsException(org.apache.lucene.search.highlight.InvalidTokenOffsetsException) Map(java.util.Map) ImmutableMap(com.google.common.collect.ImmutableMap)

Example 3 with ScoreDoc

use of org.apache.lucene.search.ScoreDoc in project crate by crate.

the class BooleanColumnReferenceTest method testBooleanExpression.

@Test
public void testBooleanExpression() throws Exception {
    BooleanColumnReference booleanColumn = new BooleanColumnReference(column);
    booleanColumn.startCollect(ctx);
    booleanColumn.setNextReader(readerContext);
    IndexSearcher searcher = new IndexSearcher(readerContext.reader());
    TopDocs topDocs = searcher.search(new MatchAllDocsQuery(), 20);
    int i = 0;
    for (ScoreDoc doc : topDocs.scoreDocs) {
        booleanColumn.setNextDocId(doc.doc);
        assertThat(booleanColumn.value(), is(i % 2 == 0));
        i++;
    }
}
Also used : BooleanColumnReference(io.crate.operation.reference.doc.lucene.BooleanColumnReference) IndexSearcher(org.apache.lucene.search.IndexSearcher) TopDocs(org.apache.lucene.search.TopDocs) MatchAllDocsQuery(org.apache.lucene.search.MatchAllDocsQuery) ScoreDoc(org.apache.lucene.search.ScoreDoc) Test(org.junit.Test)

Example 4 with ScoreDoc

use of org.apache.lucene.search.ScoreDoc in project crate by crate.

the class ByteColumnReferenceTest method testByteExpression.

@Test
public void testByteExpression() throws Exception {
    ByteColumnReference byteColumn = new ByteColumnReference(column);
    byteColumn.startCollect(ctx);
    byteColumn.setNextReader(readerContext);
    IndexSearcher searcher = new IndexSearcher(readerContext.reader());
    TopDocs topDocs = searcher.search(new MatchAllDocsQuery(), 20);
    byte b = -10;
    for (ScoreDoc doc : topDocs.scoreDocs) {
        byteColumn.setNextDocId(doc.doc);
        assertThat(byteColumn.value(), is(b));
        b++;
    }
}
Also used : IndexSearcher(org.apache.lucene.search.IndexSearcher) TopDocs(org.apache.lucene.search.TopDocs) MatchAllDocsQuery(org.apache.lucene.search.MatchAllDocsQuery) ByteColumnReference(io.crate.operation.reference.doc.lucene.ByteColumnReference) ScoreDoc(org.apache.lucene.search.ScoreDoc) Test(org.junit.Test)

Example 5 with ScoreDoc

use of org.apache.lucene.search.ScoreDoc in project crate by crate.

the class ShortColumnReferenceTest method testShortExpression.

@Test
public void testShortExpression() throws Exception {
    ShortColumnReference shortColumn = new ShortColumnReference(column);
    shortColumn.startCollect(ctx);
    shortColumn.setNextReader(readerContext);
    IndexSearcher searcher = new IndexSearcher(readerContext.reader());
    TopDocs topDocs = searcher.search(new MatchAllDocsQuery(), 20);
    short i = -10;
    for (ScoreDoc doc : topDocs.scoreDocs) {
        shortColumn.setNextDocId(doc.doc);
        assertThat(shortColumn.value(), is(i));
        i++;
    }
}
Also used : ShortColumnReference(io.crate.operation.reference.doc.lucene.ShortColumnReference) IndexSearcher(org.apache.lucene.search.IndexSearcher) TopDocs(org.apache.lucene.search.TopDocs) MatchAllDocsQuery(org.apache.lucene.search.MatchAllDocsQuery) ScoreDoc(org.apache.lucene.search.ScoreDoc) Test(org.junit.Test)

Aggregations

ScoreDoc (org.apache.lucene.search.ScoreDoc)211 TopDocs (org.apache.lucene.search.TopDocs)119 IndexSearcher (org.apache.lucene.search.IndexSearcher)94 Document (org.apache.lucene.document.Document)89 Query (org.apache.lucene.search.Query)65 TermQuery (org.apache.lucene.search.TermQuery)49 ArrayList (java.util.ArrayList)46 IOException (java.io.IOException)44 IndexReader (org.apache.lucene.index.IndexReader)42 Term (org.apache.lucene.index.Term)38 Directory (org.apache.lucene.store.Directory)37 BooleanQuery (org.apache.lucene.search.BooleanQuery)26 MatchAllDocsQuery (org.apache.lucene.search.MatchAllDocsQuery)23 Sort (org.apache.lucene.search.Sort)22 MockAnalyzer (org.apache.lucene.analysis.MockAnalyzer)21 Test (org.junit.Test)21 RandomIndexWriter (org.apache.lucene.index.RandomIndexWriter)20 FieldDoc (org.apache.lucene.search.FieldDoc)20 HashMap (java.util.HashMap)18 HashSet (java.util.HashSet)17