Examples with RandomIndexWriter - org.apache.lucene.index.RandomIndexWriter

Example 71 with RandomIndexWriter

use of org.apache.lucene.index.RandomIndexWriter in project lucene-solr by apache.

the class TestUnifiedHighlighter method testCustomFieldValueSource.

public void testCustomFieldValueSource() throws Exception {
    RandomIndexWriter iw = new RandomIndexWriter(random(), dir, indexAnalyzer);
    Document doc = new Document();
    final String text = "This is a test.  Just highlighting from postings. This is also a much sillier test.  Feel free to test test test test test test test.";
    Field body = new Field("body", text, fieldType);
    doc.add(body);
    iw.addDocument(doc);
    IndexReader ir = iw.getReader();
    iw.close();
    IndexSearcher searcher = newSearcher(ir);
    UnifiedHighlighter highlighter = new UnifiedHighlighter(searcher, indexAnalyzer) {

        @Override
        protected List<CharSequence[]> loadFieldValues(String[] fields, DocIdSetIterator docIter, int cacheCharsThreshold) throws IOException {
            assert fields.length == 1;
            assert docIter.cost() == 1;
            docIter.nextDoc();
            return Collections.singletonList(new CharSequence[] { text });
        }

        @Override
        protected BreakIterator getBreakIterator(String field) {
            return new WholeBreakIterator();
        }
    };
    Query query = new TermQuery(new Term("body", "test"));
    TopDocs topDocs = searcher.search(query, 10, Sort.INDEXORDER);
    assertEquals(1, topDocs.totalHits);
    String[] snippets = highlighter.highlight("body", query, topDocs, 2);
    assertEquals(1, snippets.length);
    assertEquals("This is a <b>test</b>.  Just highlighting from postings. This is also a much sillier <b>test</b>.  Feel free to <b>test</b> <b>test</b> <b>test</b> <b>test</b> <b>test</b> <b>test</b> <b>test</b>.", snippets[0]);
    ir.close();
}

Also used : IndexSearcher(org.apache.lucene.search.IndexSearcher) TermQuery(org.apache.lucene.search.TermQuery) Query(org.apache.lucene.search.Query) PhraseQuery(org.apache.lucene.search.PhraseQuery) PrefixQuery(org.apache.lucene.search.PrefixQuery) FuzzyQuery(org.apache.lucene.search.FuzzyQuery) TermQuery(org.apache.lucene.search.TermQuery) BooleanQuery(org.apache.lucene.search.BooleanQuery) Term(org.apache.lucene.index.Term) Document(org.apache.lucene.document.Document) TopDocs(org.apache.lucene.search.TopDocs) Field(org.apache.lucene.document.Field) IndexReader(org.apache.lucene.index.IndexReader) DocIdSetIterator(org.apache.lucene.search.DocIdSetIterator) RandomIndexWriter(org.apache.lucene.index.RandomIndexWriter)

Example 72 with RandomIndexWriter

use of org.apache.lucene.index.RandomIndexWriter in project lucene-solr by apache.

the class TestUnifiedHighlighter method testBooleanMustNot.

public void testBooleanMustNot() throws Exception {
    RandomIndexWriter iw = new RandomIndexWriter(random(), dir, indexAnalyzer);
    Field body = new Field("body", "This sentence has both terms.  This sentence has only terms.", fieldType);
    Document document = new Document();
    document.add(body);
    iw.addDocument(document);
    IndexReader ir = iw.getReader();
    iw.close();
    IndexSearcher searcher = newSearcher(ir);
    BooleanQuery query2 = new BooleanQuery.Builder().add(new TermQuery(new Term("body", "both")), BooleanClause.Occur.MUST_NOT).build();
    BooleanQuery query = new BooleanQuery.Builder().add(new TermQuery(new Term("body", "terms")), BooleanClause.Occur.SHOULD).add(query2, BooleanClause.Occur.SHOULD).build();
    TopDocs topDocs = searcher.search(query, 10);
    assertEquals(1, topDocs.totalHits);
    UnifiedHighlighter highlighter = new UnifiedHighlighter(searcher, indexAnalyzer);
    highlighter.setMaxLength(Integer.MAX_VALUE - 1);
    String[] snippets = highlighter.highlight("body", query, topDocs, 2);
    assertEquals(1, snippets.length);
    assertFalse(snippets[0].contains("<b>both</b>"));
    ir.close();
}

Also used : IndexSearcher(org.apache.lucene.search.IndexSearcher) BooleanQuery(org.apache.lucene.search.BooleanQuery) TermQuery(org.apache.lucene.search.TermQuery) Term(org.apache.lucene.index.Term) Document(org.apache.lucene.document.Document) TopDocs(org.apache.lucene.search.TopDocs) Field(org.apache.lucene.document.Field) IndexReader(org.apache.lucene.index.IndexReader) RandomIndexWriter(org.apache.lucene.index.RandomIndexWriter)

Example 73 with RandomIndexWriter

use of org.apache.lucene.index.RandomIndexWriter in project lucene-solr by apache.

the class TestUnifiedHighlighter method testEncode.

public void testEncode() throws Exception {
    RandomIndexWriter iw = new RandomIndexWriter(random(), dir, indexAnalyzer);
    Field body = new Field("body", "", fieldType);
    Document doc = new Document();
    doc.add(body);
    body.setStringValue("This is a test. Just a test highlighting from <i>postings</i>. Feel free to ignore.");
    iw.addDocument(doc);
    IndexReader ir = iw.getReader();
    iw.close();
    IndexSearcher searcher = newSearcher(ir);
    UnifiedHighlighter highlighter = new UnifiedHighlighter(searcher, indexAnalyzer) {

        @Override
        protected PassageFormatter getFormatter(String field) {
            return new DefaultPassageFormatter("<b>", "</b>", "... ", true);
        }
    };
    Query query = new TermQuery(new Term("body", "highlighting"));
    TopDocs topDocs = searcher.search(query, 10, Sort.INDEXORDER);
    assertEquals(1, topDocs.totalHits);
    String[] snippets = highlighter.highlight("body", query, topDocs);
    assertEquals(1, snippets.length);
    assertEquals("Just&#32;a&#32;test&#32;<b>highlighting</b>&#32;from&#32;&lt;i&gt;postings&lt;&#x2F;i&gt;&#46;&#32;", snippets[0]);
    ir.close();
}

Also used : IndexSearcher(org.apache.lucene.search.IndexSearcher) TermQuery(org.apache.lucene.search.TermQuery) Query(org.apache.lucene.search.Query) PhraseQuery(org.apache.lucene.search.PhraseQuery) PrefixQuery(org.apache.lucene.search.PrefixQuery) FuzzyQuery(org.apache.lucene.search.FuzzyQuery) TermQuery(org.apache.lucene.search.TermQuery) BooleanQuery(org.apache.lucene.search.BooleanQuery) Term(org.apache.lucene.index.Term) Document(org.apache.lucene.document.Document) TopDocs(org.apache.lucene.search.TopDocs) Field(org.apache.lucene.document.Field) IndexReader(org.apache.lucene.index.IndexReader) RandomIndexWriter(org.apache.lucene.index.RandomIndexWriter)

Example 74 with RandomIndexWriter

use of org.apache.lucene.index.RandomIndexWriter in project lucene-solr by apache.

the class TestUnifiedHighlighter method testCambridgeMA.

public void testCambridgeMA() throws Exception {
    BufferedReader r = new BufferedReader(new InputStreamReader(this.getClass().getResourceAsStream("CambridgeMA.utf8"), StandardCharsets.UTF_8));
    String text = r.readLine();
    r.close();
    RandomIndexWriter iw = new RandomIndexWriter(random(), dir, indexAnalyzer);
    Field body = new Field("body", text, fieldType);
    Document document = new Document();
    document.add(body);
    iw.addDocument(document);
    IndexReader ir = iw.getReader();
    iw.close();
    IndexSearcher searcher = newSearcher(ir);
    BooleanQuery query = new BooleanQuery.Builder().add(new TermQuery(new Term("body", "porter")), BooleanClause.Occur.SHOULD).add(new TermQuery(new Term("body", "square")), BooleanClause.Occur.SHOULD).add(new TermQuery(new Term("body", "massachusetts")), BooleanClause.Occur.SHOULD).build();
    TopDocs topDocs = searcher.search(query, 10);
    assertEquals(1, topDocs.totalHits);
    UnifiedHighlighter highlighter = new UnifiedHighlighter(searcher, indexAnalyzer);
    highlighter.setMaxLength(Integer.MAX_VALUE - 1);
    String[] snippets = highlighter.highlight("body", query, topDocs, 2);
    assertEquals(1, snippets.length);
    assertTrue(snippets[0].contains("<b>Square</b>"));
    assertTrue(snippets[0].contains("<b>Porter</b>"));
    ir.close();
}

Also used : IndexSearcher(org.apache.lucene.search.IndexSearcher) BooleanQuery(org.apache.lucene.search.BooleanQuery) TermQuery(org.apache.lucene.search.TermQuery) InputStreamReader(java.io.InputStreamReader) Term(org.apache.lucene.index.Term) Document(org.apache.lucene.document.Document) TopDocs(org.apache.lucene.search.TopDocs) Field(org.apache.lucene.document.Field) BufferedReader(java.io.BufferedReader) IndexReader(org.apache.lucene.index.IndexReader) RandomIndexWriter(org.apache.lucene.index.RandomIndexWriter)

Example 75 with RandomIndexWriter

use of org.apache.lucene.index.RandomIndexWriter in project lucene-solr by apache.

the class TestUnifiedHighlighter method testCuriousGeorge.

public void testCuriousGeorge() throws Exception {
    String text = "It’s the formula for success for preschoolers—Curious George and fire trucks! " + "Curious George and the Firefighters is a story based on H. A. and Margret Rey’s " + "popular primate and painted in the original watercolor and charcoal style. " + "Firefighters are a famously brave lot, but can they withstand a visit from one curious monkey?";
    RandomIndexWriter iw = new RandomIndexWriter(random(), dir, indexAnalyzer);
    Field body = new Field("body", text, fieldType);
    Document document = new Document();
    document.add(body);
    iw.addDocument(document);
    IndexReader ir = iw.getReader();
    iw.close();
    IndexSearcher searcher = newSearcher(ir);
    PhraseQuery query = new PhraseQuery.Builder().add(new Term("body", "curious")).add(new Term("body", "george")).build();
    TopDocs topDocs = searcher.search(query, 10);
    assertEquals(1, topDocs.totalHits);
    UnifiedHighlighter highlighter = new UnifiedHighlighter(searcher, indexAnalyzer);
    highlighter.setHighlightPhrasesStrictly(false);
    String[] snippets = highlighter.highlight("body", query, topDocs, 2);
    assertEquals(1, snippets.length);
    assertFalse(snippets[0].contains("<b>Curious</b>Curious"));
    ir.close();
}

Also used : IndexSearcher(org.apache.lucene.search.IndexSearcher) TopDocs(org.apache.lucene.search.TopDocs) Field(org.apache.lucene.document.Field) PhraseQuery(org.apache.lucene.search.PhraseQuery) IndexReader(org.apache.lucene.index.IndexReader) Term(org.apache.lucene.index.Term) Document(org.apache.lucene.document.Document) RandomIndexWriter(org.apache.lucene.index.RandomIndexWriter)

Aggregations

RandomIndexWriter (org.apache.lucene.index.RandomIndexWriter)779 Document (org.apache.lucene.document.Document)679 Directory (org.apache.lucene.store.Directory)588 IndexReader (org.apache.lucene.index.IndexReader)510 Term (org.apache.lucene.index.Term)325 IndexSearcher (org.apache.lucene.search.IndexSearcher)294 MockAnalyzer (org.apache.lucene.analysis.MockAnalyzer)220 BytesRef (org.apache.lucene.util.BytesRef)142 Field (org.apache.lucene.document.Field)141 MatchAllDocsQuery (org.apache.lucene.search.MatchAllDocsQuery)136 TopDocs (org.apache.lucene.search.TopDocs)134 TermQuery (org.apache.lucene.search.TermQuery)121 DirectoryReader (org.apache.lucene.index.DirectoryReader)120 IndexWriterConfig (org.apache.lucene.index.IndexWriterConfig)110 ArrayList (java.util.ArrayList)95 StringField (org.apache.lucene.document.StringField)93 Analyzer (org.apache.lucene.analysis.Analyzer)88 BooleanQuery (org.apache.lucene.search.BooleanQuery)88 NumericDocValuesField (org.apache.lucene.document.NumericDocValuesField)77 Test (org.junit.Test)75