Search in sources :

Example 41 with IndexWriterConfig

use of org.apache.lucene.index.IndexWriterConfig in project elasticsearch by elastic.

the class CustomUnifiedHighlighterTests method assertHighlightOneDoc.

private void assertHighlightOneDoc(String fieldName, String[] inputs, Analyzer analyzer, Query query, Locale locale, BreakIterator breakIterator, int noMatchSize, String[] expectedPassages) throws Exception {
    Directory dir = newDirectory();
    IndexWriterConfig iwc = newIndexWriterConfig(analyzer);
    iwc.setMergePolicy(newTieredMergePolicy(random()));
    RandomIndexWriter iw = new RandomIndexWriter(random(), dir, iwc);
    FieldType ft = new FieldType(TextField.TYPE_STORED);
    ft.setIndexOptions(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS);
    ft.freeze();
    Document doc = new Document();
    for (String input : inputs) {
        Field field = new Field(fieldName, "", ft);
        field.setStringValue(input);
        doc.add(field);
    }
    iw.addDocument(doc);
    DirectoryReader reader = iw.getReader();
    IndexSearcher searcher = newSearcher(reader);
    iw.close();
    TopDocs topDocs = searcher.search(new MatchAllDocsQuery(), 1, Sort.INDEXORDER);
    assertThat(topDocs.totalHits, equalTo(1));
    String rawValue = Strings.arrayToDelimitedString(inputs, String.valueOf(MULTIVAL_SEP_CHAR));
    CustomUnifiedHighlighter highlighter = new CustomUnifiedHighlighter(searcher, analyzer, new CustomPassageFormatter("<b>", "</b>", new DefaultEncoder()), locale, breakIterator, rawValue, noMatchSize);
    highlighter.setFieldMatcher((name) -> "text".equals(name));
    final Snippet[] snippets = highlighter.highlightField("text", query, topDocs.scoreDocs[0].doc, expectedPassages.length);
    assertEquals(snippets.length, expectedPassages.length);
    for (int i = 0; i < snippets.length; i++) {
        assertEquals(snippets[i].getText(), expectedPassages[i]);
    }
    reader.close();
    dir.close();
}
Also used : IndexSearcher(org.apache.lucene.search.IndexSearcher) DirectoryReader(org.apache.lucene.index.DirectoryReader) Snippet(org.apache.lucene.search.highlight.Snippet) Document(org.apache.lucene.document.Document) MatchAllDocsQuery(org.apache.lucene.search.MatchAllDocsQuery) FieldType(org.apache.lucene.document.FieldType) TopDocs(org.apache.lucene.search.TopDocs) Field(org.apache.lucene.document.Field) TextField(org.apache.lucene.document.TextField) DefaultEncoder(org.apache.lucene.search.highlight.DefaultEncoder) RandomIndexWriter(org.apache.lucene.index.RandomIndexWriter) Directory(org.apache.lucene.store.Directory) IndexWriterConfig(org.apache.lucene.index.IndexWriterConfig)

Example 42 with IndexWriterConfig

use of org.apache.lucene.index.IndexWriterConfig in project elasticsearch by elastic.

the class CustomPostingsHighlighterTests method testCustomPostingsHighlighter.

public void testCustomPostingsHighlighter() throws Exception {
    Directory dir = newDirectory();
    IndexWriterConfig iwc = newIndexWriterConfig(new MockAnalyzer(random()));
    iwc.setMergePolicy(newLogMergePolicy());
    RandomIndexWriter iw = new RandomIndexWriter(random(), dir, iwc);
    FieldType offsetsType = new FieldType(TextField.TYPE_STORED);
    offsetsType.setIndexOptions(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS);
    //good position but only one match
    final String firstValue = "This is a test. Just a test1 highlighting from postings highlighter.";
    Field body = new Field("body", "", offsetsType);
    Document doc = new Document();
    doc.add(body);
    body.setStringValue(firstValue);
    //two matches, not the best snippet due to its length though
    final String secondValue = "This is the second highlighting value to perform highlighting on a longer text that gets scored lower.";
    Field body2 = new Field("body", "", offsetsType);
    doc.add(body2);
    body2.setStringValue(secondValue);
    //two matches and short, will be scored highest
    final String thirdValue = "This is highlighting the third short highlighting value.";
    Field body3 = new Field("body", "", offsetsType);
    doc.add(body3);
    body3.setStringValue(thirdValue);
    //one match, same as first but at the end, will be scored lower due to its position
    final String fourthValue = "Just a test4 highlighting from postings highlighter.";
    Field body4 = new Field("body", "", offsetsType);
    doc.add(body4);
    body4.setStringValue(fourthValue);
    iw.addDocument(doc);
    IndexReader ir = iw.getReader();
    iw.close();
    String firstHlValue = "Just a test1 <b>highlighting</b> from postings highlighter.";
    String secondHlValue = "This is the second <b>highlighting</b> value to perform <b>highlighting</b> on a longer text that gets scored lower.";
    String thirdHlValue = "This is <b>highlighting</b> the third short <b>highlighting</b> value.";
    String fourthHlValue = "Just a test4 <b>highlighting</b> from postings highlighter.";
    IndexSearcher searcher = newSearcher(ir);
    Query query = new TermQuery(new Term("body", "highlighting"));
    TopDocs topDocs = searcher.search(query, 10, Sort.INDEXORDER);
    assertThat(topDocs.totalHits, equalTo(1));
    int docId = topDocs.scoreDocs[0].doc;
    String fieldValue = firstValue + HighlightUtils.PARAGRAPH_SEPARATOR + secondValue + HighlightUtils.PARAGRAPH_SEPARATOR + thirdValue + HighlightUtils.PARAGRAPH_SEPARATOR + fourthValue;
    CustomPostingsHighlighter highlighter = new CustomPostingsHighlighter(null, new CustomPassageFormatter("<b>", "</b>", new DefaultEncoder()), fieldValue, false);
    Snippet[] snippets = highlighter.highlightField("body", query, searcher, docId, 5);
    assertThat(snippets.length, equalTo(4));
    assertThat(snippets[0].getText(), equalTo(firstHlValue));
    assertThat(snippets[1].getText(), equalTo(secondHlValue));
    assertThat(snippets[2].getText(), equalTo(thirdHlValue));
    assertThat(snippets[3].getText(), equalTo(fourthHlValue));
    ir.close();
    dir.close();
}
Also used : IndexSearcher(org.apache.lucene.search.IndexSearcher) TermQuery(org.apache.lucene.search.TermQuery) Query(org.apache.lucene.search.Query) TermQuery(org.apache.lucene.search.TermQuery) Term(org.apache.lucene.index.Term) Snippet(org.apache.lucene.search.highlight.Snippet) Document(org.apache.lucene.document.Document) FieldType(org.apache.lucene.document.FieldType) TopDocs(org.apache.lucene.search.TopDocs) Field(org.apache.lucene.document.Field) TextField(org.apache.lucene.document.TextField) MockAnalyzer(org.apache.lucene.analysis.MockAnalyzer) DefaultEncoder(org.apache.lucene.search.highlight.DefaultEncoder) IndexReader(org.apache.lucene.index.IndexReader) RandomIndexWriter(org.apache.lucene.index.RandomIndexWriter) Directory(org.apache.lucene.store.Directory) IndexWriterConfig(org.apache.lucene.index.IndexWriterConfig)

Example 43 with IndexWriterConfig

use of org.apache.lucene.index.IndexWriterConfig in project ansj_seg by NLPchina.

the class HeightLightTest method index.

private static void index(Analyzer analysis, String content) throws CorruptIndexException, IOException {
    Document doc = new Document();
    IndexWriter iwriter = new IndexWriter(directory, new IndexWriterConfig(analysis));
    doc.add(new TextField("text", content, Field.Store.YES));
    iwriter.addDocument(doc);
    iwriter.commit();
    iwriter.close();
}
Also used : IndexWriter(org.apache.lucene.index.IndexWriter) TextField(org.apache.lucene.document.TextField) Document(org.apache.lucene.document.Document) IndexWriterConfig(org.apache.lucene.index.IndexWriterConfig)

Example 44 with IndexWriterConfig

use of org.apache.lucene.index.IndexWriterConfig in project ansj_seg by NLPchina.

the class IndexAndTest method test.

@Test
public void test() throws Exception {
    DicLibrary.put(DicLibrary.DEFAULT, "../../library/default.dic");
    PerFieldAnalyzerWrapper analyzer = new PerFieldAnalyzerWrapper(new AnsjAnalyzer(TYPE.index_ansj));
    Directory directory = null;
    IndexWriter iwriter = null;
    IndexWriterConfig ic = new IndexWriterConfig(analyzer);
    String text = "旅游和服务是最好的";
    System.out.println(IndexAnalysis.parse(text));
    // 建立内存索引对象
    directory = new RAMDirectory();
    iwriter = new IndexWriter(directory, ic);
    addContent(iwriter, text);
    iwriter.commit();
    iwriter.close();
    System.out.println("索引建立完毕");
    Analyzer queryAnalyzer = new AnsjAnalyzer(AnsjAnalyzer.TYPE.index_ansj);
    System.out.println("index ok to search!");
    for (Term t : IndexAnalysis.parse(text)) {
        System.out.println(t.getName());
        search(queryAnalyzer, directory, "\"" + t.getName() + "\"");
    }
}
Also used : AnsjAnalyzer(org.ansj.lucene6.AnsjAnalyzer) IndexWriter(org.apache.lucene.index.IndexWriter) Term(org.ansj.domain.Term) AnsjAnalyzer(org.ansj.lucene6.AnsjAnalyzer) Analyzer(org.apache.lucene.analysis.Analyzer) RAMDirectory(org.apache.lucene.store.RAMDirectory) PerFieldAnalyzerWrapper(org.apache.lucene.analysis.miscellaneous.PerFieldAnalyzerWrapper) RAMDirectory(org.apache.lucene.store.RAMDirectory) Directory(org.apache.lucene.store.Directory) IndexWriterConfig(org.apache.lucene.index.IndexWriterConfig) Test(org.junit.Test)

Example 45 with IndexWriterConfig

use of org.apache.lucene.index.IndexWriterConfig in project ansj_seg by NLPchina.

the class IndexTest method indexTest.

@Test
public void indexTest() throws CorruptIndexException, LockObtainFailedException, IOException, ParseException {
    PerFieldAnalyzerWrapper analyzer = new PerFieldAnalyzerWrapper(new AnsjAnalyzer(TYPE.index_ansj));
    Directory directory = null;
    IndexWriter iwriter = null;
    IndexWriterConfig ic = new IndexWriterConfig(analyzer);
    // 建立内存索引对象
    directory = new RAMDirectory();
    iwriter = new IndexWriter(directory, ic);
    addContent(iwriter, "助推企业转型升级提供强有力的技术支持和服保障。中心的建成将使青岛的服务器承载能力突破10万台,达到世界一流水平。");
    addContent(iwriter, "涉及民生的部分商品和服务成本监审政策");
    addContent(iwriter, "我穿着和服");
    iwriter.commit();
    iwriter.close();
    System.out.println("索引建立完毕");
    Analyzer queryAnalyzer = new AnsjAnalyzer(AnsjAnalyzer.TYPE.dic_ansj);
    System.out.println("index ok to search!");
    search(queryAnalyzer, directory, "\"和服\"");
}
Also used : AnsjAnalyzer(org.ansj.lucene6.AnsjAnalyzer) IndexWriter(org.apache.lucene.index.IndexWriter) AnsjAnalyzer(org.ansj.lucene6.AnsjAnalyzer) Analyzer(org.apache.lucene.analysis.Analyzer) RAMDirectory(org.apache.lucene.store.RAMDirectory) PerFieldAnalyzerWrapper(org.apache.lucene.analysis.miscellaneous.PerFieldAnalyzerWrapper) RAMDirectory(org.apache.lucene.store.RAMDirectory) Directory(org.apache.lucene.store.Directory) IndexWriterConfig(org.apache.lucene.index.IndexWriterConfig) Test(org.junit.Test)

Aggregations

IndexWriterConfig (org.apache.lucene.index.IndexWriterConfig)427 IndexWriter (org.apache.lucene.index.IndexWriter)291 Document (org.apache.lucene.document.Document)277 Directory (org.apache.lucene.store.Directory)267 RandomIndexWriter (org.apache.lucene.index.RandomIndexWriter)162 MockAnalyzer (org.apache.lucene.analysis.MockAnalyzer)139 IndexReader (org.apache.lucene.index.IndexReader)133 Term (org.apache.lucene.index.Term)102 IndexSearcher (org.apache.lucene.search.IndexSearcher)86 DirectoryReader (org.apache.lucene.index.DirectoryReader)85 RAMDirectory (org.apache.lucene.store.RAMDirectory)79 TextField (org.apache.lucene.document.TextField)77 Field (org.apache.lucene.document.Field)71 BytesRef (org.apache.lucene.util.BytesRef)68 IOException (java.io.IOException)58 Analyzer (org.apache.lucene.analysis.Analyzer)57 Test (org.junit.Test)52 StringField (org.apache.lucene.document.StringField)47 TermQuery (org.apache.lucene.search.TermQuery)41 NumericDocValuesField (org.apache.lucene.document.NumericDocValuesField)38