Search in sources :

Example 1 with IndexWriterConfig

use of org.apache.lucene.index.IndexWriterConfig in project elasticsearch by elastic.

the class AbstractTermVectorsTestCase method indexDocsWithLucene.

protected DirectoryReader indexDocsWithLucene(TestDoc[] testDocs) throws IOException {
    Map<String, Analyzer> mapping = new HashMap<>();
    for (TestFieldSetting field : testDocs[0].fieldSettings) {
        if (field.storedPayloads) {
            mapping.put(field.name, new Analyzer() {

                @Override
                protected TokenStreamComponents createComponents(String fieldName) {
                    Tokenizer tokenizer = new StandardTokenizer();
                    TokenFilter filter = new LowerCaseFilter(tokenizer);
                    filter = new TypeAsPayloadTokenFilter(filter);
                    return new TokenStreamComponents(tokenizer, filter);
                }
            });
        }
    }
    PerFieldAnalyzerWrapper wrapper = new PerFieldAnalyzerWrapper(new StandardAnalyzer(CharArraySet.EMPTY_SET), mapping);
    Directory dir = new RAMDirectory();
    IndexWriterConfig conf = new IndexWriterConfig(wrapper);
    conf.setOpenMode(IndexWriterConfig.OpenMode.CREATE);
    IndexWriter writer = new IndexWriter(dir, conf);
    for (TestDoc doc : testDocs) {
        Document d = new Document();
        d.add(new Field("id", doc.id, StringField.TYPE_STORED));
        for (int i = 0; i < doc.fieldContent.length; i++) {
            FieldType type = new FieldType(TextField.TYPE_STORED);
            TestFieldSetting fieldSetting = doc.fieldSettings[i];
            type.setStoreTermVectorOffsets(fieldSetting.storedOffset);
            type.setStoreTermVectorPayloads(fieldSetting.storedPayloads);
            type.setStoreTermVectorPositions(fieldSetting.storedPositions || fieldSetting.storedPayloads || fieldSetting.storedOffset);
            type.setStoreTermVectors(true);
            type.freeze();
            d.add(new Field(fieldSetting.name, doc.fieldContent[i], type));
        }
        writer.updateDocument(new Term("id", doc.id), d);
        writer.commit();
    }
    writer.close();
    return DirectoryReader.open(dir);
}
Also used : HashMap(java.util.HashMap) TypeAsPayloadTokenFilter(org.apache.lucene.analysis.payloads.TypeAsPayloadTokenFilter) Term(org.apache.lucene.index.Term) Analyzer(org.apache.lucene.analysis.Analyzer) StandardAnalyzer(org.apache.lucene.analysis.standard.StandardAnalyzer) Document(org.apache.lucene.document.Document) RAMDirectory(org.apache.lucene.store.RAMDirectory) PerFieldAnalyzerWrapper(org.apache.lucene.analysis.miscellaneous.PerFieldAnalyzerWrapper) FieldType(org.apache.lucene.document.FieldType) StringField(org.apache.lucene.document.StringField) Field(org.apache.lucene.document.Field) TextField(org.apache.lucene.document.TextField) IndexWriter(org.apache.lucene.index.IndexWriter) StandardTokenizer(org.apache.lucene.analysis.standard.StandardTokenizer) StandardAnalyzer(org.apache.lucene.analysis.standard.StandardAnalyzer) Tokenizer(org.apache.lucene.analysis.Tokenizer) StandardTokenizer(org.apache.lucene.analysis.standard.StandardTokenizer) LowerCaseFilter(org.apache.lucene.analysis.LowerCaseFilter) TypeAsPayloadTokenFilter(org.apache.lucene.analysis.payloads.TypeAsPayloadTokenFilter) TokenFilter(org.apache.lucene.analysis.TokenFilter) RAMDirectory(org.apache.lucene.store.RAMDirectory) Directory(org.apache.lucene.store.Directory) IndexWriterConfig(org.apache.lucene.index.IndexWriterConfig)

Example 2 with IndexWriterConfig

use of org.apache.lucene.index.IndexWriterConfig in project elasticsearch by elastic.

the class TermVectorsUnitTests method writeEmptyTermVector.

private void writeEmptyTermVector(TermVectorsResponse outResponse) throws IOException {
    Directory dir = newDirectory();
    IndexWriterConfig conf = new IndexWriterConfig(new StandardAnalyzer());
    conf.setOpenMode(OpenMode.CREATE);
    IndexWriter writer = new IndexWriter(dir, conf);
    FieldType type = new FieldType(TextField.TYPE_STORED);
    type.setStoreTermVectorOffsets(true);
    type.setStoreTermVectorPayloads(false);
    type.setStoreTermVectorPositions(true);
    type.setStoreTermVectors(true);
    type.freeze();
    Document d = new Document();
    d.add(new Field("id", "abc", StringField.TYPE_STORED));
    writer.updateDocument(new Term("id", "abc"), d);
    writer.commit();
    writer.close();
    DirectoryReader dr = DirectoryReader.open(dir);
    IndexSearcher s = new IndexSearcher(dr);
    TopDocs search = s.search(new TermQuery(new Term("id", "abc")), 1);
    ScoreDoc[] scoreDocs = search.scoreDocs;
    int doc = scoreDocs[0].doc;
    Fields fields = dr.getTermVectors(doc);
    EnumSet<Flag> flags = EnumSet.of(Flag.Positions, Flag.Offsets);
    outResponse.setFields(fields, null, flags, fields);
    outResponse.setExists(true);
    dr.close();
    dir.close();
}
Also used : IndexSearcher(org.apache.lucene.search.IndexSearcher) TermQuery(org.apache.lucene.search.TermQuery) DirectoryReader(org.apache.lucene.index.DirectoryReader) Term(org.apache.lucene.index.Term) Document(org.apache.lucene.document.Document) Flag(org.elasticsearch.action.termvectors.TermVectorsRequest.Flag) FieldType(org.apache.lucene.document.FieldType) ScoreDoc(org.apache.lucene.search.ScoreDoc) TopDocs(org.apache.lucene.search.TopDocs) StringField(org.apache.lucene.document.StringField) Field(org.apache.lucene.document.Field) TextField(org.apache.lucene.document.TextField) Fields(org.apache.lucene.index.Fields) IndexWriter(org.apache.lucene.index.IndexWriter) StandardAnalyzer(org.apache.lucene.analysis.standard.StandardAnalyzer) Directory(org.apache.lucene.store.Directory) IndexWriterConfig(org.apache.lucene.index.IndexWriterConfig)

Example 3 with IndexWriterConfig

use of org.apache.lucene.index.IndexWriterConfig in project elasticsearch by elastic.

the class TermVectorsUnitTests method writeStandardTermVector.

private void writeStandardTermVector(TermVectorsResponse outResponse) throws IOException {
    Directory dir = newDirectory();
    IndexWriterConfig conf = new IndexWriterConfig(new StandardAnalyzer());
    conf.setOpenMode(OpenMode.CREATE);
    IndexWriter writer = new IndexWriter(dir, conf);
    FieldType type = new FieldType(TextField.TYPE_STORED);
    type.setStoreTermVectorOffsets(true);
    type.setStoreTermVectorPayloads(false);
    type.setStoreTermVectorPositions(true);
    type.setStoreTermVectors(true);
    type.freeze();
    Document d = new Document();
    d.add(new Field("id", "abc", StringField.TYPE_STORED));
    d.add(new Field("title", "the1 quick brown fox jumps over  the1 lazy dog", type));
    d.add(new Field("desc", "the1 quick brown fox jumps over  the1 lazy dog", type));
    writer.updateDocument(new Term("id", "abc"), d);
    writer.commit();
    writer.close();
    DirectoryReader dr = DirectoryReader.open(dir);
    IndexSearcher s = new IndexSearcher(dr);
    TopDocs search = s.search(new TermQuery(new Term("id", "abc")), 1);
    ScoreDoc[] scoreDocs = search.scoreDocs;
    int doc = scoreDocs[0].doc;
    Fields termVectors = dr.getTermVectors(doc);
    EnumSet<Flag> flags = EnumSet.of(Flag.Positions, Flag.Offsets);
    outResponse.setFields(termVectors, null, flags, termVectors);
    dr.close();
    dir.close();
}
Also used : IndexSearcher(org.apache.lucene.search.IndexSearcher) TermQuery(org.apache.lucene.search.TermQuery) DirectoryReader(org.apache.lucene.index.DirectoryReader) Term(org.apache.lucene.index.Term) Document(org.apache.lucene.document.Document) Flag(org.elasticsearch.action.termvectors.TermVectorsRequest.Flag) FieldType(org.apache.lucene.document.FieldType) ScoreDoc(org.apache.lucene.search.ScoreDoc) TopDocs(org.apache.lucene.search.TopDocs) StringField(org.apache.lucene.document.StringField) Field(org.apache.lucene.document.Field) TextField(org.apache.lucene.document.TextField) Fields(org.apache.lucene.index.Fields) IndexWriter(org.apache.lucene.index.IndexWriter) StandardAnalyzer(org.apache.lucene.analysis.standard.StandardAnalyzer) Directory(org.apache.lucene.store.Directory) IndexWriterConfig(org.apache.lucene.index.IndexWriterConfig)

Example 4 with IndexWriterConfig

use of org.apache.lucene.index.IndexWriterConfig in project elasticsearch by elastic.

the class StoreRecovery method addIndices.

void addIndices(RecoveryState.Index indexRecoveryStats, Directory target, Directory... sources) throws IOException {
    target = new org.apache.lucene.store.HardlinkCopyDirectoryWrapper(target);
    try (IndexWriter writer = new IndexWriter(new StatsDirectoryWrapper(target, indexRecoveryStats), new IndexWriterConfig(null).setCommitOnClose(false).setMergePolicy(NoMergePolicy.INSTANCE).setOpenMode(IndexWriterConfig.OpenMode.CREATE))) {
        writer.addIndexes(sources);
        writer.commit();
    }
}
Also used : IndexWriter(org.apache.lucene.index.IndexWriter) IndexWriterConfig(org.apache.lucene.index.IndexWriterConfig)

Example 5 with IndexWriterConfig

use of org.apache.lucene.index.IndexWriterConfig in project elasticsearch by elastic.

the class CustomUnifiedHighlighterTests method assertHighlightOneDoc.

private void assertHighlightOneDoc(String fieldName, String[] inputs, Analyzer analyzer, Query query, Locale locale, BreakIterator breakIterator, int noMatchSize, String[] expectedPassages) throws Exception {
    Directory dir = newDirectory();
    IndexWriterConfig iwc = newIndexWriterConfig(analyzer);
    iwc.setMergePolicy(newTieredMergePolicy(random()));
    RandomIndexWriter iw = new RandomIndexWriter(random(), dir, iwc);
    FieldType ft = new FieldType(TextField.TYPE_STORED);
    ft.setIndexOptions(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS);
    ft.freeze();
    Document doc = new Document();
    for (String input : inputs) {
        Field field = new Field(fieldName, "", ft);
        field.setStringValue(input);
        doc.add(field);
    }
    iw.addDocument(doc);
    DirectoryReader reader = iw.getReader();
    IndexSearcher searcher = newSearcher(reader);
    iw.close();
    TopDocs topDocs = searcher.search(new MatchAllDocsQuery(), 1, Sort.INDEXORDER);
    assertThat(topDocs.totalHits, equalTo(1));
    String rawValue = Strings.arrayToDelimitedString(inputs, String.valueOf(MULTIVAL_SEP_CHAR));
    CustomUnifiedHighlighter highlighter = new CustomUnifiedHighlighter(searcher, analyzer, new CustomPassageFormatter("<b>", "</b>", new DefaultEncoder()), locale, breakIterator, rawValue, noMatchSize);
    highlighter.setFieldMatcher((name) -> "text".equals(name));
    final Snippet[] snippets = highlighter.highlightField("text", query, topDocs.scoreDocs[0].doc, expectedPassages.length);
    assertEquals(snippets.length, expectedPassages.length);
    for (int i = 0; i < snippets.length; i++) {
        assertEquals(snippets[i].getText(), expectedPassages[i]);
    }
    reader.close();
    dir.close();
}
Also used : IndexSearcher(org.apache.lucene.search.IndexSearcher) DirectoryReader(org.apache.lucene.index.DirectoryReader) Snippet(org.apache.lucene.search.highlight.Snippet) Document(org.apache.lucene.document.Document) MatchAllDocsQuery(org.apache.lucene.search.MatchAllDocsQuery) FieldType(org.apache.lucene.document.FieldType) TopDocs(org.apache.lucene.search.TopDocs) Field(org.apache.lucene.document.Field) TextField(org.apache.lucene.document.TextField) DefaultEncoder(org.apache.lucene.search.highlight.DefaultEncoder) RandomIndexWriter(org.apache.lucene.index.RandomIndexWriter) Directory(org.apache.lucene.store.Directory) IndexWriterConfig(org.apache.lucene.index.IndexWriterConfig)

Aggregations

IndexWriterConfig (org.apache.lucene.index.IndexWriterConfig)538 IndexWriter (org.apache.lucene.index.IndexWriter)387 Document (org.apache.lucene.document.Document)324 Directory (org.apache.lucene.store.Directory)303 RandomIndexWriter (org.apache.lucene.index.RandomIndexWriter)162 IndexReader (org.apache.lucene.index.IndexReader)144 MockAnalyzer (org.apache.lucene.analysis.MockAnalyzer)143 Term (org.apache.lucene.index.Term)119 IndexSearcher (org.apache.lucene.search.IndexSearcher)109 TextField (org.apache.lucene.document.TextField)102 DirectoryReader (org.apache.lucene.index.DirectoryReader)96 IOException (java.io.IOException)95 RAMDirectory (org.apache.lucene.store.RAMDirectory)86 BytesRef (org.apache.lucene.util.BytesRef)84 Field (org.apache.lucene.document.Field)82 Analyzer (org.apache.lucene.analysis.Analyzer)81 StandardAnalyzer (org.apache.lucene.analysis.standard.StandardAnalyzer)67 Test (org.junit.Test)65 StringField (org.apache.lucene.document.StringField)63 NumericDocValuesField (org.apache.lucene.document.NumericDocValuesField)49