Examples with IndexWriter - org.apache.lucene.index.IndexWriter

Example 46 with IndexWriter

use of org.apache.lucene.index.IndexWriter in project elasticsearch by elastic.

the class IndicesQueryCacheTests method testBasics.

public void testBasics() throws IOException {
    Directory dir = newDirectory();
    IndexWriter w = new IndexWriter(dir, newIndexWriterConfig());
    w.addDocument(new Document());
    DirectoryReader r = DirectoryReader.open(w);
    w.close();
    ShardId shard = new ShardId("index", "_na_", 0);
    r = ElasticsearchDirectoryReader.wrap(r, shard);
    IndexSearcher s = new IndexSearcher(r);
    s.setQueryCachingPolicy(QueryCachingPolicy.ALWAYS_CACHE);
    Settings settings = Settings.builder().put(IndicesQueryCache.INDICES_CACHE_QUERY_COUNT_SETTING.getKey(), 10).put(IndicesQueryCache.INDICES_QUERIES_CACHE_ALL_SEGMENTS_SETTING.getKey(), true).build();
    IndicesQueryCache cache = new IndicesQueryCache(settings);
    s.setQueryCache(cache);
    QueryCacheStats stats = cache.getStats(shard);
    assertEquals(0L, stats.getCacheSize());
    assertEquals(0L, stats.getCacheCount());
    assertEquals(0L, stats.getHitCount());
    assertEquals(0L, stats.getMissCount());
    assertEquals(1, s.count(new DummyQuery(0)));
    stats = cache.getStats(shard);
    assertEquals(1L, stats.getCacheSize());
    assertEquals(1L, stats.getCacheCount());
    assertEquals(0L, stats.getHitCount());
    assertEquals(1L, stats.getMissCount());
    for (int i = 1; i < 20; ++i) {
        assertEquals(1, s.count(new DummyQuery(i)));
    }
    stats = cache.getStats(shard);
    assertEquals(10L, stats.getCacheSize());
    assertEquals(20L, stats.getCacheCount());
    assertEquals(0L, stats.getHitCount());
    assertEquals(20L, stats.getMissCount());
    s.count(new DummyQuery(10));
    stats = cache.getStats(shard);
    assertEquals(10L, stats.getCacheSize());
    assertEquals(20L, stats.getCacheCount());
    assertEquals(1L, stats.getHitCount());
    assertEquals(20L, stats.getMissCount());
    IOUtils.close(r, dir);
    // got emptied, but no changes to other metrics
    stats = cache.getStats(shard);
    assertEquals(0L, stats.getCacheSize());
    assertEquals(20L, stats.getCacheCount());
    assertEquals(1L, stats.getHitCount());
    assertEquals(20L, stats.getMissCount());
    cache.onClose(shard);
    // forgot everything
    stats = cache.getStats(shard);
    assertEquals(0L, stats.getCacheSize());
    assertEquals(0L, stats.getCacheCount());
    assertEquals(0L, stats.getHitCount());
    assertEquals(0L, stats.getMissCount());
    // this triggers some assertions
    cache.close();
}

Also used : ShardId(org.elasticsearch.index.shard.ShardId) IndexSearcher(org.apache.lucene.search.IndexSearcher) IndicesQueryCache(org.elasticsearch.indices.IndicesQueryCache) IndexWriter(org.apache.lucene.index.IndexWriter) DirectoryReader(org.apache.lucene.index.DirectoryReader) ElasticsearchDirectoryReader(org.elasticsearch.common.lucene.index.ElasticsearchDirectoryReader) QueryCacheStats(org.elasticsearch.index.cache.query.QueryCacheStats) Document(org.apache.lucene.document.Document) Settings(org.elasticsearch.common.settings.Settings) Directory(org.apache.lucene.store.Directory)

Example 47 with IndexWriter

use of org.apache.lucene.index.IndexWriter in project elasticsearch by elastic.

the class AbstractTermVectorsTestCase method indexDocsWithLucene.

protected DirectoryReader indexDocsWithLucene(TestDoc[] testDocs) throws IOException {
    Map<String, Analyzer> mapping = new HashMap<>();
    for (TestFieldSetting field : testDocs[0].fieldSettings) {
        if (field.storedPayloads) {
            mapping.put(field.name, new Analyzer() {

                @Override
                protected TokenStreamComponents createComponents(String fieldName) {
                    Tokenizer tokenizer = new StandardTokenizer();
                    TokenFilter filter = new LowerCaseFilter(tokenizer);
                    filter = new TypeAsPayloadTokenFilter(filter);
                    return new TokenStreamComponents(tokenizer, filter);
                }
            });
        }
    }
    PerFieldAnalyzerWrapper wrapper = new PerFieldAnalyzerWrapper(new StandardAnalyzer(CharArraySet.EMPTY_SET), mapping);
    Directory dir = new RAMDirectory();
    IndexWriterConfig conf = new IndexWriterConfig(wrapper);
    conf.setOpenMode(IndexWriterConfig.OpenMode.CREATE);
    IndexWriter writer = new IndexWriter(dir, conf);
    for (TestDoc doc : testDocs) {
        Document d = new Document();
        d.add(new Field("id", doc.id, StringField.TYPE_STORED));
        for (int i = 0; i < doc.fieldContent.length; i++) {
            FieldType type = new FieldType(TextField.TYPE_STORED);
            TestFieldSetting fieldSetting = doc.fieldSettings[i];
            type.setStoreTermVectorOffsets(fieldSetting.storedOffset);
            type.setStoreTermVectorPayloads(fieldSetting.storedPayloads);
            type.setStoreTermVectorPositions(fieldSetting.storedPositions || fieldSetting.storedPayloads || fieldSetting.storedOffset);
            type.setStoreTermVectors(true);
            type.freeze();
            d.add(new Field(fieldSetting.name, doc.fieldContent[i], type));
        }
        writer.updateDocument(new Term("id", doc.id), d);
        writer.commit();
    }
    writer.close();
    return DirectoryReader.open(dir);
}

Also used : HashMap(java.util.HashMap) TypeAsPayloadTokenFilter(org.apache.lucene.analysis.payloads.TypeAsPayloadTokenFilter) Term(org.apache.lucene.index.Term) Analyzer(org.apache.lucene.analysis.Analyzer) StandardAnalyzer(org.apache.lucene.analysis.standard.StandardAnalyzer) Document(org.apache.lucene.document.Document) RAMDirectory(org.apache.lucene.store.RAMDirectory) PerFieldAnalyzerWrapper(org.apache.lucene.analysis.miscellaneous.PerFieldAnalyzerWrapper) FieldType(org.apache.lucene.document.FieldType) StringField(org.apache.lucene.document.StringField) Field(org.apache.lucene.document.Field) TextField(org.apache.lucene.document.TextField) IndexWriter(org.apache.lucene.index.IndexWriter) StandardTokenizer(org.apache.lucene.analysis.standard.StandardTokenizer) StandardAnalyzer(org.apache.lucene.analysis.standard.StandardAnalyzer) Tokenizer(org.apache.lucene.analysis.Tokenizer) StandardTokenizer(org.apache.lucene.analysis.standard.StandardTokenizer) LowerCaseFilter(org.apache.lucene.analysis.LowerCaseFilter) TypeAsPayloadTokenFilter(org.apache.lucene.analysis.payloads.TypeAsPayloadTokenFilter) TokenFilter(org.apache.lucene.analysis.TokenFilter) RAMDirectory(org.apache.lucene.store.RAMDirectory) Directory(org.apache.lucene.store.Directory) IndexWriterConfig(org.apache.lucene.index.IndexWriterConfig)

Example 48 with IndexWriter

use of org.apache.lucene.index.IndexWriter in project elasticsearch by elastic.

the class TermVectorsUnitTests method writeEmptyTermVector.

private void writeEmptyTermVector(TermVectorsResponse outResponse) throws IOException {
    Directory dir = newDirectory();
    IndexWriterConfig conf = new IndexWriterConfig(new StandardAnalyzer());
    conf.setOpenMode(OpenMode.CREATE);
    IndexWriter writer = new IndexWriter(dir, conf);
    FieldType type = new FieldType(TextField.TYPE_STORED);
    type.setStoreTermVectorOffsets(true);
    type.setStoreTermVectorPayloads(false);
    type.setStoreTermVectorPositions(true);
    type.setStoreTermVectors(true);
    type.freeze();
    Document d = new Document();
    d.add(new Field("id", "abc", StringField.TYPE_STORED));
    writer.updateDocument(new Term("id", "abc"), d);
    writer.commit();
    writer.close();
    DirectoryReader dr = DirectoryReader.open(dir);
    IndexSearcher s = new IndexSearcher(dr);
    TopDocs search = s.search(new TermQuery(new Term("id", "abc")), 1);
    ScoreDoc[] scoreDocs = search.scoreDocs;
    int doc = scoreDocs[0].doc;
    Fields fields = dr.getTermVectors(doc);
    EnumSet<Flag> flags = EnumSet.of(Flag.Positions, Flag.Offsets);
    outResponse.setFields(fields, null, flags, fields);
    outResponse.setExists(true);
    dr.close();
    dir.close();
}

Also used : IndexSearcher(org.apache.lucene.search.IndexSearcher) TermQuery(org.apache.lucene.search.TermQuery) DirectoryReader(org.apache.lucene.index.DirectoryReader) Term(org.apache.lucene.index.Term) Document(org.apache.lucene.document.Document) Flag(org.elasticsearch.action.termvectors.TermVectorsRequest.Flag) FieldType(org.apache.lucene.document.FieldType) ScoreDoc(org.apache.lucene.search.ScoreDoc) TopDocs(org.apache.lucene.search.TopDocs) StringField(org.apache.lucene.document.StringField) Field(org.apache.lucene.document.Field) TextField(org.apache.lucene.document.TextField) Fields(org.apache.lucene.index.Fields) IndexWriter(org.apache.lucene.index.IndexWriter) StandardAnalyzer(org.apache.lucene.analysis.standard.StandardAnalyzer) Directory(org.apache.lucene.store.Directory) IndexWriterConfig(org.apache.lucene.index.IndexWriterConfig)

Example 49 with IndexWriter

use of org.apache.lucene.index.IndexWriter in project elasticsearch by elastic.

the class TermVectorsUnitTests method writeStandardTermVector.

private void writeStandardTermVector(TermVectorsResponse outResponse) throws IOException {
    Directory dir = newDirectory();
    IndexWriterConfig conf = new IndexWriterConfig(new StandardAnalyzer());
    conf.setOpenMode(OpenMode.CREATE);
    IndexWriter writer = new IndexWriter(dir, conf);
    FieldType type = new FieldType(TextField.TYPE_STORED);
    type.setStoreTermVectorOffsets(true);
    type.setStoreTermVectorPayloads(false);
    type.setStoreTermVectorPositions(true);
    type.setStoreTermVectors(true);
    type.freeze();
    Document d = new Document();
    d.add(new Field("id", "abc", StringField.TYPE_STORED));
    d.add(new Field("title", "the1 quick brown fox jumps over  the1 lazy dog", type));
    d.add(new Field("desc", "the1 quick brown fox jumps over  the1 lazy dog", type));
    writer.updateDocument(new Term("id", "abc"), d);
    writer.commit();
    writer.close();
    DirectoryReader dr = DirectoryReader.open(dir);
    IndexSearcher s = new IndexSearcher(dr);
    TopDocs search = s.search(new TermQuery(new Term("id", "abc")), 1);
    ScoreDoc[] scoreDocs = search.scoreDocs;
    int doc = scoreDocs[0].doc;
    Fields termVectors = dr.getTermVectors(doc);
    EnumSet<Flag> flags = EnumSet.of(Flag.Positions, Flag.Offsets);
    outResponse.setFields(termVectors, null, flags, termVectors);
    dr.close();
    dir.close();
}

Example 50 with IndexWriter

use of org.apache.lucene.index.IndexWriter in project elasticsearch by elastic.

the class StoreRecovery method addIndices.

void addIndices(RecoveryState.Index indexRecoveryStats, Directory target, Directory... sources) throws IOException {
    target = new org.apache.lucene.store.HardlinkCopyDirectoryWrapper(target);
    try (IndexWriter writer = new IndexWriter(new StatsDirectoryWrapper(target, indexRecoveryStats), new IndexWriterConfig(null).setCommitOnClose(false).setMergePolicy(NoMergePolicy.INSTANCE).setOpenMode(IndexWriterConfig.OpenMode.CREATE))) {
        writer.addIndexes(sources);
        writer.commit();
    }
}

Also used : IndexWriter(org.apache.lucene.index.IndexWriter) IndexWriterConfig(org.apache.lucene.index.IndexWriterConfig)

Aggregations

IndexWriter (org.apache.lucene.index.IndexWriter)529 Document (org.apache.lucene.document.Document)311 Directory (org.apache.lucene.store.Directory)306 IndexWriterConfig (org.apache.lucene.index.IndexWriterConfig)293 IndexReader (org.apache.lucene.index.IndexReader)144 MockAnalyzer (org.apache.lucene.analysis.MockAnalyzer)136 DirectoryReader (org.apache.lucene.index.DirectoryReader)127 Term (org.apache.lucene.index.Term)125 IndexSearcher (org.apache.lucene.search.IndexSearcher)110 RandomIndexWriter (org.apache.lucene.index.RandomIndexWriter)107 TextField (org.apache.lucene.document.TextField)104 RAMDirectory (org.apache.lucene.store.RAMDirectory)88 IOException (java.io.IOException)86 Field (org.apache.lucene.document.Field)86 TermQuery (org.apache.lucene.search.TermQuery)56 StringField (org.apache.lucene.document.StringField)52 BytesRef (org.apache.lucene.util.BytesRef)52 FieldType (org.apache.lucene.document.FieldType)50 Test (org.junit.Test)49 Query (org.apache.lucene.search.Query)45