Search in sources :

Example 21 with RAMDirectory

use of org.apache.lucene.store.RAMDirectory in project elasticsearch by elastic.

the class BooleanFieldMapperTests method testDefaults.

public void testDefaults() throws IOException {
    String mapping = XContentFactory.jsonBuilder().startObject().startObject("type").startObject("properties").startObject("field").field("type", "boolean").endObject().endObject().endObject().endObject().string();
    DocumentMapper defaultMapper = parser.parse("type", new CompressedXContent(mapping));
    ParsedDocument doc = defaultMapper.parse("test", "type", "1", XContentFactory.jsonBuilder().startObject().field("field", true).endObject().bytes());
    try (Directory dir = new RAMDirectory();
        IndexWriter w = new IndexWriter(dir, new IndexWriterConfig(new MockAnalyzer(random())))) {
        w.addDocuments(doc.docs());
        try (DirectoryReader reader = DirectoryReader.open(w)) {
            final LeafReader leaf = reader.leaves().get(0).reader();
            // boolean fields are indexed and have doc values by default
            assertEquals(new BytesRef("T"), leaf.terms("field").iterator().next());
            SortedNumericDocValues values = leaf.getSortedNumericDocValues("field");
            assertNotNull(values);
            values.setDocument(0);
            assertEquals(1, values.count());
            assertEquals(1, values.valueAt(0));
        }
    }
}
Also used : SortedNumericDocValues(org.apache.lucene.index.SortedNumericDocValues) LeafReader(org.apache.lucene.index.LeafReader) DirectoryReader(org.apache.lucene.index.DirectoryReader) Matchers.containsString(org.hamcrest.Matchers.containsString) RAMDirectory(org.apache.lucene.store.RAMDirectory) MockAnalyzer(org.apache.lucene.analysis.MockAnalyzer) IndexWriter(org.apache.lucene.index.IndexWriter) CompressedXContent(org.elasticsearch.common.compress.CompressedXContent) BytesRef(org.apache.lucene.util.BytesRef) RAMDirectory(org.apache.lucene.store.RAMDirectory) Directory(org.apache.lucene.store.Directory) IndexWriterConfig(org.apache.lucene.index.IndexWriterConfig)

Example 22 with RAMDirectory

use of org.apache.lucene.store.RAMDirectory in project elasticsearch by elastic.

the class StoreTests method testCanReadOldCorruptionMarker.

public void testCanReadOldCorruptionMarker() throws IOException {
    final ShardId shardId = new ShardId("index", "_na_", 1);
    // I use ram dir to prevent that virusscanner being a PITA
    final Directory dir = new RAMDirectory();
    DirectoryService directoryService = new DirectoryService(shardId, INDEX_SETTINGS) {

        @Override
        public Directory newDirectory() throws IOException {
            return dir;
        }
    };
    Store store = new Store(shardId, INDEX_SETTINGS, directoryService, new DummyShardLock(shardId));
    CorruptIndexException exception = new CorruptIndexException("foo", "bar");
    String uuid = Store.CORRUPTED + UUIDs.randomBase64UUID();
    try (IndexOutput output = dir.createOutput(uuid, IOContext.DEFAULT)) {
        CodecUtil.writeHeader(output, Store.CODEC, Store.VERSION_STACK_TRACE);
        output.writeString(ExceptionsHelper.detailedMessage(exception));
        output.writeString(ExceptionsHelper.stackTrace(exception));
        CodecUtil.writeFooter(output);
    }
    try {
        store.failIfCorrupted();
        fail("should be corrupted");
    } catch (CorruptIndexException e) {
        assertTrue(e.getMessage().startsWith("[index][1] Preexisting corrupted index [" + uuid + "] caused by: CorruptIndexException[foo (resource=bar)]"));
        assertTrue(e.getMessage().contains(ExceptionsHelper.stackTrace(exception)));
    }
    store.removeCorruptionMarker();
    try (IndexOutput output = dir.createOutput(uuid, IOContext.DEFAULT)) {
        CodecUtil.writeHeader(output, Store.CODEC, Store.VERSION_START);
        output.writeString(ExceptionsHelper.detailedMessage(exception));
        CodecUtil.writeFooter(output);
    }
    try {
        store.failIfCorrupted();
        fail("should be corrupted");
    } catch (CorruptIndexException e) {
        assertTrue(e.getMessage().startsWith("[index][1] Preexisting corrupted index [" + uuid + "] caused by: CorruptIndexException[foo (resource=bar)]"));
        assertFalse(e.getMessage().contains(ExceptionsHelper.stackTrace(exception)));
    }
    store.removeCorruptionMarker();
    try (IndexOutput output = dir.createOutput(uuid, IOContext.DEFAULT)) {
        // corrupted header
        CodecUtil.writeHeader(output, Store.CODEC, Store.VERSION_START - 1);
        CodecUtil.writeFooter(output);
    }
    try {
        store.failIfCorrupted();
        fail("should be too old");
    } catch (IndexFormatTooOldException e) {
    }
    store.removeCorruptionMarker();
    try (IndexOutput output = dir.createOutput(uuid, IOContext.DEFAULT)) {
        // corrupted header
        CodecUtil.writeHeader(output, Store.CODEC, Store.VERSION + 1);
        CodecUtil.writeFooter(output);
    }
    try {
        store.failIfCorrupted();
        fail("should be too new");
    } catch (IndexFormatTooNewException e) {
    }
    store.close();
}
Also used : ShardId(org.elasticsearch.index.shard.ShardId) IndexFormatTooOldException(org.apache.lucene.index.IndexFormatTooOldException) CorruptIndexException(org.apache.lucene.index.CorruptIndexException) IndexOutput(org.apache.lucene.store.IndexOutput) DummyShardLock(org.elasticsearch.test.DummyShardLock) IndexFormatTooNewException(org.apache.lucene.index.IndexFormatTooNewException) RAMDirectory(org.apache.lucene.store.RAMDirectory) Directory(org.apache.lucene.store.Directory) RAMDirectory(org.apache.lucene.store.RAMDirectory)

Example 23 with RAMDirectory

use of org.apache.lucene.store.RAMDirectory in project elasticsearch by elastic.

the class AbstractTermVectorsTestCase method indexDocsWithLucene.

protected DirectoryReader indexDocsWithLucene(TestDoc[] testDocs) throws IOException {
    Map<String, Analyzer> mapping = new HashMap<>();
    for (TestFieldSetting field : testDocs[0].fieldSettings) {
        if (field.storedPayloads) {
            mapping.put(field.name, new Analyzer() {

                @Override
                protected TokenStreamComponents createComponents(String fieldName) {
                    Tokenizer tokenizer = new StandardTokenizer();
                    TokenFilter filter = new LowerCaseFilter(tokenizer);
                    filter = new TypeAsPayloadTokenFilter(filter);
                    return new TokenStreamComponents(tokenizer, filter);
                }
            });
        }
    }
    PerFieldAnalyzerWrapper wrapper = new PerFieldAnalyzerWrapper(new StandardAnalyzer(CharArraySet.EMPTY_SET), mapping);
    Directory dir = new RAMDirectory();
    IndexWriterConfig conf = new IndexWriterConfig(wrapper);
    conf.setOpenMode(IndexWriterConfig.OpenMode.CREATE);
    IndexWriter writer = new IndexWriter(dir, conf);
    for (TestDoc doc : testDocs) {
        Document d = new Document();
        d.add(new Field("id", doc.id, StringField.TYPE_STORED));
        for (int i = 0; i < doc.fieldContent.length; i++) {
            FieldType type = new FieldType(TextField.TYPE_STORED);
            TestFieldSetting fieldSetting = doc.fieldSettings[i];
            type.setStoreTermVectorOffsets(fieldSetting.storedOffset);
            type.setStoreTermVectorPayloads(fieldSetting.storedPayloads);
            type.setStoreTermVectorPositions(fieldSetting.storedPositions || fieldSetting.storedPayloads || fieldSetting.storedOffset);
            type.setStoreTermVectors(true);
            type.freeze();
            d.add(new Field(fieldSetting.name, doc.fieldContent[i], type));
        }
        writer.updateDocument(new Term("id", doc.id), d);
        writer.commit();
    }
    writer.close();
    return DirectoryReader.open(dir);
}
Also used : HashMap(java.util.HashMap) TypeAsPayloadTokenFilter(org.apache.lucene.analysis.payloads.TypeAsPayloadTokenFilter) Term(org.apache.lucene.index.Term) Analyzer(org.apache.lucene.analysis.Analyzer) StandardAnalyzer(org.apache.lucene.analysis.standard.StandardAnalyzer) Document(org.apache.lucene.document.Document) RAMDirectory(org.apache.lucene.store.RAMDirectory) PerFieldAnalyzerWrapper(org.apache.lucene.analysis.miscellaneous.PerFieldAnalyzerWrapper) FieldType(org.apache.lucene.document.FieldType) StringField(org.apache.lucene.document.StringField) Field(org.apache.lucene.document.Field) TextField(org.apache.lucene.document.TextField) IndexWriter(org.apache.lucene.index.IndexWriter) StandardTokenizer(org.apache.lucene.analysis.standard.StandardTokenizer) StandardAnalyzer(org.apache.lucene.analysis.standard.StandardAnalyzer) Tokenizer(org.apache.lucene.analysis.Tokenizer) StandardTokenizer(org.apache.lucene.analysis.standard.StandardTokenizer) LowerCaseFilter(org.apache.lucene.analysis.LowerCaseFilter) TypeAsPayloadTokenFilter(org.apache.lucene.analysis.payloads.TypeAsPayloadTokenFilter) TokenFilter(org.apache.lucene.analysis.TokenFilter) RAMDirectory(org.apache.lucene.store.RAMDirectory) Directory(org.apache.lucene.store.Directory) IndexWriterConfig(org.apache.lucene.index.IndexWriterConfig)

Example 24 with RAMDirectory

use of org.apache.lucene.store.RAMDirectory in project ansj_seg by NLPchina.

the class IndexAndTest method test.

@Test
public void test() throws Exception {
    DicLibrary.put(DicLibrary.DEFAULT, "../../library/default.dic");
    PerFieldAnalyzerWrapper analyzer = new PerFieldAnalyzerWrapper(new AnsjAnalyzer(TYPE.index_ansj));
    Directory directory = null;
    IndexWriter iwriter = null;
    IndexWriterConfig ic = new IndexWriterConfig(analyzer);
    String text = "旅游和服务是最好的";
    System.out.println(IndexAnalysis.parse(text));
    // 建立内存索引对象
    directory = new RAMDirectory();
    iwriter = new IndexWriter(directory, ic);
    addContent(iwriter, text);
    iwriter.commit();
    iwriter.close();
    System.out.println("索引建立完毕");
    Analyzer queryAnalyzer = new AnsjAnalyzer(AnsjAnalyzer.TYPE.index_ansj);
    System.out.println("index ok to search!");
    for (Term t : IndexAnalysis.parse(text)) {
        System.out.println(t.getName());
        search(queryAnalyzer, directory, "\"" + t.getName() + "\"");
    }
}
Also used : AnsjAnalyzer(org.ansj.lucene6.AnsjAnalyzer) IndexWriter(org.apache.lucene.index.IndexWriter) Term(org.ansj.domain.Term) AnsjAnalyzer(org.ansj.lucene6.AnsjAnalyzer) Analyzer(org.apache.lucene.analysis.Analyzer) RAMDirectory(org.apache.lucene.store.RAMDirectory) PerFieldAnalyzerWrapper(org.apache.lucene.analysis.miscellaneous.PerFieldAnalyzerWrapper) RAMDirectory(org.apache.lucene.store.RAMDirectory) Directory(org.apache.lucene.store.Directory) IndexWriterConfig(org.apache.lucene.index.IndexWriterConfig) Test(org.junit.Test)

Example 25 with RAMDirectory

use of org.apache.lucene.store.RAMDirectory in project ansj_seg by NLPchina.

the class IndexTest method indexTest.

@Test
public void indexTest() throws CorruptIndexException, LockObtainFailedException, IOException, ParseException {
    PerFieldAnalyzerWrapper analyzer = new PerFieldAnalyzerWrapper(new AnsjAnalyzer(TYPE.index_ansj));
    Directory directory = null;
    IndexWriter iwriter = null;
    IndexWriterConfig ic = new IndexWriterConfig(analyzer);
    // 建立内存索引对象
    directory = new RAMDirectory();
    iwriter = new IndexWriter(directory, ic);
    addContent(iwriter, "助推企业转型升级提供强有力的技术支持和服保障。中心的建成将使青岛的服务器承载能力突破10万台,达到世界一流水平。");
    addContent(iwriter, "涉及民生的部分商品和服务成本监审政策");
    addContent(iwriter, "我穿着和服");
    iwriter.commit();
    iwriter.close();
    System.out.println("索引建立完毕");
    Analyzer queryAnalyzer = new AnsjAnalyzer(AnsjAnalyzer.TYPE.dic_ansj);
    System.out.println("index ok to search!");
    search(queryAnalyzer, directory, "\"和服\"");
}
Also used : AnsjAnalyzer(org.ansj.lucene6.AnsjAnalyzer) IndexWriter(org.apache.lucene.index.IndexWriter) AnsjAnalyzer(org.ansj.lucene6.AnsjAnalyzer) Analyzer(org.apache.lucene.analysis.Analyzer) RAMDirectory(org.apache.lucene.store.RAMDirectory) PerFieldAnalyzerWrapper(org.apache.lucene.analysis.miscellaneous.PerFieldAnalyzerWrapper) RAMDirectory(org.apache.lucene.store.RAMDirectory) Directory(org.apache.lucene.store.Directory) IndexWriterConfig(org.apache.lucene.index.IndexWriterConfig) Test(org.junit.Test)

Aggregations

RAMDirectory (org.apache.lucene.store.RAMDirectory)183 Directory (org.apache.lucene.store.Directory)101 IndexWriter (org.apache.lucene.index.IndexWriter)82 Document (org.apache.lucene.document.Document)75 IndexWriterConfig (org.apache.lucene.index.IndexWriterConfig)73 IndexSearcher (org.apache.lucene.search.IndexSearcher)43 IndexReader (org.apache.lucene.index.IndexReader)41 Test (org.junit.Test)35 TextField (org.apache.lucene.document.TextField)33 Field (org.apache.lucene.document.Field)29 Term (org.apache.lucene.index.Term)25 StandardAnalyzer (org.apache.lucene.analysis.standard.StandardAnalyzer)21 Before (org.junit.Before)21 IOException (java.io.IOException)19 MockAnalyzer (org.apache.lucene.analysis.MockAnalyzer)19 Analyzer (org.apache.lucene.analysis.Analyzer)18 TopDocs (org.apache.lucene.search.TopDocs)16 DirectoryReader (org.apache.lucene.index.DirectoryReader)15 FilterDirectory (org.apache.lucene.store.FilterDirectory)15 FieldType (org.apache.lucene.document.FieldType)13