Search in sources :

Example 1 with RAMDirectory

use of org.apache.lucene.store.RAMDirectory in project crate by crate.

the class OrderedLuceneBatchIteratorBenchmark method createLuceneBatchIterator.

@Setup
public void createLuceneBatchIterator() throws Exception {
    IndexWriter iw = new IndexWriter(new RAMDirectory(), new IndexWriterConfig(new StandardAnalyzer()));
    dummyShardId = new ShardId("dummy", 1);
    columnName = "x";
    for (int i = 0; i < 10_000_000; i++) {
        Document doc = new Document();
        doc.add(new NumericDocValuesField(columnName, i));
        iw.addDocument(doc);
    }
    iw.commit();
    iw.forceMerge(1, true);
    indexSearcher = new IndexSearcher(DirectoryReader.open(iw, true));
    collectorContext = new CollectorContext(mock(IndexFieldDataService.class), new CollectorFieldsVisitor(0));
    fieldTypeLookup = column -> {
        IntegerFieldMapper.IntegerFieldType integerFieldType = new IntegerFieldMapper.IntegerFieldType();
        integerFieldType.setNames(new MappedFieldType.Names(column));
        return integerFieldType;
    };
    reference = new Reference(new ReferenceIdent(new TableIdent(null, "dummyTable"), columnName), RowGranularity.DOC, DataTypes.INTEGER);
    orderBy = new OrderBy(Collections.singletonList(reference), reverseFlags, nullsFirst);
}
Also used : OrderBy(io.crate.analyze.OrderBy) Reference(io.crate.metadata.Reference) TableIdent(io.crate.metadata.TableIdent) Document(org.apache.lucene.document.Document) IntegerFieldMapper(org.elasticsearch.index.mapper.core.IntegerFieldMapper) RAMDirectory(org.apache.lucene.store.RAMDirectory) ReferenceIdent(io.crate.metadata.ReferenceIdent) ShardId(org.elasticsearch.index.shard.ShardId) NumericDocValuesField(org.apache.lucene.document.NumericDocValuesField) IndexWriter(org.apache.lucene.index.IndexWriter) StandardAnalyzer(org.apache.lucene.analysis.standard.StandardAnalyzer) MappedFieldType(org.elasticsearch.index.mapper.MappedFieldType) CollectorContext(io.crate.operation.reference.doc.lucene.CollectorContext) IndexWriterConfig(org.apache.lucene.index.IndexWriterConfig)

Example 2 with RAMDirectory

use of org.apache.lucene.store.RAMDirectory in project crate by crate.

the class DocLevelExpressionsTest method prepare.

@Before
public void prepare() throws Exception {
    Settings settings = Settings.builder().put("index.fielddata.cache", "none").build();
    IndexService indexService = createIndex("test", settings);
    ifd = indexService.fieldData();
    writer = new IndexWriter(new RAMDirectory(), new IndexWriterConfig(new StandardAnalyzer()).setMergePolicy(new LogByteSizeMergePolicy()));
    insertValues(writer);
    DirectoryReader directoryReader = DirectoryReader.open(writer, true);
    readerContext = directoryReader.leaves().get(0);
    ctx = new CollectorContext(ifd, null);
}
Also used : IndexService(org.elasticsearch.index.IndexService) StandardAnalyzer(org.apache.lucene.analysis.standard.StandardAnalyzer) CollectorContext(io.crate.operation.reference.doc.lucene.CollectorContext) Settings(org.elasticsearch.common.settings.Settings) RAMDirectory(org.apache.lucene.store.RAMDirectory) Before(org.junit.Before)

Example 3 with RAMDirectory

use of org.apache.lucene.store.RAMDirectory in project elasticsearch by elastic.

the class NoisyChannelSpellCheckerTests method testMultiGenerator.

public void testMultiGenerator() throws IOException {
    RAMDirectory dir = new RAMDirectory();
    Map<String, Analyzer> mapping = new HashMap<>();
    mapping.put("body_ngram", new Analyzer() {

        @Override
        protected TokenStreamComponents createComponents(String fieldName) {
            Tokenizer t = new StandardTokenizer();
            ShingleFilter tf = new ShingleFilter(t, 2, 3);
            tf.setOutputUnigrams(false);
            return new TokenStreamComponents(t, new LowerCaseFilter(tf));
        }
    });
    mapping.put("body", new Analyzer() {

        @Override
        protected TokenStreamComponents createComponents(String fieldName) {
            Tokenizer t = new StandardTokenizer();
            return new TokenStreamComponents(t, new LowerCaseFilter(t));
        }
    });
    mapping.put("body_reverse", new Analyzer() {

        @Override
        protected TokenStreamComponents createComponents(String fieldName) {
            Tokenizer t = new StandardTokenizer();
            return new TokenStreamComponents(t, new ReverseStringFilter(new LowerCaseFilter(t)));
        }
    });
    PerFieldAnalyzerWrapper wrapper = new PerFieldAnalyzerWrapper(new WhitespaceAnalyzer(), mapping);
    IndexWriterConfig conf = new IndexWriterConfig(wrapper);
    IndexWriter writer = new IndexWriter(dir, conf);
    String[] strings = new String[] { "Xorr the God-Jewel", "Grog the God-Crusher", "Xorn", "Walter Newell", "Wanda Maximoff", "Captain America", "American Ace", "Wundarr the Aquarian", "Will o' the Wisp", "Xemnu the Titan", "Fantastic Four", "Quasar", "Quasar II" };
    for (String line : strings) {
        Document doc = new Document();
        doc.add(new Field("body", line, TextField.TYPE_NOT_STORED));
        doc.add(new Field("body_reverse", line, TextField.TYPE_NOT_STORED));
        doc.add(new Field("body_ngram", line, TextField.TYPE_NOT_STORED));
        writer.addDocument(doc);
    }
    DirectoryReader ir = DirectoryReader.open(writer);
    LaplaceScorer wordScorer = new LaplaceScorer(ir, MultiFields.getTerms(ir, "body_ngram"), "body_ngram", 0.95d, new BytesRef(" "), 0.5f);
    NoisyChannelSpellChecker suggester = new NoisyChannelSpellChecker();
    DirectSpellChecker spellchecker = new DirectSpellChecker();
    spellchecker.setMinQueryLength(1);
    DirectCandidateGenerator forward = new DirectCandidateGenerator(spellchecker, "body", SuggestMode.SUGGEST_ALWAYS, ir, 0.95, 10);
    DirectCandidateGenerator reverse = new DirectCandidateGenerator(spellchecker, "body_reverse", SuggestMode.SUGGEST_ALWAYS, ir, 0.95, 10, wrapper, wrapper, MultiFields.getTerms(ir, "body_reverse"));
    CandidateGenerator generator = new MultiCandidateGeneratorWrapper(10, forward, reverse);
    Correction[] corrections = suggester.getCorrections(wrapper, new BytesRef("american cae"), generator, 1, 1, ir, "body", wordScorer, 1, 2).corrections;
    assertThat(corrections.length, equalTo(1));
    assertThat(corrections[0].join(new BytesRef(" ")).utf8ToString(), equalTo("american ace"));
    generator = new MultiCandidateGeneratorWrapper(5, forward, reverse);
    corrections = suggester.getCorrections(wrapper, new BytesRef("american ame"), generator, 1, 1, ir, "body", wordScorer, 1, 2).corrections;
    assertThat(corrections.length, equalTo(1));
    assertThat(corrections[0].join(new BytesRef(" ")).utf8ToString(), equalTo("american ace"));
    corrections = suggester.getCorrections(wrapper, new BytesRef("american cae"), forward, 1, 1, ir, "body", wordScorer, 1, 2).corrections;
    // only use forward with constant prefix
    assertThat(corrections.length, equalTo(0));
    corrections = suggester.getCorrections(wrapper, new BytesRef("america cae"), generator, 2, 1, ir, "body", wordScorer, 1, 2).corrections;
    assertThat(corrections.length, equalTo(1));
    assertThat(corrections[0].join(new BytesRef(" ")).utf8ToString(), equalTo("american ace"));
    corrections = suggester.getCorrections(wrapper, new BytesRef("Zorr the Got-Jewel"), generator, 0.5f, 4, ir, "body", wordScorer, 0, 2).corrections;
    assertThat(corrections.length, equalTo(4));
    assertThat(corrections[0].join(new BytesRef(" ")).utf8ToString(), equalTo("xorr the god jewel"));
    assertThat(corrections[1].join(new BytesRef(" ")).utf8ToString(), equalTo("zorr the god jewel"));
    assertThat(corrections[2].join(new BytesRef(" ")).utf8ToString(), equalTo("four the god jewel"));
    corrections = suggester.getCorrections(wrapper, new BytesRef("Zorr the Got-Jewel"), generator, 0.5f, 1, ir, "body", wordScorer, 1.5f, 2).corrections;
    assertThat(corrections.length, equalTo(1));
    assertThat(corrections[0].join(new BytesRef(" ")).utf8ToString(), equalTo("xorr the god jewel"));
    corrections = suggester.getCorrections(wrapper, new BytesRef("Xor the Got-Jewel"), generator, 0.5f, 1, ir, "body", wordScorer, 1.5f, 2).corrections;
    assertThat(corrections.length, equalTo(1));
    assertThat(corrections[0].join(new BytesRef(" ")).utf8ToString(), equalTo("xorr the god jewel"));
    // Test a special case where one of the suggest term is unchanged by the postFilter, 'II' here is unchanged by the reverse analyzer.
    corrections = suggester.getCorrections(wrapper, new BytesRef("Quazar II"), generator, 1, 1, ir, "body", wordScorer, 1, 2).corrections;
    assertThat(corrections.length, equalTo(1));
    assertThat(corrections[0].join(new BytesRef(" ")).utf8ToString(), equalTo("quasar ii"));
}
Also used : HashMap(java.util.HashMap) WhitespaceAnalyzer(org.apache.lucene.analysis.core.WhitespaceAnalyzer) Analyzer(org.apache.lucene.analysis.Analyzer) Document(org.apache.lucene.document.Document) Field(org.apache.lucene.document.Field) TextField(org.apache.lucene.document.TextField) ShingleFilter(org.apache.lucene.analysis.shingle.ShingleFilter) ReverseStringFilter(org.apache.lucene.analysis.reverse.ReverseStringFilter) Tokenizer(org.apache.lucene.analysis.Tokenizer) StandardTokenizer(org.apache.lucene.analysis.standard.StandardTokenizer) DirectSpellChecker(org.apache.lucene.search.spell.DirectSpellChecker) BytesRef(org.apache.lucene.util.BytesRef) WhitespaceAnalyzer(org.apache.lucene.analysis.core.WhitespaceAnalyzer) DirectoryReader(org.apache.lucene.index.DirectoryReader) RAMDirectory(org.apache.lucene.store.RAMDirectory) PerFieldAnalyzerWrapper(org.apache.lucene.analysis.miscellaneous.PerFieldAnalyzerWrapper) IndexWriter(org.apache.lucene.index.IndexWriter) StandardTokenizer(org.apache.lucene.analysis.standard.StandardTokenizer) LowerCaseFilter(org.apache.lucene.analysis.LowerCaseFilter) IndexWriterConfig(org.apache.lucene.index.IndexWriterConfig)

Example 4 with RAMDirectory

use of org.apache.lucene.store.RAMDirectory in project elasticsearch by elastic.

the class SmoothingModelTestCase method testBuildWordScorer.

/**
     * Test the WordScorer emitted by the smoothing model
     */
public void testBuildWordScorer() throws IOException {
    SmoothingModel testModel = createTestModel();
    Map<String, Analyzer> mapping = new HashMap<>();
    mapping.put("field", new WhitespaceAnalyzer());
    PerFieldAnalyzerWrapper wrapper = new PerFieldAnalyzerWrapper(new WhitespaceAnalyzer(), mapping);
    IndexWriter writer = new IndexWriter(new RAMDirectory(), new IndexWriterConfig(wrapper));
    Document doc = new Document();
    doc.add(new Field("field", "someText", TextField.TYPE_NOT_STORED));
    writer.addDocument(doc);
    DirectoryReader ir = DirectoryReader.open(writer);
    WordScorer wordScorer = testModel.buildWordScorerFactory().newScorer(ir, MultiFields.getTerms(ir, "field"), "field", 0.9d, BytesRefs.toBytesRef(" "));
    assertWordScorer(wordScorer, testModel);
}
Also used : WhitespaceAnalyzer(org.apache.lucene.analysis.core.WhitespaceAnalyzer) HashMap(java.util.HashMap) DirectoryReader(org.apache.lucene.index.DirectoryReader) WhitespaceAnalyzer(org.apache.lucene.analysis.core.WhitespaceAnalyzer) Analyzer(org.apache.lucene.analysis.Analyzer) Document(org.apache.lucene.document.Document) RAMDirectory(org.apache.lucene.store.RAMDirectory) PerFieldAnalyzerWrapper(org.apache.lucene.analysis.miscellaneous.PerFieldAnalyzerWrapper) Field(org.apache.lucene.document.Field) TextField(org.apache.lucene.document.TextField) IndexWriter(org.apache.lucene.index.IndexWriter) IndexWriterConfig(org.apache.lucene.index.IndexWriterConfig)

Example 5 with RAMDirectory

use of org.apache.lucene.store.RAMDirectory in project elasticsearch by elastic.

the class PercolateQueryBuilder method createMultiDocumentSearcher.

static IndexSearcher createMultiDocumentSearcher(Analyzer analyzer, ParsedDocument doc) {
    RAMDirectory ramDirectory = new RAMDirectory();
    try (IndexWriter indexWriter = new IndexWriter(ramDirectory, new IndexWriterConfig(analyzer))) {
        indexWriter.addDocuments(doc.docs());
        indexWriter.commit();
        DirectoryReader directoryReader = DirectoryReader.open(ramDirectory);
        assert directoryReader.leaves().size() == 1 : "Expected single leaf, but got [" + directoryReader.leaves().size() + "]";
        final IndexSearcher slowSearcher = new IndexSearcher(directoryReader) {

            @Override
            public Weight createNormalizedWeight(Query query, boolean needsScores) throws IOException {
                BooleanQuery.Builder bq = new BooleanQuery.Builder();
                bq.add(query, BooleanClause.Occur.MUST);
                bq.add(Queries.newNestedFilter(), BooleanClause.Occur.MUST_NOT);
                return super.createNormalizedWeight(bq.build(), needsScores);
            }
        };
        slowSearcher.setQueryCache(null);
        return slowSearcher;
    } catch (IOException e) {
        throw new ElasticsearchException("Failed to create index for percolator with nested document ", e);
    }
}
Also used : IndexSearcher(org.apache.lucene.search.IndexSearcher) BooleanQuery(org.apache.lucene.search.BooleanQuery) Query(org.apache.lucene.search.Query) PercolatorFieldMapper.parseQuery(org.elasticsearch.percolator.PercolatorFieldMapper.parseQuery) BooleanQuery(org.apache.lucene.search.BooleanQuery) IndexWriter(org.apache.lucene.index.IndexWriter) DirectoryReader(org.apache.lucene.index.DirectoryReader) XContentBuilder(org.elasticsearch.common.xcontent.XContentBuilder) QueryBuilder(org.elasticsearch.index.query.QueryBuilder) AbstractQueryBuilder(org.elasticsearch.index.query.AbstractQueryBuilder) IOException(java.io.IOException) ElasticsearchException(org.elasticsearch.ElasticsearchException) RAMDirectory(org.apache.lucene.store.RAMDirectory) IndexWriterConfig(org.apache.lucene.index.IndexWriterConfig)

Aggregations

RAMDirectory (org.apache.lucene.store.RAMDirectory)183 Directory (org.apache.lucene.store.Directory)101 IndexWriter (org.apache.lucene.index.IndexWriter)82 Document (org.apache.lucene.document.Document)75 IndexWriterConfig (org.apache.lucene.index.IndexWriterConfig)73 IndexSearcher (org.apache.lucene.search.IndexSearcher)43 IndexReader (org.apache.lucene.index.IndexReader)41 Test (org.junit.Test)35 TextField (org.apache.lucene.document.TextField)33 Field (org.apache.lucene.document.Field)29 Term (org.apache.lucene.index.Term)25 StandardAnalyzer (org.apache.lucene.analysis.standard.StandardAnalyzer)21 Before (org.junit.Before)21 IOException (java.io.IOException)19 MockAnalyzer (org.apache.lucene.analysis.MockAnalyzer)19 Analyzer (org.apache.lucene.analysis.Analyzer)18 TopDocs (org.apache.lucene.search.TopDocs)16 DirectoryReader (org.apache.lucene.index.DirectoryReader)15 FilterDirectory (org.apache.lucene.store.FilterDirectory)15 FieldType (org.apache.lucene.document.FieldType)13