Search in sources :

Example 6 with Directory

use of org.apache.lucene.store.Directory in project elasticsearch by elastic.

the class DiversifiedSamplerTests method testDiversifiedSampler.

public void testDiversifiedSampler() throws Exception {
    String[] data = { // "id,cat,name,price,inStock,author_t,series_t,sequence_i,genre_s,genre_id",
    "0553573403,book,A Game of Thrones,7.99,true,George R.R. Martin,A Song of Ice and Fire,1,fantasy,0", "0553579908,book,A Clash of Kings,7.99,true,George R.R. Martin,A Song of Ice and Fire,2,fantasy,0", "055357342X,book,A Storm of Swords,7.99,true,George R.R. Martin,A Song of Ice and Fire,3,fantasy,0", "0553293354,book,Foundation,17.99,true,Isaac Asimov,Foundation Novels,1,scifi,1", "0812521390,book,The Black Company,6.99,false,Glen Cook,The Chronicles of The Black Company,1,fantasy,0", "0812550706,book,Ender's Game,6.99,true,Orson Scott Card,Ender,1,scifi,1", "0441385532,book,Jhereg,7.95,false,Steven Brust,Vlad Taltos,1,fantasy,0", "0380014300,book,Nine Princes In Amber,6.99,true,Roger Zelazny,the Chronicles of Amber,1,fantasy,0", "0805080481,book,The Book of Three,5.99,true,Lloyd Alexander,The Chronicles of Prydain,1,fantasy,0", "080508049X,book,The Black Cauldron,5.99,true,Lloyd Alexander,The Chronicles of Prydain,2,fantasy,0" };
    Directory directory = newDirectory();
    RandomIndexWriter indexWriter = new RandomIndexWriter(random(), directory);
    for (String entry : data) {
        String[] parts = entry.split(",");
        Document document = new Document();
        document.add(new SortedDocValuesField("id", new BytesRef(parts[0])));
        document.add(new StringField("cat", parts[1], Field.Store.NO));
        document.add(new TextField("name", parts[2], Field.Store.NO));
        document.add(new DoubleDocValuesField("price", Double.valueOf(parts[3])));
        document.add(new StringField("inStock", parts[4], Field.Store.NO));
        document.add(new StringField("author", parts[5], Field.Store.NO));
        document.add(new StringField("series", parts[6], Field.Store.NO));
        document.add(new StringField("sequence", parts[7], Field.Store.NO));
        document.add(new SortedDocValuesField("genre", new BytesRef(parts[8])));
        document.add(new NumericDocValuesField("genre_id", Long.valueOf(parts[9])));
        indexWriter.addDocument(document);
    }
    indexWriter.close();
    IndexReader indexReader = DirectoryReader.open(directory);
    IndexSearcher indexSearcher = new IndexSearcher(indexReader);
    MappedFieldType genreFieldType = new KeywordFieldMapper.KeywordFieldType();
    genreFieldType.setName("genre");
    genreFieldType.setHasDocValues(true);
    Consumer<InternalSampler> verify = result -> {
        Terms terms = result.getAggregations().get("terms");
        assertEquals(2, terms.getBuckets().size());
        assertEquals("0805080481", terms.getBuckets().get(0).getKeyAsString());
        assertEquals("0812550706", terms.getBuckets().get(1).getKeyAsString());
    };
    testCase(indexSearcher, genreFieldType, "map", verify);
    testCase(indexSearcher, genreFieldType, "global_ordinals", verify);
    testCase(indexSearcher, genreFieldType, "bytes_hash", verify);
    genreFieldType = new NumberFieldMapper.NumberFieldType(NumberFieldMapper.NumberType.LONG);
    genreFieldType.setName("genre_id");
    testCase(indexSearcher, genreFieldType, null, verify);
    // wrong field:
    genreFieldType = new KeywordFieldMapper.KeywordFieldType();
    genreFieldType.setName("wrong_field");
    genreFieldType.setHasDocValues(true);
    testCase(indexSearcher, genreFieldType, null, result -> {
        Terms terms = result.getAggregations().get("terms");
        assertEquals(1, terms.getBuckets().size());
        assertEquals("0805080481", terms.getBuckets().get(0).getKeyAsString());
    });
    indexReader.close();
    directory.close();
}
Also used : IndexSearcher(org.apache.lucene.search.IndexSearcher) StringField(org.apache.lucene.document.StringField) Index(org.elasticsearch.index.Index) Document(org.apache.lucene.document.Document) SortedNumericDVIndexFieldData(org.elasticsearch.index.fielddata.plain.SortedNumericDVIndexFieldData) MappedFieldType(org.elasticsearch.index.mapper.MappedFieldType) Directory(org.apache.lucene.store.Directory) TermsAggregationBuilder(org.elasticsearch.search.aggregations.bucket.terms.TermsAggregationBuilder) FunctionScoreQuery(org.elasticsearch.common.lucene.search.function.FunctionScoreQuery) DoubleDocValuesField(org.apache.lucene.document.DoubleDocValuesField) NumericDocValuesField(org.apache.lucene.document.NumericDocValuesField) BytesRef(org.apache.lucene.util.BytesRef) FieldValueFactorFunction(org.elasticsearch.common.lucene.search.function.FieldValueFactorFunction) Terms(org.elasticsearch.search.aggregations.bucket.terms.Terms) DirectoryReader(org.apache.lucene.index.DirectoryReader) IOException(java.io.IOException) MatchAllDocsQuery(org.apache.lucene.search.MatchAllDocsQuery) Consumer(java.util.function.Consumer) AggregatorTestCase(org.elasticsearch.search.aggregations.AggregatorTestCase) SortedDocValuesField(org.apache.lucene.document.SortedDocValuesField) KeywordFieldMapper(org.elasticsearch.index.mapper.KeywordFieldMapper) Field(org.apache.lucene.document.Field) IndexNumericFieldData(org.elasticsearch.index.fielddata.IndexNumericFieldData) NumberFieldMapper(org.elasticsearch.index.mapper.NumberFieldMapper) TextField(org.apache.lucene.document.TextField) RandomIndexWriter(org.apache.lucene.index.RandomIndexWriter) IndexReader(org.apache.lucene.index.IndexReader) IndexSearcher(org.apache.lucene.search.IndexSearcher) NumberFieldMapper(org.elasticsearch.index.mapper.NumberFieldMapper) Terms(org.elasticsearch.search.aggregations.bucket.terms.Terms) Document(org.apache.lucene.document.Document) KeywordFieldMapper(org.elasticsearch.index.mapper.KeywordFieldMapper) NumericDocValuesField(org.apache.lucene.document.NumericDocValuesField) StringField(org.apache.lucene.document.StringField) DoubleDocValuesField(org.apache.lucene.document.DoubleDocValuesField) SortedDocValuesField(org.apache.lucene.document.SortedDocValuesField) IndexReader(org.apache.lucene.index.IndexReader) MappedFieldType(org.elasticsearch.index.mapper.MappedFieldType) TextField(org.apache.lucene.document.TextField) RandomIndexWriter(org.apache.lucene.index.RandomIndexWriter) BytesRef(org.apache.lucene.util.BytesRef) Directory(org.apache.lucene.store.Directory)

Example 7 with Directory

use of org.apache.lucene.store.Directory in project elasticsearch by elastic.

the class HistogramAggregatorTests method testMinDocCount.

public void testMinDocCount() throws Exception {
    try (Directory dir = newDirectory();
        RandomIndexWriter w = new RandomIndexWriter(random(), dir)) {
        for (long value : new long[] { 7, 3, -10, -6, 5, 50 }) {
            Document doc = new Document();
            doc.add(new SortedNumericDocValuesField("field", value));
            w.addDocument(doc);
        }
        HistogramAggregationBuilder aggBuilder = new HistogramAggregationBuilder("my_agg").field("field").interval(10).minDocCount(2);
        MappedFieldType fieldType = new NumberFieldMapper.NumberFieldType(NumberFieldMapper.NumberType.LONG);
        fieldType.setName("field");
        try (IndexReader reader = w.getReader()) {
            IndexSearcher searcher = new IndexSearcher(reader);
            Histogram histogram = searchAndReduce(searcher, new MatchAllDocsQuery(), aggBuilder, fieldType);
            assertEquals(2, histogram.getBuckets().size());
            assertEquals(-10d, histogram.getBuckets().get(0).getKey());
            assertEquals(2, histogram.getBuckets().get(0).getDocCount());
            assertEquals(0d, histogram.getBuckets().get(1).getKey());
            assertEquals(3, histogram.getBuckets().get(1).getDocCount());
        }
    }
}
Also used : IndexSearcher(org.apache.lucene.search.IndexSearcher) SortedNumericDocValuesField(org.apache.lucene.document.SortedNumericDocValuesField) MappedFieldType(org.elasticsearch.index.mapper.MappedFieldType) IndexReader(org.apache.lucene.index.IndexReader) Document(org.apache.lucene.document.Document) MatchAllDocsQuery(org.apache.lucene.search.MatchAllDocsQuery) RandomIndexWriter(org.apache.lucene.index.RandomIndexWriter) Directory(org.apache.lucene.store.Directory)

Example 8 with Directory

use of org.apache.lucene.store.Directory in project elasticsearch by elastic.

the class HistogramAggregatorTests method testDoubles.

public void testDoubles() throws Exception {
    try (Directory dir = newDirectory();
        RandomIndexWriter w = new RandomIndexWriter(random(), dir)) {
        for (double value : new double[] { 9.3, 3.2, -10, -6.5, 5.3, 50.1 }) {
            Document doc = new Document();
            doc.add(new SortedNumericDocValuesField("field", NumericUtils.doubleToSortableLong(value)));
            w.addDocument(doc);
        }
        HistogramAggregationBuilder aggBuilder = new HistogramAggregationBuilder("my_agg").field("field").interval(5);
        MappedFieldType fieldType = new NumberFieldMapper.NumberFieldType(NumberFieldMapper.NumberType.DOUBLE);
        fieldType.setName("field");
        try (IndexReader reader = w.getReader()) {
            IndexSearcher searcher = new IndexSearcher(reader);
            Histogram histogram = search(searcher, new MatchAllDocsQuery(), aggBuilder, fieldType);
            assertEquals(4, histogram.getBuckets().size());
            assertEquals(-10d, histogram.getBuckets().get(0).getKey());
            assertEquals(2, histogram.getBuckets().get(0).getDocCount());
            assertEquals(0d, histogram.getBuckets().get(1).getKey());
            assertEquals(1, histogram.getBuckets().get(1).getDocCount());
            assertEquals(5d, histogram.getBuckets().get(2).getKey());
            assertEquals(2, histogram.getBuckets().get(2).getDocCount());
            assertEquals(50d, histogram.getBuckets().get(3).getKey());
            assertEquals(1, histogram.getBuckets().get(3).getDocCount());
        }
    }
}
Also used : IndexSearcher(org.apache.lucene.search.IndexSearcher) SortedNumericDocValuesField(org.apache.lucene.document.SortedNumericDocValuesField) MappedFieldType(org.elasticsearch.index.mapper.MappedFieldType) IndexReader(org.apache.lucene.index.IndexReader) Document(org.apache.lucene.document.Document) MatchAllDocsQuery(org.apache.lucene.search.MatchAllDocsQuery) RandomIndexWriter(org.apache.lucene.index.RandomIndexWriter) Directory(org.apache.lucene.store.Directory)

Example 9 with Directory

use of org.apache.lucene.store.Directory in project elasticsearch by elastic.

the class NestedAggregatorTests method testResetRootDocId.

public void testResetRootDocId() throws Exception {
    Directory directory = newDirectory();
    IndexWriterConfig iwc = new IndexWriterConfig(null);
    iwc.setMergePolicy(NoMergePolicy.INSTANCE);
    RandomIndexWriter indexWriter = new RandomIndexWriter(random(), directory, iwc);
    List<Document> documents = new ArrayList<>();
    // 1 segment with, 1 root document, with 3 nested sub docs
    Document document = new Document();
    document.add(new Field(UidFieldMapper.NAME, "type#1", UidFieldMapper.Defaults.NESTED_FIELD_TYPE));
    document.add(new Field(TypeFieldMapper.NAME, "__nested_field", TypeFieldMapper.Defaults.FIELD_TYPE));
    documents.add(document);
    document = new Document();
    document.add(new Field(UidFieldMapper.NAME, "type#1", UidFieldMapper.Defaults.NESTED_FIELD_TYPE));
    document.add(new Field(TypeFieldMapper.NAME, "__nested_field", TypeFieldMapper.Defaults.FIELD_TYPE));
    documents.add(document);
    document = new Document();
    document.add(new Field(UidFieldMapper.NAME, "type#1", UidFieldMapper.Defaults.NESTED_FIELD_TYPE));
    document.add(new Field(TypeFieldMapper.NAME, "__nested_field", TypeFieldMapper.Defaults.FIELD_TYPE));
    documents.add(document);
    document = new Document();
    document.add(new Field(UidFieldMapper.NAME, "type#1", UidFieldMapper.Defaults.FIELD_TYPE));
    document.add(new Field(TypeFieldMapper.NAME, "test", TypeFieldMapper.Defaults.FIELD_TYPE));
    documents.add(document);
    indexWriter.addDocuments(documents);
    indexWriter.commit();
    documents.clear();
    // 1 segment with:
    // 1 document, with 1 nested subdoc
    document = new Document();
    document.add(new Field(UidFieldMapper.NAME, "type#2", UidFieldMapper.Defaults.NESTED_FIELD_TYPE));
    document.add(new Field(TypeFieldMapper.NAME, "__nested_field", TypeFieldMapper.Defaults.FIELD_TYPE));
    documents.add(document);
    document = new Document();
    document.add(new Field(UidFieldMapper.NAME, "type#2", UidFieldMapper.Defaults.FIELD_TYPE));
    document.add(new Field(TypeFieldMapper.NAME, "test", TypeFieldMapper.Defaults.FIELD_TYPE));
    documents.add(document);
    indexWriter.addDocuments(documents);
    documents.clear();
    // and 1 document, with 1 nested subdoc
    document = new Document();
    document.add(new Field(UidFieldMapper.NAME, "type#3", UidFieldMapper.Defaults.NESTED_FIELD_TYPE));
    document.add(new Field(TypeFieldMapper.NAME, "__nested_field", TypeFieldMapper.Defaults.FIELD_TYPE));
    documents.add(document);
    document = new Document();
    document.add(new Field(UidFieldMapper.NAME, "type#3", UidFieldMapper.Defaults.FIELD_TYPE));
    document.add(new Field(TypeFieldMapper.NAME, "test", TypeFieldMapper.Defaults.FIELD_TYPE));
    documents.add(document);
    indexWriter.addDocuments(documents);
    indexWriter.commit();
    indexWriter.close();
    IndexService indexService = createIndex("test");
    DirectoryReader directoryReader = DirectoryReader.open(directory);
    directoryReader = ElasticsearchDirectoryReader.wrap(directoryReader, new ShardId(indexService.index(), 0));
    IndexSearcher searcher = new IndexSearcher(directoryReader);
    indexService.mapperService().merge("test", new CompressedXContent(PutMappingRequest.buildFromSimplifiedDef("test", "nested_field", "type=nested").string()), MapperService.MergeReason.MAPPING_UPDATE, false);
    SearchContext context = createSearchContext(indexService);
    AggregatorFactories.Builder builder = AggregatorFactories.builder();
    NestedAggregationBuilder factory = new NestedAggregationBuilder("test", "nested_field");
    builder.addAggregator(factory);
    AggregatorFactories factories = builder.build(context, null);
    context.aggregations(new SearchContextAggregations(factories));
    Aggregator[] aggs = factories.createTopLevelAggregators();
    BucketCollector collector = BucketCollector.wrap(Arrays.asList(aggs));
    collector.preCollection();
    // A regular search always exclude nested docs, so we use NonNestedDocsFilter.INSTANCE here (otherwise MatchAllDocsQuery would be sufficient)
    // We exclude root doc with uid type#2, this will trigger the bug if we don't reset the root doc when we process a new segment, because
    // root doc type#3 and root doc type#1 have the same segment docid
    BooleanQuery.Builder bq = new BooleanQuery.Builder();
    bq.add(Queries.newNonNestedFilter(), Occur.MUST);
    bq.add(new TermQuery(new Term(UidFieldMapper.NAME, "type#2")), Occur.MUST_NOT);
    searcher.search(new ConstantScoreQuery(bq.build()), collector);
    collector.postCollection();
    Nested nested = (Nested) aggs[0].buildAggregation(0);
    // The bug manifests if 6 docs are returned, because currentRootDoc isn't reset the previous child docs from the first segment are emitted as hits.
    assertThat(nested.getDocCount(), equalTo(4L));
    directoryReader.close();
    directory.close();
}
Also used : IndexSearcher(org.apache.lucene.search.IndexSearcher) BooleanQuery(org.apache.lucene.search.BooleanQuery) IndexService(org.elasticsearch.index.IndexService) ArrayList(java.util.ArrayList) SearchContext(org.elasticsearch.search.internal.SearchContext) Document(org.apache.lucene.document.Document) ShardId(org.elasticsearch.index.shard.ShardId) Field(org.apache.lucene.document.Field) CompressedXContent(org.elasticsearch.common.compress.CompressedXContent) AggregatorFactories(org.elasticsearch.search.aggregations.AggregatorFactories) ConstantScoreQuery(org.apache.lucene.search.ConstantScoreQuery) Directory(org.apache.lucene.store.Directory) TermQuery(org.apache.lucene.search.TermQuery) ElasticsearchDirectoryReader(org.elasticsearch.common.lucene.index.ElasticsearchDirectoryReader) DirectoryReader(org.apache.lucene.index.DirectoryReader) SearchContextAggregations(org.elasticsearch.search.aggregations.SearchContextAggregations) Aggregator(org.elasticsearch.search.aggregations.Aggregator) Term(org.apache.lucene.index.Term) BucketCollector(org.elasticsearch.search.aggregations.BucketCollector) RandomIndexWriter(org.apache.lucene.index.RandomIndexWriter) IndexWriterConfig(org.apache.lucene.index.IndexWriterConfig)

Example 10 with Directory

use of org.apache.lucene.store.Directory in project elasticsearch by elastic.

the class ParentToChildrenAggregatorTests method testParentChild.

public void testParentChild() throws IOException {
    Directory directory = newDirectory();
    RandomIndexWriter indexWriter = new RandomIndexWriter(random(), directory);
    final Map<String, Tuple<Integer, Integer>> expectedParentChildRelations = setupIndex(indexWriter);
    indexWriter.close();
    IndexReader indexReader = ElasticsearchDirectoryReader.wrap(DirectoryReader.open(directory), new ShardId(new Index("foo", "_na_"), 1));
    // TODO set "maybeWrap" to true for IndexSearcher once #23338 is resolved
    IndexSearcher indexSearcher = newSearcher(indexReader, false, true);
    testCase(new MatchAllDocsQuery(), indexSearcher, child -> {
        int expectedTotalChildren = 0;
        int expectedMinValue = Integer.MAX_VALUE;
        for (Tuple<Integer, Integer> expectedValues : expectedParentChildRelations.values()) {
            expectedTotalChildren += expectedValues.v1();
            expectedMinValue = Math.min(expectedMinValue, expectedValues.v2());
        }
        assertEquals(expectedTotalChildren, child.getDocCount());
        assertEquals(expectedMinValue, ((InternalMin) child.getAggregations().get("in_child")).getValue(), Double.MIN_VALUE);
    });
    for (String parent : expectedParentChildRelations.keySet()) {
        testCase(new TermInSetQuery(UidFieldMapper.NAME, new BytesRef(Uid.createUid(PARENT_TYPE, parent))), indexSearcher, child -> {
            assertEquals((long) expectedParentChildRelations.get(parent).v1(), child.getDocCount());
            assertEquals(expectedParentChildRelations.get(parent).v2(), ((InternalMin) child.getAggregations().get("in_child")).getValue(), Double.MIN_VALUE);
        });
    }
    indexReader.close();
    directory.close();
}
Also used : IndexSearcher(org.apache.lucene.search.IndexSearcher) Index(org.elasticsearch.index.Index) MatchAllDocsQuery(org.apache.lucene.search.MatchAllDocsQuery) ShardId(org.elasticsearch.index.shard.ShardId) TermInSetQuery(org.apache.lucene.search.TermInSetQuery) IndexReader(org.apache.lucene.index.IndexReader) RandomIndexWriter(org.apache.lucene.index.RandomIndexWriter) Tuple(org.elasticsearch.common.collect.Tuple) BytesRef(org.apache.lucene.util.BytesRef) Directory(org.apache.lucene.store.Directory)

Aggregations

Directory (org.apache.lucene.store.Directory)2188 Document (org.apache.lucene.document.Document)1374 MockAnalyzer (org.apache.lucene.analysis.MockAnalyzer)816 RandomIndexWriter (org.apache.lucene.index.RandomIndexWriter)669 IndexReader (org.apache.lucene.index.IndexReader)590 IndexSearcher (org.apache.lucene.search.IndexSearcher)381 BytesRef (org.apache.lucene.util.BytesRef)376 RAMDirectory (org.apache.lucene.store.RAMDirectory)360 Term (org.apache.lucene.index.Term)325 StringField (org.apache.lucene.document.StringField)313 IndexWriter (org.apache.lucene.index.IndexWriter)309 IndexWriterConfig (org.apache.lucene.index.IndexWriterConfig)271 TextField (org.apache.lucene.document.TextField)259 Field (org.apache.lucene.document.Field)257 NumericDocValuesField (org.apache.lucene.document.NumericDocValuesField)257 Test (org.junit.Test)236 FSDirectory (org.apache.lucene.store.FSDirectory)215 DirectoryReader (org.apache.lucene.index.DirectoryReader)193 Analyzer (org.apache.lucene.analysis.Analyzer)192 FieldType (org.apache.lucene.document.FieldType)173