Examples with RandomIndexWriter - org.apache.lucene.index.RandomIndexWriter

Example 6 with RandomIndexWriter

use of org.apache.lucene.index.RandomIndexWriter in project elasticsearch by elastic.

the class NestedAggregatorTests method testResetRootDocId.

public void testResetRootDocId() throws Exception {
    Directory directory = newDirectory();
    IndexWriterConfig iwc = new IndexWriterConfig(null);
    iwc.setMergePolicy(NoMergePolicy.INSTANCE);
    RandomIndexWriter indexWriter = new RandomIndexWriter(random(), directory, iwc);
    List<Document> documents = new ArrayList<>();
    // 1 segment with, 1 root document, with 3 nested sub docs
    Document document = new Document();
    document.add(new Field(UidFieldMapper.NAME, "type#1", UidFieldMapper.Defaults.NESTED_FIELD_TYPE));
    document.add(new Field(TypeFieldMapper.NAME, "__nested_field", TypeFieldMapper.Defaults.FIELD_TYPE));
    documents.add(document);
    document = new Document();
    document.add(new Field(UidFieldMapper.NAME, "type#1", UidFieldMapper.Defaults.NESTED_FIELD_TYPE));
    document.add(new Field(TypeFieldMapper.NAME, "__nested_field", TypeFieldMapper.Defaults.FIELD_TYPE));
    documents.add(document);
    document = new Document();
    document.add(new Field(UidFieldMapper.NAME, "type#1", UidFieldMapper.Defaults.NESTED_FIELD_TYPE));
    document.add(new Field(TypeFieldMapper.NAME, "__nested_field", TypeFieldMapper.Defaults.FIELD_TYPE));
    documents.add(document);
    document = new Document();
    document.add(new Field(UidFieldMapper.NAME, "type#1", UidFieldMapper.Defaults.FIELD_TYPE));
    document.add(new Field(TypeFieldMapper.NAME, "test", TypeFieldMapper.Defaults.FIELD_TYPE));
    documents.add(document);
    indexWriter.addDocuments(documents);
    indexWriter.commit();
    documents.clear();
    // 1 segment with:
    // 1 document, with 1 nested subdoc
    document = new Document();
    document.add(new Field(UidFieldMapper.NAME, "type#2", UidFieldMapper.Defaults.NESTED_FIELD_TYPE));
    document.add(new Field(TypeFieldMapper.NAME, "__nested_field", TypeFieldMapper.Defaults.FIELD_TYPE));
    documents.add(document);
    document = new Document();
    document.add(new Field(UidFieldMapper.NAME, "type#2", UidFieldMapper.Defaults.FIELD_TYPE));
    document.add(new Field(TypeFieldMapper.NAME, "test", TypeFieldMapper.Defaults.FIELD_TYPE));
    documents.add(document);
    indexWriter.addDocuments(documents);
    documents.clear();
    // and 1 document, with 1 nested subdoc
    document = new Document();
    document.add(new Field(UidFieldMapper.NAME, "type#3", UidFieldMapper.Defaults.NESTED_FIELD_TYPE));
    document.add(new Field(TypeFieldMapper.NAME, "__nested_field", TypeFieldMapper.Defaults.FIELD_TYPE));
    documents.add(document);
    document = new Document();
    document.add(new Field(UidFieldMapper.NAME, "type#3", UidFieldMapper.Defaults.FIELD_TYPE));
    document.add(new Field(TypeFieldMapper.NAME, "test", TypeFieldMapper.Defaults.FIELD_TYPE));
    documents.add(document);
    indexWriter.addDocuments(documents);
    indexWriter.commit();
    indexWriter.close();
    IndexService indexService = createIndex("test");
    DirectoryReader directoryReader = DirectoryReader.open(directory);
    directoryReader = ElasticsearchDirectoryReader.wrap(directoryReader, new ShardId(indexService.index(), 0));
    IndexSearcher searcher = new IndexSearcher(directoryReader);
    indexService.mapperService().merge("test", new CompressedXContent(PutMappingRequest.buildFromSimplifiedDef("test", "nested_field", "type=nested").string()), MapperService.MergeReason.MAPPING_UPDATE, false);
    SearchContext context = createSearchContext(indexService);
    AggregatorFactories.Builder builder = AggregatorFactories.builder();
    NestedAggregationBuilder factory = new NestedAggregationBuilder("test", "nested_field");
    builder.addAggregator(factory);
    AggregatorFactories factories = builder.build(context, null);
    context.aggregations(new SearchContextAggregations(factories));
    Aggregator[] aggs = factories.createTopLevelAggregators();
    BucketCollector collector = BucketCollector.wrap(Arrays.asList(aggs));
    collector.preCollection();
    // A regular search always exclude nested docs, so we use NonNestedDocsFilter.INSTANCE here (otherwise MatchAllDocsQuery would be sufficient)
    // We exclude root doc with uid type#2, this will trigger the bug if we don't reset the root doc when we process a new segment, because
    // root doc type#3 and root doc type#1 have the same segment docid
    BooleanQuery.Builder bq = new BooleanQuery.Builder();
    bq.add(Queries.newNonNestedFilter(), Occur.MUST);
    bq.add(new TermQuery(new Term(UidFieldMapper.NAME, "type#2")), Occur.MUST_NOT);
    searcher.search(new ConstantScoreQuery(bq.build()), collector);
    collector.postCollection();
    Nested nested = (Nested) aggs[0].buildAggregation(0);
    // The bug manifests if 6 docs are returned, because currentRootDoc isn't reset the previous child docs from the first segment are emitted as hits.
    assertThat(nested.getDocCount(), equalTo(4L));
    directoryReader.close();
    directory.close();
}

Also used : IndexSearcher(org.apache.lucene.search.IndexSearcher) BooleanQuery(org.apache.lucene.search.BooleanQuery) IndexService(org.elasticsearch.index.IndexService) ArrayList(java.util.ArrayList) SearchContext(org.elasticsearch.search.internal.SearchContext) Document(org.apache.lucene.document.Document) ShardId(org.elasticsearch.index.shard.ShardId) Field(org.apache.lucene.document.Field) CompressedXContent(org.elasticsearch.common.compress.CompressedXContent) AggregatorFactories(org.elasticsearch.search.aggregations.AggregatorFactories) ConstantScoreQuery(org.apache.lucene.search.ConstantScoreQuery) Directory(org.apache.lucene.store.Directory) TermQuery(org.apache.lucene.search.TermQuery) ElasticsearchDirectoryReader(org.elasticsearch.common.lucene.index.ElasticsearchDirectoryReader) DirectoryReader(org.apache.lucene.index.DirectoryReader) SearchContextAggregations(org.elasticsearch.search.aggregations.SearchContextAggregations) Aggregator(org.elasticsearch.search.aggregations.Aggregator) Term(org.apache.lucene.index.Term) BucketCollector(org.elasticsearch.search.aggregations.BucketCollector) RandomIndexWriter(org.apache.lucene.index.RandomIndexWriter) IndexWriterConfig(org.apache.lucene.index.IndexWriterConfig)

Example 7 with RandomIndexWriter

use of org.apache.lucene.index.RandomIndexWriter in project elasticsearch by elastic.

the class ParentToChildrenAggregatorTests method testParentChild.

public void testParentChild() throws IOException {
    Directory directory = newDirectory();
    RandomIndexWriter indexWriter = new RandomIndexWriter(random(), directory);
    final Map<String, Tuple<Integer, Integer>> expectedParentChildRelations = setupIndex(indexWriter);
    indexWriter.close();
    IndexReader indexReader = ElasticsearchDirectoryReader.wrap(DirectoryReader.open(directory), new ShardId(new Index("foo", "_na_"), 1));
    // TODO set "maybeWrap" to true for IndexSearcher once #23338 is resolved
    IndexSearcher indexSearcher = newSearcher(indexReader, false, true);
    testCase(new MatchAllDocsQuery(), indexSearcher, child -> {
        int expectedTotalChildren = 0;
        int expectedMinValue = Integer.MAX_VALUE;
        for (Tuple<Integer, Integer> expectedValues : expectedParentChildRelations.values()) {
            expectedTotalChildren += expectedValues.v1();
            expectedMinValue = Math.min(expectedMinValue, expectedValues.v2());
        }
        assertEquals(expectedTotalChildren, child.getDocCount());
        assertEquals(expectedMinValue, ((InternalMin) child.getAggregations().get("in_child")).getValue(), Double.MIN_VALUE);
    });
    for (String parent : expectedParentChildRelations.keySet()) {
        testCase(new TermInSetQuery(UidFieldMapper.NAME, new BytesRef(Uid.createUid(PARENT_TYPE, parent))), indexSearcher, child -> {
            assertEquals((long) expectedParentChildRelations.get(parent).v1(), child.getDocCount());
            assertEquals(expectedParentChildRelations.get(parent).v2(), ((InternalMin) child.getAggregations().get("in_child")).getValue(), Double.MIN_VALUE);
        });
    }
    indexReader.close();
    directory.close();
}

Also used : IndexSearcher(org.apache.lucene.search.IndexSearcher) Index(org.elasticsearch.index.Index) MatchAllDocsQuery(org.apache.lucene.search.MatchAllDocsQuery) ShardId(org.elasticsearch.index.shard.ShardId) TermInSetQuery(org.apache.lucene.search.TermInSetQuery) IndexReader(org.apache.lucene.index.IndexReader) RandomIndexWriter(org.apache.lucene.index.RandomIndexWriter) Tuple(org.elasticsearch.common.collect.Tuple) BytesRef(org.apache.lucene.util.BytesRef) Directory(org.apache.lucene.store.Directory)

Example 8 with RandomIndexWriter

use of org.apache.lucene.index.RandomIndexWriter in project elasticsearch by elastic.

the class TermsAggregatorTests method createIndexWithLongs.

private IndexReader createIndexWithLongs() throws IOException {
    Directory directory = newDirectory();
    RandomIndexWriter indexWriter = new RandomIndexWriter(random(), directory);
    Document document = new Document();
    document.add(new SortedNumericDocValuesField("number", 10));
    document.add(new SortedNumericDocValuesField("number", 100));
    indexWriter.addDocument(document);
    document = new Document();
    document.add(new SortedNumericDocValuesField("number", 1));
    document.add(new SortedNumericDocValuesField("number", 100));
    indexWriter.addDocument(document);
    document = new Document();
    document.add(new SortedNumericDocValuesField("number", 10));
    document.add(new SortedNumericDocValuesField("number", 1000));
    indexWriter.addDocument(document);
    indexWriter.close();
    return DirectoryReader.open(directory);
}

Also used : SortedNumericDocValuesField(org.apache.lucene.document.SortedNumericDocValuesField) Document(org.apache.lucene.document.Document) RandomIndexWriter(org.apache.lucene.index.RandomIndexWriter) Directory(org.apache.lucene.store.Directory)

Example 9 with RandomIndexWriter

use of org.apache.lucene.index.RandomIndexWriter in project elasticsearch by elastic.

the class TermsAggregatorTests method createIndexWithDoubles.

private IndexReader createIndexWithDoubles() throws IOException {
    Directory directory = newDirectory();
    RandomIndexWriter indexWriter = new RandomIndexWriter(random(), directory);
    Document document = new Document();
    document.add(new SortedNumericDocValuesField("number", NumericUtils.doubleToSortableLong(10.0d)));
    document.add(new SortedNumericDocValuesField("number", NumericUtils.doubleToSortableLong(100.0d)));
    indexWriter.addDocument(document);
    document = new Document();
    document.add(new SortedNumericDocValuesField("number", NumericUtils.doubleToSortableLong(1.0d)));
    document.add(new SortedNumericDocValuesField("number", NumericUtils.doubleToSortableLong(100.0d)));
    indexWriter.addDocument(document);
    document = new Document();
    document.add(new SortedNumericDocValuesField("number", NumericUtils.doubleToSortableLong(10.0d)));
    document.add(new SortedNumericDocValuesField("number", NumericUtils.doubleToSortableLong(1000.0d)));
    indexWriter.addDocument(document);
    indexWriter.close();
    return DirectoryReader.open(directory);
}

Example 10 with RandomIndexWriter

use of org.apache.lucene.index.RandomIndexWriter in project elasticsearch by elastic.

the class MaxAggregatorTests method testCase.

private void testCase(Query query, CheckedConsumer<RandomIndexWriter, IOException> buildIndex, Consumer<InternalMax> verify) throws IOException {
    Directory directory = newDirectory();
    RandomIndexWriter indexWriter = new RandomIndexWriter(random(), directory);
    buildIndex.accept(indexWriter);
    indexWriter.close();
    IndexReader indexReader = DirectoryReader.open(directory);
    IndexSearcher indexSearcher = newSearcher(indexReader, true, true);
    MaxAggregationBuilder aggregationBuilder = new MaxAggregationBuilder("_name").field("number");
    MappedFieldType fieldType = new NumberFieldMapper.NumberFieldType(NumberFieldMapper.NumberType.LONG);
    fieldType.setName("number");
    try (MaxAggregator aggregator = createAggregator(aggregationBuilder, indexSearcher, fieldType)) {
        aggregator.preCollection();
        indexSearcher.search(query, aggregator);
        aggregator.postCollection();
        verify.accept((InternalMax) aggregator.buildAggregation(0L));
    }
    indexReader.close();
    directory.close();
}

Also used : IndexSearcher(org.apache.lucene.search.IndexSearcher) MaxAggregator(org.elasticsearch.search.aggregations.metrics.max.MaxAggregator) MaxAggregationBuilder(org.elasticsearch.search.aggregations.metrics.max.MaxAggregationBuilder) IndexReader(org.apache.lucene.index.IndexReader) MappedFieldType(org.elasticsearch.index.mapper.MappedFieldType) RandomIndexWriter(org.apache.lucene.index.RandomIndexWriter) Directory(org.apache.lucene.store.Directory)

Aggregations

RandomIndexWriter (org.apache.lucene.index.RandomIndexWriter)775 Document (org.apache.lucene.document.Document)675 Directory (org.apache.lucene.store.Directory)584 IndexReader (org.apache.lucene.index.IndexReader)508 Term (org.apache.lucene.index.Term)324 IndexSearcher (org.apache.lucene.search.IndexSearcher)294 MockAnalyzer (org.apache.lucene.analysis.MockAnalyzer)220 BytesRef (org.apache.lucene.util.BytesRef)142 Field (org.apache.lucene.document.Field)140 MatchAllDocsQuery (org.apache.lucene.search.MatchAllDocsQuery)136 TopDocs (org.apache.lucene.search.TopDocs)134 TermQuery (org.apache.lucene.search.TermQuery)121 DirectoryReader (org.apache.lucene.index.DirectoryReader)119 IndexWriterConfig (org.apache.lucene.index.IndexWriterConfig)110 ArrayList (java.util.ArrayList)91 StringField (org.apache.lucene.document.StringField)89 Analyzer (org.apache.lucene.analysis.Analyzer)88 BooleanQuery (org.apache.lucene.search.BooleanQuery)88 NumericDocValuesField (org.apache.lucene.document.NumericDocValuesField)76 Query (org.apache.lucene.search.Query)73