Search in sources :

Example 31 with IndexReader

use of org.apache.lucene.index.IndexReader in project elasticsearch by elastic.

the class FiltersAggregatorTests method testKeyedFilter.

public void testKeyedFilter() throws Exception {
    Directory directory = newDirectory();
    RandomIndexWriter indexWriter = new RandomIndexWriter(random(), directory);
    Document document = new Document();
    document.add(new Field("field", "foo", fieldType));
    indexWriter.addDocument(document);
    document.clear();
    document.add(new Field("field", "else", fieldType));
    indexWriter.addDocument(document);
    // make sure we have more than one segment to test the merge
    indexWriter.commit();
    document.add(new Field("field", "foo", fieldType));
    indexWriter.addDocument(document);
    document.clear();
    document.add(new Field("field", "bar", fieldType));
    indexWriter.addDocument(document);
    document.clear();
    document.add(new Field("field", "foobar", fieldType));
    indexWriter.addDocument(document);
    indexWriter.commit();
    document.clear();
    document.add(new Field("field", "something", fieldType));
    indexWriter.addDocument(document);
    indexWriter.commit();
    document.clear();
    document.add(new Field("field", "foobar", fieldType));
    indexWriter.addDocument(document);
    indexWriter.close();
    IndexReader indexReader = DirectoryReader.open(directory);
    IndexSearcher indexSearcher = newSearcher(indexReader, true, true);
    FiltersAggregator.KeyedFilter[] keys = new FiltersAggregator.KeyedFilter[6];
    keys[0] = new FiltersAggregator.KeyedFilter("foobar", QueryBuilders.termQuery("field", "foobar"));
    keys[1] = new FiltersAggregator.KeyedFilter("bar", QueryBuilders.termQuery("field", "bar"));
    keys[2] = new FiltersAggregator.KeyedFilter("foo", QueryBuilders.termQuery("field", "foo"));
    keys[3] = new FiltersAggregator.KeyedFilter("foo2", QueryBuilders.termQuery("field", "foo"));
    keys[4] = new FiltersAggregator.KeyedFilter("same", QueryBuilders.termQuery("field", "foo"));
    // filter name already present so it should be merge with the previous one ?
    keys[5] = new FiltersAggregator.KeyedFilter("same", QueryBuilders.termQuery("field", "bar"));
    FiltersAggregationBuilder builder = new FiltersAggregationBuilder("test", keys);
    builder.otherBucket(true);
    builder.otherBucketKey("other");
    for (boolean doReduce : new boolean[] { true, false }) {
        final InternalFilters filters;
        if (doReduce) {
            filters = searchAndReduce(indexSearcher, new MatchAllDocsQuery(), builder, fieldType);
        } else {
            filters = search(indexSearcher, new MatchAllDocsQuery(), builder, fieldType);
        }
        assertEquals(filters.getBuckets().size(), 7);
        assertEquals(filters.getBucketByKey("foobar").getDocCount(), 2);
        assertEquals(filters.getBucketByKey("foo").getDocCount(), 2);
        assertEquals(filters.getBucketByKey("foo2").getDocCount(), 2);
        assertEquals(filters.getBucketByKey("bar").getDocCount(), 1);
        assertEquals(filters.getBucketByKey("same").getDocCount(), 1);
        assertEquals(filters.getBucketByKey("other").getDocCount(), 2);
    }
    indexReader.close();
    directory.close();
}
Also used : IndexSearcher(org.apache.lucene.search.IndexSearcher) FiltersAggregator(org.elasticsearch.search.aggregations.bucket.filters.FiltersAggregator) Document(org.apache.lucene.document.Document) MatchAllDocsQuery(org.apache.lucene.search.MatchAllDocsQuery) Field(org.apache.lucene.document.Field) InternalFilters(org.elasticsearch.search.aggregations.bucket.filters.InternalFilters) IndexReader(org.apache.lucene.index.IndexReader) FiltersAggregationBuilder(org.elasticsearch.search.aggregations.bucket.filters.FiltersAggregationBuilder) RandomIndexWriter(org.apache.lucene.index.RandomIndexWriter) Directory(org.apache.lucene.store.Directory)

Example 32 with IndexReader

use of org.apache.lucene.index.IndexReader in project elasticsearch by elastic.

the class FiltersAggregatorTests method testRandom.

public void testRandom() throws Exception {
    Directory directory = newDirectory();
    RandomIndexWriter indexWriter = new RandomIndexWriter(random(), directory);
    int numDocs = randomIntBetween(100, 200);
    int maxTerm = randomIntBetween(10, 50);
    int[] expectedBucketCount = new int[maxTerm];
    Document document = new Document();
    for (int i = 0; i < numDocs; i++) {
        if (frequently()) {
            // make sure we have more than one segment to test the merge
            indexWriter.commit();
        }
        int value = randomInt(maxTerm - 1);
        expectedBucketCount[value] += 1;
        document.add(new Field("field", Integer.toString(value), fieldType));
        indexWriter.addDocument(document);
        document.clear();
    }
    indexWriter.close();
    IndexReader indexReader = DirectoryReader.open(directory);
    IndexSearcher indexSearcher = newSearcher(indexReader, true, true);
    int numFilters = randomIntBetween(1, 10);
    QueryBuilder[] filters = new QueryBuilder[numFilters];
    int[] filterTerms = new int[numFilters];
    int expectedOtherCount = numDocs;
    Set<Integer> filterSet = new HashSet<>();
    for (int i = 0; i < filters.length; i++) {
        int value = randomInt(maxTerm - 1);
        filters[i] = QueryBuilders.termQuery("field", Integer.toString(value));
        filterTerms[i] = value;
        if (filterSet.contains(value) == false) {
            expectedOtherCount -= expectedBucketCount[value];
            filterSet.add(value);
        }
    }
    FiltersAggregationBuilder builder = new FiltersAggregationBuilder("test", filters);
    builder.otherBucket(true);
    builder.otherBucketKey("other");
    for (boolean doReduce : new boolean[] { true, false }) {
        final InternalFilters response;
        if (doReduce) {
            response = searchAndReduce(indexSearcher, new MatchAllDocsQuery(), builder, fieldType);
        } else {
            response = search(indexSearcher, new MatchAllDocsQuery(), builder, fieldType);
        }
        List<InternalFilters.InternalBucket> buckets = response.getBuckets();
        assertEquals(buckets.size(), filters.length + 1);
        for (InternalFilters.InternalBucket bucket : buckets) {
            if ("other".equals(bucket.getKey())) {
                assertEquals(bucket.getDocCount(), expectedOtherCount);
            } else {
                int index = Integer.parseInt(bucket.getKey());
                assertEquals(bucket.getDocCount(), (long) expectedBucketCount[filterTerms[index]]);
            }
        }
    }
    indexReader.close();
    directory.close();
}
Also used : IndexSearcher(org.apache.lucene.search.IndexSearcher) QueryBuilder(org.elasticsearch.index.query.QueryBuilder) Document(org.apache.lucene.document.Document) MatchAllDocsQuery(org.apache.lucene.search.MatchAllDocsQuery) Field(org.apache.lucene.document.Field) InternalFilters(org.elasticsearch.search.aggregations.bucket.filters.InternalFilters) IndexReader(org.apache.lucene.index.IndexReader) FiltersAggregationBuilder(org.elasticsearch.search.aggregations.bucket.filters.FiltersAggregationBuilder) RandomIndexWriter(org.apache.lucene.index.RandomIndexWriter) Directory(org.apache.lucene.store.Directory) HashSet(java.util.HashSet)

Example 33 with IndexReader

use of org.apache.lucene.index.IndexReader in project elasticsearch by elastic.

the class DiversifiedSamplerTests method testDiversifiedSampler.

public void testDiversifiedSampler() throws Exception {
    String[] data = { // "id,cat,name,price,inStock,author_t,series_t,sequence_i,genre_s,genre_id",
    "0553573403,book,A Game of Thrones,7.99,true,George R.R. Martin,A Song of Ice and Fire,1,fantasy,0", "0553579908,book,A Clash of Kings,7.99,true,George R.R. Martin,A Song of Ice and Fire,2,fantasy,0", "055357342X,book,A Storm of Swords,7.99,true,George R.R. Martin,A Song of Ice and Fire,3,fantasy,0", "0553293354,book,Foundation,17.99,true,Isaac Asimov,Foundation Novels,1,scifi,1", "0812521390,book,The Black Company,6.99,false,Glen Cook,The Chronicles of The Black Company,1,fantasy,0", "0812550706,book,Ender's Game,6.99,true,Orson Scott Card,Ender,1,scifi,1", "0441385532,book,Jhereg,7.95,false,Steven Brust,Vlad Taltos,1,fantasy,0", "0380014300,book,Nine Princes In Amber,6.99,true,Roger Zelazny,the Chronicles of Amber,1,fantasy,0", "0805080481,book,The Book of Three,5.99,true,Lloyd Alexander,The Chronicles of Prydain,1,fantasy,0", "080508049X,book,The Black Cauldron,5.99,true,Lloyd Alexander,The Chronicles of Prydain,2,fantasy,0" };
    Directory directory = newDirectory();
    RandomIndexWriter indexWriter = new RandomIndexWriter(random(), directory);
    for (String entry : data) {
        String[] parts = entry.split(",");
        Document document = new Document();
        document.add(new SortedDocValuesField("id", new BytesRef(parts[0])));
        document.add(new StringField("cat", parts[1], Field.Store.NO));
        document.add(new TextField("name", parts[2], Field.Store.NO));
        document.add(new DoubleDocValuesField("price", Double.valueOf(parts[3])));
        document.add(new StringField("inStock", parts[4], Field.Store.NO));
        document.add(new StringField("author", parts[5], Field.Store.NO));
        document.add(new StringField("series", parts[6], Field.Store.NO));
        document.add(new StringField("sequence", parts[7], Field.Store.NO));
        document.add(new SortedDocValuesField("genre", new BytesRef(parts[8])));
        document.add(new NumericDocValuesField("genre_id", Long.valueOf(parts[9])));
        indexWriter.addDocument(document);
    }
    indexWriter.close();
    IndexReader indexReader = DirectoryReader.open(directory);
    IndexSearcher indexSearcher = new IndexSearcher(indexReader);
    MappedFieldType genreFieldType = new KeywordFieldMapper.KeywordFieldType();
    genreFieldType.setName("genre");
    genreFieldType.setHasDocValues(true);
    Consumer<InternalSampler> verify = result -> {
        Terms terms = result.getAggregations().get("terms");
        assertEquals(2, terms.getBuckets().size());
        assertEquals("0805080481", terms.getBuckets().get(0).getKeyAsString());
        assertEquals("0812550706", terms.getBuckets().get(1).getKeyAsString());
    };
    testCase(indexSearcher, genreFieldType, "map", verify);
    testCase(indexSearcher, genreFieldType, "global_ordinals", verify);
    testCase(indexSearcher, genreFieldType, "bytes_hash", verify);
    genreFieldType = new NumberFieldMapper.NumberFieldType(NumberFieldMapper.NumberType.LONG);
    genreFieldType.setName("genre_id");
    testCase(indexSearcher, genreFieldType, null, verify);
    // wrong field:
    genreFieldType = new KeywordFieldMapper.KeywordFieldType();
    genreFieldType.setName("wrong_field");
    genreFieldType.setHasDocValues(true);
    testCase(indexSearcher, genreFieldType, null, result -> {
        Terms terms = result.getAggregations().get("terms");
        assertEquals(1, terms.getBuckets().size());
        assertEquals("0805080481", terms.getBuckets().get(0).getKeyAsString());
    });
    indexReader.close();
    directory.close();
}
Also used : IndexSearcher(org.apache.lucene.search.IndexSearcher) StringField(org.apache.lucene.document.StringField) Index(org.elasticsearch.index.Index) Document(org.apache.lucene.document.Document) SortedNumericDVIndexFieldData(org.elasticsearch.index.fielddata.plain.SortedNumericDVIndexFieldData) MappedFieldType(org.elasticsearch.index.mapper.MappedFieldType) Directory(org.apache.lucene.store.Directory) TermsAggregationBuilder(org.elasticsearch.search.aggregations.bucket.terms.TermsAggregationBuilder) FunctionScoreQuery(org.elasticsearch.common.lucene.search.function.FunctionScoreQuery) DoubleDocValuesField(org.apache.lucene.document.DoubleDocValuesField) NumericDocValuesField(org.apache.lucene.document.NumericDocValuesField) BytesRef(org.apache.lucene.util.BytesRef) FieldValueFactorFunction(org.elasticsearch.common.lucene.search.function.FieldValueFactorFunction) Terms(org.elasticsearch.search.aggregations.bucket.terms.Terms) DirectoryReader(org.apache.lucene.index.DirectoryReader) IOException(java.io.IOException) MatchAllDocsQuery(org.apache.lucene.search.MatchAllDocsQuery) Consumer(java.util.function.Consumer) AggregatorTestCase(org.elasticsearch.search.aggregations.AggregatorTestCase) SortedDocValuesField(org.apache.lucene.document.SortedDocValuesField) KeywordFieldMapper(org.elasticsearch.index.mapper.KeywordFieldMapper) Field(org.apache.lucene.document.Field) IndexNumericFieldData(org.elasticsearch.index.fielddata.IndexNumericFieldData) NumberFieldMapper(org.elasticsearch.index.mapper.NumberFieldMapper) TextField(org.apache.lucene.document.TextField) RandomIndexWriter(org.apache.lucene.index.RandomIndexWriter) IndexReader(org.apache.lucene.index.IndexReader) IndexSearcher(org.apache.lucene.search.IndexSearcher) NumberFieldMapper(org.elasticsearch.index.mapper.NumberFieldMapper) Terms(org.elasticsearch.search.aggregations.bucket.terms.Terms) Document(org.apache.lucene.document.Document) KeywordFieldMapper(org.elasticsearch.index.mapper.KeywordFieldMapper) NumericDocValuesField(org.apache.lucene.document.NumericDocValuesField) StringField(org.apache.lucene.document.StringField) DoubleDocValuesField(org.apache.lucene.document.DoubleDocValuesField) SortedDocValuesField(org.apache.lucene.document.SortedDocValuesField) IndexReader(org.apache.lucene.index.IndexReader) MappedFieldType(org.elasticsearch.index.mapper.MappedFieldType) TextField(org.apache.lucene.document.TextField) RandomIndexWriter(org.apache.lucene.index.RandomIndexWriter) BytesRef(org.apache.lucene.util.BytesRef) Directory(org.apache.lucene.store.Directory)

Example 34 with IndexReader

use of org.apache.lucene.index.IndexReader in project elasticsearch by elastic.

the class HistogramAggregatorTests method testMinDocCount.

public void testMinDocCount() throws Exception {
    try (Directory dir = newDirectory();
        RandomIndexWriter w = new RandomIndexWriter(random(), dir)) {
        for (long value : new long[] { 7, 3, -10, -6, 5, 50 }) {
            Document doc = new Document();
            doc.add(new SortedNumericDocValuesField("field", value));
            w.addDocument(doc);
        }
        HistogramAggregationBuilder aggBuilder = new HistogramAggregationBuilder("my_agg").field("field").interval(10).minDocCount(2);
        MappedFieldType fieldType = new NumberFieldMapper.NumberFieldType(NumberFieldMapper.NumberType.LONG);
        fieldType.setName("field");
        try (IndexReader reader = w.getReader()) {
            IndexSearcher searcher = new IndexSearcher(reader);
            Histogram histogram = searchAndReduce(searcher, new MatchAllDocsQuery(), aggBuilder, fieldType);
            assertEquals(2, histogram.getBuckets().size());
            assertEquals(-10d, histogram.getBuckets().get(0).getKey());
            assertEquals(2, histogram.getBuckets().get(0).getDocCount());
            assertEquals(0d, histogram.getBuckets().get(1).getKey());
            assertEquals(3, histogram.getBuckets().get(1).getDocCount());
        }
    }
}
Also used : IndexSearcher(org.apache.lucene.search.IndexSearcher) SortedNumericDocValuesField(org.apache.lucene.document.SortedNumericDocValuesField) MappedFieldType(org.elasticsearch.index.mapper.MappedFieldType) IndexReader(org.apache.lucene.index.IndexReader) Document(org.apache.lucene.document.Document) MatchAllDocsQuery(org.apache.lucene.search.MatchAllDocsQuery) RandomIndexWriter(org.apache.lucene.index.RandomIndexWriter) Directory(org.apache.lucene.store.Directory)

Example 35 with IndexReader

use of org.apache.lucene.index.IndexReader in project elasticsearch by elastic.

the class HistogramAggregatorTests method testDoubles.

public void testDoubles() throws Exception {
    try (Directory dir = newDirectory();
        RandomIndexWriter w = new RandomIndexWriter(random(), dir)) {
        for (double value : new double[] { 9.3, 3.2, -10, -6.5, 5.3, 50.1 }) {
            Document doc = new Document();
            doc.add(new SortedNumericDocValuesField("field", NumericUtils.doubleToSortableLong(value)));
            w.addDocument(doc);
        }
        HistogramAggregationBuilder aggBuilder = new HistogramAggregationBuilder("my_agg").field("field").interval(5);
        MappedFieldType fieldType = new NumberFieldMapper.NumberFieldType(NumberFieldMapper.NumberType.DOUBLE);
        fieldType.setName("field");
        try (IndexReader reader = w.getReader()) {
            IndexSearcher searcher = new IndexSearcher(reader);
            Histogram histogram = search(searcher, new MatchAllDocsQuery(), aggBuilder, fieldType);
            assertEquals(4, histogram.getBuckets().size());
            assertEquals(-10d, histogram.getBuckets().get(0).getKey());
            assertEquals(2, histogram.getBuckets().get(0).getDocCount());
            assertEquals(0d, histogram.getBuckets().get(1).getKey());
            assertEquals(1, histogram.getBuckets().get(1).getDocCount());
            assertEquals(5d, histogram.getBuckets().get(2).getKey());
            assertEquals(2, histogram.getBuckets().get(2).getDocCount());
            assertEquals(50d, histogram.getBuckets().get(3).getKey());
            assertEquals(1, histogram.getBuckets().get(3).getDocCount());
        }
    }
}
Also used : IndexSearcher(org.apache.lucene.search.IndexSearcher) SortedNumericDocValuesField(org.apache.lucene.document.SortedNumericDocValuesField) MappedFieldType(org.elasticsearch.index.mapper.MappedFieldType) IndexReader(org.apache.lucene.index.IndexReader) Document(org.apache.lucene.document.Document) MatchAllDocsQuery(org.apache.lucene.search.MatchAllDocsQuery) RandomIndexWriter(org.apache.lucene.index.RandomIndexWriter) Directory(org.apache.lucene.store.Directory)

Aggregations

IndexReader (org.apache.lucene.index.IndexReader)962 Document (org.apache.lucene.document.Document)610 Directory (org.apache.lucene.store.Directory)603 RandomIndexWriter (org.apache.lucene.index.RandomIndexWriter)549 IndexSearcher (org.apache.lucene.search.IndexSearcher)410 Term (org.apache.lucene.index.Term)332 TopDocs (org.apache.lucene.search.TopDocs)204 TermQuery (org.apache.lucene.search.TermQuery)160 Query (org.apache.lucene.search.Query)158 IndexWriter (org.apache.lucene.index.IndexWriter)150 MatchAllDocsQuery (org.apache.lucene.search.MatchAllDocsQuery)144 MockAnalyzer (org.apache.lucene.analysis.MockAnalyzer)143 IndexWriterConfig (org.apache.lucene.index.IndexWriterConfig)142 Field (org.apache.lucene.document.Field)135 BytesRef (org.apache.lucene.util.BytesRef)134 IOException (java.io.IOException)133 BooleanQuery (org.apache.lucene.search.BooleanQuery)122 ArrayList (java.util.ArrayList)108 TextField (org.apache.lucene.document.TextField)81 Test (org.junit.Test)81