Search in sources :

Example 76 with Directory

use of org.apache.lucene.store.Directory in project elasticsearch by elastic.

the class FiltersAggregatorTests method testKeyedFilter.

public void testKeyedFilter() throws Exception {
    Directory directory = newDirectory();
    RandomIndexWriter indexWriter = new RandomIndexWriter(random(), directory);
    Document document = new Document();
    document.add(new Field("field", "foo", fieldType));
    indexWriter.addDocument(document);
    document.clear();
    document.add(new Field("field", "else", fieldType));
    indexWriter.addDocument(document);
    // make sure we have more than one segment to test the merge
    indexWriter.commit();
    document.add(new Field("field", "foo", fieldType));
    indexWriter.addDocument(document);
    document.clear();
    document.add(new Field("field", "bar", fieldType));
    indexWriter.addDocument(document);
    document.clear();
    document.add(new Field("field", "foobar", fieldType));
    indexWriter.addDocument(document);
    indexWriter.commit();
    document.clear();
    document.add(new Field("field", "something", fieldType));
    indexWriter.addDocument(document);
    indexWriter.commit();
    document.clear();
    document.add(new Field("field", "foobar", fieldType));
    indexWriter.addDocument(document);
    indexWriter.close();
    IndexReader indexReader = DirectoryReader.open(directory);
    IndexSearcher indexSearcher = newSearcher(indexReader, true, true);
    FiltersAggregator.KeyedFilter[] keys = new FiltersAggregator.KeyedFilter[6];
    keys[0] = new FiltersAggregator.KeyedFilter("foobar", QueryBuilders.termQuery("field", "foobar"));
    keys[1] = new FiltersAggregator.KeyedFilter("bar", QueryBuilders.termQuery("field", "bar"));
    keys[2] = new FiltersAggregator.KeyedFilter("foo", QueryBuilders.termQuery("field", "foo"));
    keys[3] = new FiltersAggregator.KeyedFilter("foo2", QueryBuilders.termQuery("field", "foo"));
    keys[4] = new FiltersAggregator.KeyedFilter("same", QueryBuilders.termQuery("field", "foo"));
    // filter name already present so it should be merge with the previous one ?
    keys[5] = new FiltersAggregator.KeyedFilter("same", QueryBuilders.termQuery("field", "bar"));
    FiltersAggregationBuilder builder = new FiltersAggregationBuilder("test", keys);
    builder.otherBucket(true);
    builder.otherBucketKey("other");
    for (boolean doReduce : new boolean[] { true, false }) {
        final InternalFilters filters;
        if (doReduce) {
            filters = searchAndReduce(indexSearcher, new MatchAllDocsQuery(), builder, fieldType);
        } else {
            filters = search(indexSearcher, new MatchAllDocsQuery(), builder, fieldType);
        }
        assertEquals(filters.getBuckets().size(), 7);
        assertEquals(filters.getBucketByKey("foobar").getDocCount(), 2);
        assertEquals(filters.getBucketByKey("foo").getDocCount(), 2);
        assertEquals(filters.getBucketByKey("foo2").getDocCount(), 2);
        assertEquals(filters.getBucketByKey("bar").getDocCount(), 1);
        assertEquals(filters.getBucketByKey("same").getDocCount(), 1);
        assertEquals(filters.getBucketByKey("other").getDocCount(), 2);
    }
    indexReader.close();
    directory.close();
}
Also used : IndexSearcher(org.apache.lucene.search.IndexSearcher) FiltersAggregator(org.elasticsearch.search.aggregations.bucket.filters.FiltersAggregator) Document(org.apache.lucene.document.Document) MatchAllDocsQuery(org.apache.lucene.search.MatchAllDocsQuery) Field(org.apache.lucene.document.Field) InternalFilters(org.elasticsearch.search.aggregations.bucket.filters.InternalFilters) IndexReader(org.apache.lucene.index.IndexReader) FiltersAggregationBuilder(org.elasticsearch.search.aggregations.bucket.filters.FiltersAggregationBuilder) RandomIndexWriter(org.apache.lucene.index.RandomIndexWriter) Directory(org.apache.lucene.store.Directory)

Example 77 with Directory

use of org.apache.lucene.store.Directory in project elasticsearch by elastic.

the class FiltersAggregatorTests method testRandom.

public void testRandom() throws Exception {
    Directory directory = newDirectory();
    RandomIndexWriter indexWriter = new RandomIndexWriter(random(), directory);
    int numDocs = randomIntBetween(100, 200);
    int maxTerm = randomIntBetween(10, 50);
    int[] expectedBucketCount = new int[maxTerm];
    Document document = new Document();
    for (int i = 0; i < numDocs; i++) {
        if (frequently()) {
            // make sure we have more than one segment to test the merge
            indexWriter.commit();
        }
        int value = randomInt(maxTerm - 1);
        expectedBucketCount[value] += 1;
        document.add(new Field("field", Integer.toString(value), fieldType));
        indexWriter.addDocument(document);
        document.clear();
    }
    indexWriter.close();
    IndexReader indexReader = DirectoryReader.open(directory);
    IndexSearcher indexSearcher = newSearcher(indexReader, true, true);
    int numFilters = randomIntBetween(1, 10);
    QueryBuilder[] filters = new QueryBuilder[numFilters];
    int[] filterTerms = new int[numFilters];
    int expectedOtherCount = numDocs;
    Set<Integer> filterSet = new HashSet<>();
    for (int i = 0; i < filters.length; i++) {
        int value = randomInt(maxTerm - 1);
        filters[i] = QueryBuilders.termQuery("field", Integer.toString(value));
        filterTerms[i] = value;
        if (filterSet.contains(value) == false) {
            expectedOtherCount -= expectedBucketCount[value];
            filterSet.add(value);
        }
    }
    FiltersAggregationBuilder builder = new FiltersAggregationBuilder("test", filters);
    builder.otherBucket(true);
    builder.otherBucketKey("other");
    for (boolean doReduce : new boolean[] { true, false }) {
        final InternalFilters response;
        if (doReduce) {
            response = searchAndReduce(indexSearcher, new MatchAllDocsQuery(), builder, fieldType);
        } else {
            response = search(indexSearcher, new MatchAllDocsQuery(), builder, fieldType);
        }
        List<InternalFilters.InternalBucket> buckets = response.getBuckets();
        assertEquals(buckets.size(), filters.length + 1);
        for (InternalFilters.InternalBucket bucket : buckets) {
            if ("other".equals(bucket.getKey())) {
                assertEquals(bucket.getDocCount(), expectedOtherCount);
            } else {
                int index = Integer.parseInt(bucket.getKey());
                assertEquals(bucket.getDocCount(), (long) expectedBucketCount[filterTerms[index]]);
            }
        }
    }
    indexReader.close();
    directory.close();
}
Also used : IndexSearcher(org.apache.lucene.search.IndexSearcher) QueryBuilder(org.elasticsearch.index.query.QueryBuilder) Document(org.apache.lucene.document.Document) MatchAllDocsQuery(org.apache.lucene.search.MatchAllDocsQuery) Field(org.apache.lucene.document.Field) InternalFilters(org.elasticsearch.search.aggregations.bucket.filters.InternalFilters) IndexReader(org.apache.lucene.index.IndexReader) FiltersAggregationBuilder(org.elasticsearch.search.aggregations.bucket.filters.FiltersAggregationBuilder) RandomIndexWriter(org.apache.lucene.index.RandomIndexWriter) Directory(org.apache.lucene.store.Directory) HashSet(java.util.HashSet)

Example 78 with Directory

use of org.apache.lucene.store.Directory in project elasticsearch by elastic.

the class DiversifiedSamplerTests method testDiversifiedSampler.

public void testDiversifiedSampler() throws Exception {
    String[] data = { // "id,cat,name,price,inStock,author_t,series_t,sequence_i,genre_s,genre_id",
    "0553573403,book,A Game of Thrones,7.99,true,George R.R. Martin,A Song of Ice and Fire,1,fantasy,0", "0553579908,book,A Clash of Kings,7.99,true,George R.R. Martin,A Song of Ice and Fire,2,fantasy,0", "055357342X,book,A Storm of Swords,7.99,true,George R.R. Martin,A Song of Ice and Fire,3,fantasy,0", "0553293354,book,Foundation,17.99,true,Isaac Asimov,Foundation Novels,1,scifi,1", "0812521390,book,The Black Company,6.99,false,Glen Cook,The Chronicles of The Black Company,1,fantasy,0", "0812550706,book,Ender's Game,6.99,true,Orson Scott Card,Ender,1,scifi,1", "0441385532,book,Jhereg,7.95,false,Steven Brust,Vlad Taltos,1,fantasy,0", "0380014300,book,Nine Princes In Amber,6.99,true,Roger Zelazny,the Chronicles of Amber,1,fantasy,0", "0805080481,book,The Book of Three,5.99,true,Lloyd Alexander,The Chronicles of Prydain,1,fantasy,0", "080508049X,book,The Black Cauldron,5.99,true,Lloyd Alexander,The Chronicles of Prydain,2,fantasy,0" };
    Directory directory = newDirectory();
    RandomIndexWriter indexWriter = new RandomIndexWriter(random(), directory);
    for (String entry : data) {
        String[] parts = entry.split(",");
        Document document = new Document();
        document.add(new SortedDocValuesField("id", new BytesRef(parts[0])));
        document.add(new StringField("cat", parts[1], Field.Store.NO));
        document.add(new TextField("name", parts[2], Field.Store.NO));
        document.add(new DoubleDocValuesField("price", Double.valueOf(parts[3])));
        document.add(new StringField("inStock", parts[4], Field.Store.NO));
        document.add(new StringField("author", parts[5], Field.Store.NO));
        document.add(new StringField("series", parts[6], Field.Store.NO));
        document.add(new StringField("sequence", parts[7], Field.Store.NO));
        document.add(new SortedDocValuesField("genre", new BytesRef(parts[8])));
        document.add(new NumericDocValuesField("genre_id", Long.valueOf(parts[9])));
        indexWriter.addDocument(document);
    }
    indexWriter.close();
    IndexReader indexReader = DirectoryReader.open(directory);
    IndexSearcher indexSearcher = new IndexSearcher(indexReader);
    MappedFieldType genreFieldType = new KeywordFieldMapper.KeywordFieldType();
    genreFieldType.setName("genre");
    genreFieldType.setHasDocValues(true);
    Consumer<InternalSampler> verify = result -> {
        Terms terms = result.getAggregations().get("terms");
        assertEquals(2, terms.getBuckets().size());
        assertEquals("0805080481", terms.getBuckets().get(0).getKeyAsString());
        assertEquals("0812550706", terms.getBuckets().get(1).getKeyAsString());
    };
    testCase(indexSearcher, genreFieldType, "map", verify);
    testCase(indexSearcher, genreFieldType, "global_ordinals", verify);
    testCase(indexSearcher, genreFieldType, "bytes_hash", verify);
    genreFieldType = new NumberFieldMapper.NumberFieldType(NumberFieldMapper.NumberType.LONG);
    genreFieldType.setName("genre_id");
    testCase(indexSearcher, genreFieldType, null, verify);
    // wrong field:
    genreFieldType = new KeywordFieldMapper.KeywordFieldType();
    genreFieldType.setName("wrong_field");
    genreFieldType.setHasDocValues(true);
    testCase(indexSearcher, genreFieldType, null, result -> {
        Terms terms = result.getAggregations().get("terms");
        assertEquals(1, terms.getBuckets().size());
        assertEquals("0805080481", terms.getBuckets().get(0).getKeyAsString());
    });
    indexReader.close();
    directory.close();
}
Also used : IndexSearcher(org.apache.lucene.search.IndexSearcher) StringField(org.apache.lucene.document.StringField) Index(org.elasticsearch.index.Index) Document(org.apache.lucene.document.Document) SortedNumericDVIndexFieldData(org.elasticsearch.index.fielddata.plain.SortedNumericDVIndexFieldData) MappedFieldType(org.elasticsearch.index.mapper.MappedFieldType) Directory(org.apache.lucene.store.Directory) TermsAggregationBuilder(org.elasticsearch.search.aggregations.bucket.terms.TermsAggregationBuilder) FunctionScoreQuery(org.elasticsearch.common.lucene.search.function.FunctionScoreQuery) DoubleDocValuesField(org.apache.lucene.document.DoubleDocValuesField) NumericDocValuesField(org.apache.lucene.document.NumericDocValuesField) BytesRef(org.apache.lucene.util.BytesRef) FieldValueFactorFunction(org.elasticsearch.common.lucene.search.function.FieldValueFactorFunction) Terms(org.elasticsearch.search.aggregations.bucket.terms.Terms) DirectoryReader(org.apache.lucene.index.DirectoryReader) IOException(java.io.IOException) MatchAllDocsQuery(org.apache.lucene.search.MatchAllDocsQuery) Consumer(java.util.function.Consumer) AggregatorTestCase(org.elasticsearch.search.aggregations.AggregatorTestCase) SortedDocValuesField(org.apache.lucene.document.SortedDocValuesField) KeywordFieldMapper(org.elasticsearch.index.mapper.KeywordFieldMapper) Field(org.apache.lucene.document.Field) IndexNumericFieldData(org.elasticsearch.index.fielddata.IndexNumericFieldData) NumberFieldMapper(org.elasticsearch.index.mapper.NumberFieldMapper) TextField(org.apache.lucene.document.TextField) RandomIndexWriter(org.apache.lucene.index.RandomIndexWriter) IndexReader(org.apache.lucene.index.IndexReader) IndexSearcher(org.apache.lucene.search.IndexSearcher) NumberFieldMapper(org.elasticsearch.index.mapper.NumberFieldMapper) Terms(org.elasticsearch.search.aggregations.bucket.terms.Terms) Document(org.apache.lucene.document.Document) KeywordFieldMapper(org.elasticsearch.index.mapper.KeywordFieldMapper) NumericDocValuesField(org.apache.lucene.document.NumericDocValuesField) StringField(org.apache.lucene.document.StringField) DoubleDocValuesField(org.apache.lucene.document.DoubleDocValuesField) SortedDocValuesField(org.apache.lucene.document.SortedDocValuesField) IndexReader(org.apache.lucene.index.IndexReader) MappedFieldType(org.elasticsearch.index.mapper.MappedFieldType) TextField(org.apache.lucene.document.TextField) RandomIndexWriter(org.apache.lucene.index.RandomIndexWriter) BytesRef(org.apache.lucene.util.BytesRef) Directory(org.apache.lucene.store.Directory)

Example 79 with Directory

use of org.apache.lucene.store.Directory in project elasticsearch by elastic.

the class HistogramAggregatorTests method testMinDocCount.

public void testMinDocCount() throws Exception {
    try (Directory dir = newDirectory();
        RandomIndexWriter w = new RandomIndexWriter(random(), dir)) {
        for (long value : new long[] { 7, 3, -10, -6, 5, 50 }) {
            Document doc = new Document();
            doc.add(new SortedNumericDocValuesField("field", value));
            w.addDocument(doc);
        }
        HistogramAggregationBuilder aggBuilder = new HistogramAggregationBuilder("my_agg").field("field").interval(10).minDocCount(2);
        MappedFieldType fieldType = new NumberFieldMapper.NumberFieldType(NumberFieldMapper.NumberType.LONG);
        fieldType.setName("field");
        try (IndexReader reader = w.getReader()) {
            IndexSearcher searcher = new IndexSearcher(reader);
            Histogram histogram = searchAndReduce(searcher, new MatchAllDocsQuery(), aggBuilder, fieldType);
            assertEquals(2, histogram.getBuckets().size());
            assertEquals(-10d, histogram.getBuckets().get(0).getKey());
            assertEquals(2, histogram.getBuckets().get(0).getDocCount());
            assertEquals(0d, histogram.getBuckets().get(1).getKey());
            assertEquals(3, histogram.getBuckets().get(1).getDocCount());
        }
    }
}
Also used : IndexSearcher(org.apache.lucene.search.IndexSearcher) SortedNumericDocValuesField(org.apache.lucene.document.SortedNumericDocValuesField) MappedFieldType(org.elasticsearch.index.mapper.MappedFieldType) IndexReader(org.apache.lucene.index.IndexReader) Document(org.apache.lucene.document.Document) MatchAllDocsQuery(org.apache.lucene.search.MatchAllDocsQuery) RandomIndexWriter(org.apache.lucene.index.RandomIndexWriter) Directory(org.apache.lucene.store.Directory)

Example 80 with Directory

use of org.apache.lucene.store.Directory in project elasticsearch by elastic.

the class HistogramAggregatorTests method testDoubles.

public void testDoubles() throws Exception {
    try (Directory dir = newDirectory();
        RandomIndexWriter w = new RandomIndexWriter(random(), dir)) {
        for (double value : new double[] { 9.3, 3.2, -10, -6.5, 5.3, 50.1 }) {
            Document doc = new Document();
            doc.add(new SortedNumericDocValuesField("field", NumericUtils.doubleToSortableLong(value)));
            w.addDocument(doc);
        }
        HistogramAggregationBuilder aggBuilder = new HistogramAggregationBuilder("my_agg").field("field").interval(5);
        MappedFieldType fieldType = new NumberFieldMapper.NumberFieldType(NumberFieldMapper.NumberType.DOUBLE);
        fieldType.setName("field");
        try (IndexReader reader = w.getReader()) {
            IndexSearcher searcher = new IndexSearcher(reader);
            Histogram histogram = search(searcher, new MatchAllDocsQuery(), aggBuilder, fieldType);
            assertEquals(4, histogram.getBuckets().size());
            assertEquals(-10d, histogram.getBuckets().get(0).getKey());
            assertEquals(2, histogram.getBuckets().get(0).getDocCount());
            assertEquals(0d, histogram.getBuckets().get(1).getKey());
            assertEquals(1, histogram.getBuckets().get(1).getDocCount());
            assertEquals(5d, histogram.getBuckets().get(2).getKey());
            assertEquals(2, histogram.getBuckets().get(2).getDocCount());
            assertEquals(50d, histogram.getBuckets().get(3).getKey());
            assertEquals(1, histogram.getBuckets().get(3).getDocCount());
        }
    }
}
Also used : IndexSearcher(org.apache.lucene.search.IndexSearcher) SortedNumericDocValuesField(org.apache.lucene.document.SortedNumericDocValuesField) MappedFieldType(org.elasticsearch.index.mapper.MappedFieldType) IndexReader(org.apache.lucene.index.IndexReader) Document(org.apache.lucene.document.Document) MatchAllDocsQuery(org.apache.lucene.search.MatchAllDocsQuery) RandomIndexWriter(org.apache.lucene.index.RandomIndexWriter) Directory(org.apache.lucene.store.Directory)

Aggregations

Directory (org.apache.lucene.store.Directory)2194 Document (org.apache.lucene.document.Document)1374 MockAnalyzer (org.apache.lucene.analysis.MockAnalyzer)816 RandomIndexWriter (org.apache.lucene.index.RandomIndexWriter)669 IndexReader (org.apache.lucene.index.IndexReader)590 IndexSearcher (org.apache.lucene.search.IndexSearcher)383 BytesRef (org.apache.lucene.util.BytesRef)376 RAMDirectory (org.apache.lucene.store.RAMDirectory)360 Term (org.apache.lucene.index.Term)325 StringField (org.apache.lucene.document.StringField)313 IndexWriter (org.apache.lucene.index.IndexWriter)312 IndexWriterConfig (org.apache.lucene.index.IndexWriterConfig)274 TextField (org.apache.lucene.document.TextField)259 Field (org.apache.lucene.document.Field)257 NumericDocValuesField (org.apache.lucene.document.NumericDocValuesField)257 Test (org.junit.Test)237 FSDirectory (org.apache.lucene.store.FSDirectory)221 Analyzer (org.apache.lucene.analysis.Analyzer)193 DirectoryReader (org.apache.lucene.index.DirectoryReader)193 TopDocs (org.apache.lucene.search.TopDocs)174