use of org.apache.lucene.store.Directory in project elasticsearch by elastic.
the class FiltersAggregatorTests method testKeyedFilter.
public void testKeyedFilter() throws Exception {
Directory directory = newDirectory();
RandomIndexWriter indexWriter = new RandomIndexWriter(random(), directory);
Document document = new Document();
document.add(new Field("field", "foo", fieldType));
indexWriter.addDocument(document);
document.clear();
document.add(new Field("field", "else", fieldType));
indexWriter.addDocument(document);
// make sure we have more than one segment to test the merge
indexWriter.commit();
document.add(new Field("field", "foo", fieldType));
indexWriter.addDocument(document);
document.clear();
document.add(new Field("field", "bar", fieldType));
indexWriter.addDocument(document);
document.clear();
document.add(new Field("field", "foobar", fieldType));
indexWriter.addDocument(document);
indexWriter.commit();
document.clear();
document.add(new Field("field", "something", fieldType));
indexWriter.addDocument(document);
indexWriter.commit();
document.clear();
document.add(new Field("field", "foobar", fieldType));
indexWriter.addDocument(document);
indexWriter.close();
IndexReader indexReader = DirectoryReader.open(directory);
IndexSearcher indexSearcher = newSearcher(indexReader, true, true);
FiltersAggregator.KeyedFilter[] keys = new FiltersAggregator.KeyedFilter[6];
keys[0] = new FiltersAggregator.KeyedFilter("foobar", QueryBuilders.termQuery("field", "foobar"));
keys[1] = new FiltersAggregator.KeyedFilter("bar", QueryBuilders.termQuery("field", "bar"));
keys[2] = new FiltersAggregator.KeyedFilter("foo", QueryBuilders.termQuery("field", "foo"));
keys[3] = new FiltersAggregator.KeyedFilter("foo2", QueryBuilders.termQuery("field", "foo"));
keys[4] = new FiltersAggregator.KeyedFilter("same", QueryBuilders.termQuery("field", "foo"));
// filter name already present so it should be merge with the previous one ?
keys[5] = new FiltersAggregator.KeyedFilter("same", QueryBuilders.termQuery("field", "bar"));
FiltersAggregationBuilder builder = new FiltersAggregationBuilder("test", keys);
builder.otherBucket(true);
builder.otherBucketKey("other");
for (boolean doReduce : new boolean[] { true, false }) {
final InternalFilters filters;
if (doReduce) {
filters = searchAndReduce(indexSearcher, new MatchAllDocsQuery(), builder, fieldType);
} else {
filters = search(indexSearcher, new MatchAllDocsQuery(), builder, fieldType);
}
assertEquals(filters.getBuckets().size(), 7);
assertEquals(filters.getBucketByKey("foobar").getDocCount(), 2);
assertEquals(filters.getBucketByKey("foo").getDocCount(), 2);
assertEquals(filters.getBucketByKey("foo2").getDocCount(), 2);
assertEquals(filters.getBucketByKey("bar").getDocCount(), 1);
assertEquals(filters.getBucketByKey("same").getDocCount(), 1);
assertEquals(filters.getBucketByKey("other").getDocCount(), 2);
}
indexReader.close();
directory.close();
}
use of org.apache.lucene.store.Directory in project elasticsearch by elastic.
the class FiltersAggregatorTests method testRandom.
public void testRandom() throws Exception {
Directory directory = newDirectory();
RandomIndexWriter indexWriter = new RandomIndexWriter(random(), directory);
int numDocs = randomIntBetween(100, 200);
int maxTerm = randomIntBetween(10, 50);
int[] expectedBucketCount = new int[maxTerm];
Document document = new Document();
for (int i = 0; i < numDocs; i++) {
if (frequently()) {
// make sure we have more than one segment to test the merge
indexWriter.commit();
}
int value = randomInt(maxTerm - 1);
expectedBucketCount[value] += 1;
document.add(new Field("field", Integer.toString(value), fieldType));
indexWriter.addDocument(document);
document.clear();
}
indexWriter.close();
IndexReader indexReader = DirectoryReader.open(directory);
IndexSearcher indexSearcher = newSearcher(indexReader, true, true);
int numFilters = randomIntBetween(1, 10);
QueryBuilder[] filters = new QueryBuilder[numFilters];
int[] filterTerms = new int[numFilters];
int expectedOtherCount = numDocs;
Set<Integer> filterSet = new HashSet<>();
for (int i = 0; i < filters.length; i++) {
int value = randomInt(maxTerm - 1);
filters[i] = QueryBuilders.termQuery("field", Integer.toString(value));
filterTerms[i] = value;
if (filterSet.contains(value) == false) {
expectedOtherCount -= expectedBucketCount[value];
filterSet.add(value);
}
}
FiltersAggregationBuilder builder = new FiltersAggregationBuilder("test", filters);
builder.otherBucket(true);
builder.otherBucketKey("other");
for (boolean doReduce : new boolean[] { true, false }) {
final InternalFilters response;
if (doReduce) {
response = searchAndReduce(indexSearcher, new MatchAllDocsQuery(), builder, fieldType);
} else {
response = search(indexSearcher, new MatchAllDocsQuery(), builder, fieldType);
}
List<InternalFilters.InternalBucket> buckets = response.getBuckets();
assertEquals(buckets.size(), filters.length + 1);
for (InternalFilters.InternalBucket bucket : buckets) {
if ("other".equals(bucket.getKey())) {
assertEquals(bucket.getDocCount(), expectedOtherCount);
} else {
int index = Integer.parseInt(bucket.getKey());
assertEquals(bucket.getDocCount(), (long) expectedBucketCount[filterTerms[index]]);
}
}
}
indexReader.close();
directory.close();
}
use of org.apache.lucene.store.Directory in project elasticsearch by elastic.
the class DiversifiedSamplerTests method testDiversifiedSampler.
public void testDiversifiedSampler() throws Exception {
String[] data = { // "id,cat,name,price,inStock,author_t,series_t,sequence_i,genre_s,genre_id",
"0553573403,book,A Game of Thrones,7.99,true,George R.R. Martin,A Song of Ice and Fire,1,fantasy,0", "0553579908,book,A Clash of Kings,7.99,true,George R.R. Martin,A Song of Ice and Fire,2,fantasy,0", "055357342X,book,A Storm of Swords,7.99,true,George R.R. Martin,A Song of Ice and Fire,3,fantasy,0", "0553293354,book,Foundation,17.99,true,Isaac Asimov,Foundation Novels,1,scifi,1", "0812521390,book,The Black Company,6.99,false,Glen Cook,The Chronicles of The Black Company,1,fantasy,0", "0812550706,book,Ender's Game,6.99,true,Orson Scott Card,Ender,1,scifi,1", "0441385532,book,Jhereg,7.95,false,Steven Brust,Vlad Taltos,1,fantasy,0", "0380014300,book,Nine Princes In Amber,6.99,true,Roger Zelazny,the Chronicles of Amber,1,fantasy,0", "0805080481,book,The Book of Three,5.99,true,Lloyd Alexander,The Chronicles of Prydain,1,fantasy,0", "080508049X,book,The Black Cauldron,5.99,true,Lloyd Alexander,The Chronicles of Prydain,2,fantasy,0" };
Directory directory = newDirectory();
RandomIndexWriter indexWriter = new RandomIndexWriter(random(), directory);
for (String entry : data) {
String[] parts = entry.split(",");
Document document = new Document();
document.add(new SortedDocValuesField("id", new BytesRef(parts[0])));
document.add(new StringField("cat", parts[1], Field.Store.NO));
document.add(new TextField("name", parts[2], Field.Store.NO));
document.add(new DoubleDocValuesField("price", Double.valueOf(parts[3])));
document.add(new StringField("inStock", parts[4], Field.Store.NO));
document.add(new StringField("author", parts[5], Field.Store.NO));
document.add(new StringField("series", parts[6], Field.Store.NO));
document.add(new StringField("sequence", parts[7], Field.Store.NO));
document.add(new SortedDocValuesField("genre", new BytesRef(parts[8])));
document.add(new NumericDocValuesField("genre_id", Long.valueOf(parts[9])));
indexWriter.addDocument(document);
}
indexWriter.close();
IndexReader indexReader = DirectoryReader.open(directory);
IndexSearcher indexSearcher = new IndexSearcher(indexReader);
MappedFieldType genreFieldType = new KeywordFieldMapper.KeywordFieldType();
genreFieldType.setName("genre");
genreFieldType.setHasDocValues(true);
Consumer<InternalSampler> verify = result -> {
Terms terms = result.getAggregations().get("terms");
assertEquals(2, terms.getBuckets().size());
assertEquals("0805080481", terms.getBuckets().get(0).getKeyAsString());
assertEquals("0812550706", terms.getBuckets().get(1).getKeyAsString());
};
testCase(indexSearcher, genreFieldType, "map", verify);
testCase(indexSearcher, genreFieldType, "global_ordinals", verify);
testCase(indexSearcher, genreFieldType, "bytes_hash", verify);
genreFieldType = new NumberFieldMapper.NumberFieldType(NumberFieldMapper.NumberType.LONG);
genreFieldType.setName("genre_id");
testCase(indexSearcher, genreFieldType, null, verify);
// wrong field:
genreFieldType = new KeywordFieldMapper.KeywordFieldType();
genreFieldType.setName("wrong_field");
genreFieldType.setHasDocValues(true);
testCase(indexSearcher, genreFieldType, null, result -> {
Terms terms = result.getAggregations().get("terms");
assertEquals(1, terms.getBuckets().size());
assertEquals("0805080481", terms.getBuckets().get(0).getKeyAsString());
});
indexReader.close();
directory.close();
}
use of org.apache.lucene.store.Directory in project elasticsearch by elastic.
the class HistogramAggregatorTests method testMinDocCount.
public void testMinDocCount() throws Exception {
try (Directory dir = newDirectory();
RandomIndexWriter w = new RandomIndexWriter(random(), dir)) {
for (long value : new long[] { 7, 3, -10, -6, 5, 50 }) {
Document doc = new Document();
doc.add(new SortedNumericDocValuesField("field", value));
w.addDocument(doc);
}
HistogramAggregationBuilder aggBuilder = new HistogramAggregationBuilder("my_agg").field("field").interval(10).minDocCount(2);
MappedFieldType fieldType = new NumberFieldMapper.NumberFieldType(NumberFieldMapper.NumberType.LONG);
fieldType.setName("field");
try (IndexReader reader = w.getReader()) {
IndexSearcher searcher = new IndexSearcher(reader);
Histogram histogram = searchAndReduce(searcher, new MatchAllDocsQuery(), aggBuilder, fieldType);
assertEquals(2, histogram.getBuckets().size());
assertEquals(-10d, histogram.getBuckets().get(0).getKey());
assertEquals(2, histogram.getBuckets().get(0).getDocCount());
assertEquals(0d, histogram.getBuckets().get(1).getKey());
assertEquals(3, histogram.getBuckets().get(1).getDocCount());
}
}
}
use of org.apache.lucene.store.Directory in project elasticsearch by elastic.
the class HistogramAggregatorTests method testDoubles.
public void testDoubles() throws Exception {
try (Directory dir = newDirectory();
RandomIndexWriter w = new RandomIndexWriter(random(), dir)) {
for (double value : new double[] { 9.3, 3.2, -10, -6.5, 5.3, 50.1 }) {
Document doc = new Document();
doc.add(new SortedNumericDocValuesField("field", NumericUtils.doubleToSortableLong(value)));
w.addDocument(doc);
}
HistogramAggregationBuilder aggBuilder = new HistogramAggregationBuilder("my_agg").field("field").interval(5);
MappedFieldType fieldType = new NumberFieldMapper.NumberFieldType(NumberFieldMapper.NumberType.DOUBLE);
fieldType.setName("field");
try (IndexReader reader = w.getReader()) {
IndexSearcher searcher = new IndexSearcher(reader);
Histogram histogram = search(searcher, new MatchAllDocsQuery(), aggBuilder, fieldType);
assertEquals(4, histogram.getBuckets().size());
assertEquals(-10d, histogram.getBuckets().get(0).getKey());
assertEquals(2, histogram.getBuckets().get(0).getDocCount());
assertEquals(0d, histogram.getBuckets().get(1).getKey());
assertEquals(1, histogram.getBuckets().get(1).getDocCount());
assertEquals(5d, histogram.getBuckets().get(2).getKey());
assertEquals(2, histogram.getBuckets().get(2).getDocCount());
assertEquals(50d, histogram.getBuckets().get(3).getKey());
assertEquals(1, histogram.getBuckets().get(3).getDocCount());
}
}
}
Aggregations