use of org.apache.lucene.document.Document in project elasticsearch by elastic.
the class DiversifiedSamplerTests method testDiversifiedSampler.
public void testDiversifiedSampler() throws Exception {
String[] data = { // "id,cat,name,price,inStock,author_t,series_t,sequence_i,genre_s,genre_id",
"0553573403,book,A Game of Thrones,7.99,true,George R.R. Martin,A Song of Ice and Fire,1,fantasy,0", "0553579908,book,A Clash of Kings,7.99,true,George R.R. Martin,A Song of Ice and Fire,2,fantasy,0", "055357342X,book,A Storm of Swords,7.99,true,George R.R. Martin,A Song of Ice and Fire,3,fantasy,0", "0553293354,book,Foundation,17.99,true,Isaac Asimov,Foundation Novels,1,scifi,1", "0812521390,book,The Black Company,6.99,false,Glen Cook,The Chronicles of The Black Company,1,fantasy,0", "0812550706,book,Ender's Game,6.99,true,Orson Scott Card,Ender,1,scifi,1", "0441385532,book,Jhereg,7.95,false,Steven Brust,Vlad Taltos,1,fantasy,0", "0380014300,book,Nine Princes In Amber,6.99,true,Roger Zelazny,the Chronicles of Amber,1,fantasy,0", "0805080481,book,The Book of Three,5.99,true,Lloyd Alexander,The Chronicles of Prydain,1,fantasy,0", "080508049X,book,The Black Cauldron,5.99,true,Lloyd Alexander,The Chronicles of Prydain,2,fantasy,0" };
Directory directory = newDirectory();
RandomIndexWriter indexWriter = new RandomIndexWriter(random(), directory);
for (String entry : data) {
String[] parts = entry.split(",");
Document document = new Document();
document.add(new SortedDocValuesField("id", new BytesRef(parts[0])));
document.add(new StringField("cat", parts[1], Field.Store.NO));
document.add(new TextField("name", parts[2], Field.Store.NO));
document.add(new DoubleDocValuesField("price", Double.valueOf(parts[3])));
document.add(new StringField("inStock", parts[4], Field.Store.NO));
document.add(new StringField("author", parts[5], Field.Store.NO));
document.add(new StringField("series", parts[6], Field.Store.NO));
document.add(new StringField("sequence", parts[7], Field.Store.NO));
document.add(new SortedDocValuesField("genre", new BytesRef(parts[8])));
document.add(new NumericDocValuesField("genre_id", Long.valueOf(parts[9])));
indexWriter.addDocument(document);
}
indexWriter.close();
IndexReader indexReader = DirectoryReader.open(directory);
IndexSearcher indexSearcher = new IndexSearcher(indexReader);
MappedFieldType genreFieldType = new KeywordFieldMapper.KeywordFieldType();
genreFieldType.setName("genre");
genreFieldType.setHasDocValues(true);
Consumer<InternalSampler> verify = result -> {
Terms terms = result.getAggregations().get("terms");
assertEquals(2, terms.getBuckets().size());
assertEquals("0805080481", terms.getBuckets().get(0).getKeyAsString());
assertEquals("0812550706", terms.getBuckets().get(1).getKeyAsString());
};
testCase(indexSearcher, genreFieldType, "map", verify);
testCase(indexSearcher, genreFieldType, "global_ordinals", verify);
testCase(indexSearcher, genreFieldType, "bytes_hash", verify);
genreFieldType = new NumberFieldMapper.NumberFieldType(NumberFieldMapper.NumberType.LONG);
genreFieldType.setName("genre_id");
testCase(indexSearcher, genreFieldType, null, verify);
// wrong field:
genreFieldType = new KeywordFieldMapper.KeywordFieldType();
genreFieldType.setName("wrong_field");
genreFieldType.setHasDocValues(true);
testCase(indexSearcher, genreFieldType, null, result -> {
Terms terms = result.getAggregations().get("terms");
assertEquals(1, terms.getBuckets().size());
assertEquals("0805080481", terms.getBuckets().get(0).getKeyAsString());
});
indexReader.close();
directory.close();
}
use of org.apache.lucene.document.Document in project elasticsearch by elastic.
the class HistogramAggregatorTests method testMinDocCount.
public void testMinDocCount() throws Exception {
try (Directory dir = newDirectory();
RandomIndexWriter w = new RandomIndexWriter(random(), dir)) {
for (long value : new long[] { 7, 3, -10, -6, 5, 50 }) {
Document doc = new Document();
doc.add(new SortedNumericDocValuesField("field", value));
w.addDocument(doc);
}
HistogramAggregationBuilder aggBuilder = new HistogramAggregationBuilder("my_agg").field("field").interval(10).minDocCount(2);
MappedFieldType fieldType = new NumberFieldMapper.NumberFieldType(NumberFieldMapper.NumberType.LONG);
fieldType.setName("field");
try (IndexReader reader = w.getReader()) {
IndexSearcher searcher = new IndexSearcher(reader);
Histogram histogram = searchAndReduce(searcher, new MatchAllDocsQuery(), aggBuilder, fieldType);
assertEquals(2, histogram.getBuckets().size());
assertEquals(-10d, histogram.getBuckets().get(0).getKey());
assertEquals(2, histogram.getBuckets().get(0).getDocCount());
assertEquals(0d, histogram.getBuckets().get(1).getKey());
assertEquals(3, histogram.getBuckets().get(1).getDocCount());
}
}
}
use of org.apache.lucene.document.Document in project elasticsearch by elastic.
the class HistogramAggregatorTests method testDoubles.
public void testDoubles() throws Exception {
try (Directory dir = newDirectory();
RandomIndexWriter w = new RandomIndexWriter(random(), dir)) {
for (double value : new double[] { 9.3, 3.2, -10, -6.5, 5.3, 50.1 }) {
Document doc = new Document();
doc.add(new SortedNumericDocValuesField("field", NumericUtils.doubleToSortableLong(value)));
w.addDocument(doc);
}
HistogramAggregationBuilder aggBuilder = new HistogramAggregationBuilder("my_agg").field("field").interval(5);
MappedFieldType fieldType = new NumberFieldMapper.NumberFieldType(NumberFieldMapper.NumberType.DOUBLE);
fieldType.setName("field");
try (IndexReader reader = w.getReader()) {
IndexSearcher searcher = new IndexSearcher(reader);
Histogram histogram = search(searcher, new MatchAllDocsQuery(), aggBuilder, fieldType);
assertEquals(4, histogram.getBuckets().size());
assertEquals(-10d, histogram.getBuckets().get(0).getKey());
assertEquals(2, histogram.getBuckets().get(0).getDocCount());
assertEquals(0d, histogram.getBuckets().get(1).getKey());
assertEquals(1, histogram.getBuckets().get(1).getDocCount());
assertEquals(5d, histogram.getBuckets().get(2).getKey());
assertEquals(2, histogram.getBuckets().get(2).getDocCount());
assertEquals(50d, histogram.getBuckets().get(3).getKey());
assertEquals(1, histogram.getBuckets().get(3).getDocCount());
}
}
}
use of org.apache.lucene.document.Document in project elasticsearch by elastic.
the class NestedAggregatorTests method testResetRootDocId.
public void testResetRootDocId() throws Exception {
Directory directory = newDirectory();
IndexWriterConfig iwc = new IndexWriterConfig(null);
iwc.setMergePolicy(NoMergePolicy.INSTANCE);
RandomIndexWriter indexWriter = new RandomIndexWriter(random(), directory, iwc);
List<Document> documents = new ArrayList<>();
// 1 segment with, 1 root document, with 3 nested sub docs
Document document = new Document();
document.add(new Field(UidFieldMapper.NAME, "type#1", UidFieldMapper.Defaults.NESTED_FIELD_TYPE));
document.add(new Field(TypeFieldMapper.NAME, "__nested_field", TypeFieldMapper.Defaults.FIELD_TYPE));
documents.add(document);
document = new Document();
document.add(new Field(UidFieldMapper.NAME, "type#1", UidFieldMapper.Defaults.NESTED_FIELD_TYPE));
document.add(new Field(TypeFieldMapper.NAME, "__nested_field", TypeFieldMapper.Defaults.FIELD_TYPE));
documents.add(document);
document = new Document();
document.add(new Field(UidFieldMapper.NAME, "type#1", UidFieldMapper.Defaults.NESTED_FIELD_TYPE));
document.add(new Field(TypeFieldMapper.NAME, "__nested_field", TypeFieldMapper.Defaults.FIELD_TYPE));
documents.add(document);
document = new Document();
document.add(new Field(UidFieldMapper.NAME, "type#1", UidFieldMapper.Defaults.FIELD_TYPE));
document.add(new Field(TypeFieldMapper.NAME, "test", TypeFieldMapper.Defaults.FIELD_TYPE));
documents.add(document);
indexWriter.addDocuments(documents);
indexWriter.commit();
documents.clear();
// 1 segment with:
// 1 document, with 1 nested subdoc
document = new Document();
document.add(new Field(UidFieldMapper.NAME, "type#2", UidFieldMapper.Defaults.NESTED_FIELD_TYPE));
document.add(new Field(TypeFieldMapper.NAME, "__nested_field", TypeFieldMapper.Defaults.FIELD_TYPE));
documents.add(document);
document = new Document();
document.add(new Field(UidFieldMapper.NAME, "type#2", UidFieldMapper.Defaults.FIELD_TYPE));
document.add(new Field(TypeFieldMapper.NAME, "test", TypeFieldMapper.Defaults.FIELD_TYPE));
documents.add(document);
indexWriter.addDocuments(documents);
documents.clear();
// and 1 document, with 1 nested subdoc
document = new Document();
document.add(new Field(UidFieldMapper.NAME, "type#3", UidFieldMapper.Defaults.NESTED_FIELD_TYPE));
document.add(new Field(TypeFieldMapper.NAME, "__nested_field", TypeFieldMapper.Defaults.FIELD_TYPE));
documents.add(document);
document = new Document();
document.add(new Field(UidFieldMapper.NAME, "type#3", UidFieldMapper.Defaults.FIELD_TYPE));
document.add(new Field(TypeFieldMapper.NAME, "test", TypeFieldMapper.Defaults.FIELD_TYPE));
documents.add(document);
indexWriter.addDocuments(documents);
indexWriter.commit();
indexWriter.close();
IndexService indexService = createIndex("test");
DirectoryReader directoryReader = DirectoryReader.open(directory);
directoryReader = ElasticsearchDirectoryReader.wrap(directoryReader, new ShardId(indexService.index(), 0));
IndexSearcher searcher = new IndexSearcher(directoryReader);
indexService.mapperService().merge("test", new CompressedXContent(PutMappingRequest.buildFromSimplifiedDef("test", "nested_field", "type=nested").string()), MapperService.MergeReason.MAPPING_UPDATE, false);
SearchContext context = createSearchContext(indexService);
AggregatorFactories.Builder builder = AggregatorFactories.builder();
NestedAggregationBuilder factory = new NestedAggregationBuilder("test", "nested_field");
builder.addAggregator(factory);
AggregatorFactories factories = builder.build(context, null);
context.aggregations(new SearchContextAggregations(factories));
Aggregator[] aggs = factories.createTopLevelAggregators();
BucketCollector collector = BucketCollector.wrap(Arrays.asList(aggs));
collector.preCollection();
// A regular search always exclude nested docs, so we use NonNestedDocsFilter.INSTANCE here (otherwise MatchAllDocsQuery would be sufficient)
// We exclude root doc with uid type#2, this will trigger the bug if we don't reset the root doc when we process a new segment, because
// root doc type#3 and root doc type#1 have the same segment docid
BooleanQuery.Builder bq = new BooleanQuery.Builder();
bq.add(Queries.newNonNestedFilter(), Occur.MUST);
bq.add(new TermQuery(new Term(UidFieldMapper.NAME, "type#2")), Occur.MUST_NOT);
searcher.search(new ConstantScoreQuery(bq.build()), collector);
collector.postCollection();
Nested nested = (Nested) aggs[0].buildAggregation(0);
// The bug manifests if 6 docs are returned, because currentRootDoc isn't reset the previous child docs from the first segment are emitted as hits.
assertThat(nested.getDocCount(), equalTo(4L));
directoryReader.close();
directory.close();
}
use of org.apache.lucene.document.Document in project elasticsearch by elastic.
the class TermsAggregatorTests method createIndexWithLongs.
private IndexReader createIndexWithLongs() throws IOException {
Directory directory = newDirectory();
RandomIndexWriter indexWriter = new RandomIndexWriter(random(), directory);
Document document = new Document();
document.add(new SortedNumericDocValuesField("number", 10));
document.add(new SortedNumericDocValuesField("number", 100));
indexWriter.addDocument(document);
document = new Document();
document.add(new SortedNumericDocValuesField("number", 1));
document.add(new SortedNumericDocValuesField("number", 100));
indexWriter.addDocument(document);
document = new Document();
document.add(new SortedNumericDocValuesField("number", 10));
document.add(new SortedNumericDocValuesField("number", 1000));
indexWriter.addDocument(document);
indexWriter.close();
return DirectoryReader.open(directory);
}
Aggregations