use of org.apache.lucene.tests.index.RandomIndexWriter in project OpenSearch by opensearch-project.
the class NestedAggregatorTests method testNestedOrdering.
public void testNestedOrdering() throws IOException {
try (Directory directory = newDirectory()) {
try (RandomIndexWriter iw = new RandomIndexWriter(random(), directory)) {
iw.addDocuments(generateBook("1", new String[] { "a" }, new int[] { 12, 13, 14 }));
iw.addDocuments(generateBook("2", new String[] { "b" }, new int[] { 5, 50 }));
iw.addDocuments(generateBook("3", new String[] { "c" }, new int[] { 39, 19 }));
iw.addDocuments(generateBook("4", new String[] { "d" }, new int[] { 2, 1, 3 }));
iw.addDocuments(generateBook("5", new String[] { "a" }, new int[] { 70, 10 }));
iw.addDocuments(generateBook("6", new String[] { "e" }, new int[] { 23, 21 }));
iw.addDocuments(generateBook("7", new String[] { "e", "a" }, new int[] { 8, 8 }));
iw.addDocuments(generateBook("8", new String[] { "f" }, new int[] { 12, 14 }));
iw.addDocuments(generateBook("9", new String[] { "g", "c", "e" }, new int[] { 18, 8 }));
}
try (IndexReader indexReader = wrapInMockESDirectoryReader(DirectoryReader.open(directory))) {
MappedFieldType fieldType1 = new NumberFieldMapper.NumberFieldType("num_pages", NumberFieldMapper.NumberType.LONG);
MappedFieldType fieldType2 = new KeywordFieldMapper.KeywordFieldType("author");
TermsAggregationBuilder termsBuilder = new TermsAggregationBuilder("authors").userValueTypeHint(ValueType.STRING).field("author").order(BucketOrder.aggregation("chapters>num_pages.value", true));
NestedAggregationBuilder nestedBuilder = new NestedAggregationBuilder("chapters", "nested_chapters");
MaxAggregationBuilder maxAgg = new MaxAggregationBuilder("num_pages").field("num_pages");
nestedBuilder.subAggregation(maxAgg);
termsBuilder.subAggregation(nestedBuilder);
Terms terms = searchAndReduce(newSearcher(indexReader, false, true), new MatchAllDocsQuery(), termsBuilder, fieldType1, fieldType2);
assertEquals(7, terms.getBuckets().size());
assertEquals("authors", terms.getName());
Terms.Bucket bucket = terms.getBuckets().get(0);
assertEquals("d", bucket.getKeyAsString());
Max numPages = ((Nested) bucket.getAggregations().get("chapters")).getAggregations().get("num_pages");
assertEquals(3, (int) numPages.getValue());
bucket = terms.getBuckets().get(1);
assertEquals("f", bucket.getKeyAsString());
numPages = ((Nested) bucket.getAggregations().get("chapters")).getAggregations().get("num_pages");
assertEquals(14, (int) numPages.getValue());
bucket = terms.getBuckets().get(2);
assertEquals("g", bucket.getKeyAsString());
numPages = ((Nested) bucket.getAggregations().get("chapters")).getAggregations().get("num_pages");
assertEquals(18, (int) numPages.getValue());
bucket = terms.getBuckets().get(3);
assertEquals("e", bucket.getKeyAsString());
numPages = ((Nested) bucket.getAggregations().get("chapters")).getAggregations().get("num_pages");
assertEquals(23, (int) numPages.getValue());
bucket = terms.getBuckets().get(4);
assertEquals("c", bucket.getKeyAsString());
numPages = ((Nested) bucket.getAggregations().get("chapters")).getAggregations().get("num_pages");
assertEquals(39, (int) numPages.getValue());
bucket = terms.getBuckets().get(5);
assertEquals("b", bucket.getKeyAsString());
numPages = ((Nested) bucket.getAggregations().get("chapters")).getAggregations().get("num_pages");
assertEquals(50, (int) numPages.getValue());
bucket = terms.getBuckets().get(6);
assertEquals("a", bucket.getKeyAsString());
numPages = ((Nested) bucket.getAggregations().get("chapters")).getAggregations().get("num_pages");
assertEquals(70, (int) numPages.getValue());
// reverse order:
termsBuilder = new TermsAggregationBuilder("authors").userValueTypeHint(ValueType.STRING).field("author").order(BucketOrder.aggregation("chapters>num_pages.value", false));
nestedBuilder = new NestedAggregationBuilder("chapters", "nested_chapters");
maxAgg = new MaxAggregationBuilder("num_pages").field("num_pages");
nestedBuilder.subAggregation(maxAgg);
termsBuilder.subAggregation(nestedBuilder);
terms = searchAndReduce(newSearcher(indexReader, false, true), new MatchAllDocsQuery(), termsBuilder, fieldType1, fieldType2);
assertEquals(7, terms.getBuckets().size());
assertEquals("authors", terms.getName());
bucket = terms.getBuckets().get(0);
assertEquals("a", bucket.getKeyAsString());
numPages = ((Nested) bucket.getAggregations().get("chapters")).getAggregations().get("num_pages");
assertEquals(70, (int) numPages.getValue());
bucket = terms.getBuckets().get(1);
assertEquals("b", bucket.getKeyAsString());
numPages = ((Nested) bucket.getAggregations().get("chapters")).getAggregations().get("num_pages");
assertEquals(50, (int) numPages.getValue());
bucket = terms.getBuckets().get(2);
assertEquals("c", bucket.getKeyAsString());
numPages = ((Nested) bucket.getAggregations().get("chapters")).getAggregations().get("num_pages");
assertEquals(39, (int) numPages.getValue());
bucket = terms.getBuckets().get(3);
assertEquals("e", bucket.getKeyAsString());
numPages = ((Nested) bucket.getAggregations().get("chapters")).getAggregations().get("num_pages");
assertEquals(23, (int) numPages.getValue());
bucket = terms.getBuckets().get(4);
assertEquals("g", bucket.getKeyAsString());
numPages = ((Nested) bucket.getAggregations().get("chapters")).getAggregations().get("num_pages");
assertEquals(18, (int) numPages.getValue());
bucket = terms.getBuckets().get(5);
assertEquals("f", bucket.getKeyAsString());
numPages = ((Nested) bucket.getAggregations().get("chapters")).getAggregations().get("num_pages");
assertEquals(14, (int) numPages.getValue());
bucket = terms.getBuckets().get(6);
assertEquals("d", bucket.getKeyAsString());
numPages = ((Nested) bucket.getAggregations().get("chapters")).getAggregations().get("num_pages");
assertEquals(3, (int) numPages.getValue());
}
}
}
use of org.apache.lucene.tests.index.RandomIndexWriter in project OpenSearch by opensearch-project.
the class NestedAggregatorTests method testNestedOrdering_random.
public void testNestedOrdering_random() throws IOException {
int numBooks = randomIntBetween(32, 512);
List<Tuple<String, int[]>> books = new ArrayList<>();
for (int i = 0; i < numBooks; i++) {
int numChapters = randomIntBetween(1, 8);
int[] chapters = new int[numChapters];
for (int j = 0; j < numChapters; j++) {
chapters[j] = randomIntBetween(2, 64);
}
books.add(Tuple.tuple(String.format(Locale.ROOT, "%03d", i), chapters));
}
try (Directory directory = newDirectory()) {
try (RandomIndexWriter iw = new RandomIndexWriter(random(), directory)) {
int id = 0;
for (Tuple<String, int[]> book : books) {
iw.addDocuments(generateBook(String.format(Locale.ROOT, "%03d", id), new String[] { book.v1() }, book.v2()));
id++;
}
}
for (Tuple<String, int[]> book : books) {
Arrays.sort(book.v2());
}
books.sort((o1, o2) -> {
int cmp = Integer.compare(o1.v2()[0], o2.v2()[0]);
if (cmp == 0) {
return o1.v1().compareTo(o2.v1());
} else {
return cmp;
}
});
try (IndexReader indexReader = wrapInMockESDirectoryReader(DirectoryReader.open(directory))) {
MappedFieldType fieldType1 = new NumberFieldMapper.NumberFieldType("num_pages", NumberFieldMapper.NumberType.LONG);
MappedFieldType fieldType2 = new KeywordFieldMapper.KeywordFieldType("author");
TermsAggregationBuilder termsBuilder = new TermsAggregationBuilder("authors").userValueTypeHint(ValueType.STRING).size(books.size()).field("author").order(BucketOrder.compound(BucketOrder.aggregation("chapters>num_pages.value", true), BucketOrder.key(true)));
NestedAggregationBuilder nestedBuilder = new NestedAggregationBuilder("chapters", "nested_chapters");
MinAggregationBuilder minAgg = new MinAggregationBuilder("num_pages").field("num_pages");
nestedBuilder.subAggregation(minAgg);
termsBuilder.subAggregation(nestedBuilder);
Terms terms = searchAndReduce(newSearcher(indexReader, false, true), new MatchAllDocsQuery(), termsBuilder, fieldType1, fieldType2);
assertEquals(books.size(), terms.getBuckets().size());
assertEquals("authors", terms.getName());
for (int i = 0; i < books.size(); i++) {
Tuple<String, int[]> book = books.get(i);
Terms.Bucket bucket = terms.getBuckets().get(i);
assertEquals(book.v1(), bucket.getKeyAsString());
Min numPages = ((Nested) bucket.getAggregations().get("chapters")).getAggregations().get("num_pages");
assertEquals(book.v2()[0], (int) numPages.getValue());
}
}
}
}
use of org.apache.lucene.tests.index.RandomIndexWriter in project OpenSearch by opensearch-project.
the class ReverseNestedAggregatorTests method testFieldAlias.
public void testFieldAlias() throws IOException {
int numParentDocs = randomIntBetween(1, 20);
int expectedParentDocs = 0;
MappedFieldType fieldType = new NumberFieldMapper.NumberFieldType(VALUE_FIELD_NAME, NumberFieldMapper.NumberType.LONG);
try (Directory directory = newDirectory()) {
try (RandomIndexWriter iw = new RandomIndexWriter(random(), directory)) {
for (int i = 0; i < numParentDocs; i++) {
List<Document> documents = new ArrayList<>();
int numNestedDocs = randomIntBetween(0, 20);
if (numNestedDocs > 0) {
expectedParentDocs++;
}
for (int nested = 0; nested < numNestedDocs; nested++) {
Document document = new Document();
document.add(new Field(IdFieldMapper.NAME, Uid.encodeId(Integer.toString(i)), IdFieldMapper.Defaults.NESTED_FIELD_TYPE));
document.add(new Field(NestedPathFieldMapper.NAME, NESTED_OBJECT, NestedPathFieldMapper.Defaults.FIELD_TYPE));
documents.add(document);
}
Document document = new Document();
document.add(new Field(IdFieldMapper.NAME, Uid.encodeId(Integer.toString(i)), IdFieldMapper.Defaults.FIELD_TYPE));
long value = randomNonNegativeLong() % 10000;
document.add(new SortedNumericDocValuesField(VALUE_FIELD_NAME, value));
document.add(SeqNoFieldMapper.SequenceIDFields.emptySeqID().primaryTerm);
documents.add(document);
iw.addDocuments(documents);
}
iw.commit();
}
try (IndexReader indexReader = wrapInMockESDirectoryReader(DirectoryReader.open(directory))) {
MaxAggregationBuilder maxAgg = max(MAX_AGG_NAME).field(VALUE_FIELD_NAME);
MaxAggregationBuilder aliasMaxAgg = max(MAX_AGG_NAME).field(VALUE_FIELD_NAME + "-alias");
NestedAggregationBuilder agg = nested(NESTED_AGG, NESTED_OBJECT).subAggregation(reverseNested(REVERSE_AGG_NAME).subAggregation(maxAgg));
NestedAggregationBuilder aliasAgg = nested(NESTED_AGG, NESTED_OBJECT).subAggregation(reverseNested(REVERSE_AGG_NAME).subAggregation(aliasMaxAgg));
Nested nested = searchAndReduce(newSearcher(indexReader, false, true), new MatchAllDocsQuery(), agg, fieldType);
Nested aliasNested = searchAndReduce(newSearcher(indexReader, false, true), new MatchAllDocsQuery(), aliasAgg, fieldType);
ReverseNested reverseNested = nested.getAggregations().get(REVERSE_AGG_NAME);
ReverseNested aliasReverseNested = aliasNested.getAggregations().get(REVERSE_AGG_NAME);
assertEquals(reverseNested, aliasReverseNested);
assertEquals(expectedParentDocs, reverseNested.getDocCount());
}
}
}
use of org.apache.lucene.tests.index.RandomIndexWriter in project OpenSearch by opensearch-project.
the class ReverseNestedAggregatorTests method testNoDocs.
public void testNoDocs() throws IOException {
try (Directory directory = newDirectory()) {
try (RandomIndexWriter iw = new RandomIndexWriter(random(), directory)) {
// intentionally not writing any docs
}
try (IndexReader indexReader = wrapInMockESDirectoryReader(DirectoryReader.open(directory))) {
NestedAggregationBuilder nestedBuilder = new NestedAggregationBuilder(NESTED_AGG, NESTED_OBJECT);
ReverseNestedAggregationBuilder reverseNestedBuilder = new ReverseNestedAggregationBuilder(REVERSE_AGG_NAME);
nestedBuilder.subAggregation(reverseNestedBuilder);
MaxAggregationBuilder maxAgg = new MaxAggregationBuilder(MAX_AGG_NAME).field(VALUE_FIELD_NAME);
reverseNestedBuilder.subAggregation(maxAgg);
MappedFieldType fieldType = new NumberFieldMapper.NumberFieldType(VALUE_FIELD_NAME, NumberFieldMapper.NumberType.LONG);
Nested nested = searchAndReduce(newSearcher(indexReader, false, true), new MatchAllDocsQuery(), nestedBuilder, fieldType);
ReverseNested reverseNested = (ReverseNested) ((InternalAggregation) nested).getProperty(REVERSE_AGG_NAME);
assertEquals(REVERSE_AGG_NAME, reverseNested.getName());
assertEquals(0, reverseNested.getDocCount());
InternalMax max = (InternalMax) ((InternalAggregation) reverseNested).getProperty(MAX_AGG_NAME);
assertEquals(MAX_AGG_NAME, max.getName());
assertEquals(Double.NEGATIVE_INFINITY, max.getValue(), Double.MIN_VALUE);
}
}
}
use of org.apache.lucene.tests.index.RandomIndexWriter in project OpenSearch by opensearch-project.
the class DiversifiedSamplerTests method testDiversifiedSampler_noDocs.
public void testDiversifiedSampler_noDocs() throws Exception {
Directory directory = newDirectory();
RandomIndexWriter indexWriter = new RandomIndexWriter(random(), directory);
indexWriter.close();
IndexReader indexReader = DirectoryReader.open(directory);
IndexSearcher indexSearcher = new IndexSearcher(indexReader);
MappedFieldType idFieldType = new KeywordFieldMapper.KeywordFieldType("id");
MappedFieldType genreFieldType = new KeywordFieldMapper.KeywordFieldType("genre");
DiversifiedAggregationBuilder builder = new DiversifiedAggregationBuilder("_name").field(genreFieldType.name()).subAggregation(new TermsAggregationBuilder("terms").field("id"));
InternalSampler result = searchAndReduce(indexSearcher, new MatchAllDocsQuery(), builder, genreFieldType, idFieldType);
Terms terms = result.getAggregations().get("terms");
assertEquals(0, terms.getBuckets().size());
indexReader.close();
directory.close();
}
Aggregations