use of org.opensearch.search.aggregations.LeafBucketCollector in project OpenSearch by opensearch-project.
the class MergingBucketsDeferringCollectorTests method testBucketMergeAndDelete.
public void testBucketMergeAndDelete() throws Exception {
testCase((deferringCollector, delegate) -> new LeafBucketCollector() {
@Override
public void collect(int doc, long owningBucketOrd) throws IOException {
// Only collects at top level
assert owningBucketOrd == 0;
delegate.collect(doc, doc);
if (doc == 7) {
deferringCollector.mergeBuckets(oldBucket -> oldBucket > 3 ? 0 : -1);
}
}
}, (deferringCollector, finalCollector) -> {
deferringCollector.prepareSelectedBuckets(0, 8, 9);
assertThat(finalCollector.collection, equalTo(org.opensearch.common.collect.Map.of(0L, org.opensearch.common.collect.List.of(4, 5, 6, 7), 1L, org.opensearch.common.collect.List.of(8), 2L, org.opensearch.common.collect.List.of(9))));
});
}
use of org.opensearch.search.aggregations.LeafBucketCollector in project OpenSearch by opensearch-project.
the class CompositeValuesCollectorQueueTests method testRandomCase.
private void testRandomCase(boolean forceMerge, boolean missingBucket, int indexSortSourcePrefix, ClassAndName... types) throws IOException {
final BigArrays bigArrays = BigArrays.NON_RECYCLING_INSTANCE;
int numDocs = randomIntBetween(50, 100);
List<Comparable<?>[]> possibleValues = new ArrayList<>();
SortField[] indexSortFields = indexSortSourcePrefix == 0 ? null : new SortField[indexSortSourcePrefix];
for (int i = 0; i < types.length; i++) {
ClassAndName type = types[i];
final Comparable<?>[] values;
int numValues = randomIntBetween(1, numDocs * 2);
values = new Comparable[numValues];
if (type.clazz == Long.class) {
if (i < indexSortSourcePrefix) {
indexSortFields[i] = new SortedNumericSortField(type.fieldType.name(), SortField.Type.LONG);
}
for (int j = 0; j < numValues; j++) {
values[j] = randomLong();
}
} else if (type.clazz == Double.class) {
if (i < indexSortSourcePrefix) {
indexSortFields[i] = new SortedNumericSortField(type.fieldType.name(), SortField.Type.DOUBLE);
}
for (int j = 0; j < numValues; j++) {
values[j] = randomDouble();
}
} else if (type.clazz == BytesRef.class) {
if (i < indexSortSourcePrefix) {
indexSortFields[i] = new SortedSetSortField(type.fieldType.name(), false);
}
for (int j = 0; j < numValues; j++) {
values[j] = new BytesRef(randomAlphaOfLengthBetween(5, 50));
}
} else {
assert (false);
}
possibleValues.add(values);
}
Set<CompositeKey> keys = new HashSet<>();
try (Directory directory = newDirectory()) {
final IndexWriterConfig writerConfig = newIndexWriterConfig();
if (indexSortFields != null) {
writerConfig.setIndexSort(new Sort(indexSortFields));
}
try (RandomIndexWriter indexWriter = new RandomIndexWriter(random(), directory, new KeywordAnalyzer())) {
for (int i = 0; i < numDocs; i++) {
Document document = new Document();
List<List<Comparable<?>>> docValues = new ArrayList<>();
boolean hasAllField = true;
for (int j = 0; j < types.length; j++) {
int numValues = indexSortSourcePrefix - 1 >= j ? 1 : randomIntBetween(0, 5);
List<Comparable<?>> values = new ArrayList<>();
if (numValues == 0) {
hasAllField = false;
if (missingBucket) {
values.add(null);
}
} else {
for (int k = 0; k < numValues; k++) {
values.add(possibleValues.get(j)[randomIntBetween(0, possibleValues.get(j).length - 1)]);
if (types[j].clazz == Long.class) {
long value = (Long) values.get(k);
document.add(new SortedNumericDocValuesField(types[j].fieldType.name(), value));
document.add(new LongPoint(types[j].fieldType.name(), value));
} else if (types[j].clazz == Double.class) {
document.add(new SortedNumericDocValuesField(types[j].fieldType.name(), NumericUtils.doubleToSortableLong((Double) values.get(k))));
} else if (types[j].clazz == BytesRef.class) {
BytesRef value = (BytesRef) values.get(k);
document.add(new SortedSetDocValuesField(types[j].fieldType.name(), (BytesRef) values.get(k)));
document.add(new TextField(types[j].fieldType.name(), value.utf8ToString(), Field.Store.NO));
} else {
assert (false);
}
}
}
docValues.add(values);
}
if (hasAllField || missingBucket) {
List<CompositeKey> comb = createListCombinations(docValues);
keys.addAll(comb);
}
indexWriter.addDocument(document);
}
if (forceMerge) {
indexWriter.forceMerge(1);
}
}
IndexReader reader = DirectoryReader.open(directory);
int size = keys.size() > 1 ? randomIntBetween(1, keys.size()) : 1;
SingleDimensionValuesSource<?>[] sources = new SingleDimensionValuesSource[types.length];
for (int i = 0; i < types.length; i++) {
final MappedFieldType fieldType = types[i].fieldType;
if (types[i].clazz == Long.class) {
sources[i] = new LongValuesSource(bigArrays, fieldType, context -> DocValues.getSortedNumeric(context.reader(), fieldType.name()), value -> value, DocValueFormat.RAW, missingBucket, MissingOrder.DEFAULT, size, 1);
} else if (types[i].clazz == Double.class) {
sources[i] = new DoubleValuesSource(bigArrays, fieldType, context -> FieldData.sortableLongBitsToDoubles(DocValues.getSortedNumeric(context.reader(), fieldType.name())), DocValueFormat.RAW, missingBucket, MissingOrder.DEFAULT, size, 1);
} else if (types[i].clazz == BytesRef.class) {
if (forceMerge) {
// we don't create global ordinals but we test this mode when the reader has a single segment
// since ordinals are global in this case.
sources[i] = new GlobalOrdinalValuesSource(bigArrays, fieldType, context -> DocValues.getSortedSet(context.reader(), fieldType.name()), DocValueFormat.RAW, missingBucket, MissingOrder.DEFAULT, size, 1);
} else {
sources[i] = new BinaryValuesSource(bigArrays, (b) -> {
}, fieldType, context -> FieldData.toString(DocValues.getSortedSet(context.reader(), fieldType.name())), DocValueFormat.RAW, missingBucket, MissingOrder.DEFAULT, size, 1);
}
} else {
assert (false);
}
}
CompositeKey[] expected = keys.toArray(new CompositeKey[0]);
Arrays.sort(expected, (a, b) -> compareKey(a, b));
for (boolean withProducer : new boolean[] { true, false }) {
int pos = 0;
CompositeKey last = null;
while (pos < size) {
final CompositeValuesCollectorQueue queue = new CompositeValuesCollectorQueue(BigArrays.NON_RECYCLING_INSTANCE, sources, size, last);
final SortedDocsProducer docsProducer = sources[0].createSortedDocsProducerOrNull(reader, new MatchAllDocsQuery());
for (LeafReaderContext leafReaderContext : reader.leaves()) {
if (docsProducer != null && withProducer) {
assertEquals(DocIdSet.EMPTY, docsProducer.processLeaf(new MatchAllDocsQuery(), queue, leafReaderContext, false));
} else {
final LeafBucketCollector leafCollector = new LeafBucketCollector() {
@Override
public void collect(int doc, long bucket) throws IOException {
queue.addIfCompetitive(indexSortSourcePrefix);
}
};
final LeafBucketCollector queueCollector = queue.getLeafCollector(leafReaderContext, leafCollector);
final Bits liveDocs = leafReaderContext.reader().getLiveDocs();
for (int i = 0; i < leafReaderContext.reader().maxDoc(); i++) {
if (liveDocs == null || liveDocs.get(i)) {
try {
queueCollector.collect(i);
} catch (CollectionTerminatedException exc) {
assertThat(indexSortSourcePrefix, greaterThan(0));
}
}
}
}
}
assertEquals(size, Math.min(queue.size(), expected.length - pos));
int ptr = pos + (queue.size() - 1);
pos += queue.size();
last = null;
while (queue.size() > pos) {
CompositeKey key = queue.toCompositeKey(queue.pop());
if (last == null) {
last = key;
}
assertThat(key, equalTo(expected[ptr--]));
}
}
}
reader.close();
}
}
use of org.opensearch.search.aggregations.LeafBucketCollector in project OpenSearch by opensearch-project.
the class BinaryRangeAggregatorTests method doTestSortedBinaryRangeLeafCollector.
private void doTestSortedBinaryRangeLeafCollector(int maxNumValuesPerDoc) throws Exception {
final Set<BytesRef> termSet = new HashSet<>();
final int numTerms = TestUtil.nextInt(random(), maxNumValuesPerDoc, 100);
while (termSet.size() < numTerms) {
termSet.add(new BytesRef(TestUtil.randomSimpleString(random(), randomInt(2))));
}
final BytesRef[] terms = termSet.toArray(new BytesRef[0]);
Arrays.sort(terms);
final int numRanges = randomIntBetween(1, 10);
BinaryRangeAggregator.Range[] ranges = new BinaryRangeAggregator.Range[numRanges];
for (int i = 0; i < numRanges; ++i) {
ranges[i] = new BinaryRangeAggregator.Range(Integer.toString(i), randomBoolean() ? null : new BytesRef(TestUtil.randomSimpleString(random(), randomInt(2))), randomBoolean() ? null : new BytesRef(TestUtil.randomSimpleString(random(), randomInt(2))));
}
Arrays.sort(ranges, BinaryRangeAggregator.RANGE_COMPARATOR);
FakeSortedBinaryDocValues values = new FakeSortedBinaryDocValues(terms);
final int[] counts = new int[ranges.length];
SortedBinaryRangeLeafCollector collector = new SortedBinaryRangeLeafCollector(values, ranges, null) {
@Override
protected void doCollect(LeafBucketCollector sub, int doc, long bucket) throws IOException {
counts[(int) bucket]++;
}
};
final int[] expectedCounts = new int[ranges.length];
final int maxDoc = randomIntBetween(5, 10);
for (int doc = 0; doc < maxDoc; ++doc) {
LongHashSet ordinalSet = new LongHashSet();
final int numValues = randomInt(maxNumValuesPerDoc);
while (ordinalSet.size() < numValues) {
ordinalSet.add(random().nextInt(terms.length));
}
final long[] ords = ordinalSet.toArray();
Arrays.sort(ords);
values.ords = ords;
// simulate aggregation
collector.collect(doc);
// now do it the naive way
for (int i = 0; i < ranges.length; ++i) {
for (long ord : ords) {
BytesRef term = terms[(int) ord];
if ((ranges[i].from == null || ranges[i].from.compareTo(term) <= 0) && (ranges[i].to == null || ranges[i].to.compareTo(term) > 0)) {
expectedCounts[i]++;
break;
}
}
}
}
assertArrayEquals(expectedCounts, counts);
}
Aggregations