use of org.apache.lucene.index.CompositeReaderContext in project elasticsearch by elastic.
the class ShardUtilsTests method testExtractShardId.
public void testExtractShardId() throws IOException {
BaseDirectoryWrapper dir = newDirectory();
IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig());
writer.commit();
ShardId id = new ShardId("foo", "_na_", random().nextInt());
try (DirectoryReader reader = DirectoryReader.open(writer)) {
ElasticsearchDirectoryReader wrap = ElasticsearchDirectoryReader.wrap(reader, id);
assertEquals(id, ShardUtils.extractShardId(wrap));
}
final int numDocs = 1 + random().nextInt(5);
for (int i = 0; i < numDocs; i++) {
Document d = new Document();
d.add(newField("name", "foobar", StringField.TYPE_STORED));
writer.addDocument(d);
if (random().nextBoolean()) {
writer.commit();
}
}
try (DirectoryReader reader = DirectoryReader.open(writer)) {
ElasticsearchDirectoryReader wrap = ElasticsearchDirectoryReader.wrap(reader, id);
assertEquals(id, ShardUtils.extractShardId(wrap));
CompositeReaderContext context = wrap.getContext();
for (LeafReaderContext leaf : context.leaves()) {
assertEquals(id, ShardUtils.extractShardId(leaf.reader()));
}
}
IOUtils.close(writer, dir);
}
use of org.apache.lucene.index.CompositeReaderContext in project elasticsearch by elastic.
the class CollapsingTopDocsCollectorTests method assertSearchCollapse.
private <T extends Comparable> void assertSearchCollapse(CollapsingDocValuesProducer<T> dvProducers, boolean numeric, boolean multivalued) throws IOException {
final int numDocs = randomIntBetween(1000, 2000);
int maxGroup = randomIntBetween(2, 500);
final Directory dir = newDirectory();
final RandomIndexWriter w = new RandomIndexWriter(random(), dir);
Set<T> values = new HashSet<>();
int totalHits = 0;
for (int i = 0; i < numDocs; i++) {
final T value = dvProducers.randomGroup(maxGroup);
values.add(value);
Document doc = new Document();
dvProducers.add(doc, value, multivalued);
doc.add(new NumericDocValuesField("sort1", randomIntBetween(0, 10)));
doc.add(new NumericDocValuesField("sort2", randomLong()));
w.addDocument(doc);
totalHits++;
}
List<T> valueList = new ArrayList<>(values);
Collections.sort(valueList);
final IndexReader reader = w.getReader();
final IndexSearcher searcher = newSearcher(reader);
final SortField collapseField = dvProducers.sortField(multivalued);
final SortField sort1 = new SortField("sort1", SortField.Type.INT);
final SortField sort2 = new SortField("sort2", SortField.Type.LONG);
Sort sort = new Sort(sort1, sort2, collapseField);
int expectedNumGroups = values.size();
final CollapsingTopDocsCollector collapsingCollector;
if (numeric) {
collapsingCollector = CollapsingTopDocsCollector.createNumeric(collapseField.getField(), sort, expectedNumGroups, false);
} else {
collapsingCollector = CollapsingTopDocsCollector.createKeyword(collapseField.getField(), sort, expectedNumGroups, false);
}
TopFieldCollector topFieldCollector = TopFieldCollector.create(sort, totalHits, true, false, false);
searcher.search(new MatchAllDocsQuery(), collapsingCollector);
searcher.search(new MatchAllDocsQuery(), topFieldCollector);
CollapseTopFieldDocs collapseTopFieldDocs = collapsingCollector.getTopDocs();
TopFieldDocs topDocs = topFieldCollector.topDocs();
assertEquals(collapseField.getField(), collapseTopFieldDocs.field);
assertEquals(expectedNumGroups, collapseTopFieldDocs.scoreDocs.length);
assertEquals(totalHits, collapseTopFieldDocs.totalHits);
assertEquals(totalHits, topDocs.scoreDocs.length);
assertEquals(totalHits, topDocs.totalHits);
Set<Object> seen = new HashSet<>();
// collapse field is the last sort
int collapseIndex = sort.getSort().length - 1;
int topDocsIndex = 0;
for (int i = 0; i < expectedNumGroups; i++) {
FieldDoc fieldDoc = null;
for (; topDocsIndex < totalHits; topDocsIndex++) {
fieldDoc = (FieldDoc) topDocs.scoreDocs[topDocsIndex];
if (seen.contains(fieldDoc.fields[collapseIndex]) == false) {
break;
}
}
FieldDoc collapseFieldDoc = (FieldDoc) collapseTopFieldDocs.scoreDocs[i];
assertNotNull(fieldDoc);
assertEquals(collapseFieldDoc.doc, fieldDoc.doc);
assertArrayEquals(collapseFieldDoc.fields, fieldDoc.fields);
seen.add(fieldDoc.fields[fieldDoc.fields.length - 1]);
}
for (; topDocsIndex < totalHits; topDocsIndex++) {
FieldDoc fieldDoc = (FieldDoc) topDocs.scoreDocs[topDocsIndex];
assertTrue(seen.contains(fieldDoc.fields[collapseIndex]));
}
// check merge
final IndexReaderContext ctx = searcher.getTopReaderContext();
final SegmentSearcher[] subSearchers;
final int[] docStarts;
if (ctx instanceof LeafReaderContext) {
subSearchers = new SegmentSearcher[1];
docStarts = new int[1];
subSearchers[0] = new SegmentSearcher((LeafReaderContext) ctx, ctx);
docStarts[0] = 0;
} else {
final CompositeReaderContext compCTX = (CompositeReaderContext) ctx;
final int size = compCTX.leaves().size();
subSearchers = new SegmentSearcher[size];
docStarts = new int[size];
int docBase = 0;
for (int searcherIDX = 0; searcherIDX < subSearchers.length; searcherIDX++) {
final LeafReaderContext leave = compCTX.leaves().get(searcherIDX);
subSearchers[searcherIDX] = new SegmentSearcher(leave, compCTX);
docStarts[searcherIDX] = docBase;
docBase += leave.reader().maxDoc();
}
}
final CollapseTopFieldDocs[] shardHits = new CollapseTopFieldDocs[subSearchers.length];
final Weight weight = searcher.createNormalizedWeight(new MatchAllDocsQuery(), false);
for (int shardIDX = 0; shardIDX < subSearchers.length; shardIDX++) {
final SegmentSearcher subSearcher = subSearchers[shardIDX];
final CollapsingTopDocsCollector c;
if (numeric) {
c = CollapsingTopDocsCollector.createNumeric(collapseField.getField(), sort, expectedNumGroups, false);
} else {
c = CollapsingTopDocsCollector.createKeyword(collapseField.getField(), sort, expectedNumGroups, false);
}
subSearcher.search(weight, c);
shardHits[shardIDX] = c.getTopDocs();
}
CollapseTopFieldDocs mergedFieldDocs = CollapseTopFieldDocs.merge(sort, 0, expectedNumGroups, shardHits);
assertTopDocsEquals(mergedFieldDocs, collapseTopFieldDocs);
w.close();
reader.close();
dir.close();
}
use of org.apache.lucene.index.CompositeReaderContext in project lucene-solr by apache.
the class TestTopDocsMerge method testSort.
void testSort(boolean useFrom) throws Exception {
IndexReader reader = null;
Directory dir = null;
final int numDocs = TEST_NIGHTLY ? atLeast(1000) : atLeast(100);
final String[] tokens = new String[] { "a", "b", "c", "d", "e" };
if (VERBOSE) {
System.out.println("TEST: make index");
}
{
dir = newDirectory();
final RandomIndexWriter w = new RandomIndexWriter(random(), dir);
// w.setDoRandomForceMerge(false);
// w.w.getConfig().setMaxBufferedDocs(atLeast(100));
final String[] content = new String[atLeast(20)];
for (int contentIDX = 0; contentIDX < content.length; contentIDX++) {
final StringBuilder sb = new StringBuilder();
final int numTokens = TestUtil.nextInt(random(), 1, 10);
for (int tokenIDX = 0; tokenIDX < numTokens; tokenIDX++) {
sb.append(tokens[random().nextInt(tokens.length)]).append(' ');
}
content[contentIDX] = sb.toString();
}
for (int docIDX = 0; docIDX < numDocs; docIDX++) {
final Document doc = new Document();
doc.add(new SortedDocValuesField("string", new BytesRef(TestUtil.randomRealisticUnicodeString(random()))));
doc.add(newTextField("text", content[random().nextInt(content.length)], Field.Store.NO));
doc.add(new FloatDocValuesField("float", random().nextFloat()));
final int intValue;
if (random().nextInt(100) == 17) {
intValue = Integer.MIN_VALUE;
} else if (random().nextInt(100) == 17) {
intValue = Integer.MAX_VALUE;
} else {
intValue = random().nextInt();
}
doc.add(new NumericDocValuesField("int", intValue));
if (VERBOSE) {
System.out.println(" doc=" + doc);
}
w.addDocument(doc);
}
reader = w.getReader();
w.close();
}
// NOTE: sometimes reader has just one segment, which is
// important to test
final IndexSearcher searcher = newSearcher(reader);
final IndexReaderContext ctx = searcher.getTopReaderContext();
final ShardSearcher[] subSearchers;
final int[] docStarts;
if (ctx instanceof LeafReaderContext) {
subSearchers = new ShardSearcher[1];
docStarts = new int[1];
subSearchers[0] = new ShardSearcher((LeafReaderContext) ctx, ctx);
docStarts[0] = 0;
} else {
final CompositeReaderContext compCTX = (CompositeReaderContext) ctx;
final int size = compCTX.leaves().size();
subSearchers = new ShardSearcher[size];
docStarts = new int[size];
int docBase = 0;
for (int searcherIDX = 0; searcherIDX < subSearchers.length; searcherIDX++) {
final LeafReaderContext leave = compCTX.leaves().get(searcherIDX);
subSearchers[searcherIDX] = new ShardSearcher(leave, compCTX);
docStarts[searcherIDX] = docBase;
docBase += leave.reader().maxDoc();
}
}
final List<SortField> sortFields = new ArrayList<>();
sortFields.add(new SortField("string", SortField.Type.STRING, true));
sortFields.add(new SortField("string", SortField.Type.STRING, false));
sortFields.add(new SortField("int", SortField.Type.INT, true));
sortFields.add(new SortField("int", SortField.Type.INT, false));
sortFields.add(new SortField("float", SortField.Type.FLOAT, true));
sortFields.add(new SortField("float", SortField.Type.FLOAT, false));
sortFields.add(new SortField(null, SortField.Type.SCORE, true));
sortFields.add(new SortField(null, SortField.Type.SCORE, false));
sortFields.add(new SortField(null, SortField.Type.DOC, true));
sortFields.add(new SortField(null, SortField.Type.DOC, false));
int numIters = atLeast(300);
for (int iter = 0; iter < numIters; iter++) {
// TODO: custom FieldComp...
final Query query = new TermQuery(new Term("text", tokens[random().nextInt(tokens.length)]));
final Sort sort;
if (random().nextInt(10) == 4) {
// Sort by score
sort = null;
} else {
final SortField[] randomSortFields = new SortField[TestUtil.nextInt(random(), 1, 3)];
for (int sortIDX = 0; sortIDX < randomSortFields.length; sortIDX++) {
randomSortFields[sortIDX] = sortFields.get(random().nextInt(sortFields.size()));
}
sort = new Sort(randomSortFields);
}
final int numHits = TestUtil.nextInt(random(), 1, numDocs + 5);
if (VERBOSE) {
System.out.println("TEST: search query=" + query + " sort=" + sort + " numHits=" + numHits);
}
int from = -1;
int size = -1;
// First search on whole index:
final TopDocs topHits;
if (sort == null) {
if (useFrom) {
TopScoreDocCollector c = TopScoreDocCollector.create(numHits);
searcher.search(query, c);
from = TestUtil.nextInt(random(), 0, numHits - 1);
size = numHits - from;
TopDocs tempTopHits = c.topDocs();
if (from < tempTopHits.scoreDocs.length) {
// Can't use TopDocs#topDocs(start, howMany), since it has different behaviour when start >= hitCount
// than TopDocs#merge currently has
ScoreDoc[] newScoreDocs = new ScoreDoc[Math.min(size, tempTopHits.scoreDocs.length - from)];
System.arraycopy(tempTopHits.scoreDocs, from, newScoreDocs, 0, newScoreDocs.length);
tempTopHits.scoreDocs = newScoreDocs;
topHits = tempTopHits;
} else {
topHits = new TopDocs(tempTopHits.totalHits, new ScoreDoc[0], tempTopHits.getMaxScore());
}
} else {
topHits = searcher.search(query, numHits);
}
} else {
final TopFieldCollector c = TopFieldCollector.create(sort, numHits, true, true, true);
searcher.search(query, c);
if (useFrom) {
from = TestUtil.nextInt(random(), 0, numHits - 1);
size = numHits - from;
TopDocs tempTopHits = c.topDocs();
if (from < tempTopHits.scoreDocs.length) {
// Can't use TopDocs#topDocs(start, howMany), since it has different behaviour when start >= hitCount
// than TopDocs#merge currently has
ScoreDoc[] newScoreDocs = new ScoreDoc[Math.min(size, tempTopHits.scoreDocs.length - from)];
System.arraycopy(tempTopHits.scoreDocs, from, newScoreDocs, 0, newScoreDocs.length);
tempTopHits.scoreDocs = newScoreDocs;
topHits = tempTopHits;
} else {
topHits = new TopDocs(tempTopHits.totalHits, new ScoreDoc[0], tempTopHits.getMaxScore());
}
} else {
topHits = c.topDocs(0, numHits);
}
}
if (VERBOSE) {
if (useFrom) {
System.out.println("from=" + from + " size=" + size);
}
System.out.println(" top search: " + topHits.totalHits + " totalHits; hits=" + (topHits.scoreDocs == null ? "null" : topHits.scoreDocs.length + " maxScore=" + topHits.getMaxScore()));
if (topHits.scoreDocs != null) {
for (int hitIDX = 0; hitIDX < topHits.scoreDocs.length; hitIDX++) {
final ScoreDoc sd = topHits.scoreDocs[hitIDX];
System.out.println(" doc=" + sd.doc + " score=" + sd.score);
}
}
}
// ... then all shards:
final Weight w = searcher.createNormalizedWeight(query, true);
final TopDocs[] shardHits;
if (sort == null) {
shardHits = new TopDocs[subSearchers.length];
} else {
shardHits = new TopFieldDocs[subSearchers.length];
}
for (int shardIDX = 0; shardIDX < subSearchers.length; shardIDX++) {
final TopDocs subHits;
final ShardSearcher subSearcher = subSearchers[shardIDX];
if (sort == null) {
subHits = subSearcher.search(w, numHits);
} else {
final TopFieldCollector c = TopFieldCollector.create(sort, numHits, true, true, true);
subSearcher.search(w, c);
subHits = c.topDocs(0, numHits);
}
shardHits[shardIDX] = subHits;
if (VERBOSE) {
System.out.println(" shard=" + shardIDX + " " + subHits.totalHits + " totalHits hits=" + (subHits.scoreDocs == null ? "null" : subHits.scoreDocs.length));
if (subHits.scoreDocs != null) {
for (ScoreDoc sd : subHits.scoreDocs) {
System.out.println(" doc=" + sd.doc + " score=" + sd.score);
}
}
}
}
// Merge:
final TopDocs mergedHits;
if (useFrom) {
if (sort == null) {
mergedHits = TopDocs.merge(from, size, shardHits, true);
} else {
mergedHits = TopDocs.merge(sort, from, size, (TopFieldDocs[]) shardHits, true);
}
} else {
if (sort == null) {
mergedHits = TopDocs.merge(numHits, shardHits);
} else {
mergedHits = TopDocs.merge(sort, numHits, (TopFieldDocs[]) shardHits);
}
}
if (mergedHits.scoreDocs != null) {
// Make sure the returned shards are correct:
for (int hitIDX = 0; hitIDX < mergedHits.scoreDocs.length; hitIDX++) {
final ScoreDoc sd = mergedHits.scoreDocs[hitIDX];
assertEquals("doc=" + sd.doc + " wrong shard", ReaderUtil.subIndex(sd.doc, docStarts), sd.shardIndex);
}
}
TestUtil.assertEquals(topHits, mergedHits);
}
reader.close();
dir.close();
}
use of org.apache.lucene.index.CompositeReaderContext in project elasticsearch by elastic.
the class AggregatorTestCase method searchAndReduce.
/**
* Divides the provided {@link IndexSearcher} in sub-searcher, one for each segment,
* builds an aggregator for each sub-searcher filtered by the provided {@link Query} and
* returns the reduced {@link InternalAggregation}.
*/
protected <A extends InternalAggregation, C extends Aggregator> A searchAndReduce(IndexSearcher searcher, Query query, AggregationBuilder builder, MappedFieldType... fieldTypes) throws IOException {
final IndexReaderContext ctx = searcher.getTopReaderContext();
final ShardSearcher[] subSearchers;
if (ctx instanceof LeafReaderContext) {
subSearchers = new ShardSearcher[1];
subSearchers[0] = new ShardSearcher((LeafReaderContext) ctx, ctx);
} else {
final CompositeReaderContext compCTX = (CompositeReaderContext) ctx;
final int size = compCTX.leaves().size();
subSearchers = new ShardSearcher[size];
for (int searcherIDX = 0; searcherIDX < subSearchers.length; searcherIDX++) {
final LeafReaderContext leave = compCTX.leaves().get(searcherIDX);
subSearchers[searcherIDX] = new ShardSearcher(leave, compCTX);
}
}
List<InternalAggregation> aggs = new ArrayList<>();
Query rewritten = searcher.rewrite(query);
Weight weight = searcher.createWeight(rewritten, true);
C root = createAggregator(builder, searcher, fieldTypes);
try {
for (ShardSearcher subSearcher : subSearchers) {
C a = createAggregator(builder, subSearcher, fieldTypes);
a.preCollection();
subSearcher.search(weight, a);
a.postCollection();
aggs.add(a.buildAggregation(0L));
}
if (aggs.isEmpty()) {
return null;
} else {
if (randomBoolean()) {
// sometimes do an incremental reduce
List<InternalAggregation> internalAggregations = randomSubsetOf(randomIntBetween(1, aggs.size()), aggs);
A internalAgg = (A) aggs.get(0).doReduce(internalAggregations, new InternalAggregation.ReduceContext(root.context().bigArrays(), null, false));
aggs.removeAll(internalAggregations);
aggs.add(internalAgg);
}
// now do the final reduce
@SuppressWarnings("unchecked") A internalAgg = (A) aggs.get(0).doReduce(aggs, new InternalAggregation.ReduceContext(root.context().bigArrays(), null, true));
return internalAgg;
}
} finally {
Releasables.close(releasables);
releasables.clear();
}
}
Aggregations