use of org.apache.lucene.index.LeafReaderContext in project elasticsearch by elastic.
the class FetchPhase method execute.
@Override
public void execute(SearchContext context) {
final FieldsVisitor fieldsVisitor;
Set<String> fieldNames = null;
List<String> fieldNamePatterns = null;
StoredFieldsContext storedFieldsContext = context.storedFieldsContext();
if (storedFieldsContext == null) {
// no fields specified, default to return source if no explicit indication
if (!context.hasScriptFields() && !context.hasFetchSourceContext()) {
context.fetchSourceContext(new FetchSourceContext(true));
}
fieldsVisitor = new FieldsVisitor(context.sourceRequested());
} else if (storedFieldsContext.fetchFields() == false) {
// disable stored fields entirely
fieldsVisitor = null;
} else {
for (String fieldName : context.storedFieldsContext().fieldNames()) {
if (fieldName.equals(SourceFieldMapper.NAME)) {
FetchSourceContext fetchSourceContext = context.hasFetchSourceContext() ? context.fetchSourceContext() : FetchSourceContext.FETCH_SOURCE;
context.fetchSourceContext(new FetchSourceContext(true, fetchSourceContext.includes(), fetchSourceContext.excludes()));
continue;
}
if (Regex.isSimpleMatchPattern(fieldName)) {
if (fieldNamePatterns == null) {
fieldNamePatterns = new ArrayList<>();
}
fieldNamePatterns.add(fieldName);
} else {
MappedFieldType fieldType = context.smartNameFieldType(fieldName);
if (fieldType == null) {
// Only fail if we know it is a object field, missing paths / fields shouldn't fail.
if (context.getObjectMapper(fieldName) != null) {
throw new IllegalArgumentException("field [" + fieldName + "] isn't a leaf field");
}
}
if (fieldNames == null) {
fieldNames = new HashSet<>();
}
fieldNames.add(fieldName);
}
}
boolean loadSource = context.sourceRequested();
if (fieldNames == null && fieldNamePatterns == null) {
// empty list specified, default to disable _source if no explicit indication
fieldsVisitor = new FieldsVisitor(loadSource);
} else {
fieldsVisitor = new CustomFieldsVisitor(fieldNames == null ? Collections.emptySet() : fieldNames, fieldNamePatterns == null ? Collections.emptyList() : fieldNamePatterns, loadSource);
}
}
SearchHit[] hits = new SearchHit[context.docIdsToLoadSize()];
FetchSubPhase.HitContext hitContext = new FetchSubPhase.HitContext();
for (int index = 0; index < context.docIdsToLoadSize(); index++) {
if (context.isCancelled()) {
throw new TaskCancelledException("cancelled");
}
int docId = context.docIdsToLoad()[context.docIdsToLoadFrom() + index];
int readerIndex = ReaderUtil.subIndex(docId, context.searcher().getIndexReader().leaves());
LeafReaderContext subReaderContext = context.searcher().getIndexReader().leaves().get(readerIndex);
int subDocId = docId - subReaderContext.docBase;
final SearchHit searchHit;
try {
int rootDocId = findRootDocumentIfNested(context, subReaderContext, subDocId);
if (rootDocId != -1) {
searchHit = createNestedSearchHit(context, docId, subDocId, rootDocId, fieldNames, fieldNamePatterns, subReaderContext);
} else {
searchHit = createSearchHit(context, fieldsVisitor, docId, subDocId, subReaderContext);
}
} catch (IOException e) {
throw ExceptionsHelper.convertToElastic(e);
}
hits[index] = searchHit;
hitContext.reset(searchHit, subReaderContext, subDocId, context.searcher());
for (FetchSubPhase fetchSubPhase : fetchSubPhases) {
fetchSubPhase.hitExecute(context, hitContext);
}
}
for (FetchSubPhase fetchSubPhase : fetchSubPhases) {
fetchSubPhase.hitsExecute(context, hits);
}
context.fetchResult().hits(new SearchHits(hits, context.queryResult().topDocs().totalHits, context.queryResult().topDocs().getMaxScore()));
}
use of org.apache.lucene.index.LeafReaderContext in project elasticsearch by elastic.
the class LongTermsAggregator method buildAggregation.
@Override
public InternalAggregation buildAggregation(long owningBucketOrdinal) throws IOException {
assert owningBucketOrdinal == 0;
if (bucketCountThresholds.getMinDocCount() == 0 && (order != InternalOrder.COUNT_DESC || bucketOrds.size() < bucketCountThresholds.getRequiredSize())) {
// we need to fill-in the blanks
for (LeafReaderContext ctx : context.searcher().getTopReaderContext().leaves()) {
final SortedNumericDocValues values = getValues(valuesSource, ctx);
for (int docId = 0; docId < ctx.reader().maxDoc(); ++docId) {
values.setDocument(docId);
final int valueCount = values.count();
for (int i = 0; i < valueCount; ++i) {
long value = values.valueAt(i);
if (longFilter == null || longFilter.accept(value)) {
bucketOrds.add(value);
}
}
}
}
}
final int size = (int) Math.min(bucketOrds.size(), bucketCountThresholds.getShardSize());
long otherDocCount = 0;
BucketPriorityQueue<LongTerms.Bucket> ordered = new BucketPriorityQueue<>(size, order.comparator(this));
LongTerms.Bucket spare = null;
for (long i = 0; i < bucketOrds.size(); i++) {
if (spare == null) {
spare = new LongTerms.Bucket(0, 0, null, showTermDocCountError, 0, format);
}
spare.term = bucketOrds.get(i);
spare.docCount = bucketDocCount(i);
otherDocCount += spare.docCount;
spare.bucketOrd = i;
if (bucketCountThresholds.getShardMinDocCount() <= spare.docCount) {
spare = (LongTerms.Bucket) ordered.insertWithOverflow(spare);
}
}
// Get the top buckets
final LongTerms.Bucket[] list = new LongTerms.Bucket[ordered.size()];
long[] survivingBucketOrds = new long[ordered.size()];
for (int i = ordered.size() - 1; i >= 0; --i) {
final LongTerms.Bucket bucket = (LongTerms.Bucket) ordered.pop();
survivingBucketOrds[i] = bucket.bucketOrd;
list[i] = bucket;
otherDocCount -= bucket.docCount;
}
runDeferredCollections(survivingBucketOrds);
// Now build the aggs
for (int i = 0; i < list.length; i++) {
list[i].aggregations = bucketAggregations(list[i].bucketOrd);
list[i].docCountError = 0;
}
return new LongTerms(name, order, bucketCountThresholds.getRequiredSize(), bucketCountThresholds.getMinDocCount(), pipelineAggregators(), metaData(), format, bucketCountThresholds.getShardSize(), showTermDocCountError, otherDocCount, Arrays.asList(list), 0);
}
use of org.apache.lucene.index.LeafReaderContext in project elasticsearch by elastic.
the class CompletionFieldStats method completionStats.
/**
* Returns total in-heap bytes used by all suggesters. This method has CPU cost <code>O(numIndexedFields)</code>.
*
* @param fieldNamePatterns if non-null, any completion field name matching any of these patterns will break out its in-heap bytes
* separately in the returned {@link CompletionStats}
*/
public static CompletionStats completionStats(IndexReader indexReader, String... fieldNamePatterns) {
long sizeInBytes = 0;
ObjectLongHashMap<String> completionFields = null;
if (fieldNamePatterns != null && fieldNamePatterns.length > 0) {
completionFields = new ObjectLongHashMap<>(fieldNamePatterns.length);
}
for (LeafReaderContext atomicReaderContext : indexReader.leaves()) {
LeafReader atomicReader = atomicReaderContext.reader();
try {
Fields fields = atomicReader.fields();
for (String fieldName : fields) {
Terms terms = fields.terms(fieldName);
if (terms instanceof CompletionTerms) {
// TODO: currently we load up the suggester for reporting its size
long fstSize = ((CompletionTerms) terms).suggester().ramBytesUsed();
if (fieldNamePatterns != null && fieldNamePatterns.length > 0 && Regex.simpleMatch(fieldNamePatterns, fieldName)) {
completionFields.addTo(fieldName, fstSize);
}
sizeInBytes += fstSize;
}
}
} catch (IOException ioe) {
throw new ElasticsearchException(ioe);
}
}
return new CompletionStats(sizeInBytes, completionFields == null ? null : new FieldMemoryStats(completionFields));
}
use of org.apache.lucene.index.LeafReaderContext in project elasticsearch by elastic.
the class CompletionSuggester method suggest.
private static void suggest(IndexSearcher searcher, CompletionQuery query, TopSuggestDocsCollector collector) throws IOException {
query = (CompletionQuery) query.rewrite(searcher.getIndexReader());
Weight weight = query.createWeight(searcher, collector.needsScores());
for (LeafReaderContext context : searcher.getIndexReader().leaves()) {
BulkScorer scorer = weight.bulkScorer(context);
if (scorer != null) {
try {
scorer.score(collector.getLeafCollector(context), context.reader().getLiveDocs());
} catch (CollectionTerminatedException e) {
// collection was terminated prematurely
// continue with the following leaf
}
}
}
}
use of org.apache.lucene.index.LeafReaderContext in project elasticsearch by elastic.
the class CollapsingTopDocsCollectorTests method assertSearchCollapse.
private <T extends Comparable> void assertSearchCollapse(CollapsingDocValuesProducer<T> dvProducers, boolean numeric, boolean multivalued) throws IOException {
final int numDocs = randomIntBetween(1000, 2000);
int maxGroup = randomIntBetween(2, 500);
final Directory dir = newDirectory();
final RandomIndexWriter w = new RandomIndexWriter(random(), dir);
Set<T> values = new HashSet<>();
int totalHits = 0;
for (int i = 0; i < numDocs; i++) {
final T value = dvProducers.randomGroup(maxGroup);
values.add(value);
Document doc = new Document();
dvProducers.add(doc, value, multivalued);
doc.add(new NumericDocValuesField("sort1", randomIntBetween(0, 10)));
doc.add(new NumericDocValuesField("sort2", randomLong()));
w.addDocument(doc);
totalHits++;
}
List<T> valueList = new ArrayList<>(values);
Collections.sort(valueList);
final IndexReader reader = w.getReader();
final IndexSearcher searcher = newSearcher(reader);
final SortField collapseField = dvProducers.sortField(multivalued);
final SortField sort1 = new SortField("sort1", SortField.Type.INT);
final SortField sort2 = new SortField("sort2", SortField.Type.LONG);
Sort sort = new Sort(sort1, sort2, collapseField);
int expectedNumGroups = values.size();
final CollapsingTopDocsCollector collapsingCollector;
if (numeric) {
collapsingCollector = CollapsingTopDocsCollector.createNumeric(collapseField.getField(), sort, expectedNumGroups, false);
} else {
collapsingCollector = CollapsingTopDocsCollector.createKeyword(collapseField.getField(), sort, expectedNumGroups, false);
}
TopFieldCollector topFieldCollector = TopFieldCollector.create(sort, totalHits, true, false, false);
searcher.search(new MatchAllDocsQuery(), collapsingCollector);
searcher.search(new MatchAllDocsQuery(), topFieldCollector);
CollapseTopFieldDocs collapseTopFieldDocs = collapsingCollector.getTopDocs();
TopFieldDocs topDocs = topFieldCollector.topDocs();
assertEquals(collapseField.getField(), collapseTopFieldDocs.field);
assertEquals(expectedNumGroups, collapseTopFieldDocs.scoreDocs.length);
assertEquals(totalHits, collapseTopFieldDocs.totalHits);
assertEquals(totalHits, topDocs.scoreDocs.length);
assertEquals(totalHits, topDocs.totalHits);
Set<Object> seen = new HashSet<>();
// collapse field is the last sort
int collapseIndex = sort.getSort().length - 1;
int topDocsIndex = 0;
for (int i = 0; i < expectedNumGroups; i++) {
FieldDoc fieldDoc = null;
for (; topDocsIndex < totalHits; topDocsIndex++) {
fieldDoc = (FieldDoc) topDocs.scoreDocs[topDocsIndex];
if (seen.contains(fieldDoc.fields[collapseIndex]) == false) {
break;
}
}
FieldDoc collapseFieldDoc = (FieldDoc) collapseTopFieldDocs.scoreDocs[i];
assertNotNull(fieldDoc);
assertEquals(collapseFieldDoc.doc, fieldDoc.doc);
assertArrayEquals(collapseFieldDoc.fields, fieldDoc.fields);
seen.add(fieldDoc.fields[fieldDoc.fields.length - 1]);
}
for (; topDocsIndex < totalHits; topDocsIndex++) {
FieldDoc fieldDoc = (FieldDoc) topDocs.scoreDocs[topDocsIndex];
assertTrue(seen.contains(fieldDoc.fields[collapseIndex]));
}
// check merge
final IndexReaderContext ctx = searcher.getTopReaderContext();
final SegmentSearcher[] subSearchers;
final int[] docStarts;
if (ctx instanceof LeafReaderContext) {
subSearchers = new SegmentSearcher[1];
docStarts = new int[1];
subSearchers[0] = new SegmentSearcher((LeafReaderContext) ctx, ctx);
docStarts[0] = 0;
} else {
final CompositeReaderContext compCTX = (CompositeReaderContext) ctx;
final int size = compCTX.leaves().size();
subSearchers = new SegmentSearcher[size];
docStarts = new int[size];
int docBase = 0;
for (int searcherIDX = 0; searcherIDX < subSearchers.length; searcherIDX++) {
final LeafReaderContext leave = compCTX.leaves().get(searcherIDX);
subSearchers[searcherIDX] = new SegmentSearcher(leave, compCTX);
docStarts[searcherIDX] = docBase;
docBase += leave.reader().maxDoc();
}
}
final CollapseTopFieldDocs[] shardHits = new CollapseTopFieldDocs[subSearchers.length];
final Weight weight = searcher.createNormalizedWeight(new MatchAllDocsQuery(), false);
for (int shardIDX = 0; shardIDX < subSearchers.length; shardIDX++) {
final SegmentSearcher subSearcher = subSearchers[shardIDX];
final CollapsingTopDocsCollector c;
if (numeric) {
c = CollapsingTopDocsCollector.createNumeric(collapseField.getField(), sort, expectedNumGroups, false);
} else {
c = CollapsingTopDocsCollector.createKeyword(collapseField.getField(), sort, expectedNumGroups, false);
}
subSearcher.search(weight, c);
shardHits[shardIDX] = c.getTopDocs();
}
CollapseTopFieldDocs mergedFieldDocs = CollapseTopFieldDocs.merge(sort, 0, expectedNumGroups, shardHits);
assertTopDocsEquals(mergedFieldDocs, collapseTopFieldDocs);
w.close();
reader.close();
dir.close();
}
Aggregations