Search in sources :

Example 6 with LeafReaderContext

use of org.apache.lucene.index.LeafReaderContext in project elasticsearch by elastic.

the class FetchPhase method execute.

@Override
public void execute(SearchContext context) {
    final FieldsVisitor fieldsVisitor;
    Set<String> fieldNames = null;
    List<String> fieldNamePatterns = null;
    StoredFieldsContext storedFieldsContext = context.storedFieldsContext();
    if (storedFieldsContext == null) {
        // no fields specified, default to return source if no explicit indication
        if (!context.hasScriptFields() && !context.hasFetchSourceContext()) {
            context.fetchSourceContext(new FetchSourceContext(true));
        }
        fieldsVisitor = new FieldsVisitor(context.sourceRequested());
    } else if (storedFieldsContext.fetchFields() == false) {
        // disable stored fields entirely
        fieldsVisitor = null;
    } else {
        for (String fieldName : context.storedFieldsContext().fieldNames()) {
            if (fieldName.equals(SourceFieldMapper.NAME)) {
                FetchSourceContext fetchSourceContext = context.hasFetchSourceContext() ? context.fetchSourceContext() : FetchSourceContext.FETCH_SOURCE;
                context.fetchSourceContext(new FetchSourceContext(true, fetchSourceContext.includes(), fetchSourceContext.excludes()));
                continue;
            }
            if (Regex.isSimpleMatchPattern(fieldName)) {
                if (fieldNamePatterns == null) {
                    fieldNamePatterns = new ArrayList<>();
                }
                fieldNamePatterns.add(fieldName);
            } else {
                MappedFieldType fieldType = context.smartNameFieldType(fieldName);
                if (fieldType == null) {
                    // Only fail if we know it is a object field, missing paths / fields shouldn't fail.
                    if (context.getObjectMapper(fieldName) != null) {
                        throw new IllegalArgumentException("field [" + fieldName + "] isn't a leaf field");
                    }
                }
                if (fieldNames == null) {
                    fieldNames = new HashSet<>();
                }
                fieldNames.add(fieldName);
            }
        }
        boolean loadSource = context.sourceRequested();
        if (fieldNames == null && fieldNamePatterns == null) {
            // empty list specified, default to disable _source if no explicit indication
            fieldsVisitor = new FieldsVisitor(loadSource);
        } else {
            fieldsVisitor = new CustomFieldsVisitor(fieldNames == null ? Collections.emptySet() : fieldNames, fieldNamePatterns == null ? Collections.emptyList() : fieldNamePatterns, loadSource);
        }
    }
    SearchHit[] hits = new SearchHit[context.docIdsToLoadSize()];
    FetchSubPhase.HitContext hitContext = new FetchSubPhase.HitContext();
    for (int index = 0; index < context.docIdsToLoadSize(); index++) {
        if (context.isCancelled()) {
            throw new TaskCancelledException("cancelled");
        }
        int docId = context.docIdsToLoad()[context.docIdsToLoadFrom() + index];
        int readerIndex = ReaderUtil.subIndex(docId, context.searcher().getIndexReader().leaves());
        LeafReaderContext subReaderContext = context.searcher().getIndexReader().leaves().get(readerIndex);
        int subDocId = docId - subReaderContext.docBase;
        final SearchHit searchHit;
        try {
            int rootDocId = findRootDocumentIfNested(context, subReaderContext, subDocId);
            if (rootDocId != -1) {
                searchHit = createNestedSearchHit(context, docId, subDocId, rootDocId, fieldNames, fieldNamePatterns, subReaderContext);
            } else {
                searchHit = createSearchHit(context, fieldsVisitor, docId, subDocId, subReaderContext);
            }
        } catch (IOException e) {
            throw ExceptionsHelper.convertToElastic(e);
        }
        hits[index] = searchHit;
        hitContext.reset(searchHit, subReaderContext, subDocId, context.searcher());
        for (FetchSubPhase fetchSubPhase : fetchSubPhases) {
            fetchSubPhase.hitExecute(context, hitContext);
        }
    }
    for (FetchSubPhase fetchSubPhase : fetchSubPhases) {
        fetchSubPhase.hitsExecute(context, hits);
    }
    context.fetchResult().hits(new SearchHits(hits, context.queryResult().topDocs().totalHits, context.queryResult().topDocs().getMaxScore()));
}
Also used : FieldsVisitor(org.elasticsearch.index.fieldvisitor.FieldsVisitor) CustomFieldsVisitor(org.elasticsearch.index.fieldvisitor.CustomFieldsVisitor) SearchHit(org.elasticsearch.search.SearchHit) ArrayList(java.util.ArrayList) IOException(java.io.IOException) FetchSourceContext(org.elasticsearch.search.fetch.subphase.FetchSourceContext) CustomFieldsVisitor(org.elasticsearch.index.fieldvisitor.CustomFieldsVisitor) MappedFieldType(org.elasticsearch.index.mapper.MappedFieldType) InnerHitsFetchSubPhase(org.elasticsearch.search.fetch.subphase.InnerHitsFetchSubPhase) LeafReaderContext(org.apache.lucene.index.LeafReaderContext) TaskCancelledException(org.elasticsearch.tasks.TaskCancelledException) SearchHits(org.elasticsearch.search.SearchHits) HashSet(java.util.HashSet)

Example 7 with LeafReaderContext

use of org.apache.lucene.index.LeafReaderContext in project elasticsearch by elastic.

the class LongTermsAggregator method buildAggregation.

@Override
public InternalAggregation buildAggregation(long owningBucketOrdinal) throws IOException {
    assert owningBucketOrdinal == 0;
    if (bucketCountThresholds.getMinDocCount() == 0 && (order != InternalOrder.COUNT_DESC || bucketOrds.size() < bucketCountThresholds.getRequiredSize())) {
        // we need to fill-in the blanks
        for (LeafReaderContext ctx : context.searcher().getTopReaderContext().leaves()) {
            final SortedNumericDocValues values = getValues(valuesSource, ctx);
            for (int docId = 0; docId < ctx.reader().maxDoc(); ++docId) {
                values.setDocument(docId);
                final int valueCount = values.count();
                for (int i = 0; i < valueCount; ++i) {
                    long value = values.valueAt(i);
                    if (longFilter == null || longFilter.accept(value)) {
                        bucketOrds.add(value);
                    }
                }
            }
        }
    }
    final int size = (int) Math.min(bucketOrds.size(), bucketCountThresholds.getShardSize());
    long otherDocCount = 0;
    BucketPriorityQueue<LongTerms.Bucket> ordered = new BucketPriorityQueue<>(size, order.comparator(this));
    LongTerms.Bucket spare = null;
    for (long i = 0; i < bucketOrds.size(); i++) {
        if (spare == null) {
            spare = new LongTerms.Bucket(0, 0, null, showTermDocCountError, 0, format);
        }
        spare.term = bucketOrds.get(i);
        spare.docCount = bucketDocCount(i);
        otherDocCount += spare.docCount;
        spare.bucketOrd = i;
        if (bucketCountThresholds.getShardMinDocCount() <= spare.docCount) {
            spare = (LongTerms.Bucket) ordered.insertWithOverflow(spare);
        }
    }
    // Get the top buckets
    final LongTerms.Bucket[] list = new LongTerms.Bucket[ordered.size()];
    long[] survivingBucketOrds = new long[ordered.size()];
    for (int i = ordered.size() - 1; i >= 0; --i) {
        final LongTerms.Bucket bucket = (LongTerms.Bucket) ordered.pop();
        survivingBucketOrds[i] = bucket.bucketOrd;
        list[i] = bucket;
        otherDocCount -= bucket.docCount;
    }
    runDeferredCollections(survivingBucketOrds);
    // Now build the aggs
    for (int i = 0; i < list.length; i++) {
        list[i].aggregations = bucketAggregations(list[i].bucketOrd);
        list[i].docCountError = 0;
    }
    return new LongTerms(name, order, bucketCountThresholds.getRequiredSize(), bucketCountThresholds.getMinDocCount(), pipelineAggregators(), metaData(), format, bucketCountThresholds.getShardSize(), showTermDocCountError, otherDocCount, Arrays.asList(list), 0);
}
Also used : BucketPriorityQueue(org.elasticsearch.search.aggregations.bucket.terms.support.BucketPriorityQueue) SortedNumericDocValues(org.apache.lucene.index.SortedNumericDocValues) LeafReaderContext(org.apache.lucene.index.LeafReaderContext)

Example 8 with LeafReaderContext

use of org.apache.lucene.index.LeafReaderContext in project elasticsearch by elastic.

the class CompletionFieldStats method completionStats.

/**
     * Returns total in-heap bytes used by all suggesters.  This method has CPU cost <code>O(numIndexedFields)</code>.
     *
     * @param fieldNamePatterns if non-null, any completion field name matching any of these patterns will break out its in-heap bytes
     * separately in the returned {@link CompletionStats}
     */
public static CompletionStats completionStats(IndexReader indexReader, String... fieldNamePatterns) {
    long sizeInBytes = 0;
    ObjectLongHashMap<String> completionFields = null;
    if (fieldNamePatterns != null && fieldNamePatterns.length > 0) {
        completionFields = new ObjectLongHashMap<>(fieldNamePatterns.length);
    }
    for (LeafReaderContext atomicReaderContext : indexReader.leaves()) {
        LeafReader atomicReader = atomicReaderContext.reader();
        try {
            Fields fields = atomicReader.fields();
            for (String fieldName : fields) {
                Terms terms = fields.terms(fieldName);
                if (terms instanceof CompletionTerms) {
                    // TODO: currently we load up the suggester for reporting its size
                    long fstSize = ((CompletionTerms) terms).suggester().ramBytesUsed();
                    if (fieldNamePatterns != null && fieldNamePatterns.length > 0 && Regex.simpleMatch(fieldNamePatterns, fieldName)) {
                        completionFields.addTo(fieldName, fstSize);
                    }
                    sizeInBytes += fstSize;
                }
            }
        } catch (IOException ioe) {
            throw new ElasticsearchException(ioe);
        }
    }
    return new CompletionStats(sizeInBytes, completionFields == null ? null : new FieldMemoryStats(completionFields));
}
Also used : LeafReader(org.apache.lucene.index.LeafReader) Terms(org.apache.lucene.index.Terms) CompletionTerms(org.apache.lucene.search.suggest.document.CompletionTerms) IOException(java.io.IOException) ElasticsearchException(org.elasticsearch.ElasticsearchException) CompletionTerms(org.apache.lucene.search.suggest.document.CompletionTerms) Fields(org.apache.lucene.index.Fields) LeafReaderContext(org.apache.lucene.index.LeafReaderContext) FieldMemoryStats(org.elasticsearch.common.FieldMemoryStats)

Example 9 with LeafReaderContext

use of org.apache.lucene.index.LeafReaderContext in project elasticsearch by elastic.

the class CompletionSuggester method suggest.

private static void suggest(IndexSearcher searcher, CompletionQuery query, TopSuggestDocsCollector collector) throws IOException {
    query = (CompletionQuery) query.rewrite(searcher.getIndexReader());
    Weight weight = query.createWeight(searcher, collector.needsScores());
    for (LeafReaderContext context : searcher.getIndexReader().leaves()) {
        BulkScorer scorer = weight.bulkScorer(context);
        if (scorer != null) {
            try {
                scorer.score(collector.getLeafCollector(context), context.reader().getLiveDocs());
            } catch (CollectionTerminatedException e) {
            // collection was terminated prematurely
            // continue with the following leaf
            }
        }
    }
}
Also used : CollectionTerminatedException(org.apache.lucene.search.CollectionTerminatedException) LeafReaderContext(org.apache.lucene.index.LeafReaderContext) BulkScorer(org.apache.lucene.search.BulkScorer) Weight(org.apache.lucene.search.Weight)

Example 10 with LeafReaderContext

use of org.apache.lucene.index.LeafReaderContext in project elasticsearch by elastic.

the class CollapsingTopDocsCollectorTests method assertSearchCollapse.

private <T extends Comparable> void assertSearchCollapse(CollapsingDocValuesProducer<T> dvProducers, boolean numeric, boolean multivalued) throws IOException {
    final int numDocs = randomIntBetween(1000, 2000);
    int maxGroup = randomIntBetween(2, 500);
    final Directory dir = newDirectory();
    final RandomIndexWriter w = new RandomIndexWriter(random(), dir);
    Set<T> values = new HashSet<>();
    int totalHits = 0;
    for (int i = 0; i < numDocs; i++) {
        final T value = dvProducers.randomGroup(maxGroup);
        values.add(value);
        Document doc = new Document();
        dvProducers.add(doc, value, multivalued);
        doc.add(new NumericDocValuesField("sort1", randomIntBetween(0, 10)));
        doc.add(new NumericDocValuesField("sort2", randomLong()));
        w.addDocument(doc);
        totalHits++;
    }
    List<T> valueList = new ArrayList<>(values);
    Collections.sort(valueList);
    final IndexReader reader = w.getReader();
    final IndexSearcher searcher = newSearcher(reader);
    final SortField collapseField = dvProducers.sortField(multivalued);
    final SortField sort1 = new SortField("sort1", SortField.Type.INT);
    final SortField sort2 = new SortField("sort2", SortField.Type.LONG);
    Sort sort = new Sort(sort1, sort2, collapseField);
    int expectedNumGroups = values.size();
    final CollapsingTopDocsCollector collapsingCollector;
    if (numeric) {
        collapsingCollector = CollapsingTopDocsCollector.createNumeric(collapseField.getField(), sort, expectedNumGroups, false);
    } else {
        collapsingCollector = CollapsingTopDocsCollector.createKeyword(collapseField.getField(), sort, expectedNumGroups, false);
    }
    TopFieldCollector topFieldCollector = TopFieldCollector.create(sort, totalHits, true, false, false);
    searcher.search(new MatchAllDocsQuery(), collapsingCollector);
    searcher.search(new MatchAllDocsQuery(), topFieldCollector);
    CollapseTopFieldDocs collapseTopFieldDocs = collapsingCollector.getTopDocs();
    TopFieldDocs topDocs = topFieldCollector.topDocs();
    assertEquals(collapseField.getField(), collapseTopFieldDocs.field);
    assertEquals(expectedNumGroups, collapseTopFieldDocs.scoreDocs.length);
    assertEquals(totalHits, collapseTopFieldDocs.totalHits);
    assertEquals(totalHits, topDocs.scoreDocs.length);
    assertEquals(totalHits, topDocs.totalHits);
    Set<Object> seen = new HashSet<>();
    // collapse field is the last sort
    int collapseIndex = sort.getSort().length - 1;
    int topDocsIndex = 0;
    for (int i = 0; i < expectedNumGroups; i++) {
        FieldDoc fieldDoc = null;
        for (; topDocsIndex < totalHits; topDocsIndex++) {
            fieldDoc = (FieldDoc) topDocs.scoreDocs[topDocsIndex];
            if (seen.contains(fieldDoc.fields[collapseIndex]) == false) {
                break;
            }
        }
        FieldDoc collapseFieldDoc = (FieldDoc) collapseTopFieldDocs.scoreDocs[i];
        assertNotNull(fieldDoc);
        assertEquals(collapseFieldDoc.doc, fieldDoc.doc);
        assertArrayEquals(collapseFieldDoc.fields, fieldDoc.fields);
        seen.add(fieldDoc.fields[fieldDoc.fields.length - 1]);
    }
    for (; topDocsIndex < totalHits; topDocsIndex++) {
        FieldDoc fieldDoc = (FieldDoc) topDocs.scoreDocs[topDocsIndex];
        assertTrue(seen.contains(fieldDoc.fields[collapseIndex]));
    }
    // check merge
    final IndexReaderContext ctx = searcher.getTopReaderContext();
    final SegmentSearcher[] subSearchers;
    final int[] docStarts;
    if (ctx instanceof LeafReaderContext) {
        subSearchers = new SegmentSearcher[1];
        docStarts = new int[1];
        subSearchers[0] = new SegmentSearcher((LeafReaderContext) ctx, ctx);
        docStarts[0] = 0;
    } else {
        final CompositeReaderContext compCTX = (CompositeReaderContext) ctx;
        final int size = compCTX.leaves().size();
        subSearchers = new SegmentSearcher[size];
        docStarts = new int[size];
        int docBase = 0;
        for (int searcherIDX = 0; searcherIDX < subSearchers.length; searcherIDX++) {
            final LeafReaderContext leave = compCTX.leaves().get(searcherIDX);
            subSearchers[searcherIDX] = new SegmentSearcher(leave, compCTX);
            docStarts[searcherIDX] = docBase;
            docBase += leave.reader().maxDoc();
        }
    }
    final CollapseTopFieldDocs[] shardHits = new CollapseTopFieldDocs[subSearchers.length];
    final Weight weight = searcher.createNormalizedWeight(new MatchAllDocsQuery(), false);
    for (int shardIDX = 0; shardIDX < subSearchers.length; shardIDX++) {
        final SegmentSearcher subSearcher = subSearchers[shardIDX];
        final CollapsingTopDocsCollector c;
        if (numeric) {
            c = CollapsingTopDocsCollector.createNumeric(collapseField.getField(), sort, expectedNumGroups, false);
        } else {
            c = CollapsingTopDocsCollector.createKeyword(collapseField.getField(), sort, expectedNumGroups, false);
        }
        subSearcher.search(weight, c);
        shardHits[shardIDX] = c.getTopDocs();
    }
    CollapseTopFieldDocs mergedFieldDocs = CollapseTopFieldDocs.merge(sort, 0, expectedNumGroups, shardHits);
    assertTopDocsEquals(mergedFieldDocs, collapseTopFieldDocs);
    w.close();
    reader.close();
    dir.close();
}
Also used : IndexSearcher(org.apache.lucene.search.IndexSearcher) FieldDoc(org.apache.lucene.search.FieldDoc) ArrayList(java.util.ArrayList) CollapseTopFieldDocs(org.apache.lucene.search.grouping.CollapseTopFieldDocs) TopFieldDocs(org.apache.lucene.search.TopFieldDocs) SortField(org.apache.lucene.search.SortField) SortedSetSortField(org.apache.lucene.search.SortedSetSortField) SortedNumericSortField(org.apache.lucene.search.SortedNumericSortField) Document(org.apache.lucene.document.Document) SortedNumericDocValuesField(org.apache.lucene.document.SortedNumericDocValuesField) NumericDocValuesField(org.apache.lucene.document.NumericDocValuesField) Sort(org.apache.lucene.search.Sort) LeafReaderContext(org.apache.lucene.index.LeafReaderContext) TopFieldCollector(org.apache.lucene.search.TopFieldCollector) CollapseTopFieldDocs(org.apache.lucene.search.grouping.CollapseTopFieldDocs) Directory(org.apache.lucene.store.Directory) HashSet(java.util.HashSet) MatchAllDocsQuery(org.apache.lucene.search.MatchAllDocsQuery) IndexReaderContext(org.apache.lucene.index.IndexReaderContext) Weight(org.apache.lucene.search.Weight) CompositeReaderContext(org.apache.lucene.index.CompositeReaderContext) IndexReader(org.apache.lucene.index.IndexReader) CollapsingTopDocsCollector(org.apache.lucene.search.grouping.CollapsingTopDocsCollector) RandomIndexWriter(org.apache.lucene.index.RandomIndexWriter)

Aggregations

LeafReaderContext (org.apache.lucene.index.LeafReaderContext)335 LeafReader (org.apache.lucene.index.LeafReader)73 Document (org.apache.lucene.document.Document)71 IOException (java.io.IOException)69 BytesRef (org.apache.lucene.util.BytesRef)67 Directory (org.apache.lucene.store.Directory)61 Term (org.apache.lucene.index.Term)52 IndexSearcher (org.apache.lucene.search.IndexSearcher)49 IndexReader (org.apache.lucene.index.IndexReader)48 RandomIndexWriter (org.apache.lucene.index.RandomIndexWriter)45 DirectoryReader (org.apache.lucene.index.DirectoryReader)44 Bits (org.apache.lucene.util.Bits)44 NumericDocValues (org.apache.lucene.index.NumericDocValues)43 ArrayList (java.util.ArrayList)41 Weight (org.apache.lucene.search.Weight)37 Terms (org.apache.lucene.index.Terms)36 DocIdSetIterator (org.apache.lucene.search.DocIdSetIterator)36 Scorer (org.apache.lucene.search.Scorer)36 IndexWriterConfig (org.apache.lucene.index.IndexWriterConfig)34 Query (org.apache.lucene.search.Query)34