use of org.apache.lucene.search.Collector in project elasticsearch by elastic.
the class AggregationPhase method preProcess.
@Override
public void preProcess(SearchContext context) {
if (context.aggregations() != null) {
List<Aggregator> collectors = new ArrayList<>();
Aggregator[] aggregators;
try {
AggregatorFactories factories = context.aggregations().factories();
aggregators = factories.createTopLevelAggregators();
for (int i = 0; i < aggregators.length; i++) {
if (aggregators[i] instanceof GlobalAggregator == false) {
collectors.add(aggregators[i]);
}
}
context.aggregations().aggregators(aggregators);
if (!collectors.isEmpty()) {
Collector collector = BucketCollector.wrap(collectors);
((BucketCollector) collector).preCollection();
if (context.getProfilers() != null) {
collector = new InternalProfileCollector(collector, CollectorResult.REASON_AGGREGATION, // TODO: report on child aggs as well
Collections.emptyList());
}
context.queryCollectors().put(AggregationPhase.class, collector);
}
} catch (IOException e) {
throw new AggregationInitializationException("Could not initialize aggregators", e);
}
}
}
use of org.apache.lucene.search.Collector in project elasticsearch by elastic.
the class AggregationPhase method execute.
@Override
public void execute(SearchContext context) {
if (context.aggregations() == null) {
context.queryResult().aggregations(null);
return;
}
if (context.queryResult().hasAggs()) {
// no need to compute the aggs twice, they should be computed on a per context basis
return;
}
Aggregator[] aggregators = context.aggregations().aggregators();
List<Aggregator> globals = new ArrayList<>();
for (int i = 0; i < aggregators.length; i++) {
if (aggregators[i] instanceof GlobalAggregator) {
globals.add(aggregators[i]);
}
}
// optimize the global collector based execution
if (!globals.isEmpty()) {
BucketCollector globalsCollector = BucketCollector.wrap(globals);
Query query = context.buildFilteredQuery(Queries.newMatchAllQuery());
try {
final Collector collector;
if (context.getProfilers() == null) {
collector = globalsCollector;
} else {
InternalProfileCollector profileCollector = new InternalProfileCollector(globalsCollector, CollectorResult.REASON_AGGREGATION_GLOBAL, // TODO: report on sub collectors
Collections.emptyList());
collector = profileCollector;
// start a new profile with this collector
context.getProfilers().addQueryProfiler().setCollector(profileCollector);
}
globalsCollector.preCollection();
context.searcher().search(query, collector);
} catch (Exception e) {
throw new QueryPhaseExecutionException(context, "Failed to execute global aggregators", e);
} finally {
context.clearReleasables(SearchContext.Lifetime.COLLECTION);
}
}
List<InternalAggregation> aggregations = new ArrayList<>(aggregators.length);
for (Aggregator aggregator : context.aggregations().aggregators()) {
try {
aggregator.postCollection();
aggregations.add(aggregator.buildAggregation(0));
} catch (IOException e) {
throw new AggregationExecutionException("Failed to build aggregation [" + aggregator.name() + "]", e);
}
}
context.queryResult().aggregations(new InternalAggregations(aggregations));
try {
List<PipelineAggregator> pipelineAggregators = context.aggregations().factories().createPipelineAggregators();
List<SiblingPipelineAggregator> siblingPipelineAggregators = new ArrayList<>(pipelineAggregators.size());
for (PipelineAggregator pipelineAggregator : pipelineAggregators) {
if (pipelineAggregator instanceof SiblingPipelineAggregator) {
siblingPipelineAggregators.add((SiblingPipelineAggregator) pipelineAggregator);
} else {
throw new AggregationExecutionException("Invalid pipeline aggregation named [" + pipelineAggregator.name() + "] of type [" + pipelineAggregator.getWriteableName() + "]. Only sibling pipeline aggregations are " + "allowed at the top level");
}
}
context.queryResult().pipelineAggregators(siblingPipelineAggregators);
} catch (IOException e) {
throw new AggregationExecutionException("Failed to build top level pipeline aggregators", e);
}
// disable aggregations so that they don't run on next pages in case of scrolling
context.aggregations(null);
context.queryCollectors().remove(AggregationPhase.class);
}
use of org.apache.lucene.search.Collector in project lucene-solr by apache.
the class TestSort method testSort.
public void testSort() throws Exception {
Directory dir = new RAMDirectory();
Field f = new StringField("f", "0", Field.Store.NO);
Field f2 = new StringField("f2", "0", Field.Store.NO);
for (int iterCnt = 0; iterCnt < iter; iterCnt++) {
IndexWriter iw = new IndexWriter(dir, new IndexWriterConfig(new SimpleAnalyzer()).setOpenMode(IndexWriterConfig.OpenMode.CREATE));
final MyDoc[] mydocs = new MyDoc[ndocs];
int v1EmptyPercent = 50;
int v2EmptyPercent = 50;
int commitCountdown = commitCount;
for (int i = 0; i < ndocs; i++) {
MyDoc mydoc = new MyDoc();
mydoc.doc = i;
mydocs[i] = mydoc;
Document document = new Document();
if (r.nextInt(100) < v1EmptyPercent) {
mydoc.val = Integer.toString(r.nextInt(maxval));
f.setStringValue(mydoc.val);
document.add(f);
}
if (r.nextInt(100) < v2EmptyPercent) {
mydoc.val2 = Integer.toString(r.nextInt(maxval));
f2.setStringValue(mydoc.val2);
document.add(f2);
}
iw.addDocument(document);
if (--commitCountdown <= 0) {
commitCountdown = commitCount;
iw.commit();
}
}
iw.close();
Map<String, UninvertingReader.Type> mapping = new HashMap<>();
mapping.put("f", UninvertingReader.Type.SORTED);
mapping.put("f2", UninvertingReader.Type.SORTED);
DirectoryReader reader = UninvertingReader.wrap(DirectoryReader.open(dir), mapping);
IndexSearcher searcher = new IndexSearcher(reader);
// System.out.println("segments="+searcher.getIndexReader().getSequentialSubReaders().length);
assertTrue(reader.leaves().size() > 1);
for (int i = 0; i < qiter; i++) {
Filter filt = new Filter() {
@Override
public DocIdSet getDocIdSet(LeafReaderContext context, Bits acceptDocs) {
return BitsFilteredDocIdSet.wrap(randSet(context.reader().maxDoc()), acceptDocs);
}
@Override
public String toString(String field) {
return "TestSortFilter";
}
@Override
public boolean equals(Object other) {
return other == this;
}
@Override
public int hashCode() {
return System.identityHashCode(this);
}
};
int top = r.nextInt((ndocs >> 3) + 1) + 1;
final boolean luceneSort = r.nextBoolean();
final boolean sortMissingLast = !luceneSort && r.nextBoolean();
final boolean sortMissingFirst = !luceneSort && !sortMissingLast;
final boolean reverse = r.nextBoolean();
List<SortField> sfields = new ArrayList<>();
final boolean secondary = r.nextBoolean();
final boolean luceneSort2 = r.nextBoolean();
final boolean sortMissingLast2 = !luceneSort2 && r.nextBoolean();
final boolean sortMissingFirst2 = !luceneSort2 && !sortMissingLast2;
final boolean reverse2 = r.nextBoolean();
if (r.nextBoolean())
sfields.add(new SortField(null, SortField.Type.SCORE));
// hit both use-cases of sort-missing-last
sfields.add(Sorting.getStringSortField("f", reverse, sortMissingLast, sortMissingFirst));
if (secondary) {
sfields.add(Sorting.getStringSortField("f2", reverse2, sortMissingLast2, sortMissingFirst2));
}
if (r.nextBoolean())
sfields.add(new SortField(null, SortField.Type.SCORE));
Sort sort = new Sort(sfields.toArray(new SortField[sfields.size()]));
final String nullRep = luceneSort || sortMissingFirst && !reverse || sortMissingLast && reverse ? "" : "zzz";
final String nullRep2 = luceneSort2 || sortMissingFirst2 && !reverse2 || sortMissingLast2 && reverse2 ? "" : "zzz";
boolean trackScores = r.nextBoolean();
boolean trackMaxScores = r.nextBoolean();
boolean scoreInOrder = r.nextBoolean();
final TopFieldCollector topCollector = TopFieldCollector.create(sort, top, true, trackScores, trackMaxScores);
final List<MyDoc> collectedDocs = new ArrayList<>();
// delegate and collect docs ourselves
Collector myCollector = new FilterCollector(topCollector) {
@Override
public LeafCollector getLeafCollector(LeafReaderContext context) throws IOException {
final int docBase = context.docBase;
return new FilterLeafCollector(super.getLeafCollector(context)) {
@Override
public void collect(int doc) throws IOException {
super.collect(doc);
collectedDocs.add(mydocs[docBase + doc]);
}
};
}
};
searcher.search(filt, myCollector);
Collections.sort(collectedDocs, (o1, o2) -> {
String v1 = o1.val == null ? nullRep : o1.val;
String v2 = o2.val == null ? nullRep : o2.val;
int cmp = v1.compareTo(v2);
if (reverse)
cmp = -cmp;
if (cmp != 0)
return cmp;
if (secondary) {
v1 = o1.val2 == null ? nullRep2 : o1.val2;
v2 = o2.val2 == null ? nullRep2 : o2.val2;
cmp = v1.compareTo(v2);
if (reverse2)
cmp = -cmp;
}
cmp = cmp == 0 ? o1.doc - o2.doc : cmp;
return cmp;
});
TopDocs topDocs = topCollector.topDocs();
ScoreDoc[] sdocs = topDocs.scoreDocs;
for (int j = 0; j < sdocs.length; j++) {
int id = sdocs[j].doc;
if (id != collectedDocs.get(j).doc) {
log.error("Error at pos " + j + "\n\tsortMissingFirst=" + sortMissingFirst + " sortMissingLast=" + sortMissingLast + " reverse=" + reverse + "\n\tEXPECTED=" + collectedDocs);
}
assertEquals(id, collectedDocs.get(j).doc);
}
}
reader.close();
}
dir.close();
}
use of org.apache.lucene.search.Collector in project lucene-solr by apache.
the class ReadTask method doLogic.
@Override
public int doLogic() throws Exception {
int res = 0;
// open reader or use existing one
// (will incRef the reader)
IndexSearcher searcher = getRunData().getIndexSearcher();
IndexReader reader;
final boolean closeSearcher;
if (searcher == null) {
// open our own reader
Directory dir = getRunData().getDirectory();
reader = DirectoryReader.open(dir);
searcher = new IndexSearcher(reader);
closeSearcher = true;
} else {
// use existing one; this passes +1 ref to us
reader = searcher.getIndexReader();
closeSearcher = false;
}
// optionally warm and add num docs traversed to count
if (withWarm()) {
Document doc = null;
Bits liveDocs = MultiFields.getLiveDocs(reader);
for (int m = 0; m < reader.maxDoc(); m++) {
if (null == liveDocs || liveDocs.get(m)) {
doc = reader.document(m);
res += (doc == null ? 0 : 1);
}
}
}
if (withSearch()) {
res++;
Query q = queryMaker.makeQuery();
Sort sort = getSort();
TopDocs hits = null;
final int numHits = numHits();
if (numHits > 0) {
if (withCollector() == false) {
if (sort != null) {
// TODO: instead of always passing false we
// should detect based on the query; if we make
// the IndexSearcher search methods that take
// Weight public again, we can go back to
// pulling the Weight ourselves:
TopFieldCollector collector = TopFieldCollector.create(sort, numHits, true, withScore(), withMaxScore());
searcher.search(q, collector);
hits = collector.topDocs();
} else {
hits = searcher.search(q, numHits);
}
} else {
Collector collector = createCollector();
searcher.search(q, collector);
//hits = collector.topDocs();
}
if (hits != null) {
final String printHitsField = getRunData().getConfig().get("print.hits.field", null);
if (printHitsField != null && printHitsField.length() > 0) {
System.out.println("totalHits = " + hits.totalHits);
System.out.println("maxDoc() = " + reader.maxDoc());
System.out.println("numDocs() = " + reader.numDocs());
for (int i = 0; i < hits.scoreDocs.length; i++) {
final int docID = hits.scoreDocs[i].doc;
final Document doc = reader.document(docID);
System.out.println(" " + i + ": doc=" + docID + " score=" + hits.scoreDocs[i].score + " " + printHitsField + " =" + doc.get(printHitsField));
}
}
res += withTopDocs(searcher, q, hits);
}
}
}
if (closeSearcher) {
reader.close();
} else {
// Release our +1 ref from above
reader.decRef();
}
return res;
}
use of org.apache.lucene.search.Collector in project lucene-solr by apache.
the class Grouping method execute.
public void execute() throws IOException {
if (commands.isEmpty()) {
throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, "Specify at least one field, function or query to group by.");
}
DocListAndSet out = new DocListAndSet();
qr.setDocListAndSet(out);
SolrIndexSearcher.ProcessedFilter pf = searcher.getProcessedFilter(cmd.getFilter(), cmd.getFilterList());
final Filter luceneFilter = pf.filter;
maxDoc = searcher.maxDoc();
needScores = (cmd.getFlags() & SolrIndexSearcher.GET_SCORES) != 0;
boolean cacheScores = false;
// NOTE: Change this when withinGroupSort can be specified per group
if (!needScores && !commands.isEmpty()) {
Sort withinGroupSort = commands.get(0).withinGroupSort;
cacheScores = withinGroupSort == null || withinGroupSort.needsScores();
} else if (needScores) {
cacheScores = needScores;
}
getDocSet = (cmd.getFlags() & SolrIndexSearcher.GET_DOCSET) != 0;
getDocList = (cmd.getFlags() & SolrIndexSearcher.GET_DOCLIST) != 0;
query = QueryUtils.makeQueryable(cmd.getQuery());
for (Command cmd : commands) {
cmd.prepare();
}
AllGroupHeadsCollector<?> allGroupHeadsCollector = null;
List<Collector> collectors = new ArrayList<>(commands.size());
for (Command cmd : commands) {
Collector collector = cmd.createFirstPassCollector();
if (collector != null) {
collectors.add(collector);
}
if (getGroupedDocSet && allGroupHeadsCollector == null) {
collectors.add(allGroupHeadsCollector = cmd.createAllGroupCollector());
}
}
DocSetCollector setCollector = null;
if (getDocSet && allGroupHeadsCollector == null) {
setCollector = new DocSetCollector(maxDoc);
collectors.add(setCollector);
}
Collector allCollectors = MultiCollector.wrap(collectors);
CachingCollector cachedCollector = null;
if (cacheSecondPassSearch && allCollectors != null) {
int maxDocsToCache = (int) Math.round(maxDoc * (maxDocsPercentageToCache / 100.0d));
// Maybe we should have a minimum and a maximum, that defines the window we would like caching for.
if (maxDocsToCache > 0) {
allCollectors = cachedCollector = CachingCollector.create(allCollectors, cacheScores, maxDocsToCache);
}
}
if (pf.postFilter != null) {
pf.postFilter.setLastDelegate(allCollectors);
allCollectors = pf.postFilter;
}
if (allCollectors != null) {
searchWithTimeLimiter(luceneFilter, allCollectors);
if (allCollectors instanceof DelegatingCollector) {
((DelegatingCollector) allCollectors).finish();
}
}
if (getGroupedDocSet && allGroupHeadsCollector != null) {
qr.setDocSet(new BitDocSet(allGroupHeadsCollector.retrieveGroupHeads(maxDoc)));
} else if (getDocSet) {
qr.setDocSet(setCollector.getDocSet());
}
collectors.clear();
for (Command cmd : commands) {
Collector collector = cmd.createSecondPassCollector();
if (collector != null)
collectors.add(collector);
}
if (!collectors.isEmpty()) {
Collector secondPhaseCollectors = MultiCollector.wrap(collectors.toArray(new Collector[collectors.size()]));
if (collectors.size() > 0) {
if (cachedCollector != null) {
if (cachedCollector.isCached()) {
cachedCollector.replay(secondPhaseCollectors);
} else {
signalCacheWarning = true;
logger.warn(String.format(Locale.ROOT, "The grouping cache is active, but not used because it exceeded the max cache limit of %d percent", maxDocsPercentageToCache));
logger.warn("Please increase cache size or disable group caching.");
searchWithTimeLimiter(luceneFilter, secondPhaseCollectors);
}
} else {
if (pf.postFilter != null) {
pf.postFilter.setLastDelegate(secondPhaseCollectors);
secondPhaseCollectors = pf.postFilter;
}
searchWithTimeLimiter(luceneFilter, secondPhaseCollectors);
}
if (secondPhaseCollectors instanceof DelegatingCollector) {
((DelegatingCollector) secondPhaseCollectors).finish();
}
}
}
for (Command cmd : commands) {
cmd.finish();
}
qr.groupedResults = grouped;
if (getDocList) {
int sz = idSet.size();
int[] ids = new int[sz];
int idx = 0;
for (int val : idSet) {
ids[idx++] = val;
}
qr.setDocList(new DocSlice(0, sz, ids, null, maxMatches, maxScore));
}
}
Aggregations