use of org.apache.lucene.search.TopFieldDocs in project lucene-solr by apache.
the class DrillSideways method search.
/**
* Search, sorting by {@link Sort}, and computing
* drill down and sideways counts.
*/
public DrillSidewaysResult search(DrillDownQuery query, Query filter, FieldDoc after, int topN, Sort sort, boolean doDocScores, boolean doMaxScore) throws IOException {
if (filter != null) {
query = new DrillDownQuery(config, filter, query);
}
if (sort != null) {
int limit = searcher.getIndexReader().maxDoc();
if (limit == 0) {
// the collector does not alow numHits = 0
limit = 1;
}
final int fTopN = Math.min(topN, limit);
if (executor != null) {
// We have an executor, let use the multi-threaded version
final CollectorManager<TopFieldCollector, TopFieldDocs> collectorManager = new CollectorManager<TopFieldCollector, TopFieldDocs>() {
@Override
public TopFieldCollector newCollector() throws IOException {
return TopFieldCollector.create(sort, fTopN, after, true, doDocScores, doMaxScore);
}
@Override
public TopFieldDocs reduce(Collection<TopFieldCollector> collectors) throws IOException {
final TopFieldDocs[] topFieldDocs = new TopFieldDocs[collectors.size()];
int pos = 0;
for (TopFieldCollector collector : collectors) topFieldDocs[pos++] = collector.topDocs();
return TopDocs.merge(sort, topN, topFieldDocs);
}
};
ConcurrentDrillSidewaysResult<TopFieldDocs> r = search(query, collectorManager);
return new DrillSidewaysResult(r.facets, r.collectorResult);
} else {
final TopFieldCollector hitCollector = TopFieldCollector.create(sort, fTopN, after, true, doDocScores, doMaxScore);
DrillSidewaysResult r = search(query, hitCollector);
return new DrillSidewaysResult(r.facets, hitCollector.topDocs());
}
} else {
return search(after, query, topN);
}
}
use of org.apache.lucene.search.TopFieldDocs in project lucene-solr by apache.
the class LatLonPoint method nearest.
/**
* Finds the {@code n} nearest indexed points to the provided point, according to Haversine distance.
* <p>
* This is functionally equivalent to running {@link MatchAllDocsQuery} with a {@link LatLonDocValuesField#newDistanceSort},
* but is far more efficient since it takes advantage of properties the indexed BKD tree. Currently this
* only works with {@link Lucene60PointsFormat} (used by the default codec). Multi-valued fields are
* currently not de-duplicated, so if a document had multiple instances of the specified field that
* make it into the top n, that document will appear more than once.
* <p>
* Documents are ordered by ascending distance from the location. The value returned in {@link FieldDoc} for
* the hits contains a Double instance with the distance in meters.
*
* @param searcher IndexSearcher to find nearest points from.
* @param field field name. must not be null.
* @param latitude latitude at the center: must be within standard +/-90 coordinate bounds.
* @param longitude longitude at the center: must be within standard +/-180 coordinate bounds.
* @param n the number of nearest neighbors to retrieve.
* @return TopFieldDocs containing documents ordered by distance, where the field value for each {@link FieldDoc} is the distance in meters
* @throws IllegalArgumentException if the underlying PointValues is not a {@code Lucene60PointsReader} (this is a current limitation), or
* if {@code field} or {@code searcher} is null, or if {@code latitude}, {@code longitude} or {@code n} are out-of-bounds
* @throws IOException if an IOException occurs while finding the points.
*/
// TODO: what about multi-valued documents? what happens?
public static TopFieldDocs nearest(IndexSearcher searcher, String field, double latitude, double longitude, int n) throws IOException {
GeoUtils.checkLatitude(latitude);
GeoUtils.checkLongitude(longitude);
if (n < 1) {
throw new IllegalArgumentException("n must be at least 1; got " + n);
}
if (field == null) {
throw new IllegalArgumentException("field must not be null");
}
if (searcher == null) {
throw new IllegalArgumentException("searcher must not be null");
}
List<BKDReader> readers = new ArrayList<>();
List<Integer> docBases = new ArrayList<>();
List<Bits> liveDocs = new ArrayList<>();
int totalHits = 0;
for (LeafReaderContext leaf : searcher.getIndexReader().leaves()) {
PointValues points = leaf.reader().getPointValues(field);
if (points != null) {
if (points instanceof BKDReader == false) {
throw new IllegalArgumentException("can only run on Lucene60PointsReader points implementation, but got " + points);
}
totalHits += points.getDocCount();
BKDReader reader = (BKDReader) points;
if (reader != null) {
readers.add(reader);
docBases.add(leaf.docBase);
liveDocs.add(leaf.reader().getLiveDocs());
}
}
}
NearestNeighbor.NearestHit[] hits = NearestNeighbor.nearest(latitude, longitude, readers, liveDocs, docBases, n);
// Convert to TopFieldDocs:
ScoreDoc[] scoreDocs = new ScoreDoc[hits.length];
for (int i = 0; i < hits.length; i++) {
NearestNeighbor.NearestHit hit = hits[i];
scoreDocs[i] = new FieldDoc(hit.docID, 0.0f, new Object[] { Double.valueOf(hit.distanceMeters) });
}
return new TopFieldDocs(totalHits, scoreDocs, null, 0.0f);
}
use of org.apache.lucene.search.TopFieldDocs in project lucene-solr by apache.
the class AnalyzingInfixSuggester method lookup.
/**
* This is an advanced method providing the capability to send down to the suggester any
* arbitrary lucene query to be used to filter the result of the suggester
*
* @param key the keyword being looked for
* @param contextQuery an arbitrary Lucene query to be used to filter the result of the suggester. {@link #addContextToQuery} could be used to build this contextQuery.
* @param num number of items to return
* @param allTermsRequired all searched terms must match or not
* @param doHighlight if true, the matching term will be highlighted in the search result
* @return the result of the suggester
* @throws IOException f the is IO exception while reading data from the index
*/
public List<LookupResult> lookup(CharSequence key, BooleanQuery contextQuery, int num, boolean allTermsRequired, boolean doHighlight) throws IOException {
if (searcherMgr == null) {
throw new IllegalStateException("suggester was not built");
}
final BooleanClause.Occur occur;
if (allTermsRequired) {
occur = BooleanClause.Occur.MUST;
} else {
occur = BooleanClause.Occur.SHOULD;
}
BooleanQuery.Builder query;
Set<String> matchedTokens;
String prefixToken = null;
try (TokenStream ts = queryAnalyzer.tokenStream("", new StringReader(key.toString()))) {
//long t0 = System.currentTimeMillis();
ts.reset();
final CharTermAttribute termAtt = ts.addAttribute(CharTermAttribute.class);
final OffsetAttribute offsetAtt = ts.addAttribute(OffsetAttribute.class);
String lastToken = null;
query = new BooleanQuery.Builder();
int maxEndOffset = -1;
matchedTokens = new HashSet<>();
while (ts.incrementToken()) {
if (lastToken != null) {
matchedTokens.add(lastToken);
query.add(new TermQuery(new Term(TEXT_FIELD_NAME, lastToken)), occur);
}
lastToken = termAtt.toString();
if (lastToken != null) {
maxEndOffset = Math.max(maxEndOffset, offsetAtt.endOffset());
}
}
ts.end();
if (lastToken != null) {
Query lastQuery;
if (maxEndOffset == offsetAtt.endOffset()) {
// Use PrefixQuery (or the ngram equivalent) when
// there was no trailing discarded chars in the
// string (e.g. whitespace), so that if query does
// not end with a space we show prefix matches for
// that token:
lastQuery = getLastTokenQuery(lastToken);
prefixToken = lastToken;
} else {
// Use TermQuery for an exact match if there were
// trailing discarded chars (e.g. whitespace), so
// that if query ends with a space we only show
// exact matches for that term:
matchedTokens.add(lastToken);
lastQuery = new TermQuery(new Term(TEXT_FIELD_NAME, lastToken));
}
if (lastQuery != null) {
query.add(lastQuery, occur);
}
}
if (contextQuery != null) {
boolean allMustNot = true;
for (BooleanClause clause : contextQuery.clauses()) {
if (clause.getOccur() != BooleanClause.Occur.MUST_NOT) {
allMustNot = false;
break;
}
}
if (allMustNot) {
// All are MUST_NOT: add the contextQuery to the main query instead (not as sub-query)
for (BooleanClause clause : contextQuery.clauses()) {
query.add(clause);
}
} else if (allTermsRequired == false) {
// We must carefully upgrade the query clauses to MUST:
BooleanQuery.Builder newQuery = new BooleanQuery.Builder();
newQuery.add(query.build(), BooleanClause.Occur.MUST);
newQuery.add(contextQuery, BooleanClause.Occur.MUST);
query = newQuery;
} else {
// Add contextQuery as sub-query
query.add(contextQuery, BooleanClause.Occur.MUST);
}
}
}
// TODO: we could allow blended sort here, combining
// weight w/ score. Now we ignore score and sort only
// by weight:
Query finalQuery = finishQuery(query, allTermsRequired);
//System.out.println("finalQuery=" + finalQuery);
// Sort by weight, descending:
TopFieldCollector c = TopFieldCollector.create(SORT, num, true, false, false);
// We sorted postings by weight during indexing, so we
// only retrieve the first num hits now:
Collector c2 = new EarlyTerminatingSortingCollector(c, SORT, num);
List<LookupResult> results = null;
SearcherManager mgr;
IndexSearcher searcher;
synchronized (searcherMgrLock) {
// acquire & release on same SearcherManager, via local reference
mgr = searcherMgr;
searcher = mgr.acquire();
}
try {
//System.out.println("got searcher=" + searcher);
searcher.search(finalQuery, c2);
TopFieldDocs hits = c.topDocs();
// Slower way if postings are not pre-sorted by weight:
// hits = searcher.search(query, null, num, SORT);
results = createResults(searcher, hits, num, key, doHighlight, matchedTokens, prefixToken);
} finally {
mgr.release(searcher);
}
return results;
}
use of org.apache.lucene.search.TopFieldDocs in project lucene-solr by apache.
the class BlobHandler method handleRequestBody.
@Override
public void handleRequestBody(final SolrQueryRequest req, SolrQueryResponse rsp) throws Exception {
String httpMethod = req.getHttpMethod();
String path = (String) req.getContext().get("path");
SolrConfigHandler.setWt(req, JSON);
List<String> pieces = StrUtils.splitSmart(path, '/');
String blobName = null;
if (pieces.size() >= 3)
blobName = pieces.get(2);
if ("POST".equals(httpMethod)) {
if (blobName == null || blobName.isEmpty()) {
rsp.add("error", "Name not found");
return;
}
String err = SolrConfigHandler.validateName(blobName);
if (err != null) {
log.warn("no blob name");
rsp.add("error", err);
return;
}
if (req.getContentStreams() == null) {
log.warn("no content stream");
rsp.add("error", "No stream");
return;
}
for (ContentStream stream : req.getContentStreams()) {
ByteBuffer payload = SimplePostTool.inputStreamToByteArray(stream.getStream(), maxSize);
MessageDigest m = MessageDigest.getInstance("MD5");
m.update(payload.array(), payload.position(), payload.limit());
String md5 = new BigInteger(1, m.digest()).toString(16);
TopDocs duplicate = req.getSearcher().search(new TermQuery(new Term("md5", md5)), 1);
if (duplicate.totalHits > 0) {
rsp.add("error", "duplicate entry");
forward(req, null, new MapSolrParams((Map) makeMap("q", "md5:" + md5, "fl", "id,size,version,timestamp,blobName")), rsp);
log.warn("duplicate entry for blob :" + blobName);
return;
}
TopFieldDocs docs = req.getSearcher().search(new TermQuery(new Term("blobName", blobName)), 1, new Sort(new SortField("version", SortField.Type.LONG, true)));
long version = 0;
if (docs.totalHits > 0) {
Document doc = req.getSearcher().doc(docs.scoreDocs[0].doc);
Number n = doc.getField("version").numericValue();
version = n.longValue();
}
version++;
String id = blobName + "/" + version;
Map<String, Object> doc = makeMap(ID, id, "md5", md5, "blobName", blobName, VERSION, version, "timestamp", new Date(), "size", payload.limit(), "blob", payload);
verifyWithRealtimeGet(blobName, version, req, doc);
log.info(StrUtils.formatString("inserting new blob {0} ,size {1}, md5 {2}", doc.get(ID), String.valueOf(payload.limit()), md5));
indexMap(req, rsp, doc);
log.info(" Successfully Added and committed a blob with id {} and size {} ", id, payload.limit());
break;
}
} else {
int version = -1;
if (pieces.size() > 3) {
try {
version = Integer.parseInt(pieces.get(3));
} catch (NumberFormatException e) {
rsp.add("error", "Invalid version" + pieces.get(3));
return;
}
}
if (ReplicationHandler.FILE_STREAM.equals(req.getParams().get(CommonParams.WT))) {
if (blobName == null) {
throw new SolrException(SolrException.ErrorCode.NOT_FOUND, "Please send the request in the format /blob/<blobName>/<version>");
} else {
String q = "blobName:{0}";
if (version != -1)
q = "id:{0}/{1}";
QParser qparser = QParser.getParser(StrUtils.formatString(q, blobName, version), req);
final TopDocs docs = req.getSearcher().search(qparser.parse(), 1, new Sort(new SortField("version", SortField.Type.LONG, true)));
if (docs.totalHits > 0) {
rsp.add(ReplicationHandler.FILE_STREAM, new SolrCore.RawWriter() {
@Override
public void write(OutputStream os) throws IOException {
Document doc = req.getSearcher().doc(docs.scoreDocs[0].doc);
IndexableField sf = doc.getField("blob");
FieldType fieldType = req.getSchema().getField("blob").getType();
ByteBuffer buf = (ByteBuffer) fieldType.toObject(sf);
if (buf == null) {
//should never happen unless a user wrote this document directly
throw new SolrException(SolrException.ErrorCode.NOT_FOUND, "Invalid document . No field called blob");
} else {
os.write(buf.array(), 0, buf.limit());
}
}
});
} else {
throw new SolrException(SolrException.ErrorCode.NOT_FOUND, StrUtils.formatString("Invalid combination of blobName {0} and version {1}", blobName, version));
}
}
} else {
String q = "*:*";
if (blobName != null) {
q = "blobName:{0}";
if (version != -1) {
q = "id:{0}/{1}";
}
}
forward(req, null, new MapSolrParams((Map) makeMap("q", StrUtils.formatString(q, blobName, version), "fl", "id,size,version,timestamp,blobName,md5", SORT, "version desc")), rsp);
}
}
}
use of org.apache.lucene.search.TopFieldDocs in project lucene-solr by apache.
the class TestDemoExpressions method testDollarVariable.
/** Uses variables with $ */
public void testDollarVariable() throws Exception {
Expression expr = JavascriptCompiler.compile("$0+$score");
SimpleBindings bindings = new SimpleBindings();
bindings.add(new SortField("$0", SortField.Type.SCORE));
bindings.add(new SortField("$score", SortField.Type.SCORE));
Sort sort = new Sort(expr.getSortField(bindings, true));
Query query = new TermQuery(new Term("body", "contents"));
TopFieldDocs td = searcher.search(query, 3, sort, true, true);
for (int i = 0; i < 3; i++) {
FieldDoc d = (FieldDoc) td.scoreDocs[i];
float expected = 2 * d.score;
float actual = ((Double) d.fields[0]).floatValue();
assertEquals(expected, actual, CheckHits.explainToleranceDelta(expected, actual));
}
}
Aggregations