Search in sources :

Example 11 with TopFieldDocs

use of org.apache.lucene.search.TopFieldDocs in project lucene-solr by apache.

the class DrillSideways method search.

/**
   * Search, sorting by {@link Sort}, and computing
   * drill down and sideways counts.
   */
public DrillSidewaysResult search(DrillDownQuery query, Query filter, FieldDoc after, int topN, Sort sort, boolean doDocScores, boolean doMaxScore) throws IOException {
    if (filter != null) {
        query = new DrillDownQuery(config, filter, query);
    }
    if (sort != null) {
        int limit = searcher.getIndexReader().maxDoc();
        if (limit == 0) {
            // the collector does not alow numHits = 0
            limit = 1;
        }
        final int fTopN = Math.min(topN, limit);
        if (executor != null) {
            // We have an executor, let use the multi-threaded version
            final CollectorManager<TopFieldCollector, TopFieldDocs> collectorManager = new CollectorManager<TopFieldCollector, TopFieldDocs>() {

                @Override
                public TopFieldCollector newCollector() throws IOException {
                    return TopFieldCollector.create(sort, fTopN, after, true, doDocScores, doMaxScore);
                }

                @Override
                public TopFieldDocs reduce(Collection<TopFieldCollector> collectors) throws IOException {
                    final TopFieldDocs[] topFieldDocs = new TopFieldDocs[collectors.size()];
                    int pos = 0;
                    for (TopFieldCollector collector : collectors) topFieldDocs[pos++] = collector.topDocs();
                    return TopDocs.merge(sort, topN, topFieldDocs);
                }
            };
            ConcurrentDrillSidewaysResult<TopFieldDocs> r = search(query, collectorManager);
            return new DrillSidewaysResult(r.facets, r.collectorResult);
        } else {
            final TopFieldCollector hitCollector = TopFieldCollector.create(sort, fTopN, after, true, doDocScores, doMaxScore);
            DrillSidewaysResult r = search(query, hitCollector);
            return new DrillSidewaysResult(r.facets, hitCollector.topDocs());
        }
    } else {
        return search(after, query, topN);
    }
}
Also used : Collection(java.util.Collection) TopFieldDocs(org.apache.lucene.search.TopFieldDocs) MultiCollectorManager(org.apache.lucene.search.MultiCollectorManager) CollectorManager(org.apache.lucene.search.CollectorManager) TopFieldCollector(org.apache.lucene.search.TopFieldCollector)

Example 12 with TopFieldDocs

use of org.apache.lucene.search.TopFieldDocs in project lucene-solr by apache.

the class LatLonPoint method nearest.

/**
   * Finds the {@code n} nearest indexed points to the provided point, according to Haversine distance.
   * <p>
   * This is functionally equivalent to running {@link MatchAllDocsQuery} with a {@link LatLonDocValuesField#newDistanceSort},
   * but is far more efficient since it takes advantage of properties the indexed BKD tree.  Currently this
   * only works with {@link Lucene60PointsFormat} (used by the default codec).  Multi-valued fields are
   * currently not de-duplicated, so if a document had multiple instances of the specified field that
   * make it into the top n, that document will appear more than once.
   * <p>
   * Documents are ordered by ascending distance from the location. The value returned in {@link FieldDoc} for
   * the hits contains a Double instance with the distance in meters.
   * 
   * @param searcher IndexSearcher to find nearest points from.
   * @param field field name. must not be null.
   * @param latitude latitude at the center: must be within standard +/-90 coordinate bounds.
   * @param longitude longitude at the center: must be within standard +/-180 coordinate bounds.
   * @param n the number of nearest neighbors to retrieve.
   * @return TopFieldDocs containing documents ordered by distance, where the field value for each {@link FieldDoc} is the distance in meters
   * @throws IllegalArgumentException if the underlying PointValues is not a {@code Lucene60PointsReader} (this is a current limitation), or
   *         if {@code field} or {@code searcher} is null, or if {@code latitude}, {@code longitude} or {@code n} are out-of-bounds
   * @throws IOException if an IOException occurs while finding the points.
   */
// TODO: what about multi-valued documents? what happens?
public static TopFieldDocs nearest(IndexSearcher searcher, String field, double latitude, double longitude, int n) throws IOException {
    GeoUtils.checkLatitude(latitude);
    GeoUtils.checkLongitude(longitude);
    if (n < 1) {
        throw new IllegalArgumentException("n must be at least 1; got " + n);
    }
    if (field == null) {
        throw new IllegalArgumentException("field must not be null");
    }
    if (searcher == null) {
        throw new IllegalArgumentException("searcher must not be null");
    }
    List<BKDReader> readers = new ArrayList<>();
    List<Integer> docBases = new ArrayList<>();
    List<Bits> liveDocs = new ArrayList<>();
    int totalHits = 0;
    for (LeafReaderContext leaf : searcher.getIndexReader().leaves()) {
        PointValues points = leaf.reader().getPointValues(field);
        if (points != null) {
            if (points instanceof BKDReader == false) {
                throw new IllegalArgumentException("can only run on Lucene60PointsReader points implementation, but got " + points);
            }
            totalHits += points.getDocCount();
            BKDReader reader = (BKDReader) points;
            if (reader != null) {
                readers.add(reader);
                docBases.add(leaf.docBase);
                liveDocs.add(leaf.reader().getLiveDocs());
            }
        }
    }
    NearestNeighbor.NearestHit[] hits = NearestNeighbor.nearest(latitude, longitude, readers, liveDocs, docBases, n);
    // Convert to TopFieldDocs:
    ScoreDoc[] scoreDocs = new ScoreDoc[hits.length];
    for (int i = 0; i < hits.length; i++) {
        NearestNeighbor.NearestHit hit = hits[i];
        scoreDocs[i] = new FieldDoc(hit.docID, 0.0f, new Object[] { Double.valueOf(hit.distanceMeters) });
    }
    return new TopFieldDocs(totalHits, scoreDocs, null, 0.0f);
}
Also used : FieldDoc(org.apache.lucene.search.FieldDoc) ArrayList(java.util.ArrayList) TopFieldDocs(org.apache.lucene.search.TopFieldDocs) ScoreDoc(org.apache.lucene.search.ScoreDoc) BKDReader(org.apache.lucene.util.bkd.BKDReader) PointValues(org.apache.lucene.index.PointValues) Bits(org.apache.lucene.util.Bits) LeafReaderContext(org.apache.lucene.index.LeafReaderContext)

Example 13 with TopFieldDocs

use of org.apache.lucene.search.TopFieldDocs in project lucene-solr by apache.

the class AnalyzingInfixSuggester method lookup.

/**
   * This is an advanced method providing the capability to send down to the suggester any 
   * arbitrary lucene query to be used to filter the result of the suggester
   * 
   * @param key the keyword being looked for
   * @param contextQuery an arbitrary Lucene query to be used to filter the result of the suggester. {@link #addContextToQuery} could be used to build this contextQuery.
   * @param num number of items to return
   * @param allTermsRequired all searched terms must match or not
   * @param doHighlight if true, the matching term will be highlighted in the search result
   * @return the result of the suggester
   * @throws IOException f the is IO exception while reading data from the index
   */
public List<LookupResult> lookup(CharSequence key, BooleanQuery contextQuery, int num, boolean allTermsRequired, boolean doHighlight) throws IOException {
    if (searcherMgr == null) {
        throw new IllegalStateException("suggester was not built");
    }
    final BooleanClause.Occur occur;
    if (allTermsRequired) {
        occur = BooleanClause.Occur.MUST;
    } else {
        occur = BooleanClause.Occur.SHOULD;
    }
    BooleanQuery.Builder query;
    Set<String> matchedTokens;
    String prefixToken = null;
    try (TokenStream ts = queryAnalyzer.tokenStream("", new StringReader(key.toString()))) {
        //long t0 = System.currentTimeMillis();
        ts.reset();
        final CharTermAttribute termAtt = ts.addAttribute(CharTermAttribute.class);
        final OffsetAttribute offsetAtt = ts.addAttribute(OffsetAttribute.class);
        String lastToken = null;
        query = new BooleanQuery.Builder();
        int maxEndOffset = -1;
        matchedTokens = new HashSet<>();
        while (ts.incrementToken()) {
            if (lastToken != null) {
                matchedTokens.add(lastToken);
                query.add(new TermQuery(new Term(TEXT_FIELD_NAME, lastToken)), occur);
            }
            lastToken = termAtt.toString();
            if (lastToken != null) {
                maxEndOffset = Math.max(maxEndOffset, offsetAtt.endOffset());
            }
        }
        ts.end();
        if (lastToken != null) {
            Query lastQuery;
            if (maxEndOffset == offsetAtt.endOffset()) {
                // Use PrefixQuery (or the ngram equivalent) when
                // there was no trailing discarded chars in the
                // string (e.g. whitespace), so that if query does
                // not end with a space we show prefix matches for
                // that token:
                lastQuery = getLastTokenQuery(lastToken);
                prefixToken = lastToken;
            } else {
                // Use TermQuery for an exact match if there were
                // trailing discarded chars (e.g. whitespace), so
                // that if query ends with a space we only show
                // exact matches for that term:
                matchedTokens.add(lastToken);
                lastQuery = new TermQuery(new Term(TEXT_FIELD_NAME, lastToken));
            }
            if (lastQuery != null) {
                query.add(lastQuery, occur);
            }
        }
        if (contextQuery != null) {
            boolean allMustNot = true;
            for (BooleanClause clause : contextQuery.clauses()) {
                if (clause.getOccur() != BooleanClause.Occur.MUST_NOT) {
                    allMustNot = false;
                    break;
                }
            }
            if (allMustNot) {
                // All are MUST_NOT: add the contextQuery to the main query instead (not as sub-query)
                for (BooleanClause clause : contextQuery.clauses()) {
                    query.add(clause);
                }
            } else if (allTermsRequired == false) {
                // We must carefully upgrade the query clauses to MUST:
                BooleanQuery.Builder newQuery = new BooleanQuery.Builder();
                newQuery.add(query.build(), BooleanClause.Occur.MUST);
                newQuery.add(contextQuery, BooleanClause.Occur.MUST);
                query = newQuery;
            } else {
                // Add contextQuery as sub-query
                query.add(contextQuery, BooleanClause.Occur.MUST);
            }
        }
    }
    // TODO: we could allow blended sort here, combining
    // weight w/ score.  Now we ignore score and sort only
    // by weight:
    Query finalQuery = finishQuery(query, allTermsRequired);
    //System.out.println("finalQuery=" + finalQuery);
    // Sort by weight, descending:
    TopFieldCollector c = TopFieldCollector.create(SORT, num, true, false, false);
    // We sorted postings by weight during indexing, so we
    // only retrieve the first num hits now:
    Collector c2 = new EarlyTerminatingSortingCollector(c, SORT, num);
    List<LookupResult> results = null;
    SearcherManager mgr;
    IndexSearcher searcher;
    synchronized (searcherMgrLock) {
        // acquire & release on same SearcherManager, via local reference
        mgr = searcherMgr;
        searcher = mgr.acquire();
    }
    try {
        //System.out.println("got searcher=" + searcher);
        searcher.search(finalQuery, c2);
        TopFieldDocs hits = c.topDocs();
        // Slower way if postings are not pre-sorted by weight:
        // hits = searcher.search(query, null, num, SORT);
        results = createResults(searcher, hits, num, key, doHighlight, matchedTokens, prefixToken);
    } finally {
        mgr.release(searcher);
    }
    return results;
}
Also used : IndexSearcher(org.apache.lucene.search.IndexSearcher) BooleanQuery(org.apache.lucene.search.BooleanQuery) TokenStream(org.apache.lucene.analysis.TokenStream) Query(org.apache.lucene.search.Query) PrefixQuery(org.apache.lucene.search.PrefixQuery) TermQuery(org.apache.lucene.search.TermQuery) BooleanQuery(org.apache.lucene.search.BooleanQuery) TopFieldDocs(org.apache.lucene.search.TopFieldDocs) SearcherManager(org.apache.lucene.search.SearcherManager) StringReader(java.io.StringReader) EarlyTerminatingSortingCollector(org.apache.lucene.search.EarlyTerminatingSortingCollector) TopFieldCollector(org.apache.lucene.search.TopFieldCollector) Collector(org.apache.lucene.search.Collector) TopFieldCollector(org.apache.lucene.search.TopFieldCollector) TermQuery(org.apache.lucene.search.TermQuery) Occur(org.apache.lucene.search.BooleanClause.Occur) Term(org.apache.lucene.index.Term) BooleanClause(org.apache.lucene.search.BooleanClause) EarlyTerminatingSortingCollector(org.apache.lucene.search.EarlyTerminatingSortingCollector) CharTermAttribute(org.apache.lucene.analysis.tokenattributes.CharTermAttribute) OffsetAttribute(org.apache.lucene.analysis.tokenattributes.OffsetAttribute)

Example 14 with TopFieldDocs

use of org.apache.lucene.search.TopFieldDocs in project lucene-solr by apache.

the class BlobHandler method handleRequestBody.

@Override
public void handleRequestBody(final SolrQueryRequest req, SolrQueryResponse rsp) throws Exception {
    String httpMethod = req.getHttpMethod();
    String path = (String) req.getContext().get("path");
    SolrConfigHandler.setWt(req, JSON);
    List<String> pieces = StrUtils.splitSmart(path, '/');
    String blobName = null;
    if (pieces.size() >= 3)
        blobName = pieces.get(2);
    if ("POST".equals(httpMethod)) {
        if (blobName == null || blobName.isEmpty()) {
            rsp.add("error", "Name not found");
            return;
        }
        String err = SolrConfigHandler.validateName(blobName);
        if (err != null) {
            log.warn("no blob name");
            rsp.add("error", err);
            return;
        }
        if (req.getContentStreams() == null) {
            log.warn("no content stream");
            rsp.add("error", "No stream");
            return;
        }
        for (ContentStream stream : req.getContentStreams()) {
            ByteBuffer payload = SimplePostTool.inputStreamToByteArray(stream.getStream(), maxSize);
            MessageDigest m = MessageDigest.getInstance("MD5");
            m.update(payload.array(), payload.position(), payload.limit());
            String md5 = new BigInteger(1, m.digest()).toString(16);
            TopDocs duplicate = req.getSearcher().search(new TermQuery(new Term("md5", md5)), 1);
            if (duplicate.totalHits > 0) {
                rsp.add("error", "duplicate entry");
                forward(req, null, new MapSolrParams((Map) makeMap("q", "md5:" + md5, "fl", "id,size,version,timestamp,blobName")), rsp);
                log.warn("duplicate entry for blob :" + blobName);
                return;
            }
            TopFieldDocs docs = req.getSearcher().search(new TermQuery(new Term("blobName", blobName)), 1, new Sort(new SortField("version", SortField.Type.LONG, true)));
            long version = 0;
            if (docs.totalHits > 0) {
                Document doc = req.getSearcher().doc(docs.scoreDocs[0].doc);
                Number n = doc.getField("version").numericValue();
                version = n.longValue();
            }
            version++;
            String id = blobName + "/" + version;
            Map<String, Object> doc = makeMap(ID, id, "md5", md5, "blobName", blobName, VERSION, version, "timestamp", new Date(), "size", payload.limit(), "blob", payload);
            verifyWithRealtimeGet(blobName, version, req, doc);
            log.info(StrUtils.formatString("inserting new blob {0} ,size {1}, md5 {2}", doc.get(ID), String.valueOf(payload.limit()), md5));
            indexMap(req, rsp, doc);
            log.info(" Successfully Added and committed a blob with id {} and size {} ", id, payload.limit());
            break;
        }
    } else {
        int version = -1;
        if (pieces.size() > 3) {
            try {
                version = Integer.parseInt(pieces.get(3));
            } catch (NumberFormatException e) {
                rsp.add("error", "Invalid version" + pieces.get(3));
                return;
            }
        }
        if (ReplicationHandler.FILE_STREAM.equals(req.getParams().get(CommonParams.WT))) {
            if (blobName == null) {
                throw new SolrException(SolrException.ErrorCode.NOT_FOUND, "Please send the request in the format /blob/<blobName>/<version>");
            } else {
                String q = "blobName:{0}";
                if (version != -1)
                    q = "id:{0}/{1}";
                QParser qparser = QParser.getParser(StrUtils.formatString(q, blobName, version), req);
                final TopDocs docs = req.getSearcher().search(qparser.parse(), 1, new Sort(new SortField("version", SortField.Type.LONG, true)));
                if (docs.totalHits > 0) {
                    rsp.add(ReplicationHandler.FILE_STREAM, new SolrCore.RawWriter() {

                        @Override
                        public void write(OutputStream os) throws IOException {
                            Document doc = req.getSearcher().doc(docs.scoreDocs[0].doc);
                            IndexableField sf = doc.getField("blob");
                            FieldType fieldType = req.getSchema().getField("blob").getType();
                            ByteBuffer buf = (ByteBuffer) fieldType.toObject(sf);
                            if (buf == null) {
                                //should never happen unless a user wrote this document directly
                                throw new SolrException(SolrException.ErrorCode.NOT_FOUND, "Invalid document . No field called blob");
                            } else {
                                os.write(buf.array(), 0, buf.limit());
                            }
                        }
                    });
                } else {
                    throw new SolrException(SolrException.ErrorCode.NOT_FOUND, StrUtils.formatString("Invalid combination of blobName {0} and version {1}", blobName, version));
                }
            }
        } else {
            String q = "*:*";
            if (blobName != null) {
                q = "blobName:{0}";
                if (version != -1) {
                    q = "id:{0}/{1}";
                }
            }
            forward(req, null, new MapSolrParams((Map) makeMap("q", StrUtils.formatString(q, blobName, version), "fl", "id,size,version,timestamp,blobName,md5", SORT, "version desc")), rsp);
        }
    }
}
Also used : SolrCore(org.apache.solr.core.SolrCore) OutputStream(java.io.OutputStream) TopFieldDocs(org.apache.lucene.search.TopFieldDocs) SortField(org.apache.lucene.search.SortField) Document(org.apache.lucene.document.Document) SolrInputDocument(org.apache.solr.common.SolrInputDocument) TopDocs(org.apache.lucene.search.TopDocs) ContentStream(org.apache.solr.common.util.ContentStream) Sort(org.apache.lucene.search.Sort) MessageDigest(java.security.MessageDigest) SolrException(org.apache.solr.common.SolrException) TermQuery(org.apache.lucene.search.TermQuery) Term(org.apache.lucene.index.Term) IOException(java.io.IOException) ByteBuffer(java.nio.ByteBuffer) Date(java.util.Date) FieldType(org.apache.solr.schema.FieldType) IndexableField(org.apache.lucene.index.IndexableField) MapSolrParams(org.apache.solr.common.params.MapSolrParams) QParser(org.apache.solr.search.QParser) BigInteger(java.math.BigInteger) Map(java.util.Map) Utils.makeMap(org.apache.solr.common.util.Utils.makeMap) Collections.singletonMap(java.util.Collections.singletonMap)

Example 15 with TopFieldDocs

use of org.apache.lucene.search.TopFieldDocs in project lucene-solr by apache.

the class TestDemoExpressions method testDollarVariable.

/** Uses variables with $ */
public void testDollarVariable() throws Exception {
    Expression expr = JavascriptCompiler.compile("$0+$score");
    SimpleBindings bindings = new SimpleBindings();
    bindings.add(new SortField("$0", SortField.Type.SCORE));
    bindings.add(new SortField("$score", SortField.Type.SCORE));
    Sort sort = new Sort(expr.getSortField(bindings, true));
    Query query = new TermQuery(new Term("body", "contents"));
    TopFieldDocs td = searcher.search(query, 3, sort, true, true);
    for (int i = 0; i < 3; i++) {
        FieldDoc d = (FieldDoc) td.scoreDocs[i];
        float expected = 2 * d.score;
        float actual = ((Double) d.fields[0]).floatValue();
        assertEquals(expected, actual, CheckHits.explainToleranceDelta(expected, actual));
    }
}
Also used : TermQuery(org.apache.lucene.search.TermQuery) Query(org.apache.lucene.search.Query) MatchAllDocsQuery(org.apache.lucene.search.MatchAllDocsQuery) TermQuery(org.apache.lucene.search.TermQuery) FieldDoc(org.apache.lucene.search.FieldDoc) Sort(org.apache.lucene.search.Sort) TopFieldDocs(org.apache.lucene.search.TopFieldDocs) SortField(org.apache.lucene.search.SortField) Term(org.apache.lucene.index.Term)

Aggregations

TopFieldDocs (org.apache.lucene.search.TopFieldDocs)41 Sort (org.apache.lucene.search.Sort)30 SortField (org.apache.lucene.search.SortField)24 FieldDoc (org.apache.lucene.search.FieldDoc)23 IndexSearcher (org.apache.lucene.search.IndexSearcher)19 MatchAllDocsQuery (org.apache.lucene.search.MatchAllDocsQuery)18 TermQuery (org.apache.lucene.search.TermQuery)15 Document (org.apache.lucene.document.Document)14 ScoreDoc (org.apache.lucene.search.ScoreDoc)12 ArrayList (java.util.ArrayList)11 Term (org.apache.lucene.index.Term)11 Query (org.apache.lucene.search.Query)11 BytesRef (org.apache.lucene.util.BytesRef)9 TopDocs (org.apache.lucene.search.TopDocs)8 Directory (org.apache.lucene.store.Directory)8 HashMap (java.util.HashMap)6 StringField (org.apache.lucene.document.StringField)6 CollapseTopFieldDocs (org.apache.lucene.search.grouping.CollapseTopFieldDocs)5 IOException (java.io.IOException)4 MockAnalyzer (org.apache.lucene.analysis.MockAnalyzer)4