Search in sources :

Example 61 with LeafReaderContext

use of org.apache.lucene.index.LeafReaderContext in project lucene-solr by apache.

the class ValueSource method asDoubleValuesSource.

/**
   * Expose this ValueSource as a DoubleValuesSource
   */
public DoubleValuesSource asDoubleValuesSource() {
    return new DoubleValuesSource() {

        @Override
        public DoubleValues getValues(LeafReaderContext ctx, DoubleValues scores) throws IOException {
            Map context = new HashMap<>();
            FakeScorer scorer = new FakeScorer();
            context.put("scorer", scorer);
            FunctionValues fv = ValueSource.this.getValues(context, ctx);
            return new DoubleValues() {

                @Override
                public double doubleValue() throws IOException {
                    return fv.doubleVal(scorer.current);
                }

                @Override
                public boolean advanceExact(int doc) throws IOException {
                    scorer.current = doc;
                    if (scores != null && scores.advanceExact(doc)) {
                        scorer.score = (float) scores.doubleValue();
                    } else
                        scorer.score = 0;
                    return fv.exists(doc);
                }
            };
        }

        @Override
        public boolean needsScores() {
            // be on the safe side
            return true;
        }

        @Override
        public Explanation explain(LeafReaderContext ctx, int docId, Explanation scoreExplanation) throws IOException {
            Map context = new HashMap<>();
            FakeScorer scorer = new FakeScorer();
            scorer.score = scoreExplanation.getValue();
            context.put("scorer", scorer);
            FunctionValues fv = ValueSource.this.getValues(context, ctx);
            return fv.explain(docId);
        }
    };
}
Also used : IdentityHashMap(java.util.IdentityHashMap) HashMap(java.util.HashMap) Explanation(org.apache.lucene.search.Explanation) DoubleValues(org.apache.lucene.search.DoubleValues) LeafReaderContext(org.apache.lucene.index.LeafReaderContext) DoubleValuesSource(org.apache.lucene.search.DoubleValuesSource) IdentityHashMap(java.util.IdentityHashMap) HashMap(java.util.HashMap) Map(java.util.Map)

Example 62 with LeafReaderContext

use of org.apache.lucene.index.LeafReaderContext in project lucene-solr by apache.

the class LatLonDocValuesBoxQuery method createWeight.

@Override
public Weight createWeight(IndexSearcher searcher, boolean needsScores, float boost) throws IOException {
    return new ConstantScoreWeight(this, boost) {

        @Override
        public Scorer scorer(LeafReaderContext context) throws IOException {
            final SortedNumericDocValues values = context.reader().getSortedNumericDocValues(field);
            if (values == null) {
                return null;
            }
            final TwoPhaseIterator iterator = new TwoPhaseIterator(values) {

                @Override
                public boolean matches() throws IOException {
                    for (int i = 0, count = values.docValueCount(); i < count; ++i) {
                        final long value = values.nextValue();
                        final int lat = (int) (value >>> 32);
                        if (lat < minLatitude || lat > maxLatitude) {
                            // not within latitude range
                            continue;
                        }
                        final int lon = (int) (value & 0xFFFFFFFF);
                        if (crossesDateline) {
                            if (lon > maxLongitude && lon < minLongitude) {
                                // not within longitude range
                                continue;
                            }
                        } else {
                            if (lon < minLongitude || lon > maxLongitude) {
                                // not within longitude range
                                continue;
                            }
                        }
                        return true;
                    }
                    return false;
                }

                @Override
                public float matchCost() {
                    // 5 comparisons
                    return 5;
                }
            };
            return new ConstantScoreScorer(this, boost, iterator);
        }
    };
}
Also used : SortedNumericDocValues(org.apache.lucene.index.SortedNumericDocValues) TwoPhaseIterator(org.apache.lucene.search.TwoPhaseIterator) ConstantScoreScorer(org.apache.lucene.search.ConstantScoreScorer) LeafReaderContext(org.apache.lucene.index.LeafReaderContext) ConstantScoreWeight(org.apache.lucene.search.ConstantScoreWeight)

Example 63 with LeafReaderContext

use of org.apache.lucene.index.LeafReaderContext in project lucene-solr by apache.

the class LatLonPoint method nearest.

/**
   * Finds the {@code n} nearest indexed points to the provided point, according to Haversine distance.
   * <p>
   * This is functionally equivalent to running {@link MatchAllDocsQuery} with a {@link LatLonDocValuesField#newDistanceSort},
   * but is far more efficient since it takes advantage of properties the indexed BKD tree.  Currently this
   * only works with {@link Lucene60PointsFormat} (used by the default codec).  Multi-valued fields are
   * currently not de-duplicated, so if a document had multiple instances of the specified field that
   * make it into the top n, that document will appear more than once.
   * <p>
   * Documents are ordered by ascending distance from the location. The value returned in {@link FieldDoc} for
   * the hits contains a Double instance with the distance in meters.
   * 
   * @param searcher IndexSearcher to find nearest points from.
   * @param field field name. must not be null.
   * @param latitude latitude at the center: must be within standard +/-90 coordinate bounds.
   * @param longitude longitude at the center: must be within standard +/-180 coordinate bounds.
   * @param n the number of nearest neighbors to retrieve.
   * @return TopFieldDocs containing documents ordered by distance, where the field value for each {@link FieldDoc} is the distance in meters
   * @throws IllegalArgumentException if the underlying PointValues is not a {@code Lucene60PointsReader} (this is a current limitation), or
   *         if {@code field} or {@code searcher} is null, or if {@code latitude}, {@code longitude} or {@code n} are out-of-bounds
   * @throws IOException if an IOException occurs while finding the points.
   */
// TODO: what about multi-valued documents? what happens?
public static TopFieldDocs nearest(IndexSearcher searcher, String field, double latitude, double longitude, int n) throws IOException {
    GeoUtils.checkLatitude(latitude);
    GeoUtils.checkLongitude(longitude);
    if (n < 1) {
        throw new IllegalArgumentException("n must be at least 1; got " + n);
    }
    if (field == null) {
        throw new IllegalArgumentException("field must not be null");
    }
    if (searcher == null) {
        throw new IllegalArgumentException("searcher must not be null");
    }
    List<BKDReader> readers = new ArrayList<>();
    List<Integer> docBases = new ArrayList<>();
    List<Bits> liveDocs = new ArrayList<>();
    int totalHits = 0;
    for (LeafReaderContext leaf : searcher.getIndexReader().leaves()) {
        PointValues points = leaf.reader().getPointValues(field);
        if (points != null) {
            if (points instanceof BKDReader == false) {
                throw new IllegalArgumentException("can only run on Lucene60PointsReader points implementation, but got " + points);
            }
            totalHits += points.getDocCount();
            BKDReader reader = (BKDReader) points;
            if (reader != null) {
                readers.add(reader);
                docBases.add(leaf.docBase);
                liveDocs.add(leaf.reader().getLiveDocs());
            }
        }
    }
    NearestNeighbor.NearestHit[] hits = NearestNeighbor.nearest(latitude, longitude, readers, liveDocs, docBases, n);
    // Convert to TopFieldDocs:
    ScoreDoc[] scoreDocs = new ScoreDoc[hits.length];
    for (int i = 0; i < hits.length; i++) {
        NearestNeighbor.NearestHit hit = hits[i];
        scoreDocs[i] = new FieldDoc(hit.docID, 0.0f, new Object[] { Double.valueOf(hit.distanceMeters) });
    }
    return new TopFieldDocs(totalHits, scoreDocs, null, 0.0f);
}
Also used : FieldDoc(org.apache.lucene.search.FieldDoc) ArrayList(java.util.ArrayList) TopFieldDocs(org.apache.lucene.search.TopFieldDocs) ScoreDoc(org.apache.lucene.search.ScoreDoc) BKDReader(org.apache.lucene.util.bkd.BKDReader) PointValues(org.apache.lucene.index.PointValues) Bits(org.apache.lucene.util.Bits) LeafReaderContext(org.apache.lucene.index.LeafReaderContext)

Example 64 with LeafReaderContext

use of org.apache.lucene.index.LeafReaderContext in project lucene-solr by apache.

the class FuzzyLikeThisQuery method newTermQuery.

private Query newTermQuery(IndexReader reader, Term term) throws IOException {
    if (ignoreTF) {
        return new ConstantScoreQuery(new TermQuery(term));
    } else {
        // we build an artificial TermContext that will give an overall df and ttf
        // equal to 1
        TermContext context = new TermContext(reader.getContext());
        for (LeafReaderContext leafContext : reader.leaves()) {
            Terms terms = leafContext.reader().terms(term.field());
            if (terms != null) {
                TermsEnum termsEnum = terms.iterator();
                if (termsEnum.seekExact(term.bytes())) {
                    // we want the total df and ttf to be 1
                    int freq = 1 - context.docFreq();
                    context.register(termsEnum.termState(), leafContext.ord, freq, freq);
                }
            }
        }
        return new TermQuery(term, context);
    }
}
Also used : TermQuery(org.apache.lucene.search.TermQuery) Terms(org.apache.lucene.index.Terms) ConstantScoreQuery(org.apache.lucene.search.ConstantScoreQuery) LeafReaderContext(org.apache.lucene.index.LeafReaderContext) TermContext(org.apache.lucene.index.TermContext) TermsEnum(org.apache.lucene.index.TermsEnum) FuzzyTermsEnum(org.apache.lucene.search.FuzzyTermsEnum)

Example 65 with LeafReaderContext

use of org.apache.lucene.index.LeafReaderContext in project lucene-solr by apache.

the class BaseGeoPointTestCase method testMultiValued.

public void testMultiValued() throws Exception {
    int numPoints = atLeast(10000);
    // Every doc has 2 points:
    double[] lats = new double[2 * numPoints];
    double[] lons = new double[2 * numPoints];
    Directory dir = newDirectory();
    IndexWriterConfig iwc = newIndexWriterConfig();
    // We rely on docID order:
    iwc.setMergePolicy(newLogMergePolicy());
    // and on seeds being able to reproduce:
    iwc.setMergeScheduler(new SerialMergeScheduler());
    RandomIndexWriter w = new RandomIndexWriter(random(), dir, iwc);
    for (int id = 0; id < numPoints; id++) {
        Document doc = new Document();
        lats[2 * id] = quantizeLat(nextLatitude());
        lons[2 * id] = quantizeLon(nextLongitude());
        doc.add(newStringField("id", "" + id, Field.Store.YES));
        addPointToDoc(FIELD_NAME, doc, lats[2 * id], lons[2 * id]);
        lats[2 * id + 1] = quantizeLat(nextLatitude());
        lons[2 * id + 1] = quantizeLon(nextLongitude());
        addPointToDoc(FIELD_NAME, doc, lats[2 * id + 1], lons[2 * id + 1]);
        if (VERBOSE) {
            System.out.println("id=" + id);
            System.out.println("  lat=" + lats[2 * id] + " lon=" + lons[2 * id]);
            System.out.println("  lat=" + lats[2 * id + 1] + " lon=" + lons[2 * id + 1]);
        }
        w.addDocument(doc);
    }
    // TODO: share w/ verify; just need parallel array of the expected ids
    if (random().nextBoolean()) {
        w.forceMerge(1);
    }
    IndexReader r = w.getReader();
    w.close();
    IndexSearcher s = newSearcher(r);
    int iters = atLeast(25);
    for (int iter = 0; iter < iters; iter++) {
        Rectangle rect = nextBox();
        if (VERBOSE) {
            System.out.println("\nTEST: iter=" + iter + " rect=" + rect);
        }
        Query query = newRectQuery(FIELD_NAME, rect.minLat, rect.maxLat, rect.minLon, rect.maxLon);
        final FixedBitSet hits = new FixedBitSet(r.maxDoc());
        s.search(query, new SimpleCollector() {

            private int docBase;

            @Override
            public boolean needsScores() {
                return false;
            }

            @Override
            protected void doSetNextReader(LeafReaderContext context) throws IOException {
                docBase = context.docBase;
            }

            @Override
            public void collect(int doc) {
                hits.set(docBase + doc);
            }
        });
        boolean fail = false;
        for (int docID = 0; docID < lats.length / 2; docID++) {
            double latDoc1 = lats[2 * docID];
            double lonDoc1 = lons[2 * docID];
            double latDoc2 = lats[2 * docID + 1];
            double lonDoc2 = lons[2 * docID + 1];
            boolean result1 = rectContainsPoint(rect, latDoc1, lonDoc1);
            boolean result2 = rectContainsPoint(rect, latDoc2, lonDoc2);
            boolean expected = result1 || result2;
            if (hits.get(docID) != expected) {
                String id = s.doc(docID).get("id");
                if (expected) {
                    System.out.println("TEST: id=" + id + " docID=" + docID + " should match but did not");
                } else {
                    System.out.println("TEST: id=" + id + " docID=" + docID + " should not match but did");
                }
                System.out.println("  rect=" + rect);
                System.out.println("  lat=" + latDoc1 + " lon=" + lonDoc1 + "\n  lat=" + latDoc2 + " lon=" + lonDoc2);
                System.out.println("  result1=" + result1 + " result2=" + result2);
                fail = true;
            }
        }
        if (fail) {
            fail("some hits were wrong");
        }
    }
    r.close();
    dir.close();
}
Also used : IndexSearcher(org.apache.lucene.search.IndexSearcher) Query(org.apache.lucene.search.Query) MatchNoDocsQuery(org.apache.lucene.search.MatchNoDocsQuery) IOException(java.io.IOException) Document(org.apache.lucene.document.Document) SerialMergeScheduler(org.apache.lucene.index.SerialMergeScheduler) SimpleCollector(org.apache.lucene.search.SimpleCollector) FixedBitSet(org.apache.lucene.util.FixedBitSet) IndexReader(org.apache.lucene.index.IndexReader) LeafReaderContext(org.apache.lucene.index.LeafReaderContext) RandomIndexWriter(org.apache.lucene.index.RandomIndexWriter) Directory(org.apache.lucene.store.Directory) IndexWriterConfig(org.apache.lucene.index.IndexWriterConfig)

Aggregations

LeafReaderContext (org.apache.lucene.index.LeafReaderContext)326 LeafReader (org.apache.lucene.index.LeafReader)70 Document (org.apache.lucene.document.Document)68 BytesRef (org.apache.lucene.util.BytesRef)66 IOException (java.io.IOException)65 Directory (org.apache.lucene.store.Directory)57 Term (org.apache.lucene.index.Term)50 IndexSearcher (org.apache.lucene.search.IndexSearcher)47 RandomIndexWriter (org.apache.lucene.index.RandomIndexWriter)45 DirectoryReader (org.apache.lucene.index.DirectoryReader)44 Bits (org.apache.lucene.util.Bits)44 IndexReader (org.apache.lucene.index.IndexReader)43 NumericDocValues (org.apache.lucene.index.NumericDocValues)40 ArrayList (java.util.ArrayList)39 Terms (org.apache.lucene.index.Terms)35 Weight (org.apache.lucene.search.Weight)35 DocIdSetIterator (org.apache.lucene.search.DocIdSetIterator)34 Scorer (org.apache.lucene.search.Scorer)34 IndexWriterConfig (org.apache.lucene.index.IndexWriterConfig)33 TermsEnum (org.apache.lucene.index.TermsEnum)31