Search in sources :

Example 91 with TopDocs

use of org.apache.lucene.search.TopDocs in project lucene-solr by apache.

the class BaseGeoPointTestCase method doRandomDistanceTest.

private void doRandomDistanceTest(int numDocs, int numQueries) throws IOException {
    Directory dir = newDirectory();
    IndexWriterConfig iwc = newIndexWriterConfig();
    // Else seeds may not reproduce:
    iwc.setMergeScheduler(new SerialMergeScheduler());
    int pointsInLeaf = 2 + random().nextInt(4);
    iwc.setCodec(new FilterCodec("Lucene70", TestUtil.getDefaultCodec()) {

        @Override
        public PointsFormat pointsFormat() {
            return new PointsFormat() {

                @Override
                public PointsWriter fieldsWriter(SegmentWriteState writeState) throws IOException {
                    return new Lucene60PointsWriter(writeState, pointsInLeaf, BKDWriter.DEFAULT_MAX_MB_SORT_IN_HEAP);
                }

                @Override
                public PointsReader fieldsReader(SegmentReadState readState) throws IOException {
                    return new Lucene60PointsReader(readState);
                }
            };
        }
    });
    RandomIndexWriter writer = new RandomIndexWriter(random(), dir, iwc);
    for (int i = 0; i < numDocs; i++) {
        double latRaw = nextLatitude();
        double lonRaw = nextLongitude();
        // pre-normalize up front, so we can just use quantized value for testing and do simple exact comparisons
        double lat = quantizeLat(latRaw);
        double lon = quantizeLon(lonRaw);
        Document doc = new Document();
        addPointToDoc("field", doc, lat, lon);
        doc.add(new StoredField("lat", lat));
        doc.add(new StoredField("lon", lon));
        writer.addDocument(doc);
    }
    IndexReader reader = writer.getReader();
    IndexSearcher searcher = newSearcher(reader);
    for (int i = 0; i < numQueries; i++) {
        double lat = nextLatitude();
        double lon = nextLongitude();
        double radius = 50000000D * random().nextDouble();
        BitSet expected = new BitSet();
        for (int doc = 0; doc < reader.maxDoc(); doc++) {
            double docLatitude = reader.document(doc).getField("lat").numericValue().doubleValue();
            double docLongitude = reader.document(doc).getField("lon").numericValue().doubleValue();
            double distance = SloppyMath.haversinMeters(lat, lon, docLatitude, docLongitude);
            if (distance <= radius) {
                expected.set(doc);
            }
        }
        TopDocs topDocs = searcher.search(newDistanceQuery("field", lat, lon, radius), reader.maxDoc(), Sort.INDEXORDER);
        BitSet actual = new BitSet();
        for (ScoreDoc doc : topDocs.scoreDocs) {
            actual.set(doc.doc);
        }
        try {
            assertEquals(expected, actual);
        } catch (AssertionError e) {
            System.out.println("center: (" + lat + "," + lon + "), radius=" + radius);
            for (int doc = 0; doc < reader.maxDoc(); doc++) {
                double docLatitude = reader.document(doc).getField("lat").numericValue().doubleValue();
                double docLongitude = reader.document(doc).getField("lon").numericValue().doubleValue();
                double distance = SloppyMath.haversinMeters(lat, lon, docLatitude, docLongitude);
                System.out.println("" + doc + ": (" + docLatitude + "," + docLongitude + "), distance=" + distance);
            }
            throw e;
        }
    }
    reader.close();
    writer.close();
    dir.close();
}
Also used : Lucene60PointsWriter(org.apache.lucene.codecs.lucene60.Lucene60PointsWriter) PointsWriter(org.apache.lucene.codecs.PointsWriter) IndexSearcher(org.apache.lucene.search.IndexSearcher) Lucene60PointsReader(org.apache.lucene.codecs.lucene60.Lucene60PointsReader) SegmentReadState(org.apache.lucene.index.SegmentReadState) FixedBitSet(org.apache.lucene.util.FixedBitSet) BitSet(java.util.BitSet) IOException(java.io.IOException) Document(org.apache.lucene.document.Document) FilterCodec(org.apache.lucene.codecs.FilterCodec) ScoreDoc(org.apache.lucene.search.ScoreDoc) SerialMergeScheduler(org.apache.lucene.index.SerialMergeScheduler) TopDocs(org.apache.lucene.search.TopDocs) StoredField(org.apache.lucene.document.StoredField) PointsFormat(org.apache.lucene.codecs.PointsFormat) PointsReader(org.apache.lucene.codecs.PointsReader) Lucene60PointsReader(org.apache.lucene.codecs.lucene60.Lucene60PointsReader) IndexReader(org.apache.lucene.index.IndexReader) SegmentWriteState(org.apache.lucene.index.SegmentWriteState) Lucene60PointsWriter(org.apache.lucene.codecs.lucene60.Lucene60PointsWriter) RandomIndexWriter(org.apache.lucene.index.RandomIndexWriter) Directory(org.apache.lucene.store.Directory) IndexWriterConfig(org.apache.lucene.index.IndexWriterConfig)

Example 92 with TopDocs

use of org.apache.lucene.search.TopDocs in project lucene-solr by apache.

the class BaseGeoPointTestCase method testSmallSetDistanceDateline.

public void testSmallSetDistanceDateline() throws Exception {
    TopDocs td = searchSmallSet(newDistanceQuery("point", 32.94823588839368, -179.9538113027811, 120000), 20);
    assertEquals(3, td.totalHits);
}
Also used : TopDocs(org.apache.lucene.search.TopDocs)

Example 93 with TopDocs

use of org.apache.lucene.search.TopDocs in project lucene-solr by apache.

the class BaseDocValuesFormatTestCase method testOneNumber.

public void testOneNumber() throws IOException {
    Directory directory = newDirectory();
    RandomIndexWriter iwriter = new RandomIndexWriter(random(), directory);
    Document doc = new Document();
    String longTerm = "longtermlongtermlongtermlongtermlongtermlongtermlongtermlongtermlongtermlongtermlongtermlongtermlongtermlongtermlongtermlongtermlongtermlongterm";
    String text = "This is the text to be indexed. " + longTerm;
    doc.add(newTextField("fieldname", text, Field.Store.YES));
    doc.add(new NumericDocValuesField("dv", 5));
    iwriter.addDocument(doc);
    iwriter.close();
    // Now search the index:
    // read-only=true
    IndexReader ireader = DirectoryReader.open(directory);
    IndexSearcher isearcher = new IndexSearcher(ireader);
    assertEquals(1, isearcher.search(new TermQuery(new Term("fieldname", longTerm)), 1).totalHits);
    Query query = new TermQuery(new Term("fieldname", "text"));
    TopDocs hits = isearcher.search(query, 1);
    assertEquals(1, hits.totalHits);
    // Iterate through the results:
    for (int i = 0; i < hits.scoreDocs.length; i++) {
        Document hitDoc = isearcher.doc(hits.scoreDocs[i].doc);
        assertEquals(text, hitDoc.get("fieldname"));
        assert ireader.leaves().size() == 1;
        NumericDocValues dv = ireader.leaves().get(0).reader().getNumericDocValues("dv");
        int docID = hits.scoreDocs[i].doc;
        assertEquals(docID, dv.advance(docID));
        assertEquals(5, dv.longValue());
    }
    ireader.close();
    directory.close();
}
Also used : IndexSearcher(org.apache.lucene.search.IndexSearcher) TermQuery(org.apache.lucene.search.TermQuery) Query(org.apache.lucene.search.Query) TermQuery(org.apache.lucene.search.TermQuery) BooleanQuery(org.apache.lucene.search.BooleanQuery) Document(org.apache.lucene.document.Document) TopDocs(org.apache.lucene.search.TopDocs) SortedNumericDocValuesField(org.apache.lucene.document.SortedNumericDocValuesField) NumericDocValuesField(org.apache.lucene.document.NumericDocValuesField) Directory(org.apache.lucene.store.Directory)

Example 94 with TopDocs

use of org.apache.lucene.search.TopDocs in project lucene-solr by apache.

the class TestLTRScoringQuery method testLTRScoringQuery.

@Test
public void testLTRScoringQuery() throws IOException, ModelException {
    final Directory dir = newDirectory();
    final RandomIndexWriter w = new RandomIndexWriter(random(), dir);
    Document doc = new Document();
    doc.add(newStringField("id", "0", Field.Store.YES));
    doc.add(newTextField("field", "wizard the the the the the oz", Field.Store.NO));
    doc.add(new FloatDocValuesField("final-score", 1.0f));
    w.addDocument(doc);
    doc = new Document();
    doc.add(newStringField("id", "1", Field.Store.YES));
    // 1 extra token, but wizard and oz are close;
    doc.add(newTextField("field", "wizard oz the the the the the the", Field.Store.NO));
    doc.add(new FloatDocValuesField("final-score", 2.0f));
    w.addDocument(doc);
    final IndexReader r = w.getReader();
    w.close();
    // Do ordinary BooleanQuery:
    final BooleanQuery.Builder bqBuilder = new BooleanQuery.Builder();
    bqBuilder.add(new TermQuery(new Term("field", "wizard")), BooleanClause.Occur.SHOULD);
    bqBuilder.add(new TermQuery(new Term("field", "oz")), BooleanClause.Occur.SHOULD);
    final IndexSearcher searcher = getSearcher(r);
    // first run the standard query
    final TopDocs hits = searcher.search(bqBuilder.build(), 10);
    assertEquals(2, hits.totalHits);
    assertEquals("0", searcher.doc(hits.scoreDocs[0].doc).get("id"));
    assertEquals("1", searcher.doc(hits.scoreDocs[1].doc).get("id"));
    List<Feature> features = makeFeatures(new int[] { 0, 1, 2 });
    final List<Feature> allFeatures = makeFeatures(new int[] { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9 });
    List<Normalizer> norms = new ArrayList<Normalizer>(Collections.nCopies(features.size(), IdentityNormalizer.INSTANCE));
    LTRScoringModel ltrScoringModel = TestLinearModel.createLinearModel("test", features, norms, "test", allFeatures, makeFeatureWeights(features));
    LTRScoringQuery.ModelWeight modelWeight = performQuery(hits, searcher, hits.scoreDocs[0].doc, new LTRScoringQuery(ltrScoringModel));
    assertEquals(3, modelWeight.getModelFeatureValuesNormalized().length);
    for (int i = 0; i < 3; i++) {
        assertEquals(i, modelWeight.getModelFeatureValuesNormalized()[i], 0.0001);
    }
    int[] posVals = new int[] { 0, 1, 2 };
    int pos = 0;
    for (LTRScoringQuery.FeatureInfo fInfo : modelWeight.getFeaturesInfo()) {
        if (fInfo == null) {
            continue;
        }
        assertEquals(posVals[pos], fInfo.getValue(), 0.0001);
        assertEquals("f" + posVals[pos], fInfo.getName());
        pos++;
    }
    final int[] mixPositions = new int[] { 8, 2, 4, 9, 0 };
    features = makeFeatures(mixPositions);
    norms = new ArrayList<Normalizer>(Collections.nCopies(features.size(), IdentityNormalizer.INSTANCE));
    ltrScoringModel = TestLinearModel.createLinearModel("test", features, norms, "test", allFeatures, makeFeatureWeights(features));
    modelWeight = performQuery(hits, searcher, hits.scoreDocs[0].doc, new LTRScoringQuery(ltrScoringModel));
    assertEquals(mixPositions.length, modelWeight.getModelFeatureWeights().length);
    for (int i = 0; i < mixPositions.length; i++) {
        assertEquals(mixPositions[i], modelWeight.getModelFeatureValuesNormalized()[i], 0.0001);
    }
    final ModelException expectedModelException = new ModelException("no features declared for model test");
    final int[] noPositions = new int[] {};
    features = makeFeatures(noPositions);
    norms = new ArrayList<Normalizer>(Collections.nCopies(features.size(), IdentityNormalizer.INSTANCE));
    try {
        ltrScoringModel = TestLinearModel.createLinearModel("test", features, norms, "test", allFeatures, makeFeatureWeights(features));
        fail("unexpectedly got here instead of catching " + expectedModelException);
        modelWeight = performQuery(hits, searcher, hits.scoreDocs[0].doc, new LTRScoringQuery(ltrScoringModel));
        assertEquals(0, modelWeight.getModelFeatureWeights().length);
    } catch (ModelException actualModelException) {
        assertEquals(expectedModelException.toString(), actualModelException.toString());
    }
    // test normalizers
    features = makeFilterFeatures(mixPositions);
    final Normalizer norm = new Normalizer() {

        @Override
        public float normalize(float value) {
            return 42.42f;
        }

        @Override
        public LinkedHashMap<String, Object> paramsToMap() {
            return null;
        }

        @Override
        protected void validate() throws NormalizerException {
        }
    };
    norms = new ArrayList<Normalizer>(Collections.nCopies(features.size(), norm));
    final LTRScoringModel normMeta = TestLinearModel.createLinearModel("test", features, norms, "test", allFeatures, makeFeatureWeights(features));
    modelWeight = performQuery(hits, searcher, hits.scoreDocs[0].doc, new LTRScoringQuery(normMeta));
    normMeta.normalizeFeaturesInPlace(modelWeight.getModelFeatureValuesNormalized());
    assertEquals(mixPositions.length, modelWeight.getModelFeatureWeights().length);
    for (int i = 0; i < mixPositions.length; i++) {
        assertEquals(42.42f, modelWeight.getModelFeatureValuesNormalized()[i], 0.0001);
    }
    r.close();
    dir.close();
}
Also used : IndexSearcher(org.apache.lucene.search.IndexSearcher) BooleanQuery(org.apache.lucene.search.BooleanQuery) ArrayList(java.util.ArrayList) FloatDocValuesField(org.apache.lucene.document.FloatDocValuesField) Document(org.apache.lucene.document.Document) ValueFeature(org.apache.solr.ltr.feature.ValueFeature) Feature(org.apache.solr.ltr.feature.Feature) TopDocs(org.apache.lucene.search.TopDocs) Directory(org.apache.lucene.store.Directory) TermQuery(org.apache.lucene.search.TermQuery) ModelException(org.apache.solr.ltr.model.ModelException) Normalizer(org.apache.solr.ltr.norm.Normalizer) IdentityNormalizer(org.apache.solr.ltr.norm.IdentityNormalizer) Term(org.apache.lucene.index.Term) IndexReader(org.apache.lucene.index.IndexReader) RandomIndexWriter(org.apache.lucene.index.RandomIndexWriter) LTRScoringModel(org.apache.solr.ltr.model.LTRScoringModel) Test(org.junit.Test)

Example 95 with TopDocs

use of org.apache.lucene.search.TopDocs in project lucene-solr by apache.

the class TestLTRReRankingPipeline method testDifferentTopN.

@Ignore
@Test
public void testDifferentTopN() throws IOException {
    final Directory dir = newDirectory();
    final RandomIndexWriter w = new RandomIndexWriter(random(), dir);
    Document doc = new Document();
    doc.add(newStringField("id", "0", Field.Store.YES));
    doc.add(newTextField("field", "wizard oz oz oz oz oz", Field.Store.NO));
    doc.add(new FloatDocValuesField("final-score", 1.0f));
    w.addDocument(doc);
    doc = new Document();
    doc.add(newStringField("id", "1", Field.Store.YES));
    doc.add(newTextField("field", "wizard oz oz oz oz the", Field.Store.NO));
    doc.add(new FloatDocValuesField("final-score", 2.0f));
    w.addDocument(doc);
    doc = new Document();
    doc.add(newStringField("id", "2", Field.Store.YES));
    doc.add(newTextField("field", "wizard oz oz oz the the ", Field.Store.NO));
    doc.add(new FloatDocValuesField("final-score", 3.0f));
    w.addDocument(doc);
    doc = new Document();
    doc.add(newStringField("id", "3", Field.Store.YES));
    doc.add(newTextField("field", "wizard oz oz the the the the ", Field.Store.NO));
    doc.add(new FloatDocValuesField("final-score", 4.0f));
    w.addDocument(doc);
    doc = new Document();
    doc.add(newStringField("id", "4", Field.Store.YES));
    doc.add(newTextField("field", "wizard oz the the the the the the", Field.Store.NO));
    doc.add(new FloatDocValuesField("final-score", 5.0f));
    w.addDocument(doc);
    final IndexReader r = w.getReader();
    w.close();
    // Do ordinary BooleanQuery:
    final BooleanQuery.Builder bqBuilder = new BooleanQuery.Builder();
    bqBuilder.add(new TermQuery(new Term("field", "wizard")), BooleanClause.Occur.SHOULD);
    bqBuilder.add(new TermQuery(new Term("field", "oz")), BooleanClause.Occur.SHOULD);
    final IndexSearcher searcher = getSearcher(r);
    // first run the standard query
    TopDocs hits = searcher.search(bqBuilder.build(), 10);
    assertEquals(5, hits.totalHits);
    assertEquals("0", searcher.doc(hits.scoreDocs[0].doc).get("id"));
    assertEquals("1", searcher.doc(hits.scoreDocs[1].doc).get("id"));
    assertEquals("2", searcher.doc(hits.scoreDocs[2].doc).get("id"));
    assertEquals("3", searcher.doc(hits.scoreDocs[3].doc).get("id"));
    assertEquals("4", searcher.doc(hits.scoreDocs[4].doc).get("id"));
    final List<Feature> features = makeFieldValueFeatures(new int[] { 0, 1, 2 }, "final-score");
    final List<Normalizer> norms = new ArrayList<Normalizer>(Collections.nCopies(features.size(), IdentityNormalizer.INSTANCE));
    final List<Feature> allFeatures = makeFieldValueFeatures(new int[] { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9 }, "final-score");
    final LTRScoringModel ltrScoringModel = TestLinearModel.createLinearModel("test", features, norms, "test", allFeatures, null);
    final LTRRescorer rescorer = new LTRRescorer(new LTRScoringQuery(ltrScoringModel));
    // rerank @ 0 should not change the order
    hits = rescorer.rescore(searcher, hits, 0);
    assertEquals("0", searcher.doc(hits.scoreDocs[0].doc).get("id"));
    assertEquals("1", searcher.doc(hits.scoreDocs[1].doc).get("id"));
    assertEquals("2", searcher.doc(hits.scoreDocs[2].doc).get("id"));
    assertEquals("3", searcher.doc(hits.scoreDocs[3].doc).get("id"));
    assertEquals("4", searcher.doc(hits.scoreDocs[4].doc).get("id"));
    for (int topN = 1; topN <= 5; topN++) {
        log.info("rerank {} documents ", topN);
        hits = searcher.search(bqBuilder.build(), 10);
        final ScoreDoc[] slice = new ScoreDoc[topN];
        System.arraycopy(hits.scoreDocs, 0, slice, 0, topN);
        hits = new TopDocs(hits.totalHits, slice, hits.getMaxScore());
        hits = rescorer.rescore(searcher, hits, topN);
        for (int i = topN - 1, j = 0; i >= 0; i--, j++) {
            log.info("doc {} in pos {}", searcher.doc(hits.scoreDocs[j].doc).get("id"), j);
            assertEquals(i, Integer.parseInt(searcher.doc(hits.scoreDocs[j].doc).get("id")));
            assertEquals(i + 1, hits.scoreDocs[j].score, 0.00001);
        }
    }
    r.close();
    dir.close();
}
Also used : IndexSearcher(org.apache.lucene.search.IndexSearcher) BooleanQuery(org.apache.lucene.search.BooleanQuery) TermQuery(org.apache.lucene.search.TermQuery) Normalizer(org.apache.solr.ltr.norm.Normalizer) IdentityNormalizer(org.apache.solr.ltr.norm.IdentityNormalizer) ArrayList(java.util.ArrayList) FloatDocValuesField(org.apache.lucene.document.FloatDocValuesField) Term(org.apache.lucene.index.Term) Document(org.apache.lucene.document.Document) FieldValueFeature(org.apache.solr.ltr.feature.FieldValueFeature) Feature(org.apache.solr.ltr.feature.Feature) ScoreDoc(org.apache.lucene.search.ScoreDoc) TopDocs(org.apache.lucene.search.TopDocs) IndexReader(org.apache.lucene.index.IndexReader) RandomIndexWriter(org.apache.lucene.index.RandomIndexWriter) LTRScoringModel(org.apache.solr.ltr.model.LTRScoringModel) Directory(org.apache.lucene.store.Directory) Ignore(org.junit.Ignore) Test(org.junit.Test)

Aggregations

TopDocs (org.apache.lucene.search.TopDocs)496 IndexSearcher (org.apache.lucene.search.IndexSearcher)302 Document (org.apache.lucene.document.Document)275 TermQuery (org.apache.lucene.search.TermQuery)189 IndexReader (org.apache.lucene.index.IndexReader)187 Term (org.apache.lucene.index.Term)174 Directory (org.apache.lucene.store.Directory)174 Query (org.apache.lucene.search.Query)172 RandomIndexWriter (org.apache.lucene.index.RandomIndexWriter)144 BooleanQuery (org.apache.lucene.search.BooleanQuery)127 ScoreDoc (org.apache.lucene.search.ScoreDoc)127 MatchAllDocsQuery (org.apache.lucene.search.MatchAllDocsQuery)120 Sort (org.apache.lucene.search.Sort)94 Field (org.apache.lucene.document.Field)85 SortField (org.apache.lucene.search.SortField)74 IOException (java.io.IOException)58 MockAnalyzer (org.apache.lucene.analysis.MockAnalyzer)56 TextField (org.apache.lucene.document.TextField)47 PhraseQuery (org.apache.lucene.search.PhraseQuery)46 PrefixQuery (org.apache.lucene.search.PrefixQuery)45