use of org.apache.lucene.search.TopDocs in project lucene-solr by apache.
the class BaseGeoPointTestCase method doRandomDistanceTest.
private void doRandomDistanceTest(int numDocs, int numQueries) throws IOException {
Directory dir = newDirectory();
IndexWriterConfig iwc = newIndexWriterConfig();
// Else seeds may not reproduce:
iwc.setMergeScheduler(new SerialMergeScheduler());
int pointsInLeaf = 2 + random().nextInt(4);
iwc.setCodec(new FilterCodec("Lucene70", TestUtil.getDefaultCodec()) {
@Override
public PointsFormat pointsFormat() {
return new PointsFormat() {
@Override
public PointsWriter fieldsWriter(SegmentWriteState writeState) throws IOException {
return new Lucene60PointsWriter(writeState, pointsInLeaf, BKDWriter.DEFAULT_MAX_MB_SORT_IN_HEAP);
}
@Override
public PointsReader fieldsReader(SegmentReadState readState) throws IOException {
return new Lucene60PointsReader(readState);
}
};
}
});
RandomIndexWriter writer = new RandomIndexWriter(random(), dir, iwc);
for (int i = 0; i < numDocs; i++) {
double latRaw = nextLatitude();
double lonRaw = nextLongitude();
// pre-normalize up front, so we can just use quantized value for testing and do simple exact comparisons
double lat = quantizeLat(latRaw);
double lon = quantizeLon(lonRaw);
Document doc = new Document();
addPointToDoc("field", doc, lat, lon);
doc.add(new StoredField("lat", lat));
doc.add(new StoredField("lon", lon));
writer.addDocument(doc);
}
IndexReader reader = writer.getReader();
IndexSearcher searcher = newSearcher(reader);
for (int i = 0; i < numQueries; i++) {
double lat = nextLatitude();
double lon = nextLongitude();
double radius = 50000000D * random().nextDouble();
BitSet expected = new BitSet();
for (int doc = 0; doc < reader.maxDoc(); doc++) {
double docLatitude = reader.document(doc).getField("lat").numericValue().doubleValue();
double docLongitude = reader.document(doc).getField("lon").numericValue().doubleValue();
double distance = SloppyMath.haversinMeters(lat, lon, docLatitude, docLongitude);
if (distance <= radius) {
expected.set(doc);
}
}
TopDocs topDocs = searcher.search(newDistanceQuery("field", lat, lon, radius), reader.maxDoc(), Sort.INDEXORDER);
BitSet actual = new BitSet();
for (ScoreDoc doc : topDocs.scoreDocs) {
actual.set(doc.doc);
}
try {
assertEquals(expected, actual);
} catch (AssertionError e) {
System.out.println("center: (" + lat + "," + lon + "), radius=" + radius);
for (int doc = 0; doc < reader.maxDoc(); doc++) {
double docLatitude = reader.document(doc).getField("lat").numericValue().doubleValue();
double docLongitude = reader.document(doc).getField("lon").numericValue().doubleValue();
double distance = SloppyMath.haversinMeters(lat, lon, docLatitude, docLongitude);
System.out.println("" + doc + ": (" + docLatitude + "," + docLongitude + "), distance=" + distance);
}
throw e;
}
}
reader.close();
writer.close();
dir.close();
}
use of org.apache.lucene.search.TopDocs in project lucene-solr by apache.
the class BaseGeoPointTestCase method testSmallSetDistanceDateline.
public void testSmallSetDistanceDateline() throws Exception {
TopDocs td = searchSmallSet(newDistanceQuery("point", 32.94823588839368, -179.9538113027811, 120000), 20);
assertEquals(3, td.totalHits);
}
use of org.apache.lucene.search.TopDocs in project lucene-solr by apache.
the class BaseDocValuesFormatTestCase method testOneNumber.
public void testOneNumber() throws IOException {
Directory directory = newDirectory();
RandomIndexWriter iwriter = new RandomIndexWriter(random(), directory);
Document doc = new Document();
String longTerm = "longtermlongtermlongtermlongtermlongtermlongtermlongtermlongtermlongtermlongtermlongtermlongtermlongtermlongtermlongtermlongtermlongtermlongterm";
String text = "This is the text to be indexed. " + longTerm;
doc.add(newTextField("fieldname", text, Field.Store.YES));
doc.add(new NumericDocValuesField("dv", 5));
iwriter.addDocument(doc);
iwriter.close();
// Now search the index:
// read-only=true
IndexReader ireader = DirectoryReader.open(directory);
IndexSearcher isearcher = new IndexSearcher(ireader);
assertEquals(1, isearcher.search(new TermQuery(new Term("fieldname", longTerm)), 1).totalHits);
Query query = new TermQuery(new Term("fieldname", "text"));
TopDocs hits = isearcher.search(query, 1);
assertEquals(1, hits.totalHits);
// Iterate through the results:
for (int i = 0; i < hits.scoreDocs.length; i++) {
Document hitDoc = isearcher.doc(hits.scoreDocs[i].doc);
assertEquals(text, hitDoc.get("fieldname"));
assert ireader.leaves().size() == 1;
NumericDocValues dv = ireader.leaves().get(0).reader().getNumericDocValues("dv");
int docID = hits.scoreDocs[i].doc;
assertEquals(docID, dv.advance(docID));
assertEquals(5, dv.longValue());
}
ireader.close();
directory.close();
}
use of org.apache.lucene.search.TopDocs in project lucene-solr by apache.
the class TestLTRScoringQuery method testLTRScoringQuery.
@Test
public void testLTRScoringQuery() throws IOException, ModelException {
final Directory dir = newDirectory();
final RandomIndexWriter w = new RandomIndexWriter(random(), dir);
Document doc = new Document();
doc.add(newStringField("id", "0", Field.Store.YES));
doc.add(newTextField("field", "wizard the the the the the oz", Field.Store.NO));
doc.add(new FloatDocValuesField("final-score", 1.0f));
w.addDocument(doc);
doc = new Document();
doc.add(newStringField("id", "1", Field.Store.YES));
// 1 extra token, but wizard and oz are close;
doc.add(newTextField("field", "wizard oz the the the the the the", Field.Store.NO));
doc.add(new FloatDocValuesField("final-score", 2.0f));
w.addDocument(doc);
final IndexReader r = w.getReader();
w.close();
// Do ordinary BooleanQuery:
final BooleanQuery.Builder bqBuilder = new BooleanQuery.Builder();
bqBuilder.add(new TermQuery(new Term("field", "wizard")), BooleanClause.Occur.SHOULD);
bqBuilder.add(new TermQuery(new Term("field", "oz")), BooleanClause.Occur.SHOULD);
final IndexSearcher searcher = getSearcher(r);
// first run the standard query
final TopDocs hits = searcher.search(bqBuilder.build(), 10);
assertEquals(2, hits.totalHits);
assertEquals("0", searcher.doc(hits.scoreDocs[0].doc).get("id"));
assertEquals("1", searcher.doc(hits.scoreDocs[1].doc).get("id"));
List<Feature> features = makeFeatures(new int[] { 0, 1, 2 });
final List<Feature> allFeatures = makeFeatures(new int[] { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9 });
List<Normalizer> norms = new ArrayList<Normalizer>(Collections.nCopies(features.size(), IdentityNormalizer.INSTANCE));
LTRScoringModel ltrScoringModel = TestLinearModel.createLinearModel("test", features, norms, "test", allFeatures, makeFeatureWeights(features));
LTRScoringQuery.ModelWeight modelWeight = performQuery(hits, searcher, hits.scoreDocs[0].doc, new LTRScoringQuery(ltrScoringModel));
assertEquals(3, modelWeight.getModelFeatureValuesNormalized().length);
for (int i = 0; i < 3; i++) {
assertEquals(i, modelWeight.getModelFeatureValuesNormalized()[i], 0.0001);
}
int[] posVals = new int[] { 0, 1, 2 };
int pos = 0;
for (LTRScoringQuery.FeatureInfo fInfo : modelWeight.getFeaturesInfo()) {
if (fInfo == null) {
continue;
}
assertEquals(posVals[pos], fInfo.getValue(), 0.0001);
assertEquals("f" + posVals[pos], fInfo.getName());
pos++;
}
final int[] mixPositions = new int[] { 8, 2, 4, 9, 0 };
features = makeFeatures(mixPositions);
norms = new ArrayList<Normalizer>(Collections.nCopies(features.size(), IdentityNormalizer.INSTANCE));
ltrScoringModel = TestLinearModel.createLinearModel("test", features, norms, "test", allFeatures, makeFeatureWeights(features));
modelWeight = performQuery(hits, searcher, hits.scoreDocs[0].doc, new LTRScoringQuery(ltrScoringModel));
assertEquals(mixPositions.length, modelWeight.getModelFeatureWeights().length);
for (int i = 0; i < mixPositions.length; i++) {
assertEquals(mixPositions[i], modelWeight.getModelFeatureValuesNormalized()[i], 0.0001);
}
final ModelException expectedModelException = new ModelException("no features declared for model test");
final int[] noPositions = new int[] {};
features = makeFeatures(noPositions);
norms = new ArrayList<Normalizer>(Collections.nCopies(features.size(), IdentityNormalizer.INSTANCE));
try {
ltrScoringModel = TestLinearModel.createLinearModel("test", features, norms, "test", allFeatures, makeFeatureWeights(features));
fail("unexpectedly got here instead of catching " + expectedModelException);
modelWeight = performQuery(hits, searcher, hits.scoreDocs[0].doc, new LTRScoringQuery(ltrScoringModel));
assertEquals(0, modelWeight.getModelFeatureWeights().length);
} catch (ModelException actualModelException) {
assertEquals(expectedModelException.toString(), actualModelException.toString());
}
// test normalizers
features = makeFilterFeatures(mixPositions);
final Normalizer norm = new Normalizer() {
@Override
public float normalize(float value) {
return 42.42f;
}
@Override
public LinkedHashMap<String, Object> paramsToMap() {
return null;
}
@Override
protected void validate() throws NormalizerException {
}
};
norms = new ArrayList<Normalizer>(Collections.nCopies(features.size(), norm));
final LTRScoringModel normMeta = TestLinearModel.createLinearModel("test", features, norms, "test", allFeatures, makeFeatureWeights(features));
modelWeight = performQuery(hits, searcher, hits.scoreDocs[0].doc, new LTRScoringQuery(normMeta));
normMeta.normalizeFeaturesInPlace(modelWeight.getModelFeatureValuesNormalized());
assertEquals(mixPositions.length, modelWeight.getModelFeatureWeights().length);
for (int i = 0; i < mixPositions.length; i++) {
assertEquals(42.42f, modelWeight.getModelFeatureValuesNormalized()[i], 0.0001);
}
r.close();
dir.close();
}
use of org.apache.lucene.search.TopDocs in project lucene-solr by apache.
the class TestLTRReRankingPipeline method testDifferentTopN.
@Ignore
@Test
public void testDifferentTopN() throws IOException {
final Directory dir = newDirectory();
final RandomIndexWriter w = new RandomIndexWriter(random(), dir);
Document doc = new Document();
doc.add(newStringField("id", "0", Field.Store.YES));
doc.add(newTextField("field", "wizard oz oz oz oz oz", Field.Store.NO));
doc.add(new FloatDocValuesField("final-score", 1.0f));
w.addDocument(doc);
doc = new Document();
doc.add(newStringField("id", "1", Field.Store.YES));
doc.add(newTextField("field", "wizard oz oz oz oz the", Field.Store.NO));
doc.add(new FloatDocValuesField("final-score", 2.0f));
w.addDocument(doc);
doc = new Document();
doc.add(newStringField("id", "2", Field.Store.YES));
doc.add(newTextField("field", "wizard oz oz oz the the ", Field.Store.NO));
doc.add(new FloatDocValuesField("final-score", 3.0f));
w.addDocument(doc);
doc = new Document();
doc.add(newStringField("id", "3", Field.Store.YES));
doc.add(newTextField("field", "wizard oz oz the the the the ", Field.Store.NO));
doc.add(new FloatDocValuesField("final-score", 4.0f));
w.addDocument(doc);
doc = new Document();
doc.add(newStringField("id", "4", Field.Store.YES));
doc.add(newTextField("field", "wizard oz the the the the the the", Field.Store.NO));
doc.add(new FloatDocValuesField("final-score", 5.0f));
w.addDocument(doc);
final IndexReader r = w.getReader();
w.close();
// Do ordinary BooleanQuery:
final BooleanQuery.Builder bqBuilder = new BooleanQuery.Builder();
bqBuilder.add(new TermQuery(new Term("field", "wizard")), BooleanClause.Occur.SHOULD);
bqBuilder.add(new TermQuery(new Term("field", "oz")), BooleanClause.Occur.SHOULD);
final IndexSearcher searcher = getSearcher(r);
// first run the standard query
TopDocs hits = searcher.search(bqBuilder.build(), 10);
assertEquals(5, hits.totalHits);
assertEquals("0", searcher.doc(hits.scoreDocs[0].doc).get("id"));
assertEquals("1", searcher.doc(hits.scoreDocs[1].doc).get("id"));
assertEquals("2", searcher.doc(hits.scoreDocs[2].doc).get("id"));
assertEquals("3", searcher.doc(hits.scoreDocs[3].doc).get("id"));
assertEquals("4", searcher.doc(hits.scoreDocs[4].doc).get("id"));
final List<Feature> features = makeFieldValueFeatures(new int[] { 0, 1, 2 }, "final-score");
final List<Normalizer> norms = new ArrayList<Normalizer>(Collections.nCopies(features.size(), IdentityNormalizer.INSTANCE));
final List<Feature> allFeatures = makeFieldValueFeatures(new int[] { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9 }, "final-score");
final LTRScoringModel ltrScoringModel = TestLinearModel.createLinearModel("test", features, norms, "test", allFeatures, null);
final LTRRescorer rescorer = new LTRRescorer(new LTRScoringQuery(ltrScoringModel));
// rerank @ 0 should not change the order
hits = rescorer.rescore(searcher, hits, 0);
assertEquals("0", searcher.doc(hits.scoreDocs[0].doc).get("id"));
assertEquals("1", searcher.doc(hits.scoreDocs[1].doc).get("id"));
assertEquals("2", searcher.doc(hits.scoreDocs[2].doc).get("id"));
assertEquals("3", searcher.doc(hits.scoreDocs[3].doc).get("id"));
assertEquals("4", searcher.doc(hits.scoreDocs[4].doc).get("id"));
for (int topN = 1; topN <= 5; topN++) {
log.info("rerank {} documents ", topN);
hits = searcher.search(bqBuilder.build(), 10);
final ScoreDoc[] slice = new ScoreDoc[topN];
System.arraycopy(hits.scoreDocs, 0, slice, 0, topN);
hits = new TopDocs(hits.totalHits, slice, hits.getMaxScore());
hits = rescorer.rescore(searcher, hits, topN);
for (int i = topN - 1, j = 0; i >= 0; i--, j++) {
log.info("doc {} in pos {}", searcher.doc(hits.scoreDocs[j].doc).get("id"), j);
assertEquals(i, Integer.parseInt(searcher.doc(hits.scoreDocs[j].doc).get("id")));
assertEquals(i + 1, hits.scoreDocs[j].score, 0.00001);
}
}
r.close();
dir.close();
}
Aggregations