use of org.apache.lucene.search.ScoreDoc in project lucene-solr by apache.
the class BaseGeoPointTestCase method doRandomDistanceTest.
private void doRandomDistanceTest(int numDocs, int numQueries) throws IOException {
Directory dir = newDirectory();
IndexWriterConfig iwc = newIndexWriterConfig();
// Else seeds may not reproduce:
iwc.setMergeScheduler(new SerialMergeScheduler());
int pointsInLeaf = 2 + random().nextInt(4);
iwc.setCodec(new FilterCodec("Lucene70", TestUtil.getDefaultCodec()) {
@Override
public PointsFormat pointsFormat() {
return new PointsFormat() {
@Override
public PointsWriter fieldsWriter(SegmentWriteState writeState) throws IOException {
return new Lucene60PointsWriter(writeState, pointsInLeaf, BKDWriter.DEFAULT_MAX_MB_SORT_IN_HEAP);
}
@Override
public PointsReader fieldsReader(SegmentReadState readState) throws IOException {
return new Lucene60PointsReader(readState);
}
};
}
});
RandomIndexWriter writer = new RandomIndexWriter(random(), dir, iwc);
for (int i = 0; i < numDocs; i++) {
double latRaw = nextLatitude();
double lonRaw = nextLongitude();
// pre-normalize up front, so we can just use quantized value for testing and do simple exact comparisons
double lat = quantizeLat(latRaw);
double lon = quantizeLon(lonRaw);
Document doc = new Document();
addPointToDoc("field", doc, lat, lon);
doc.add(new StoredField("lat", lat));
doc.add(new StoredField("lon", lon));
writer.addDocument(doc);
}
IndexReader reader = writer.getReader();
IndexSearcher searcher = newSearcher(reader);
for (int i = 0; i < numQueries; i++) {
double lat = nextLatitude();
double lon = nextLongitude();
double radius = 50000000D * random().nextDouble();
BitSet expected = new BitSet();
for (int doc = 0; doc < reader.maxDoc(); doc++) {
double docLatitude = reader.document(doc).getField("lat").numericValue().doubleValue();
double docLongitude = reader.document(doc).getField("lon").numericValue().doubleValue();
double distance = SloppyMath.haversinMeters(lat, lon, docLatitude, docLongitude);
if (distance <= radius) {
expected.set(doc);
}
}
TopDocs topDocs = searcher.search(newDistanceQuery("field", lat, lon, radius), reader.maxDoc(), Sort.INDEXORDER);
BitSet actual = new BitSet();
for (ScoreDoc doc : topDocs.scoreDocs) {
actual.set(doc.doc);
}
try {
assertEquals(expected, actual);
} catch (AssertionError e) {
System.out.println("center: (" + lat + "," + lon + "), radius=" + radius);
for (int doc = 0; doc < reader.maxDoc(); doc++) {
double docLatitude = reader.document(doc).getField("lat").numericValue().doubleValue();
double docLongitude = reader.document(doc).getField("lon").numericValue().doubleValue();
double distance = SloppyMath.haversinMeters(lat, lon, docLatitude, docLongitude);
System.out.println("" + doc + ": (" + docLatitude + "," + docLongitude + "), distance=" + distance);
}
throw e;
}
}
reader.close();
writer.close();
dir.close();
}
use of org.apache.lucene.search.ScoreDoc in project lucene-solr by apache.
the class LTRRescorer method heapAdjust.
private void heapAdjust(ScoreDoc[] hits, int size, int root) {
final ScoreDoc doc = hits[root];
final float score = doc.score;
int i = root;
while (i <= ((size >> 1) - 1)) {
final int lchild = (i << 1) + 1;
final ScoreDoc ldoc = hits[lchild];
final float lscore = ldoc.score;
float rscore = Float.MAX_VALUE;
final int rchild = (i << 1) + 2;
ScoreDoc rdoc = null;
if (rchild < size) {
rdoc = hits[rchild];
rscore = rdoc.score;
}
if (lscore < score) {
if (rscore < lscore) {
hits[i] = rdoc;
hits[rchild] = doc;
i = rchild;
} else {
hits[i] = ldoc;
hits[lchild] = doc;
i = lchild;
}
} else if (rscore < score) {
hits[i] = rdoc;
hits[rchild] = doc;
i = rchild;
} else {
return;
}
}
}
use of org.apache.lucene.search.ScoreDoc in project lucene-solr by apache.
the class TestLTRReRankingPipeline method testDifferentTopN.
@Ignore
@Test
public void testDifferentTopN() throws IOException {
final Directory dir = newDirectory();
final RandomIndexWriter w = new RandomIndexWriter(random(), dir);
Document doc = new Document();
doc.add(newStringField("id", "0", Field.Store.YES));
doc.add(newTextField("field", "wizard oz oz oz oz oz", Field.Store.NO));
doc.add(new FloatDocValuesField("final-score", 1.0f));
w.addDocument(doc);
doc = new Document();
doc.add(newStringField("id", "1", Field.Store.YES));
doc.add(newTextField("field", "wizard oz oz oz oz the", Field.Store.NO));
doc.add(new FloatDocValuesField("final-score", 2.0f));
w.addDocument(doc);
doc = new Document();
doc.add(newStringField("id", "2", Field.Store.YES));
doc.add(newTextField("field", "wizard oz oz oz the the ", Field.Store.NO));
doc.add(new FloatDocValuesField("final-score", 3.0f));
w.addDocument(doc);
doc = new Document();
doc.add(newStringField("id", "3", Field.Store.YES));
doc.add(newTextField("field", "wizard oz oz the the the the ", Field.Store.NO));
doc.add(new FloatDocValuesField("final-score", 4.0f));
w.addDocument(doc);
doc = new Document();
doc.add(newStringField("id", "4", Field.Store.YES));
doc.add(newTextField("field", "wizard oz the the the the the the", Field.Store.NO));
doc.add(new FloatDocValuesField("final-score", 5.0f));
w.addDocument(doc);
final IndexReader r = w.getReader();
w.close();
// Do ordinary BooleanQuery:
final BooleanQuery.Builder bqBuilder = new BooleanQuery.Builder();
bqBuilder.add(new TermQuery(new Term("field", "wizard")), BooleanClause.Occur.SHOULD);
bqBuilder.add(new TermQuery(new Term("field", "oz")), BooleanClause.Occur.SHOULD);
final IndexSearcher searcher = getSearcher(r);
// first run the standard query
TopDocs hits = searcher.search(bqBuilder.build(), 10);
assertEquals(5, hits.totalHits);
assertEquals("0", searcher.doc(hits.scoreDocs[0].doc).get("id"));
assertEquals("1", searcher.doc(hits.scoreDocs[1].doc).get("id"));
assertEquals("2", searcher.doc(hits.scoreDocs[2].doc).get("id"));
assertEquals("3", searcher.doc(hits.scoreDocs[3].doc).get("id"));
assertEquals("4", searcher.doc(hits.scoreDocs[4].doc).get("id"));
final List<Feature> features = makeFieldValueFeatures(new int[] { 0, 1, 2 }, "final-score");
final List<Normalizer> norms = new ArrayList<Normalizer>(Collections.nCopies(features.size(), IdentityNormalizer.INSTANCE));
final List<Feature> allFeatures = makeFieldValueFeatures(new int[] { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9 }, "final-score");
final LTRScoringModel ltrScoringModel = TestLinearModel.createLinearModel("test", features, norms, "test", allFeatures, null);
final LTRRescorer rescorer = new LTRRescorer(new LTRScoringQuery(ltrScoringModel));
// rerank @ 0 should not change the order
hits = rescorer.rescore(searcher, hits, 0);
assertEquals("0", searcher.doc(hits.scoreDocs[0].doc).get("id"));
assertEquals("1", searcher.doc(hits.scoreDocs[1].doc).get("id"));
assertEquals("2", searcher.doc(hits.scoreDocs[2].doc).get("id"));
assertEquals("3", searcher.doc(hits.scoreDocs[3].doc).get("id"));
assertEquals("4", searcher.doc(hits.scoreDocs[4].doc).get("id"));
for (int topN = 1; topN <= 5; topN++) {
log.info("rerank {} documents ", topN);
hits = searcher.search(bqBuilder.build(), 10);
final ScoreDoc[] slice = new ScoreDoc[topN];
System.arraycopy(hits.scoreDocs, 0, slice, 0, topN);
hits = new TopDocs(hits.totalHits, slice, hits.getMaxScore());
hits = rescorer.rescore(searcher, hits, topN);
for (int i = topN - 1, j = 0; i >= 0; i--, j++) {
log.info("doc {} in pos {}", searcher.doc(hits.scoreDocs[j].doc).get("id"), j);
assertEquals(i, Integer.parseInt(searcher.doc(hits.scoreDocs[j].doc).get("id")));
assertEquals(i + 1, hits.scoreDocs[j].score, 0.00001);
}
}
r.close();
dir.close();
}
use of org.apache.lucene.search.ScoreDoc in project lucene-solr by apache.
the class StoredFieldsShardRequestFactory method mapShardToDocs.
private void mapShardToDocs(HashMap<String, Set<ShardDoc>> shardMap, ScoreDoc[] scoreDocs) {
for (ScoreDoc scoreDoc : scoreDocs) {
ShardDoc solrDoc = (ShardDoc) scoreDoc;
Set<ShardDoc> shardDocs = shardMap.get(solrDoc.shard);
if (shardDocs == null) {
shardMap.put(solrDoc.shard, shardDocs = new HashSet<>());
}
shardDocs.add(solrDoc);
}
}
use of org.apache.lucene.search.ScoreDoc in project lucene-solr by apache.
the class TestDrillSideways method testRandom.
public void testRandom() throws Exception {
while (aChance == 0.0) {
aChance = random().nextDouble();
}
while (bChance == 0.0) {
bChance = random().nextDouble();
}
while (cChance == 0.0) {
cChance = random().nextDouble();
}
//aChance = .01;
//bChance = 0.5;
//cChance = 1.0;
double sum = aChance + bChance + cChance;
aChance /= sum;
bChance /= sum;
cChance /= sum;
int numDims = TestUtil.nextInt(random(), 2, 5);
//int numDims = 3;
int numDocs = atLeast(3000);
//int numDocs = 20;
if (VERBOSE) {
System.out.println("numDims=" + numDims + " numDocs=" + numDocs + " aChance=" + aChance + " bChance=" + bChance + " cChance=" + cChance);
}
String[][] dimValues = new String[numDims][];
int valueCount = 2;
for (int dim = 0; dim < numDims; dim++) {
Set<String> values = new HashSet<>();
while (values.size() < valueCount) {
String s = TestUtil.randomRealisticUnicodeString(random());
//String s = _TestUtil.randomString(random());
if (s.length() > 0) {
values.add(s);
}
}
dimValues[dim] = values.toArray(new String[values.size()]);
valueCount *= 2;
}
List<Doc> docs = new ArrayList<>();
for (int i = 0; i < numDocs; i++) {
Doc doc = new Doc();
doc.id = "" + i;
doc.contentToken = randomContentToken(false);
doc.dims = new int[numDims];
doc.dims2 = new int[numDims];
for (int dim = 0; dim < numDims; dim++) {
if (random().nextInt(5) == 3) {
// This doc is missing this dim:
doc.dims[dim] = -1;
} else if (dimValues[dim].length <= 4) {
int dimUpto = 0;
doc.dims[dim] = dimValues[dim].length - 1;
while (dimUpto < dimValues[dim].length) {
if (random().nextBoolean()) {
doc.dims[dim] = dimUpto;
break;
}
dimUpto++;
}
} else {
doc.dims[dim] = random().nextInt(dimValues[dim].length);
}
if (random().nextInt(5) == 3) {
// 2nd value:
doc.dims2[dim] = random().nextInt(dimValues[dim].length);
} else {
doc.dims2[dim] = -1;
}
}
docs.add(doc);
}
Directory d = newDirectory();
Directory td = newDirectory();
IndexWriterConfig iwc = newIndexWriterConfig(new MockAnalyzer(random()));
iwc.setInfoStream(InfoStream.NO_OUTPUT);
RandomIndexWriter w = new RandomIndexWriter(random(), d, iwc);
DirectoryTaxonomyWriter tw = new DirectoryTaxonomyWriter(td, IndexWriterConfig.OpenMode.CREATE);
FacetsConfig config = new FacetsConfig();
for (int i = 0; i < numDims; i++) {
config.setMultiValued("dim" + i, true);
}
boolean doUseDV = random().nextBoolean();
for (Doc rawDoc : docs) {
Document doc = new Document();
doc.add(newStringField("id", rawDoc.id, Field.Store.YES));
doc.add(new SortedDocValuesField("id", new BytesRef(rawDoc.id)));
doc.add(newStringField("content", rawDoc.contentToken, Field.Store.NO));
if (VERBOSE) {
System.out.println(" doc id=" + rawDoc.id + " token=" + rawDoc.contentToken);
}
for (int dim = 0; dim < numDims; dim++) {
int dimValue = rawDoc.dims[dim];
if (dimValue != -1) {
if (doUseDV) {
doc.add(new SortedSetDocValuesFacetField("dim" + dim, dimValues[dim][dimValue]));
} else {
doc.add(new FacetField("dim" + dim, dimValues[dim][dimValue]));
}
doc.add(new StringField("dim" + dim, dimValues[dim][dimValue], Field.Store.YES));
if (VERBOSE) {
System.out.println(" dim" + dim + "=" + new BytesRef(dimValues[dim][dimValue]));
}
}
int dimValue2 = rawDoc.dims2[dim];
if (dimValue2 != -1) {
if (doUseDV) {
doc.add(new SortedSetDocValuesFacetField("dim" + dim, dimValues[dim][dimValue2]));
} else {
doc.add(new FacetField("dim" + dim, dimValues[dim][dimValue2]));
}
doc.add(new StringField("dim" + dim, dimValues[dim][dimValue2], Field.Store.YES));
if (VERBOSE) {
System.out.println(" dim" + dim + "=" + new BytesRef(dimValues[dim][dimValue2]));
}
}
}
w.addDocument(config.build(tw, doc));
}
if (random().nextBoolean()) {
// Randomly delete a few docs:
int numDel = TestUtil.nextInt(random(), 1, (int) (numDocs * 0.05));
if (VERBOSE) {
System.out.println("delete " + numDel);
}
int delCount = 0;
while (delCount < numDel) {
Doc doc = docs.get(random().nextInt(docs.size()));
if (!doc.deleted) {
if (VERBOSE) {
System.out.println(" delete id=" + doc.id);
}
doc.deleted = true;
w.deleteDocuments(new Term("id", doc.id));
delCount++;
}
}
}
if (random().nextBoolean()) {
if (VERBOSE) {
System.out.println("TEST: forceMerge(1)...");
}
w.forceMerge(1);
}
IndexReader r = w.getReader();
final SortedSetDocValuesReaderState sortedSetDVState;
IndexSearcher s = newSearcher(r);
if (doUseDV) {
sortedSetDVState = new DefaultSortedSetDocValuesReaderState(s.getIndexReader());
} else {
sortedSetDVState = null;
}
if (VERBOSE) {
System.out.println("r.numDocs() = " + r.numDocs());
}
// NRT open
TaxonomyReader tr = new DirectoryTaxonomyReader(tw);
int numIters = atLeast(10);
for (int iter = 0; iter < numIters; iter++) {
String contentToken = random().nextInt(30) == 17 ? null : randomContentToken(true);
int numDrillDown = TestUtil.nextInt(random(), 1, Math.min(4, numDims));
if (VERBOSE) {
System.out.println("\nTEST: iter=" + iter + " baseQuery=" + contentToken + " numDrillDown=" + numDrillDown + " useSortedSetDV=" + doUseDV);
}
String[][] drillDowns = new String[numDims][];
int count = 0;
boolean anyMultiValuedDrillDowns = false;
while (count < numDrillDown) {
int dim = random().nextInt(numDims);
if (drillDowns[dim] == null) {
if (random().nextBoolean()) {
// Drill down on one value:
drillDowns[dim] = new String[] { dimValues[dim][random().nextInt(dimValues[dim].length)] };
} else {
int orCount = TestUtil.nextInt(random(), 1, Math.min(5, dimValues[dim].length));
drillDowns[dim] = new String[orCount];
anyMultiValuedDrillDowns |= orCount > 1;
for (int i = 0; i < orCount; i++) {
while (true) {
String value = dimValues[dim][random().nextInt(dimValues[dim].length)];
for (int j = 0; j < i; j++) {
if (value.equals(drillDowns[dim][j])) {
value = null;
break;
}
}
if (value != null) {
drillDowns[dim][i] = value;
break;
}
}
}
}
if (VERBOSE) {
BytesRef[] values = new BytesRef[drillDowns[dim].length];
for (int i = 0; i < values.length; i++) {
values[i] = new BytesRef(drillDowns[dim][i]);
}
System.out.println(" dim" + dim + "=" + Arrays.toString(values));
}
count++;
}
}
Query baseQuery;
if (contentToken == null) {
baseQuery = new MatchAllDocsQuery();
} else {
baseQuery = new TermQuery(new Term("content", contentToken));
}
DrillDownQuery ddq = new DrillDownQuery(config, baseQuery);
for (int dim = 0; dim < numDims; dim++) {
if (drillDowns[dim] != null) {
for (String value : drillDowns[dim]) {
ddq.add("dim" + dim, value);
}
}
}
Query filter;
if (random().nextInt(7) == 6) {
if (VERBOSE) {
System.out.println(" only-even filter");
}
filter = new Query() {
@Override
public Weight createWeight(IndexSearcher searcher, boolean needsScores, float boost) throws IOException {
return new ConstantScoreWeight(this, boost) {
@Override
public Scorer scorer(LeafReaderContext context) throws IOException {
DocIdSetIterator approximation = DocIdSetIterator.all(context.reader().maxDoc());
return new ConstantScoreScorer(this, score(), new TwoPhaseIterator(approximation) {
@Override
public boolean matches() throws IOException {
int docID = approximation.docID();
return (Integer.parseInt(context.reader().document(docID).get("id")) & 1) == 0;
}
@Override
public float matchCost() {
return 1000f;
}
});
}
};
}
@Override
public String toString(String field) {
return "drillSidewaysTestFilter";
}
@Override
public boolean equals(Object o) {
return o == this;
}
@Override
public int hashCode() {
return System.identityHashCode(this);
}
};
} else {
filter = null;
}
// Verify docs are always collected in order. If we
// had an AssertingScorer it could catch it when
// Weight.scoresDocsOutOfOrder lies!:
getNewDrillSideways(s, config, tr).search(ddq, new SimpleCollector() {
int lastDocID;
@Override
public void collect(int doc) {
assert doc > lastDocID;
lastDocID = doc;
}
@Override
protected void doSetNextReader(LeafReaderContext context) throws IOException {
lastDocID = -1;
}
@Override
public boolean needsScores() {
return false;
}
});
// subScorers are on the same docID:
if (!anyMultiValuedDrillDowns) {
// Can only do this test when there are no OR'd
// drill-down values, because in that case it's
// easily possible for one of the DD terms to be on
// a future docID:
getNewDrillSidewaysScoreSubdocsAtOnce(s, config, tr).search(ddq, new AssertingSubDocsAtOnceCollector());
}
TestFacetResult expected = slowDrillSidewaysSearch(s, docs, contentToken, drillDowns, dimValues, filter);
Sort sort = new Sort(new SortField("id", SortField.Type.STRING));
DrillSideways ds;
if (doUseDV) {
ds = getNewDrillSideways(s, config, sortedSetDVState);
} else {
ds = getNewDrillSidewaysBuildFacetsResult(s, config, tr);
}
// Retrieve all facets:
DrillSidewaysResult actual = ds.search(ddq, filter, null, numDocs, sort, true, true);
TopDocs hits = s.search(baseQuery, numDocs);
Map<String, Float> scores = new HashMap<>();
for (ScoreDoc sd : hits.scoreDocs) {
scores.put(s.doc(sd.doc).get("id"), sd.score);
}
if (VERBOSE) {
System.out.println(" verify all facets");
}
verifyEquals(dimValues, s, expected, actual, scores, doUseDV);
// Make sure drill down doesn't change score:
Query q = ddq;
if (filter != null) {
q = new BooleanQuery.Builder().add(q, Occur.MUST).add(filter, Occur.FILTER).build();
}
TopDocs ddqHits = s.search(q, numDocs);
assertEquals(expected.hits.size(), ddqHits.totalHits);
for (int i = 0; i < expected.hits.size(); i++) {
// Score should be IDENTICAL:
assertEquals(scores.get(expected.hits.get(i).id), ddqHits.scoreDocs[i].score, 0.0f);
}
}
w.close();
IOUtils.close(r, tr, tw, d, td);
}
Aggregations