use of org.apache.lucene.util.FixedBitSet in project lucene-solr by apache.
the class DrillSidewaysScorer method doDrillDownAdvanceScoring.
/** Used when drill downs are highly constraining vs
* baseQuery. */
private void doDrillDownAdvanceScoring(Bits acceptDocs, LeafCollector collector, DocsAndCost[] dims) throws IOException {
final int maxDoc = context.reader().maxDoc();
final int numDims = dims.length;
//if (DEBUG) {
// System.out.println(" doDrillDownAdvanceScoring");
//}
// TODO: maybe a class like BS, instead of parallel arrays
int[] filledSlots = new int[CHUNK];
int[] docIDs = new int[CHUNK];
float[] scores = new float[CHUNK];
int[] missingDims = new int[CHUNK];
int[] counts = new int[CHUNK];
docIDs[0] = -1;
int nextChunkStart = CHUNK;
final FixedBitSet seen = new FixedBitSet(CHUNK);
while (true) {
//if (DEBUG) {
// System.out.println("\ncycle nextChunkStart=" + nextChunkStart + " docIds[0]=" + docIDs[0]);
//}
// First dim:
//if (DEBUG) {
// System.out.println(" dim0");
//}
DocsAndCost dc = dims[0];
int docID = dc.approximation.docID();
while (docID < nextChunkStart) {
if (acceptDocs == null || acceptDocs.get(docID)) {
int slot = docID & MASK;
if (docIDs[slot] != docID && (dc.twoPhase == null || dc.twoPhase.matches())) {
seen.set(slot);
// Mark slot as valid:
//if (DEBUG) {
// System.out.println(" set docID=" + docID + " id=" + context.reader().document(docID).get("id"));
//}
docIDs[slot] = docID;
missingDims[slot] = 1;
counts[slot] = 1;
}
}
docID = dc.approximation.nextDoc();
}
// Second dim:
//if (DEBUG) {
// System.out.println(" dim1");
//}
dc = dims[1];
docID = dc.approximation.docID();
while (docID < nextChunkStart) {
if (acceptDocs == null || acceptDocs.get(docID) && (dc.twoPhase == null || dc.twoPhase.matches())) {
int slot = docID & MASK;
if (docIDs[slot] != docID) {
// Mark slot as valid:
seen.set(slot);
//if (DEBUG) {
// System.out.println(" set docID=" + docID + " missingDim=0 id=" + context.reader().document(docID).get("id"));
//}
docIDs[slot] = docID;
missingDims[slot] = 0;
counts[slot] = 1;
} else {
// below; we could somehow specialize
if (missingDims[slot] >= 1) {
missingDims[slot] = 2;
counts[slot] = 2;
//if (DEBUG) {
// System.out.println(" set docID=" + docID + " missingDim=2 id=" + context.reader().document(docID).get("id"));
//}
} else {
counts[slot] = 1;
//if (DEBUG) {
// System.out.println(" set docID=" + docID + " missingDim=" + missingDims[slot] + " id=" + context.reader().document(docID).get("id"));
//}
}
}
}
docID = dc.approximation.nextDoc();
}
// After this we can "upgrade" to conjunction, because
// any doc not seen by either dim 0 or dim 1 cannot be
// a hit or a near miss:
//if (DEBUG) {
// System.out.println(" baseScorer");
//}
// Fold in baseScorer, using advance:
int filledCount = 0;
int slot0 = 0;
while (slot0 < CHUNK && (slot0 = seen.nextSetBit(slot0)) != DocIdSetIterator.NO_MORE_DOCS) {
int ddDocID = docIDs[slot0];
assert ddDocID != -1;
int baseDocID = baseIterator.docID();
if (baseDocID < ddDocID) {
baseDocID = baseIterator.advance(ddDocID);
}
if (baseDocID == ddDocID) {
//if (DEBUG) {
// System.out.println(" keep docID=" + ddDocID + " id=" + context.reader().document(ddDocID).get("id"));
//}
scores[slot0] = baseScorer.score();
filledSlots[filledCount++] = slot0;
counts[slot0]++;
} else {
//if (DEBUG) {
// System.out.println(" no docID=" + ddDocID + " id=" + context.reader().document(ddDocID).get("id"));
//}
docIDs[slot0] = -1;
// TODO: we could jump slot0 forward to the
// baseDocID ... but we'd need to set docIDs for
// intervening slots to -1
}
slot0++;
}
seen.clear(0, CHUNK);
if (filledCount == 0) {
if (nextChunkStart >= maxDoc) {
break;
}
nextChunkStart += CHUNK;
continue;
}
// except we start from dim=2 instead:
for (int dim = 2; dim < numDims; dim++) {
//if (DEBUG) {
// System.out.println(" dim=" + dim + " [" + dims[dim].dim + "]");
//}
dc = dims[dim];
docID = dc.approximation.docID();
while (docID < nextChunkStart) {
int slot = docID & MASK;
if (docIDs[slot] == docID && counts[slot] >= dim && (dc.twoPhase == null || dc.twoPhase.matches())) {
// below; we could somehow specialize
if (missingDims[slot] >= dim) {
//if (DEBUG) {
// System.out.println(" set docID=" + docID + " count=" + (dim+2));
//}
missingDims[slot] = dim + 1;
counts[slot] = dim + 2;
} else {
//if (DEBUG) {
// System.out.println(" set docID=" + docID + " missing count=" + (dim+1));
//}
counts[slot] = dim + 1;
}
}
// TODO: sometimes use advance?
docID = dc.approximation.nextDoc();
}
}
//}
for (int i = 0; i < filledCount; i++) {
int slot = filledSlots[i];
collectDocID = docIDs[slot];
collectScore = scores[slot];
//}
if (counts[slot] == 1 + numDims) {
collectHit(collector, dims);
} else if (counts[slot] == numDims) {
collectNearMiss(dims[missingDims[slot]].sidewaysLeafCollector);
}
}
if (nextChunkStart >= maxDoc) {
break;
}
nextChunkStart += CHUNK;
}
}
use of org.apache.lucene.util.FixedBitSet in project lucene-solr by apache.
the class BaseGeoPointTestCase method testMultiValued.
public void testMultiValued() throws Exception {
int numPoints = atLeast(10000);
// Every doc has 2 points:
double[] lats = new double[2 * numPoints];
double[] lons = new double[2 * numPoints];
Directory dir = newDirectory();
IndexWriterConfig iwc = newIndexWriterConfig();
// We rely on docID order:
iwc.setMergePolicy(newLogMergePolicy());
// and on seeds being able to reproduce:
iwc.setMergeScheduler(new SerialMergeScheduler());
RandomIndexWriter w = new RandomIndexWriter(random(), dir, iwc);
for (int id = 0; id < numPoints; id++) {
Document doc = new Document();
lats[2 * id] = quantizeLat(nextLatitude());
lons[2 * id] = quantizeLon(nextLongitude());
doc.add(newStringField("id", "" + id, Field.Store.YES));
addPointToDoc(FIELD_NAME, doc, lats[2 * id], lons[2 * id]);
lats[2 * id + 1] = quantizeLat(nextLatitude());
lons[2 * id + 1] = quantizeLon(nextLongitude());
addPointToDoc(FIELD_NAME, doc, lats[2 * id + 1], lons[2 * id + 1]);
if (VERBOSE) {
System.out.println("id=" + id);
System.out.println(" lat=" + lats[2 * id] + " lon=" + lons[2 * id]);
System.out.println(" lat=" + lats[2 * id + 1] + " lon=" + lons[2 * id + 1]);
}
w.addDocument(doc);
}
// TODO: share w/ verify; just need parallel array of the expected ids
if (random().nextBoolean()) {
w.forceMerge(1);
}
IndexReader r = w.getReader();
w.close();
IndexSearcher s = newSearcher(r);
int iters = atLeast(25);
for (int iter = 0; iter < iters; iter++) {
Rectangle rect = nextBox();
if (VERBOSE) {
System.out.println("\nTEST: iter=" + iter + " rect=" + rect);
}
Query query = newRectQuery(FIELD_NAME, rect.minLat, rect.maxLat, rect.minLon, rect.maxLon);
final FixedBitSet hits = new FixedBitSet(r.maxDoc());
s.search(query, new SimpleCollector() {
private int docBase;
@Override
public boolean needsScores() {
return false;
}
@Override
protected void doSetNextReader(LeafReaderContext context) throws IOException {
docBase = context.docBase;
}
@Override
public void collect(int doc) {
hits.set(docBase + doc);
}
});
boolean fail = false;
for (int docID = 0; docID < lats.length / 2; docID++) {
double latDoc1 = lats[2 * docID];
double lonDoc1 = lons[2 * docID];
double latDoc2 = lats[2 * docID + 1];
double lonDoc2 = lons[2 * docID + 1];
boolean result1 = rectContainsPoint(rect, latDoc1, lonDoc1);
boolean result2 = rectContainsPoint(rect, latDoc2, lonDoc2);
boolean expected = result1 || result2;
if (hits.get(docID) != expected) {
String id = s.doc(docID).get("id");
if (expected) {
System.out.println("TEST: id=" + id + " docID=" + docID + " should match but did not");
} else {
System.out.println("TEST: id=" + id + " docID=" + docID + " should not match but did");
}
System.out.println(" rect=" + rect);
System.out.println(" lat=" + latDoc1 + " lon=" + lonDoc1 + "\n lat=" + latDoc2 + " lon=" + lonDoc2);
System.out.println(" result1=" + result1 + " result2=" + result2);
fail = true;
}
}
if (fail) {
fail("some hits were wrong");
}
}
r.close();
dir.close();
}
use of org.apache.lucene.util.FixedBitSet in project lucene-solr by apache.
the class BaseGeoPointTestCase method verifyRandomRectangles.
protected void verifyRandomRectangles(double[] lats, double[] lons) throws Exception {
IndexWriterConfig iwc = newIndexWriterConfig();
// Else seeds may not reproduce:
iwc.setMergeScheduler(new SerialMergeScheduler());
// Else we can get O(N^2) merging:
int mbd = iwc.getMaxBufferedDocs();
if (mbd != -1 && mbd < lats.length / 100) {
iwc.setMaxBufferedDocs(lats.length / 100);
}
Directory dir;
if (lats.length > 100000) {
dir = newFSDirectory(createTempDir(getClass().getSimpleName()));
} else {
dir = newDirectory();
}
Set<Integer> deleted = new HashSet<>();
// RandomIndexWriter is too slow here:
IndexWriter w = new IndexWriter(dir, iwc);
for (int id = 0; id < lats.length; id++) {
Document doc = new Document();
doc.add(newStringField("id", "" + id, Field.Store.NO));
doc.add(new NumericDocValuesField("id", id));
if (Double.isNaN(lats[id]) == false) {
addPointToDoc(FIELD_NAME, doc, lats[id], lons[id]);
}
w.addDocument(doc);
if (id > 0 && random().nextInt(100) == 42) {
int idToDelete = random().nextInt(id);
w.deleteDocuments(new Term("id", "" + idToDelete));
deleted.add(idToDelete);
if (VERBOSE) {
System.out.println(" delete id=" + idToDelete);
}
}
}
if (random().nextBoolean()) {
w.forceMerge(1);
}
final IndexReader r = DirectoryReader.open(w);
w.close();
IndexSearcher s = newSearcher(r);
int iters = atLeast(25);
Bits liveDocs = MultiFields.getLiveDocs(s.getIndexReader());
int maxDoc = s.getIndexReader().maxDoc();
for (int iter = 0; iter < iters; iter++) {
if (VERBOSE) {
System.out.println("\nTEST: iter=" + iter + " s=" + s);
}
Rectangle rect = nextBox();
Query query = newRectQuery(FIELD_NAME, rect.minLat, rect.maxLat, rect.minLon, rect.maxLon);
if (VERBOSE) {
System.out.println(" query=" + query);
}
final FixedBitSet hits = new FixedBitSet(maxDoc);
s.search(query, new SimpleCollector() {
private int docBase;
@Override
public boolean needsScores() {
return false;
}
@Override
protected void doSetNextReader(LeafReaderContext context) throws IOException {
docBase = context.docBase;
}
@Override
public void collect(int doc) {
hits.set(docBase + doc);
}
});
boolean fail = false;
NumericDocValues docIDToID = MultiDocValues.getNumericValues(r, "id");
for (int docID = 0; docID < maxDoc; docID++) {
assertEquals(docID, docIDToID.nextDoc());
int id = (int) docIDToID.longValue();
boolean expected;
if (liveDocs != null && liveDocs.get(docID) == false) {
// document is deleted
expected = false;
} else if (Double.isNaN(lats[id])) {
expected = false;
} else {
expected = rectContainsPoint(rect, lats[id], lons[id]);
}
if (hits.get(docID) != expected) {
StringBuilder b = new StringBuilder();
b.append("docID=(" + docID + ")\n");
if (expected) {
b.append("FAIL: id=" + id + " should match but did not\n");
} else {
b.append("FAIL: id=" + id + " should not match but did\n");
}
b.append(" box=" + rect + "\n");
b.append(" query=" + query + " docID=" + docID + "\n");
b.append(" lat=" + lats[id] + " lon=" + lons[id] + "\n");
b.append(" deleted?=" + (liveDocs != null && liveDocs.get(docID) == false));
if (true) {
fail("wrong hit (first of possibly more):\n\n" + b);
} else {
System.out.println(b.toString());
fail = true;
}
}
}
if (fail) {
fail("some hits were wrong");
}
}
IOUtils.close(r, dir);
}
use of org.apache.lucene.util.FixedBitSet in project lucene-solr by apache.
the class BaseGeoPointTestCase method verifyRandomDistances.
protected void verifyRandomDistances(double[] lats, double[] lons) throws Exception {
IndexWriterConfig iwc = newIndexWriterConfig();
// Else seeds may not reproduce:
iwc.setMergeScheduler(new SerialMergeScheduler());
// Else we can get O(N^2) merging:
int mbd = iwc.getMaxBufferedDocs();
if (mbd != -1 && mbd < lats.length / 100) {
iwc.setMaxBufferedDocs(lats.length / 100);
}
Directory dir;
if (lats.length > 100000) {
dir = newFSDirectory(createTempDir(getClass().getSimpleName()));
} else {
dir = newDirectory();
}
Set<Integer> deleted = new HashSet<>();
// RandomIndexWriter is too slow here:
IndexWriter w = new IndexWriter(dir, iwc);
for (int id = 0; id < lats.length; id++) {
Document doc = new Document();
doc.add(newStringField("id", "" + id, Field.Store.NO));
doc.add(new NumericDocValuesField("id", id));
if (Double.isNaN(lats[id]) == false) {
addPointToDoc(FIELD_NAME, doc, lats[id], lons[id]);
}
w.addDocument(doc);
if (id > 0 && random().nextInt(100) == 42) {
int idToDelete = random().nextInt(id);
w.deleteDocuments(new Term("id", "" + idToDelete));
deleted.add(idToDelete);
if (VERBOSE) {
System.out.println(" delete id=" + idToDelete);
}
}
}
if (random().nextBoolean()) {
w.forceMerge(1);
}
final IndexReader r = DirectoryReader.open(w);
w.close();
IndexSearcher s = newSearcher(r);
int iters = atLeast(25);
Bits liveDocs = MultiFields.getLiveDocs(s.getIndexReader());
int maxDoc = s.getIndexReader().maxDoc();
for (int iter = 0; iter < iters; iter++) {
if (VERBOSE) {
System.out.println("\nTEST: iter=" + iter + " s=" + s);
}
// Distance
final double centerLat = nextLatitude();
final double centerLon = nextLongitude();
// So the query can cover at most 50% of the earth's surface:
final double radiusMeters = random().nextDouble() * GeoUtils.EARTH_MEAN_RADIUS_METERS * Math.PI / 2.0 + 1.0;
if (VERBOSE) {
final DecimalFormat df = new DecimalFormat("#,###.00", DecimalFormatSymbols.getInstance(Locale.ENGLISH));
System.out.println(" radiusMeters = " + df.format(radiusMeters));
}
Query query = newDistanceQuery(FIELD_NAME, centerLat, centerLon, radiusMeters);
if (VERBOSE) {
System.out.println(" query=" + query);
}
final FixedBitSet hits = new FixedBitSet(maxDoc);
s.search(query, new SimpleCollector() {
private int docBase;
@Override
public boolean needsScores() {
return false;
}
@Override
protected void doSetNextReader(LeafReaderContext context) throws IOException {
docBase = context.docBase;
}
@Override
public void collect(int doc) {
hits.set(docBase + doc);
}
});
boolean fail = false;
NumericDocValues docIDToID = MultiDocValues.getNumericValues(r, "id");
for (int docID = 0; docID < maxDoc; docID++) {
assertEquals(docID, docIDToID.nextDoc());
int id = (int) docIDToID.longValue();
boolean expected;
if (liveDocs != null && liveDocs.get(docID) == false) {
// document is deleted
expected = false;
} else if (Double.isNaN(lats[id])) {
expected = false;
} else {
expected = SloppyMath.haversinMeters(centerLat, centerLon, lats[id], lons[id]) <= radiusMeters;
}
if (hits.get(docID) != expected) {
StringBuilder b = new StringBuilder();
if (expected) {
b.append("FAIL: id=" + id + " should match but did not\n");
} else {
b.append("FAIL: id=" + id + " should not match but did\n");
}
b.append(" query=" + query + " docID=" + docID + "\n");
b.append(" lat=" + lats[id] + " lon=" + lons[id] + "\n");
b.append(" deleted?=" + (liveDocs != null && liveDocs.get(docID) == false));
if (Double.isNaN(lats[id]) == false) {
double distanceMeters = SloppyMath.haversinMeters(centerLat, centerLon, lats[id], lons[id]);
b.append(" centerLat=" + centerLat + " centerLon=" + centerLon + " distanceMeters=" + distanceMeters + " vs radiusMeters=" + radiusMeters);
}
if (true) {
fail("wrong hit (first of possibly more):\n\n" + b);
} else {
System.out.println(b.toString());
fail = true;
}
}
}
if (fail) {
fail("some hits were wrong");
}
}
IOUtils.close(r, dir);
}
use of org.apache.lucene.util.FixedBitSet in project lucene-solr by apache.
the class BaseGeoPointTestCase method verifyRandomPolygons.
protected void verifyRandomPolygons(double[] lats, double[] lons) throws Exception {
IndexWriterConfig iwc = newIndexWriterConfig();
// Else seeds may not reproduce:
iwc.setMergeScheduler(new SerialMergeScheduler());
// Else we can get O(N^2) merging:
int mbd = iwc.getMaxBufferedDocs();
if (mbd != -1 && mbd < lats.length / 100) {
iwc.setMaxBufferedDocs(lats.length / 100);
}
Directory dir;
if (lats.length > 100000) {
dir = newFSDirectory(createTempDir(getClass().getSimpleName()));
} else {
dir = newDirectory();
}
Set<Integer> deleted = new HashSet<>();
// RandomIndexWriter is too slow here:
IndexWriter w = new IndexWriter(dir, iwc);
for (int id = 0; id < lats.length; id++) {
Document doc = new Document();
doc.add(newStringField("id", "" + id, Field.Store.NO));
doc.add(new NumericDocValuesField("id", id));
if (Double.isNaN(lats[id]) == false) {
addPointToDoc(FIELD_NAME, doc, lats[id], lons[id]);
}
w.addDocument(doc);
if (id > 0 && random().nextInt(100) == 42) {
int idToDelete = random().nextInt(id);
w.deleteDocuments(new Term("id", "" + idToDelete));
deleted.add(idToDelete);
if (VERBOSE) {
System.out.println(" delete id=" + idToDelete);
}
}
}
if (random().nextBoolean()) {
w.forceMerge(1);
}
final IndexReader r = DirectoryReader.open(w);
w.close();
// We can't wrap with "exotic" readers because points needs to work:
IndexSearcher s = newSearcher(r);
final int iters = atLeast(75);
Bits liveDocs = MultiFields.getLiveDocs(s.getIndexReader());
int maxDoc = s.getIndexReader().maxDoc();
for (int iter = 0; iter < iters; iter++) {
if (VERBOSE) {
System.out.println("\nTEST: iter=" + iter + " s=" + s);
}
// Polygon
Polygon polygon = nextPolygon();
Query query = newPolygonQuery(FIELD_NAME, polygon);
if (VERBOSE) {
System.out.println(" query=" + query);
}
final FixedBitSet hits = new FixedBitSet(maxDoc);
s.search(query, new SimpleCollector() {
private int docBase;
@Override
public boolean needsScores() {
return false;
}
@Override
protected void doSetNextReader(LeafReaderContext context) throws IOException {
docBase = context.docBase;
}
@Override
public void collect(int doc) {
hits.set(docBase + doc);
}
});
boolean fail = false;
NumericDocValues docIDToID = MultiDocValues.getNumericValues(r, "id");
for (int docID = 0; docID < maxDoc; docID++) {
assertEquals(docID, docIDToID.nextDoc());
int id = (int) docIDToID.longValue();
boolean expected;
if (liveDocs != null && liveDocs.get(docID) == false) {
// document is deleted
expected = false;
} else if (Double.isNaN(lats[id])) {
expected = false;
} else {
expected = GeoTestUtil.containsSlowly(polygon, lats[id], lons[id]);
}
if (hits.get(docID) != expected) {
StringBuilder b = new StringBuilder();
if (expected) {
b.append("FAIL: id=" + id + " should match but did not\n");
} else {
b.append("FAIL: id=" + id + " should not match but did\n");
}
b.append(" query=" + query + " docID=" + docID + "\n");
b.append(" lat=" + lats[id] + " lon=" + lons[id] + "\n");
b.append(" deleted?=" + (liveDocs != null && liveDocs.get(docID) == false));
b.append(" polygon=" + polygon);
if (true) {
fail("wrong hit (first of possibly more):\n\n" + b);
} else {
System.out.println(b.toString());
fail = true;
}
}
}
if (fail) {
fail("some hits were wrong");
}
}
IOUtils.close(r, dir);
}
Aggregations