use of org.apache.lucene.util.FixedBitSet in project lucene-solr by apache.
the class SolrIndexSplitter method split.
public void split() throws IOException {
List<LeafReaderContext> leaves = searcher.getRawReader().leaves();
List<FixedBitSet[]> segmentDocSets = new ArrayList<>(leaves.size());
log.info("SolrIndexSplitter: partitions=" + numPieces + " segments=" + leaves.size());
for (LeafReaderContext readerContext : leaves) {
// make sure we're going in order
assert readerContext.ordInParent == segmentDocSets.size();
FixedBitSet[] docSets = split(readerContext);
segmentDocSets.add(docSets);
}
for (int partitionNumber = 0; partitionNumber < numPieces; partitionNumber++) {
log.info("SolrIndexSplitter: partition #" + partitionNumber + " partitionCount=" + numPieces + (ranges != null ? " range=" + ranges.get(partitionNumber) : ""));
boolean success = false;
RefCounted<IndexWriter> iwRef = null;
IndexWriter iw = null;
if (cores != null) {
SolrCore subCore = cores.get(partitionNumber);
iwRef = subCore.getUpdateHandler().getSolrCoreState().getIndexWriter(subCore);
iw = iwRef.get();
} else {
SolrCore core = searcher.getCore();
String path = paths.get(partitionNumber);
iw = SolrIndexWriter.create(core, "SplittingIndexWriter" + partitionNumber + (ranges != null ? " " + ranges.get(partitionNumber) : ""), path, core.getDirectoryFactory(), true, core.getLatestSchema(), core.getSolrConfig().indexConfig, core.getDeletionPolicy(), core.getCodec());
}
try {
// This removes deletions but optimize might still be needed because sub-shards will have the same number of segments as the parent shard.
for (int segmentNumber = 0; segmentNumber < leaves.size(); segmentNumber++) {
log.info("SolrIndexSplitter: partition #" + partitionNumber + " partitionCount=" + numPieces + (ranges != null ? " range=" + ranges.get(partitionNumber) : "") + " segment #" + segmentNumber + " segmentCount=" + leaves.size());
CodecReader subReader = SlowCodecReaderWrapper.wrap(leaves.get(segmentNumber).reader());
iw.addIndexes(new LiveDocsReader(subReader, segmentDocSets.get(segmentNumber)[partitionNumber]));
}
// we commit explicitly instead of sending a CommitUpdateCommand through the processor chain
// because the sub-shard cores will just ignore such a commit because the update log is not
// in active state at this time.
//TODO no commitUpdateCommand
SolrIndexWriter.setCommitData(iw, -1);
iw.commit();
success = true;
} finally {
if (iwRef != null) {
iwRef.decref();
} else {
if (success) {
iw.close();
} else {
IOUtils.closeWhileHandlingException(iw);
}
}
}
}
}
use of org.apache.lucene.util.FixedBitSet in project lucene-solr by apache.
the class SolrIndexSplitter method split.
FixedBitSet[] split(LeafReaderContext readerContext) throws IOException {
LeafReader reader = readerContext.reader();
FixedBitSet[] docSets = new FixedBitSet[numPieces];
for (int i = 0; i < docSets.length; i++) {
docSets[i] = new FixedBitSet(reader.maxDoc());
}
Bits liveDocs = reader.getLiveDocs();
Fields fields = reader.fields();
Terms terms = fields == null ? null : fields.terms(field.getName());
TermsEnum termsEnum = terms == null ? null : terms.iterator();
if (termsEnum == null)
return docSets;
BytesRef term = null;
PostingsEnum postingsEnum = null;
int[] docsMatchingRanges = null;
if (ranges != null) {
// +1 because documents can belong to *zero*, one, several or all ranges in rangesArr
docsMatchingRanges = new int[rangesArr.length + 1];
}
CharsRefBuilder idRef = new CharsRefBuilder();
for (; ; ) {
term = termsEnum.next();
if (term == null)
break;
// figure out the hash for the term
// FUTURE: if conversion to strings costs too much, we could
// specialize and use the hash function that can work over bytes.
field.getType().indexedToReadable(term, idRef);
String idString = idRef.toString();
if (splitKey != null) {
// todo have composite routers support these kind of things instead
String part1 = getRouteKey(idString);
if (part1 == null)
continue;
if (!splitKey.equals(part1)) {
continue;
}
}
int hash = 0;
if (hashRouter != null) {
hash = hashRouter.sliceHash(idString, null, null, null);
}
postingsEnum = termsEnum.postings(postingsEnum, PostingsEnum.NONE);
postingsEnum = BitsFilteredPostingsEnum.wrap(postingsEnum, liveDocs);
for (; ; ) {
int doc = postingsEnum.nextDoc();
if (doc == DocIdSetIterator.NO_MORE_DOCS)
break;
if (ranges == null) {
docSets[currPartition].set(doc);
currPartition = (currPartition + 1) % numPieces;
} else {
int matchingRangesCount = 0;
for (int i = 0; i < rangesArr.length; i++) {
// inner-loop: use array here for extra speed.
if (rangesArr[i].includes(hash)) {
docSets[i].set(doc);
++matchingRangesCount;
}
}
docsMatchingRanges[matchingRangesCount]++;
}
}
}
if (docsMatchingRanges != null) {
for (int ii = 0; ii < docsMatchingRanges.length; ii++) {
if (0 == docsMatchingRanges[ii])
continue;
switch(ii) {
case 0:
// document loss
log.error("Splitting {}: {} documents belong to no shards and will be dropped", reader, docsMatchingRanges[ii]);
break;
case 1:
// normal case, each document moves to one of the sub-shards
log.info("Splitting {}: {} documents will move into a sub-shard", reader, docsMatchingRanges[ii]);
break;
default:
// document duplication
log.error("Splitting {}: {} documents will be moved to multiple ({}) sub-shards", reader, docsMatchingRanges[ii], ii);
break;
}
}
}
return docSets;
}
use of org.apache.lucene.util.FixedBitSet in project lucene-solr by apache.
the class FieldCacheImpl method setDocsWithField.
// null Bits means no docs matched
void setDocsWithField(LeafReader reader, String field, Bits docsWithField, Parser parser) {
final int maxDoc = reader.maxDoc();
final Bits bits;
if (docsWithField == null) {
bits = new Bits.MatchNoBits(maxDoc);
} else if (docsWithField instanceof FixedBitSet) {
final int numSet = ((FixedBitSet) docsWithField).cardinality();
if (numSet >= maxDoc) {
// The cardinality of the BitSet is maxDoc if all documents have a value.
assert numSet == maxDoc;
bits = new Bits.MatchAllBits(maxDoc);
} else {
bits = docsWithField;
}
} else {
bits = docsWithField;
}
caches.get(DocsWithFieldCache.class).put(reader, new CacheKey(field, parser), new BitsEntry(bits));
}
use of org.apache.lucene.util.FixedBitSet in project lucene-solr by apache.
the class WithinPrefixTreeQuery method getDocIdSet.
@Override
protected DocIdSet getDocIdSet(LeafReaderContext context) throws IOException {
return new VisitorTemplate(context) {
private FixedBitSet inside;
private FixedBitSet outside;
@Override
protected void start() {
inside = new FixedBitSet(maxDoc);
outside = new FixedBitSet(maxDoc);
}
@Override
protected DocIdSet finish() {
inside.andNot(outside);
return new BitDocIdSet(inside);
}
@Override
protected CellIterator findSubCellsToVisit(Cell cell) {
//use buffered query shape instead of orig. Works with null too.
return cell.getNextLevelCells(bufferedQueryShape);
}
@Override
protected boolean visitPrefix(Cell cell) throws IOException {
//cell.relate is based on the bufferedQueryShape; we need to examine what
// the relation is against the queryShape
SpatialRelation visitRelation = cell.getShape().relate(queryShape);
if (cell.getLevel() == detailLevel) {
collectDocs(visitRelation.intersects() ? inside : outside);
return false;
} else if (visitRelation == SpatialRelation.WITHIN) {
collectDocs(inside);
return false;
} else if (visitRelation == SpatialRelation.DISJOINT) {
collectDocs(outside);
return false;
}
return true;
}
@Override
protected void visitLeaf(Cell cell) throws IOException {
if (allCellsIntersectQuery(cell))
collectDocs(inside);
else
collectDocs(outside);
}
/** Returns true if the provided cell, and all its sub-cells down to
* detailLevel all intersect the queryShape.
*/
private boolean allCellsIntersectQuery(Cell cell) {
SpatialRelation relate = cell.getShape().relate(queryShape);
if (cell.getLevel() == detailLevel)
return relate.intersects();
if (relate == SpatialRelation.WITHIN)
return true;
if (relate == SpatialRelation.DISJOINT)
return false;
// Note: Generating all these cells just to determine intersection is not ideal.
// The real solution is LUCENE-4869.
CellIterator subCells = cell.getNextLevelCells(null);
while (subCells.hasNext()) {
Cell subCell = subCells.next();
if (//recursion
!allCellsIntersectQuery(subCell))
return false;
}
return true;
}
@Override
protected void visitScanned(Cell cell) throws IOException {
//collects as we want, even if not a leaf
visitLeaf(cell);
// if (cell.isLeaf()) {
// visitLeaf(cell);
// } else {
// visitPrefix(cell);
// }
}
}.getDocIdSet();
}
use of org.apache.lucene.util.FixedBitSet in project lucene-solr by apache.
the class TestGeo3DPoint method verify.
private static void verify(double[] lats, double[] lons) throws Exception {
IndexWriterConfig iwc = newIndexWriterConfig();
GeoPoint[] points = new GeoPoint[lats.length];
GeoPoint[] unquantizedPoints = new GeoPoint[lats.length];
// Pre-quantize all lat/lons:
for (int i = 0; i < lats.length; i++) {
if (Double.isNaN(lats[i]) == false) {
//System.out.println("lats[" + i + "] = " + lats[i]);
unquantizedPoints[i] = new GeoPoint(PlanetModel.WGS84, toRadians(lats[i]), toRadians(lons[i]));
points[i] = quantize(unquantizedPoints[i]);
}
}
// Else we can get O(N^2) merging:
int mbd = iwc.getMaxBufferedDocs();
if (mbd != -1 && mbd < points.length / 100) {
iwc.setMaxBufferedDocs(points.length / 100);
}
iwc.setCodec(getCodec());
Directory dir;
if (points.length > 100000) {
dir = newFSDirectory(createTempDir("TestBKDTree"));
} else {
dir = getDirectory();
}
Set<Integer> deleted = new HashSet<>();
// RandomIndexWriter is too slow here:
IndexWriter w = new IndexWriter(dir, iwc);
for (int id = 0; id < points.length; id++) {
Document doc = new Document();
doc.add(newStringField("id", "" + id, Field.Store.NO));
doc.add(new NumericDocValuesField("id", id));
GeoPoint point = points[id];
if (point != null) {
doc.add(new Geo3DPoint("point", point.x, point.y, point.z));
}
w.addDocument(doc);
if (id > 0 && random().nextInt(100) == 42) {
int idToDelete = random().nextInt(id);
w.deleteDocuments(new Term("id", "" + idToDelete));
deleted.add(idToDelete);
if (VERBOSE) {
System.err.println(" delete id=" + idToDelete);
}
}
}
if (random().nextBoolean()) {
w.forceMerge(1);
}
final IndexReader r = DirectoryReader.open(w);
if (VERBOSE) {
System.out.println("TEST: using reader " + r);
}
w.close();
// We can't wrap with "exotic" readers because the geo3d query must see the Geo3DDVFormat:
IndexSearcher s = newSearcher(r, false);
final int iters = atLeast(100);
for (int iter = 0; iter < iters; iter++) {
/*
GeoShape shape = randomShape();
if (VERBOSE) {
System.err.println("\nTEST: iter=" + iter + " shape="+shape);
}
*/
// Geo3DPoint.newShapeQuery("point", shape);
Query query = random3DQuery("point");
if (VERBOSE) {
System.err.println(" using query: " + query);
}
final FixedBitSet hits = new FixedBitSet(r.maxDoc());
s.search(query, new SimpleCollector() {
private int docBase;
@Override
public boolean needsScores() {
return false;
}
@Override
protected void doSetNextReader(LeafReaderContext context) throws IOException {
docBase = context.docBase;
}
@Override
public void collect(int doc) {
hits.set(docBase + doc);
}
});
if (VERBOSE) {
System.err.println(" hitCount: " + hits.cardinality());
}
NumericDocValues docIDToID = MultiDocValues.getNumericValues(r, "id");
for (int docID = 0; docID < r.maxDoc(); docID++) {
assertEquals(docID, docIDToID.nextDoc());
int id = (int) docIDToID.longValue();
GeoPoint point = points[id];
GeoPoint unquantizedPoint = unquantizedPoints[id];
if (point != null && unquantizedPoint != null) {
GeoShape shape = ((PointInGeo3DShapeQuery) query).getShape();
XYZBounds bounds = new XYZBounds();
shape.getBounds(bounds);
XYZSolid solid = XYZSolidFactory.makeXYZSolid(PlanetModel.WGS84, bounds.getMinimumX(), bounds.getMaximumX(), bounds.getMinimumY(), bounds.getMaximumY(), bounds.getMinimumZ(), bounds.getMaximumZ());
boolean expected = ((deleted.contains(id) == false) && shape.isWithin(point));
if (hits.get(docID) != expected) {
StringBuilder b = new StringBuilder();
if (expected) {
b.append("FAIL: id=" + id + " should have matched but did not\n");
} else {
b.append("FAIL: id=" + id + " should not have matched but did\n");
}
b.append(" shape=" + shape + "\n");
b.append(" bounds=" + bounds + "\n");
b.append(" world bounds=(" + " minX=" + PlanetModel.WGS84.getMinimumXValue() + " maxX=" + PlanetModel.WGS84.getMaximumXValue() + " minY=" + PlanetModel.WGS84.getMinimumYValue() + " maxY=" + PlanetModel.WGS84.getMaximumYValue() + " minZ=" + PlanetModel.WGS84.getMinimumZValue() + " maxZ=" + PlanetModel.WGS84.getMaximumZValue() + "\n");
b.append(" quantized point=" + point + " within shape? " + shape.isWithin(point) + " within bounds? " + solid.isWithin(point) + "\n");
b.append(" unquantized point=" + unquantizedPoint + " within shape? " + shape.isWithin(unquantizedPoint) + " within bounds? " + solid.isWithin(unquantizedPoint) + "\n");
b.append(" docID=" + docID + " deleted?=" + deleted.contains(id) + "\n");
b.append(" query=" + query + "\n");
b.append(" explanation:\n " + explain("point", shape, point, unquantizedPoint, r, docID).replace("\n", "\n "));
fail(b.toString());
}
} else {
assertFalse(hits.get(docID));
}
}
}
IOUtils.close(r, dir);
}
Aggregations