Search in sources :

Example 26 with FixedBitSet

use of org.apache.lucene.util.FixedBitSet in project lucene-solr by apache.

the class SolrIndexSplitter method split.

public void split() throws IOException {
    List<LeafReaderContext> leaves = searcher.getRawReader().leaves();
    List<FixedBitSet[]> segmentDocSets = new ArrayList<>(leaves.size());
    log.info("SolrIndexSplitter: partitions=" + numPieces + " segments=" + leaves.size());
    for (LeafReaderContext readerContext : leaves) {
        // make sure we're going in order
        assert readerContext.ordInParent == segmentDocSets.size();
        FixedBitSet[] docSets = split(readerContext);
        segmentDocSets.add(docSets);
    }
    for (int partitionNumber = 0; partitionNumber < numPieces; partitionNumber++) {
        log.info("SolrIndexSplitter: partition #" + partitionNumber + " partitionCount=" + numPieces + (ranges != null ? " range=" + ranges.get(partitionNumber) : ""));
        boolean success = false;
        RefCounted<IndexWriter> iwRef = null;
        IndexWriter iw = null;
        if (cores != null) {
            SolrCore subCore = cores.get(partitionNumber);
            iwRef = subCore.getUpdateHandler().getSolrCoreState().getIndexWriter(subCore);
            iw = iwRef.get();
        } else {
            SolrCore core = searcher.getCore();
            String path = paths.get(partitionNumber);
            iw = SolrIndexWriter.create(core, "SplittingIndexWriter" + partitionNumber + (ranges != null ? " " + ranges.get(partitionNumber) : ""), path, core.getDirectoryFactory(), true, core.getLatestSchema(), core.getSolrConfig().indexConfig, core.getDeletionPolicy(), core.getCodec());
        }
        try {
            // This removes deletions but optimize might still be needed because sub-shards will have the same number of segments as the parent shard.
            for (int segmentNumber = 0; segmentNumber < leaves.size(); segmentNumber++) {
                log.info("SolrIndexSplitter: partition #" + partitionNumber + " partitionCount=" + numPieces + (ranges != null ? " range=" + ranges.get(partitionNumber) : "") + " segment #" + segmentNumber + " segmentCount=" + leaves.size());
                CodecReader subReader = SlowCodecReaderWrapper.wrap(leaves.get(segmentNumber).reader());
                iw.addIndexes(new LiveDocsReader(subReader, segmentDocSets.get(segmentNumber)[partitionNumber]));
            }
            // we commit explicitly instead of sending a CommitUpdateCommand through the processor chain
            // because the sub-shard cores will just ignore such a commit because the update log is not
            // in active state at this time.
            //TODO no commitUpdateCommand
            SolrIndexWriter.setCommitData(iw, -1);
            iw.commit();
            success = true;
        } finally {
            if (iwRef != null) {
                iwRef.decref();
            } else {
                if (success) {
                    iw.close();
                } else {
                    IOUtils.closeWhileHandlingException(iw);
                }
            }
        }
    }
}
Also used : FilterCodecReader(org.apache.lucene.index.FilterCodecReader) CodecReader(org.apache.lucene.index.CodecReader) SolrCore(org.apache.solr.core.SolrCore) ArrayList(java.util.ArrayList) IndexWriter(org.apache.lucene.index.IndexWriter) FixedBitSet(org.apache.lucene.util.FixedBitSet) LeafReaderContext(org.apache.lucene.index.LeafReaderContext)

Example 27 with FixedBitSet

use of org.apache.lucene.util.FixedBitSet in project lucene-solr by apache.

the class SolrIndexSplitter method split.

FixedBitSet[] split(LeafReaderContext readerContext) throws IOException {
    LeafReader reader = readerContext.reader();
    FixedBitSet[] docSets = new FixedBitSet[numPieces];
    for (int i = 0; i < docSets.length; i++) {
        docSets[i] = new FixedBitSet(reader.maxDoc());
    }
    Bits liveDocs = reader.getLiveDocs();
    Fields fields = reader.fields();
    Terms terms = fields == null ? null : fields.terms(field.getName());
    TermsEnum termsEnum = terms == null ? null : terms.iterator();
    if (termsEnum == null)
        return docSets;
    BytesRef term = null;
    PostingsEnum postingsEnum = null;
    int[] docsMatchingRanges = null;
    if (ranges != null) {
        // +1 because documents can belong to *zero*, one, several or all ranges in rangesArr
        docsMatchingRanges = new int[rangesArr.length + 1];
    }
    CharsRefBuilder idRef = new CharsRefBuilder();
    for (; ; ) {
        term = termsEnum.next();
        if (term == null)
            break;
        // figure out the hash for the term
        // FUTURE: if conversion to strings costs too much, we could
        // specialize and use the hash function that can work over bytes.
        field.getType().indexedToReadable(term, idRef);
        String idString = idRef.toString();
        if (splitKey != null) {
            // todo have composite routers support these kind of things instead
            String part1 = getRouteKey(idString);
            if (part1 == null)
                continue;
            if (!splitKey.equals(part1)) {
                continue;
            }
        }
        int hash = 0;
        if (hashRouter != null) {
            hash = hashRouter.sliceHash(idString, null, null, null);
        }
        postingsEnum = termsEnum.postings(postingsEnum, PostingsEnum.NONE);
        postingsEnum = BitsFilteredPostingsEnum.wrap(postingsEnum, liveDocs);
        for (; ; ) {
            int doc = postingsEnum.nextDoc();
            if (doc == DocIdSetIterator.NO_MORE_DOCS)
                break;
            if (ranges == null) {
                docSets[currPartition].set(doc);
                currPartition = (currPartition + 1) % numPieces;
            } else {
                int matchingRangesCount = 0;
                for (int i = 0; i < rangesArr.length; i++) {
                    // inner-loop: use array here for extra speed.
                    if (rangesArr[i].includes(hash)) {
                        docSets[i].set(doc);
                        ++matchingRangesCount;
                    }
                }
                docsMatchingRanges[matchingRangesCount]++;
            }
        }
    }
    if (docsMatchingRanges != null) {
        for (int ii = 0; ii < docsMatchingRanges.length; ii++) {
            if (0 == docsMatchingRanges[ii])
                continue;
            switch(ii) {
                case 0:
                    // document loss
                    log.error("Splitting {}: {} documents belong to no shards and will be dropped", reader, docsMatchingRanges[ii]);
                    break;
                case 1:
                    // normal case, each document moves to one of the sub-shards
                    log.info("Splitting {}: {} documents will move into a sub-shard", reader, docsMatchingRanges[ii]);
                    break;
                default:
                    // document duplication
                    log.error("Splitting {}: {} documents will be moved to multiple ({}) sub-shards", reader, docsMatchingRanges[ii], ii);
                    break;
            }
        }
    }
    return docSets;
}
Also used : LeafReader(org.apache.lucene.index.LeafReader) Terms(org.apache.lucene.index.Terms) TermsEnum(org.apache.lucene.index.TermsEnum) Fields(org.apache.lucene.index.Fields) FixedBitSet(org.apache.lucene.util.FixedBitSet) Bits(org.apache.lucene.util.Bits) CharsRefBuilder(org.apache.lucene.util.CharsRefBuilder) BitsFilteredPostingsEnum(org.apache.solr.search.BitsFilteredPostingsEnum) PostingsEnum(org.apache.lucene.index.PostingsEnum) BytesRef(org.apache.lucene.util.BytesRef)

Example 28 with FixedBitSet

use of org.apache.lucene.util.FixedBitSet in project lucene-solr by apache.

the class FieldCacheImpl method setDocsWithField.

// null Bits means no docs matched
void setDocsWithField(LeafReader reader, String field, Bits docsWithField, Parser parser) {
    final int maxDoc = reader.maxDoc();
    final Bits bits;
    if (docsWithField == null) {
        bits = new Bits.MatchNoBits(maxDoc);
    } else if (docsWithField instanceof FixedBitSet) {
        final int numSet = ((FixedBitSet) docsWithField).cardinality();
        if (numSet >= maxDoc) {
            // The cardinality of the BitSet is maxDoc if all documents have a value.
            assert numSet == maxDoc;
            bits = new Bits.MatchAllBits(maxDoc);
        } else {
            bits = docsWithField;
        }
    } else {
        bits = docsWithField;
    }
    caches.get(DocsWithFieldCache.class).put(reader, new CacheKey(field, parser), new BitsEntry(bits));
}
Also used : FixedBitSet(org.apache.lucene.util.FixedBitSet) Bits(org.apache.lucene.util.Bits)

Example 29 with FixedBitSet

use of org.apache.lucene.util.FixedBitSet in project lucene-solr by apache.

the class WithinPrefixTreeQuery method getDocIdSet.

@Override
protected DocIdSet getDocIdSet(LeafReaderContext context) throws IOException {
    return new VisitorTemplate(context) {

        private FixedBitSet inside;

        private FixedBitSet outside;

        @Override
        protected void start() {
            inside = new FixedBitSet(maxDoc);
            outside = new FixedBitSet(maxDoc);
        }

        @Override
        protected DocIdSet finish() {
            inside.andNot(outside);
            return new BitDocIdSet(inside);
        }

        @Override
        protected CellIterator findSubCellsToVisit(Cell cell) {
            //use buffered query shape instead of orig.  Works with null too.
            return cell.getNextLevelCells(bufferedQueryShape);
        }

        @Override
        protected boolean visitPrefix(Cell cell) throws IOException {
            //cell.relate is based on the bufferedQueryShape; we need to examine what
            // the relation is against the queryShape
            SpatialRelation visitRelation = cell.getShape().relate(queryShape);
            if (cell.getLevel() == detailLevel) {
                collectDocs(visitRelation.intersects() ? inside : outside);
                return false;
            } else if (visitRelation == SpatialRelation.WITHIN) {
                collectDocs(inside);
                return false;
            } else if (visitRelation == SpatialRelation.DISJOINT) {
                collectDocs(outside);
                return false;
            }
            return true;
        }

        @Override
        protected void visitLeaf(Cell cell) throws IOException {
            if (allCellsIntersectQuery(cell))
                collectDocs(inside);
            else
                collectDocs(outside);
        }

        /** Returns true if the provided cell, and all its sub-cells down to
       * detailLevel all intersect the queryShape.
       */
        private boolean allCellsIntersectQuery(Cell cell) {
            SpatialRelation relate = cell.getShape().relate(queryShape);
            if (cell.getLevel() == detailLevel)
                return relate.intersects();
            if (relate == SpatialRelation.WITHIN)
                return true;
            if (relate == SpatialRelation.DISJOINT)
                return false;
            // Note: Generating all these cells just to determine intersection is not ideal.
            // The real solution is LUCENE-4869.
            CellIterator subCells = cell.getNextLevelCells(null);
            while (subCells.hasNext()) {
                Cell subCell = subCells.next();
                if (//recursion
                !allCellsIntersectQuery(subCell))
                    return false;
            }
            return true;
        }

        @Override
        protected void visitScanned(Cell cell) throws IOException {
            //collects as we want, even if not a leaf
            visitLeaf(cell);
        //        if (cell.isLeaf()) {
        //          visitLeaf(cell);
        //        } else {
        //          visitPrefix(cell);
        //        }
        }
    }.getDocIdSet();
}
Also used : BitDocIdSet(org.apache.lucene.util.BitDocIdSet) FixedBitSet(org.apache.lucene.util.FixedBitSet) DocIdSet(org.apache.lucene.search.DocIdSet) BitDocIdSet(org.apache.lucene.util.BitDocIdSet) CellIterator(org.apache.lucene.spatial.prefix.tree.CellIterator) IOException(java.io.IOException) Cell(org.apache.lucene.spatial.prefix.tree.Cell) SpatialRelation(org.locationtech.spatial4j.shape.SpatialRelation)

Example 30 with FixedBitSet

use of org.apache.lucene.util.FixedBitSet in project lucene-solr by apache.

the class TestGeo3DPoint method verify.

private static void verify(double[] lats, double[] lons) throws Exception {
    IndexWriterConfig iwc = newIndexWriterConfig();
    GeoPoint[] points = new GeoPoint[lats.length];
    GeoPoint[] unquantizedPoints = new GeoPoint[lats.length];
    // Pre-quantize all lat/lons:
    for (int i = 0; i < lats.length; i++) {
        if (Double.isNaN(lats[i]) == false) {
            //System.out.println("lats[" + i + "] = " + lats[i]);
            unquantizedPoints[i] = new GeoPoint(PlanetModel.WGS84, toRadians(lats[i]), toRadians(lons[i]));
            points[i] = quantize(unquantizedPoints[i]);
        }
    }
    // Else we can get O(N^2) merging:
    int mbd = iwc.getMaxBufferedDocs();
    if (mbd != -1 && mbd < points.length / 100) {
        iwc.setMaxBufferedDocs(points.length / 100);
    }
    iwc.setCodec(getCodec());
    Directory dir;
    if (points.length > 100000) {
        dir = newFSDirectory(createTempDir("TestBKDTree"));
    } else {
        dir = getDirectory();
    }
    Set<Integer> deleted = new HashSet<>();
    // RandomIndexWriter is too slow here:
    IndexWriter w = new IndexWriter(dir, iwc);
    for (int id = 0; id < points.length; id++) {
        Document doc = new Document();
        doc.add(newStringField("id", "" + id, Field.Store.NO));
        doc.add(new NumericDocValuesField("id", id));
        GeoPoint point = points[id];
        if (point != null) {
            doc.add(new Geo3DPoint("point", point.x, point.y, point.z));
        }
        w.addDocument(doc);
        if (id > 0 && random().nextInt(100) == 42) {
            int idToDelete = random().nextInt(id);
            w.deleteDocuments(new Term("id", "" + idToDelete));
            deleted.add(idToDelete);
            if (VERBOSE) {
                System.err.println("  delete id=" + idToDelete);
            }
        }
    }
    if (random().nextBoolean()) {
        w.forceMerge(1);
    }
    final IndexReader r = DirectoryReader.open(w);
    if (VERBOSE) {
        System.out.println("TEST: using reader " + r);
    }
    w.close();
    // We can't wrap with "exotic" readers because the geo3d query must see the Geo3DDVFormat:
    IndexSearcher s = newSearcher(r, false);
    final int iters = atLeast(100);
    for (int iter = 0; iter < iters; iter++) {
        /*
      GeoShape shape = randomShape();

      if (VERBOSE) {
        System.err.println("\nTEST: iter=" + iter + " shape="+shape);
      }
      */
        // Geo3DPoint.newShapeQuery("point", shape);
        Query query = random3DQuery("point");
        if (VERBOSE) {
            System.err.println("  using query: " + query);
        }
        final FixedBitSet hits = new FixedBitSet(r.maxDoc());
        s.search(query, new SimpleCollector() {

            private int docBase;

            @Override
            public boolean needsScores() {
                return false;
            }

            @Override
            protected void doSetNextReader(LeafReaderContext context) throws IOException {
                docBase = context.docBase;
            }

            @Override
            public void collect(int doc) {
                hits.set(docBase + doc);
            }
        });
        if (VERBOSE) {
            System.err.println("  hitCount: " + hits.cardinality());
        }
        NumericDocValues docIDToID = MultiDocValues.getNumericValues(r, "id");
        for (int docID = 0; docID < r.maxDoc(); docID++) {
            assertEquals(docID, docIDToID.nextDoc());
            int id = (int) docIDToID.longValue();
            GeoPoint point = points[id];
            GeoPoint unquantizedPoint = unquantizedPoints[id];
            if (point != null && unquantizedPoint != null) {
                GeoShape shape = ((PointInGeo3DShapeQuery) query).getShape();
                XYZBounds bounds = new XYZBounds();
                shape.getBounds(bounds);
                XYZSolid solid = XYZSolidFactory.makeXYZSolid(PlanetModel.WGS84, bounds.getMinimumX(), bounds.getMaximumX(), bounds.getMinimumY(), bounds.getMaximumY(), bounds.getMinimumZ(), bounds.getMaximumZ());
                boolean expected = ((deleted.contains(id) == false) && shape.isWithin(point));
                if (hits.get(docID) != expected) {
                    StringBuilder b = new StringBuilder();
                    if (expected) {
                        b.append("FAIL: id=" + id + " should have matched but did not\n");
                    } else {
                        b.append("FAIL: id=" + id + " should not have matched but did\n");
                    }
                    b.append("  shape=" + shape + "\n");
                    b.append("  bounds=" + bounds + "\n");
                    b.append("  world bounds=(" + " minX=" + PlanetModel.WGS84.getMinimumXValue() + " maxX=" + PlanetModel.WGS84.getMaximumXValue() + " minY=" + PlanetModel.WGS84.getMinimumYValue() + " maxY=" + PlanetModel.WGS84.getMaximumYValue() + " minZ=" + PlanetModel.WGS84.getMinimumZValue() + " maxZ=" + PlanetModel.WGS84.getMaximumZValue() + "\n");
                    b.append("  quantized point=" + point + " within shape? " + shape.isWithin(point) + " within bounds? " + solid.isWithin(point) + "\n");
                    b.append("  unquantized point=" + unquantizedPoint + " within shape? " + shape.isWithin(unquantizedPoint) + " within bounds? " + solid.isWithin(unquantizedPoint) + "\n");
                    b.append("  docID=" + docID + " deleted?=" + deleted.contains(id) + "\n");
                    b.append("  query=" + query + "\n");
                    b.append("  explanation:\n    " + explain("point", shape, point, unquantizedPoint, r, docID).replace("\n", "\n  "));
                    fail(b.toString());
                }
            } else {
                assertFalse(hits.get(docID));
            }
        }
    }
    IOUtils.close(r, dir);
}
Also used : IndexSearcher(org.apache.lucene.search.IndexSearcher) NumericDocValues(org.apache.lucene.index.NumericDocValues) Query(org.apache.lucene.search.Query) GeoShape(org.apache.lucene.spatial3d.geom.GeoShape) Document(org.apache.lucene.document.Document) GeoPoint(org.apache.lucene.spatial3d.geom.GeoPoint) SimpleCollector(org.apache.lucene.search.SimpleCollector) NumericDocValuesField(org.apache.lucene.document.NumericDocValuesField) FixedBitSet(org.apache.lucene.util.FixedBitSet) LeafReaderContext(org.apache.lucene.index.LeafReaderContext) Directory(org.apache.lucene.store.Directory) HashSet(java.util.HashSet) Term(org.apache.lucene.index.Term) IOException(java.io.IOException) GeoPoint(org.apache.lucene.spatial3d.geom.GeoPoint) IndexWriter(org.apache.lucene.index.IndexWriter) IndexReader(org.apache.lucene.index.IndexReader) XYZSolid(org.apache.lucene.spatial3d.geom.XYZSolid) XYZBounds(org.apache.lucene.spatial3d.geom.XYZBounds) IndexWriterConfig(org.apache.lucene.index.IndexWriterConfig)

Aggregations

FixedBitSet (org.apache.lucene.util.FixedBitSet)162 Term (org.apache.lucene.index.Term)27 LeafReaderContext (org.apache.lucene.index.LeafReaderContext)26 Directory (org.apache.lucene.store.Directory)25 BytesRef (org.apache.lucene.util.BytesRef)22 IOException (java.io.IOException)19 Document (org.apache.lucene.document.Document)17 ArrayList (java.util.ArrayList)15 Query (org.apache.lucene.search.Query)15 NumericDocValues (org.apache.lucene.index.NumericDocValues)14 BitDocIdSet (org.apache.lucene.util.BitDocIdSet)13 Bits (org.apache.lucene.util.Bits)13 LeafReader (org.apache.lucene.index.LeafReader)12 IndexSearcher (org.apache.lucene.search.IndexSearcher)12 TermQuery (org.apache.lucene.search.TermQuery)12 IndexReader (org.apache.lucene.index.IndexReader)11 HashSet (java.util.HashSet)10 NumericDocValuesField (org.apache.lucene.document.NumericDocValuesField)10 DocIterator (org.apache.solr.search.DocIterator)10 MockAnalyzer (org.apache.lucene.analysis.MockAnalyzer)9