Search in sources :

Example 41 with FixedBitSet

use of org.apache.lucene.util.FixedBitSet in project lucene-solr by apache.

the class IndexedDISI method writeBitSet.

static void writeBitSet(DocIdSetIterator it, IndexOutput out) throws IOException {
    int i = 0;
    final FixedBitSet buffer = new FixedBitSet(1 << 16);
    int prevBlock = -1;
    for (int doc = it.nextDoc(); doc != DocIdSetIterator.NO_MORE_DOCS; doc = it.nextDoc()) {
        final int block = doc >>> 16;
        if (prevBlock != -1 && block != prevBlock) {
            flush(prevBlock, buffer, i, out);
            buffer.clear(0, buffer.length());
            prevBlock = block;
            i = 0;
        }
        buffer.set(doc & 0xFFFF);
        i++;
        prevBlock = block;
    }
    if (i > 0) {
        flush(prevBlock, buffer, i, out);
        buffer.clear(0, buffer.length());
    }
    // NO_MORE_DOCS is stored explicitly
    buffer.set(DocIdSetIterator.NO_MORE_DOCS & 0xFFFF);
    flush(DocIdSetIterator.NO_MORE_DOCS >>> 16, buffer, 1, out);
}
Also used : FixedBitSet(org.apache.lucene.util.FixedBitSet)

Example 42 with FixedBitSet

use of org.apache.lucene.util.FixedBitSet in project lucene-solr by apache.

the class BKDWriter method build.

/* Recursively reorders the provided reader and writes the bkd-tree on the fly; this method is used
   * when we are writing a new segment directly from IndexWriter's indexing buffer (MutablePointsReader). */
private void build(int nodeID, int leafNodeOffset, MutablePointValues reader, int from, int to, IndexOutput out, byte[] minPackedValue, byte[] maxPackedValue, int[] parentSplits, byte[] splitPackedValues, long[] leafBlockFPs, int[] spareDocIds) throws IOException {
    if (nodeID >= leafNodeOffset) {
        // leaf node
        final int count = to - from;
        assert count <= maxPointsInLeafNode;
        // Compute common prefixes
        Arrays.fill(commonPrefixLengths, bytesPerDim);
        reader.getValue(from, scratchBytesRef1);
        for (int i = from + 1; i < to; ++i) {
            reader.getValue(i, scratchBytesRef2);
            for (int dim = 0; dim < numDims; dim++) {
                final int offset = dim * bytesPerDim;
                for (int j = 0; j < commonPrefixLengths[dim]; j++) {
                    if (scratchBytesRef1.bytes[scratchBytesRef1.offset + offset + j] != scratchBytesRef2.bytes[scratchBytesRef2.offset + offset + j]) {
                        commonPrefixLengths[dim] = j;
                        break;
                    }
                }
            }
        }
        // Find the dimension that has the least number of unique bytes at commonPrefixLengths[dim]
        FixedBitSet[] usedBytes = new FixedBitSet[numDims];
        for (int dim = 0; dim < numDims; ++dim) {
            if (commonPrefixLengths[dim] < bytesPerDim) {
                usedBytes[dim] = new FixedBitSet(256);
            }
        }
        for (int i = from + 1; i < to; ++i) {
            for (int dim = 0; dim < numDims; dim++) {
                if (usedBytes[dim] != null) {
                    byte b = reader.getByteAt(i, dim * bytesPerDim + commonPrefixLengths[dim]);
                    usedBytes[dim].set(Byte.toUnsignedInt(b));
                }
            }
        }
        int sortedDim = 0;
        int sortedDimCardinality = Integer.MAX_VALUE;
        for (int dim = 0; dim < numDims; ++dim) {
            if (usedBytes[dim] != null) {
                final int cardinality = usedBytes[dim].cardinality();
                if (cardinality < sortedDimCardinality) {
                    sortedDim = dim;
                    sortedDimCardinality = cardinality;
                }
            }
        }
        // sort by sortedDim
        MutablePointsReaderUtils.sortByDim(sortedDim, bytesPerDim, commonPrefixLengths, reader, from, to, scratchBytesRef1, scratchBytesRef2);
        // Save the block file pointer:
        leafBlockFPs[nodeID - leafNodeOffset] = out.getFilePointer();
        assert scratchOut.getPosition() == 0;
        // Write doc IDs
        int[] docIDs = spareDocIds;
        for (int i = from; i < to; ++i) {
            docIDs[i - from] = reader.getDocID(i);
        }
        //System.out.println("writeLeafBlock pos=" + out.getFilePointer());
        writeLeafBlockDocs(scratchOut, docIDs, 0, count);
        // Write the common prefixes:
        reader.getValue(from, scratchBytesRef1);
        System.arraycopy(scratchBytesRef1.bytes, scratchBytesRef1.offset, scratch1, 0, packedBytesLength);
        writeCommonPrefixes(scratchOut, commonPrefixLengths, scratch1);
        // Write the full values:
        IntFunction<BytesRef> packedValues = new IntFunction<BytesRef>() {

            @Override
            public BytesRef apply(int i) {
                reader.getValue(from + i, scratchBytesRef1);
                return scratchBytesRef1;
            }
        };
        assert valuesInOrderAndBounds(count, sortedDim, minPackedValue, maxPackedValue, packedValues, docIDs, 0);
        writeLeafBlockPackedValues(scratchOut, commonPrefixLengths, count, sortedDim, packedValues);
        out.writeBytes(scratchOut.getBytes(), 0, scratchOut.getPosition());
        scratchOut.reset();
    } else {
        // inner node
        // compute the split dimension and partition around it
        final int splitDim = split(minPackedValue, maxPackedValue, parentSplits);
        final int mid = (from + to + 1) >>> 1;
        int commonPrefixLen = bytesPerDim;
        for (int i = 0; i < bytesPerDim; ++i) {
            if (minPackedValue[splitDim * bytesPerDim + i] != maxPackedValue[splitDim * bytesPerDim + i]) {
                commonPrefixLen = i;
                break;
            }
        }
        MutablePointsReaderUtils.partition(maxDoc, splitDim, bytesPerDim, commonPrefixLen, reader, from, to, mid, scratchBytesRef1, scratchBytesRef2);
        // set the split value
        final int address = nodeID * (1 + bytesPerDim);
        splitPackedValues[address] = (byte) splitDim;
        reader.getValue(mid, scratchBytesRef1);
        System.arraycopy(scratchBytesRef1.bytes, scratchBytesRef1.offset + splitDim * bytesPerDim, splitPackedValues, address + 1, bytesPerDim);
        byte[] minSplitPackedValue = Arrays.copyOf(minPackedValue, packedBytesLength);
        byte[] maxSplitPackedValue = Arrays.copyOf(maxPackedValue, packedBytesLength);
        System.arraycopy(scratchBytesRef1.bytes, scratchBytesRef1.offset + splitDim * bytesPerDim, minSplitPackedValue, splitDim * bytesPerDim, bytesPerDim);
        System.arraycopy(scratchBytesRef1.bytes, scratchBytesRef1.offset + splitDim * bytesPerDim, maxSplitPackedValue, splitDim * bytesPerDim, bytesPerDim);
        // recurse
        parentSplits[splitDim]++;
        build(nodeID * 2, leafNodeOffset, reader, from, mid, out, minPackedValue, maxSplitPackedValue, parentSplits, splitPackedValues, leafBlockFPs, spareDocIds);
        build(nodeID * 2 + 1, leafNodeOffset, reader, mid, to, out, minSplitPackedValue, maxPackedValue, parentSplits, splitPackedValues, leafBlockFPs, spareDocIds);
        parentSplits[splitDim]--;
    }
}
Also used : FixedBitSet(org.apache.lucene.util.FixedBitSet) IntFunction(java.util.function.IntFunction) BytesRef(org.apache.lucene.util.BytesRef)

Example 43 with FixedBitSet

use of org.apache.lucene.util.FixedBitSet in project lucene-solr by apache.

the class TestIndexSorting method testRandom1.

public void testRandom1() throws IOException {
    boolean withDeletes = random().nextBoolean();
    Directory dir = newDirectory();
    IndexWriterConfig iwc = new IndexWriterConfig(new MockAnalyzer(random()));
    Sort indexSort = new Sort(new SortField("foo", SortField.Type.LONG));
    iwc.setIndexSort(indexSort);
    IndexWriter w = new IndexWriter(dir, iwc);
    final int numDocs = atLeast(1000);
    final FixedBitSet deleted = new FixedBitSet(numDocs);
    for (int i = 0; i < numDocs; ++i) {
        Document doc = new Document();
        doc.add(new NumericDocValuesField("foo", random().nextInt(20)));
        doc.add(new StringField("id", Integer.toString(i), Store.YES));
        doc.add(new NumericDocValuesField("id", i));
        w.addDocument(doc);
        if (random().nextInt(5) == 0) {
            w.getReader().close();
        } else if (random().nextInt(30) == 0) {
            w.forceMerge(2);
        } else if (random().nextInt(4) == 0) {
            final int id = TestUtil.nextInt(random(), 0, i);
            deleted.set(id);
            w.deleteDocuments(new Term("id", Integer.toString(id)));
        }
    }
    // Check that segments are sorted
    DirectoryReader reader = w.getReader();
    for (LeafReaderContext ctx : reader.leaves()) {
        final SegmentReader leaf = (SegmentReader) ctx.reader();
        SegmentInfo info = leaf.getSegmentInfo().info;
        switch(info.getDiagnostics().get(IndexWriter.SOURCE)) {
            case IndexWriter.SOURCE_FLUSH:
            case IndexWriter.SOURCE_MERGE:
                assertEquals(indexSort, info.getIndexSort());
                final NumericDocValues values = leaf.getNumericDocValues("foo");
                long previous = Long.MIN_VALUE;
                for (int i = 0; i < leaf.maxDoc(); ++i) {
                    assertEquals(i, values.nextDoc());
                    final long value = values.longValue();
                    assertTrue(value >= previous);
                    previous = value;
                }
                break;
            default:
                fail();
        }
    }
    // Now check that the index is consistent
    IndexSearcher searcher = newSearcher(reader);
    for (int i = 0; i < numDocs; ++i) {
        TermQuery termQuery = new TermQuery(new Term("id", Integer.toString(i)));
        final TopDocs topDocs = searcher.search(termQuery, 1);
        if (deleted.get(i)) {
            assertEquals(0, topDocs.totalHits);
        } else {
            assertEquals(1, topDocs.totalHits);
            NumericDocValues values = MultiDocValues.getNumericValues(reader, "id");
            assertEquals(topDocs.scoreDocs[0].doc, values.advance(topDocs.scoreDocs[0].doc));
            assertEquals(i, values.longValue());
            Document document = reader.document(topDocs.scoreDocs[0].doc);
            assertEquals(Integer.toString(i), document.get("id"));
        }
    }
    reader.close();
    w.close();
    dir.close();
}
Also used : IndexSearcher(org.apache.lucene.search.IndexSearcher) TermQuery(org.apache.lucene.search.TermQuery) SortField(org.apache.lucene.search.SortField) SortedSetSortField(org.apache.lucene.search.SortedSetSortField) SortedNumericSortField(org.apache.lucene.search.SortedNumericSortField) Document(org.apache.lucene.document.Document) BinaryPoint(org.apache.lucene.document.BinaryPoint) IntPoint(org.apache.lucene.document.IntPoint) TopDocs(org.apache.lucene.search.TopDocs) MockAnalyzer(org.apache.lucene.analysis.MockAnalyzer) SortedNumericDocValuesField(org.apache.lucene.document.SortedNumericDocValuesField) NumericDocValuesField(org.apache.lucene.document.NumericDocValuesField) FixedBitSet(org.apache.lucene.util.FixedBitSet) StringField(org.apache.lucene.document.StringField) Sort(org.apache.lucene.search.Sort) Directory(org.apache.lucene.store.Directory)

Example 44 with FixedBitSet

use of org.apache.lucene.util.FixedBitSet in project lucene-solr by apache.

the class TestIndexSorting method testMultiValuedRandom1.

public void testMultiValuedRandom1() throws IOException {
    boolean withDeletes = random().nextBoolean();
    Directory dir = newDirectory();
    IndexWriterConfig iwc = new IndexWriterConfig(new MockAnalyzer(random()));
    Sort indexSort = new Sort(new SortedNumericSortField("foo", SortField.Type.LONG));
    iwc.setIndexSort(indexSort);
    IndexWriter w = new IndexWriter(dir, iwc);
    final int numDocs = atLeast(1000);
    final FixedBitSet deleted = new FixedBitSet(numDocs);
    for (int i = 0; i < numDocs; ++i) {
        Document doc = new Document();
        int num = random().nextInt(10);
        for (int j = 0; j < num; j++) {
            doc.add(new SortedNumericDocValuesField("foo", random().nextInt(2000)));
        }
        doc.add(new StringField("id", Integer.toString(i), Store.YES));
        doc.add(new NumericDocValuesField("id", i));
        w.addDocument(doc);
        if (random().nextInt(5) == 0) {
            w.getReader().close();
        } else if (random().nextInt(30) == 0) {
            w.forceMerge(2);
        } else if (random().nextInt(4) == 0) {
            final int id = TestUtil.nextInt(random(), 0, i);
            deleted.set(id);
            w.deleteDocuments(new Term("id", Integer.toString(id)));
        }
    }
    DirectoryReader reader = w.getReader();
    // Now check that the index is consistent
    IndexSearcher searcher = newSearcher(reader);
    for (int i = 0; i < numDocs; ++i) {
        TermQuery termQuery = new TermQuery(new Term("id", Integer.toString(i)));
        final TopDocs topDocs = searcher.search(termQuery, 1);
        if (deleted.get(i)) {
            assertEquals(0, topDocs.totalHits);
        } else {
            assertEquals(1, topDocs.totalHits);
            NumericDocValues values = MultiDocValues.getNumericValues(reader, "id");
            assertEquals(topDocs.scoreDocs[0].doc, values.advance(topDocs.scoreDocs[0].doc));
            assertEquals(i, values.longValue());
            Document document = reader.document(topDocs.scoreDocs[0].doc);
            assertEquals(Integer.toString(i), document.get("id"));
        }
    }
    reader.close();
    w.close();
    dir.close();
}
Also used : IndexSearcher(org.apache.lucene.search.IndexSearcher) TermQuery(org.apache.lucene.search.TermQuery) Document(org.apache.lucene.document.Document) BinaryPoint(org.apache.lucene.document.BinaryPoint) IntPoint(org.apache.lucene.document.IntPoint) SortedNumericSortField(org.apache.lucene.search.SortedNumericSortField) TopDocs(org.apache.lucene.search.TopDocs) SortedNumericDocValuesField(org.apache.lucene.document.SortedNumericDocValuesField) MockAnalyzer(org.apache.lucene.analysis.MockAnalyzer) SortedNumericDocValuesField(org.apache.lucene.document.SortedNumericDocValuesField) NumericDocValuesField(org.apache.lucene.document.NumericDocValuesField) FixedBitSet(org.apache.lucene.util.FixedBitSet) StringField(org.apache.lucene.document.StringField) Sort(org.apache.lucene.search.Sort) Directory(org.apache.lucene.store.Directory)

Example 45 with FixedBitSet

use of org.apache.lucene.util.FixedBitSet in project lucene-solr by apache.

the class SortingLeafReader method getBinaryDocValues.

@Override
public BinaryDocValues getBinaryDocValues(String field) throws IOException {
    final BinaryDocValues oldDocValues = in.getBinaryDocValues(field);
    if (oldDocValues == null)
        return null;
    CachedBinaryDVs dvs;
    synchronized (cachedBinaryDVs) {
        dvs = cachedBinaryDVs.get(field);
        if (dvs == null) {
            FixedBitSet docsWithField = new FixedBitSet(maxDoc());
            BytesRef[] values = new BytesRef[maxDoc()];
            while (true) {
                int docID = oldDocValues.nextDoc();
                if (docID == NO_MORE_DOCS) {
                    break;
                }
                int newDocID = docMap.oldToNew(docID);
                docsWithField.set(newDocID);
                values[newDocID] = BytesRef.deepCopyOf(oldDocValues.binaryValue());
            }
            dvs = new CachedBinaryDVs(values, docsWithField);
            cachedBinaryDVs.put(field, dvs);
        }
    }
    return new SortingBinaryDocValues(dvs);
}
Also used : FixedBitSet(org.apache.lucene.util.FixedBitSet) BytesRef(org.apache.lucene.util.BytesRef)

Aggregations

FixedBitSet (org.apache.lucene.util.FixedBitSet)162 Term (org.apache.lucene.index.Term)27 LeafReaderContext (org.apache.lucene.index.LeafReaderContext)26 Directory (org.apache.lucene.store.Directory)25 BytesRef (org.apache.lucene.util.BytesRef)22 IOException (java.io.IOException)19 Document (org.apache.lucene.document.Document)17 ArrayList (java.util.ArrayList)15 Query (org.apache.lucene.search.Query)15 NumericDocValues (org.apache.lucene.index.NumericDocValues)14 BitDocIdSet (org.apache.lucene.util.BitDocIdSet)13 Bits (org.apache.lucene.util.Bits)13 LeafReader (org.apache.lucene.index.LeafReader)12 IndexSearcher (org.apache.lucene.search.IndexSearcher)12 TermQuery (org.apache.lucene.search.TermQuery)12 IndexReader (org.apache.lucene.index.IndexReader)11 HashSet (java.util.HashSet)10 NumericDocValuesField (org.apache.lucene.document.NumericDocValuesField)10 DocIterator (org.apache.solr.search.DocIterator)10 MockAnalyzer (org.apache.lucene.analysis.MockAnalyzer)9