Search in sources :

Example 56 with FixedBitSet

use of org.apache.lucene.util.FixedBitSet in project lucene-solr by apache.

the class TestSegmentMerger method testBuildDocMap.

public void testBuildDocMap() {
    final int maxDoc = TestUtil.nextInt(random(), 1, 128);
    final int numDocs = TestUtil.nextInt(random(), 0, maxDoc);
    final FixedBitSet liveDocs = new FixedBitSet(maxDoc);
    for (int i = 0; i < numDocs; ++i) {
        while (true) {
            final int docID = random().nextInt(maxDoc);
            if (!liveDocs.get(docID)) {
                liveDocs.set(docID);
                break;
            }
        }
    }
    final PackedLongValues docMap = MergeState.removeDeletes(maxDoc, liveDocs);
    // assert the mapping is compact
    for (int i = 0, del = 0; i < maxDoc; ++i) {
        if (liveDocs.get(i) == false) {
            ++del;
        } else {
            assertEquals(i - del, docMap.get(i));
        }
    }
}
Also used : PackedLongValues(org.apache.lucene.util.packed.PackedLongValues) FixedBitSet(org.apache.lucene.util.FixedBitSet)

Example 57 with FixedBitSet

use of org.apache.lucene.util.FixedBitSet in project lucene-solr by apache.

the class SimpleTextBKDWriter method build.

/* Recursively reorders the provided reader and writes the bkd-tree on the fly. */
private void build(int nodeID, int leafNodeOffset, MutablePointValues reader, int from, int to, IndexOutput out, byte[] minPackedValue, byte[] maxPackedValue, byte[] splitPackedValues, long[] leafBlockFPs, int[] spareDocIds) throws IOException {
    if (nodeID >= leafNodeOffset) {
        // leaf node
        final int count = to - from;
        assert count <= maxPointsInLeafNode;
        // Compute common prefixes
        Arrays.fill(commonPrefixLengths, bytesPerDim);
        reader.getValue(from, scratchBytesRef1);
        for (int i = from + 1; i < to; ++i) {
            reader.getValue(i, scratchBytesRef2);
            for (int dim = 0; dim < numDims; dim++) {
                final int offset = dim * bytesPerDim;
                for (int j = 0; j < commonPrefixLengths[dim]; j++) {
                    if (scratchBytesRef1.bytes[scratchBytesRef1.offset + offset + j] != scratchBytesRef2.bytes[scratchBytesRef2.offset + offset + j]) {
                        commonPrefixLengths[dim] = j;
                        break;
                    }
                }
            }
        }
        // Find the dimension that has the least number of unique bytes at commonPrefixLengths[dim]
        FixedBitSet[] usedBytes = new FixedBitSet[numDims];
        for (int dim = 0; dim < numDims; ++dim) {
            if (commonPrefixLengths[dim] < bytesPerDim) {
                usedBytes[dim] = new FixedBitSet(256);
            }
        }
        for (int i = from + 1; i < to; ++i) {
            for (int dim = 0; dim < numDims; dim++) {
                if (usedBytes[dim] != null) {
                    byte b = reader.getByteAt(i, dim * bytesPerDim + commonPrefixLengths[dim]);
                    usedBytes[dim].set(Byte.toUnsignedInt(b));
                }
            }
        }
        int sortedDim = 0;
        int sortedDimCardinality = Integer.MAX_VALUE;
        for (int dim = 0; dim < numDims; ++dim) {
            if (usedBytes[dim] != null) {
                final int cardinality = usedBytes[dim].cardinality();
                if (cardinality < sortedDimCardinality) {
                    sortedDim = dim;
                    sortedDimCardinality = cardinality;
                }
            }
        }
        // sort by sortedDim
        MutablePointsReaderUtils.sortByDim(sortedDim, bytesPerDim, commonPrefixLengths, reader, from, to, scratchBytesRef1, scratchBytesRef2);
        // Save the block file pointer:
        leafBlockFPs[nodeID - leafNodeOffset] = out.getFilePointer();
        // Write doc IDs
        int[] docIDs = spareDocIds;
        for (int i = from; i < to; ++i) {
            docIDs[i - from] = reader.getDocID(i);
        }
        writeLeafBlockDocs(out, docIDs, 0, count);
        // Write the common prefixes:
        reader.getValue(from, scratchBytesRef1);
        System.arraycopy(scratchBytesRef1.bytes, scratchBytesRef1.offset, scratch1, 0, packedBytesLength);
        // Write the full values:
        IntFunction<BytesRef> packedValues = new IntFunction<BytesRef>() {

            @Override
            public BytesRef apply(int i) {
                reader.getValue(from + i, scratchBytesRef1);
                return scratchBytesRef1;
            }
        };
        assert valuesInOrderAndBounds(count, sortedDim, minPackedValue, maxPackedValue, packedValues, docIDs, 0);
        writeLeafBlockPackedValues(out, commonPrefixLengths, count, sortedDim, packedValues);
    } else {
        // inner node
        // compute the split dimension and partition around it
        final int splitDim = split(minPackedValue, maxPackedValue);
        final int mid = (from + to + 1) >>> 1;
        int commonPrefixLen = bytesPerDim;
        for (int i = 0; i < bytesPerDim; ++i) {
            if (minPackedValue[splitDim * bytesPerDim + i] != maxPackedValue[splitDim * bytesPerDim + i]) {
                commonPrefixLen = i;
                break;
            }
        }
        MutablePointsReaderUtils.partition(maxDoc, splitDim, bytesPerDim, commonPrefixLen, reader, from, to, mid, scratchBytesRef1, scratchBytesRef2);
        // set the split value
        final int address = nodeID * (1 + bytesPerDim);
        splitPackedValues[address] = (byte) splitDim;
        reader.getValue(mid, scratchBytesRef1);
        System.arraycopy(scratchBytesRef1.bytes, scratchBytesRef1.offset + splitDim * bytesPerDim, splitPackedValues, address + 1, bytesPerDim);
        byte[] minSplitPackedValue = Arrays.copyOf(minPackedValue, packedBytesLength);
        byte[] maxSplitPackedValue = Arrays.copyOf(maxPackedValue, packedBytesLength);
        System.arraycopy(scratchBytesRef1.bytes, scratchBytesRef1.offset + splitDim * bytesPerDim, minSplitPackedValue, splitDim * bytesPerDim, bytesPerDim);
        System.arraycopy(scratchBytesRef1.bytes, scratchBytesRef1.offset + splitDim * bytesPerDim, maxSplitPackedValue, splitDim * bytesPerDim, bytesPerDim);
        // recurse
        build(nodeID * 2, leafNodeOffset, reader, from, mid, out, minPackedValue, maxSplitPackedValue, splitPackedValues, leafBlockFPs, spareDocIds);
        build(nodeID * 2 + 1, leafNodeOffset, reader, mid, to, out, minSplitPackedValue, maxPackedValue, splitPackedValues, leafBlockFPs, spareDocIds);
    }
}
Also used : FixedBitSet(org.apache.lucene.util.FixedBitSet) IntFunction(java.util.function.IntFunction) BytesRef(org.apache.lucene.util.BytesRef)

Example 58 with FixedBitSet

use of org.apache.lucene.util.FixedBitSet in project lucene-solr by apache.

the class TestScorerPerf method addClause.

FixedBitSet addClause(BooleanQuery.Builder bq, FixedBitSet result) {
    final FixedBitSet rnd = sets[random().nextInt(sets.length)];
    Query q = new BitSetQuery(rnd);
    bq.add(q, BooleanClause.Occur.MUST);
    if (validate) {
        if (result == null)
            result = rnd.clone();
        else
            result.and(rnd);
    }
    return result;
}
Also used : FixedBitSet(org.apache.lucene.util.FixedBitSet)

Example 59 with FixedBitSet

use of org.apache.lucene.util.FixedBitSet in project lucene-solr by apache.

the class TestScorerPerf method doConjunctions.

public int doConjunctions(int iter, int maxClauses) throws IOException {
    int ret = 0;
    for (int i = 0; i < iter; i++) {
        // min 2 clauses
        int nClauses = random().nextInt(maxClauses - 1) + 2;
        BooleanQuery.Builder bq = new BooleanQuery.Builder();
        FixedBitSet result = null;
        for (int j = 0; j < nClauses; j++) {
            result = addClause(bq, result);
        }
        CountingHitCollector hc = validate ? new MatchingHitCollector(result) : new CountingHitCollector();
        s.search(bq.build(), hc);
        ret += hc.getSum();
        if (validate)
            assertEquals(result.cardinality(), hc.getCount());
    // System.out.println(hc.getCount());
    }
    return ret;
}
Also used : FixedBitSet(org.apache.lucene.util.FixedBitSet)

Example 60 with FixedBitSet

use of org.apache.lucene.util.FixedBitSet in project lucene-solr by apache.

the class TestScorerPerf method doNestedConjunctions.

public int doNestedConjunctions(int iter, int maxOuterClauses, int maxClauses) throws IOException {
    int ret = 0;
    long nMatches = 0;
    for (int i = 0; i < iter; i++) {
        int oClauses = random().nextInt(maxOuterClauses - 1) + 2;
        BooleanQuery.Builder oq = new BooleanQuery.Builder();
        FixedBitSet result = null;
        for (int o = 0; o < oClauses; o++) {
            // min 2 clauses
            int nClauses = random().nextInt(maxClauses - 1) + 2;
            BooleanQuery.Builder bq = new BooleanQuery.Builder();
            for (int j = 0; j < nClauses; j++) {
                result = addClause(bq, result);
            }
            oq.add(bq.build(), BooleanClause.Occur.MUST);
        }
        // outer
        CountingHitCollector hc = validate ? new MatchingHitCollector(result) : new CountingHitCollector();
        s.search(oq.build(), hc);
        nMatches += hc.getCount();
        ret += hc.getSum();
        if (validate)
            assertEquals(result.cardinality(), hc.getCount());
    // System.out.println(hc.getCount());
    }
    if (VERBOSE)
        System.out.println("Average number of matches=" + (nMatches / iter));
    return ret;
}
Also used : FixedBitSet(org.apache.lucene.util.FixedBitSet)

Aggregations

FixedBitSet (org.apache.lucene.util.FixedBitSet)162 Term (org.apache.lucene.index.Term)27 LeafReaderContext (org.apache.lucene.index.LeafReaderContext)26 Directory (org.apache.lucene.store.Directory)25 BytesRef (org.apache.lucene.util.BytesRef)22 IOException (java.io.IOException)19 Document (org.apache.lucene.document.Document)17 ArrayList (java.util.ArrayList)15 Query (org.apache.lucene.search.Query)15 NumericDocValues (org.apache.lucene.index.NumericDocValues)14 BitDocIdSet (org.apache.lucene.util.BitDocIdSet)13 Bits (org.apache.lucene.util.Bits)13 LeafReader (org.apache.lucene.index.LeafReader)12 IndexSearcher (org.apache.lucene.search.IndexSearcher)12 TermQuery (org.apache.lucene.search.TermQuery)12 IndexReader (org.apache.lucene.index.IndexReader)11 HashSet (java.util.HashSet)10 NumericDocValuesField (org.apache.lucene.document.NumericDocValuesField)10 DocIterator (org.apache.solr.search.DocIterator)10 MockAnalyzer (org.apache.lucene.analysis.MockAnalyzer)9