Search in sources :

Example 6 with Relation

use of org.apache.lucene.index.PointValues.Relation in project lucene-solr by apache.

the class BasePointsFormatTestCase method testMerge.

public void testMerge() throws Exception {
    Directory dir = getDirectory(20);
    IndexWriterConfig iwc = newIndexWriterConfig();
    iwc.setMergePolicy(newLogMergePolicy());
    IndexWriter w = new IndexWriter(dir, iwc);
    byte[] point = new byte[4];
    for (int i = 0; i < 20; i++) {
        Document doc = new Document();
        NumericUtils.intToSortableBytes(i, point, 0);
        doc.add(new BinaryPoint("dim", point));
        w.addDocument(doc);
        if (i == 10) {
            w.commit();
        }
    }
    w.forceMerge(1);
    w.close();
    DirectoryReader r = DirectoryReader.open(dir);
    LeafReader sub = getOnlyLeafReader(r);
    PointValues values = sub.getPointValues("dim");
    // Simple test: make sure intersect can visit every doc:
    BitSet seen = new BitSet();
    values.intersect(new IntersectVisitor() {

        @Override
        public Relation compare(byte[] minPacked, byte[] maxPacked) {
            return Relation.CELL_CROSSES_QUERY;
        }

        public void visit(int docID) {
            throw new IllegalStateException();
        }

        public void visit(int docID, byte[] packedValue) {
            seen.set(docID);
            assertEquals(docID, NumericUtils.sortableBytesToInt(packedValue, 0));
        }
    });
    assertEquals(20, seen.cardinality());
    IOUtils.close(r, dir);
}
Also used : IntersectVisitor(org.apache.lucene.index.PointValues.IntersectVisitor) BinaryPoint(org.apache.lucene.document.BinaryPoint) BitSet(java.util.BitSet) Document(org.apache.lucene.document.Document) BinaryPoint(org.apache.lucene.document.BinaryPoint) IntPoint(org.apache.lucene.document.IntPoint) Relation(org.apache.lucene.index.PointValues.Relation) Directory(org.apache.lucene.store.Directory)

Example 7 with Relation

use of org.apache.lucene.index.PointValues.Relation in project lucene-solr by apache.

the class TestBKD method verify.

private void verify(Directory dir, byte[][][] docValues, int[] docIDs, int numDims, int numBytesPerDim, int maxPointsInLeafNode, double maxMB) throws Exception {
    int numValues = docValues.length;
    if (VERBOSE) {
        System.out.println("TEST: numValues=" + numValues + " numDims=" + numDims + " numBytesPerDim=" + numBytesPerDim + " maxPointsInLeafNode=" + maxPointsInLeafNode + " maxMB=" + maxMB);
    }
    List<Long> toMerge = null;
    List<MergeState.DocMap> docMaps = null;
    int seg = 0;
    BKDWriter w = new BKDWriter(numValues, dir, "_" + seg, numDims, numBytesPerDim, maxPointsInLeafNode, maxMB, docValues.length, false);
    IndexOutput out = dir.createOutput("bkd", IOContext.DEFAULT);
    IndexInput in = null;
    boolean success = false;
    try {
        byte[] scratch = new byte[numBytesPerDim * numDims];
        int lastDocIDBase = 0;
        boolean useMerge = numDims == 1 && numValues >= 10 && random().nextBoolean();
        int valuesInThisSeg;
        if (useMerge) {
            // Sometimes we will call merge with a single segment:
            valuesInThisSeg = TestUtil.nextInt(random(), numValues / 10, numValues);
        } else {
            valuesInThisSeg = 0;
        }
        int segCount = 0;
        for (int ord = 0; ord < numValues; ord++) {
            int docID;
            if (docIDs == null) {
                docID = ord;
            } else {
                docID = docIDs[ord];
            }
            if (VERBOSE) {
                System.out.println("  ord=" + ord + " docID=" + docID + " lastDocIDBase=" + lastDocIDBase);
            }
            for (int dim = 0; dim < numDims; dim++) {
                if (VERBOSE) {
                    System.out.println("    " + dim + " -> " + new BytesRef(docValues[ord][dim]));
                }
                System.arraycopy(docValues[ord][dim], 0, scratch, dim * numBytesPerDim, numBytesPerDim);
            }
            w.add(scratch, docID - lastDocIDBase);
            segCount++;
            if (useMerge && segCount == valuesInThisSeg) {
                if (toMerge == null) {
                    toMerge = new ArrayList<>();
                    docMaps = new ArrayList<>();
                }
                final int curDocIDBase = lastDocIDBase;
                docMaps.add(new MergeState.DocMap() {

                    @Override
                    public int get(int docID) {
                        return curDocIDBase + docID;
                    }
                });
                toMerge.add(w.finish(out));
                valuesInThisSeg = TestUtil.nextInt(random(), numValues / 10, numValues / 2);
                segCount = 0;
                seg++;
                maxPointsInLeafNode = TestUtil.nextInt(random(), 50, 1000);
                maxMB = (float) 3.0 + (3 * random().nextDouble());
                w = new BKDWriter(numValues, dir, "_" + seg, numDims, numBytesPerDim, maxPointsInLeafNode, maxMB, docValues.length, false);
                lastDocIDBase = docID;
            }
        }
        long indexFP;
        if (toMerge != null) {
            if (segCount > 0) {
                toMerge.add(w.finish(out));
                final int curDocIDBase = lastDocIDBase;
                docMaps.add(new MergeState.DocMap() {

                    @Override
                    public int get(int docID) {
                        return curDocIDBase + docID;
                    }
                });
            }
            out.close();
            in = dir.openInput("bkd", IOContext.DEFAULT);
            seg++;
            w = new BKDWriter(numValues, dir, "_" + seg, numDims, numBytesPerDim, maxPointsInLeafNode, maxMB, docValues.length, false);
            List<BKDReader> readers = new ArrayList<>();
            for (long fp : toMerge) {
                in.seek(fp);
                readers.add(new BKDReader(in));
            }
            out = dir.createOutput("bkd2", IOContext.DEFAULT);
            indexFP = w.merge(out, docMaps, readers);
            out.close();
            in.close();
            in = dir.openInput("bkd2", IOContext.DEFAULT);
        } else {
            indexFP = w.finish(out);
            out.close();
            in = dir.openInput("bkd", IOContext.DEFAULT);
        }
        in.seek(indexFP);
        BKDReader r = new BKDReader(in);
        int iters = atLeast(100);
        for (int iter = 0; iter < iters; iter++) {
            if (VERBOSE) {
                System.out.println("\nTEST: iter=" + iter);
            }
            // Random N dims rect query:
            byte[][] queryMin = new byte[numDims][];
            byte[][] queryMax = new byte[numDims][];
            for (int dim = 0; dim < numDims; dim++) {
                queryMin[dim] = new byte[numBytesPerDim];
                random().nextBytes(queryMin[dim]);
                queryMax[dim] = new byte[numBytesPerDim];
                random().nextBytes(queryMax[dim]);
                if (StringHelper.compare(numBytesPerDim, queryMin[dim], 0, queryMax[dim], 0) > 0) {
                    byte[] x = queryMin[dim];
                    queryMin[dim] = queryMax[dim];
                    queryMax[dim] = x;
                }
            }
            final BitSet hits = new BitSet();
            r.intersect(new IntersectVisitor() {

                @Override
                public void visit(int docID) {
                    hits.set(docID);
                //System.out.println("visit docID=" + docID);
                }

                @Override
                public void visit(int docID, byte[] packedValue) {
                    //System.out.println("visit check docID=" + docID);
                    for (int dim = 0; dim < numDims; dim++) {
                        if (StringHelper.compare(numBytesPerDim, packedValue, dim * numBytesPerDim, queryMin[dim], 0) < 0 || StringHelper.compare(numBytesPerDim, packedValue, dim * numBytesPerDim, queryMax[dim], 0) > 0) {
                            //System.out.println("  no");
                            return;
                        }
                    }
                    //System.out.println("  yes");
                    hits.set(docID);
                }

                @Override
                public Relation compare(byte[] minPacked, byte[] maxPacked) {
                    boolean crosses = false;
                    for (int dim = 0; dim < numDims; dim++) {
                        if (StringHelper.compare(numBytesPerDim, maxPacked, dim * numBytesPerDim, queryMin[dim], 0) < 0 || StringHelper.compare(numBytesPerDim, minPacked, dim * numBytesPerDim, queryMax[dim], 0) > 0) {
                            return Relation.CELL_OUTSIDE_QUERY;
                        } else if (StringHelper.compare(numBytesPerDim, minPacked, dim * numBytesPerDim, queryMin[dim], 0) < 0 || StringHelper.compare(numBytesPerDim, maxPacked, dim * numBytesPerDim, queryMax[dim], 0) > 0) {
                            crosses = true;
                        }
                    }
                    if (crosses) {
                        return Relation.CELL_CROSSES_QUERY;
                    } else {
                        return Relation.CELL_INSIDE_QUERY;
                    }
                }
            });
            BitSet expected = new BitSet();
            for (int ord = 0; ord < numValues; ord++) {
                boolean matches = true;
                for (int dim = 0; dim < numDims; dim++) {
                    byte[] x = docValues[ord][dim];
                    if (StringHelper.compare(numBytesPerDim, x, 0, queryMin[dim], 0) < 0 || StringHelper.compare(numBytesPerDim, x, 0, queryMax[dim], 0) > 0) {
                        matches = false;
                        break;
                    }
                }
                if (matches) {
                    int docID;
                    if (docIDs == null) {
                        docID = ord;
                    } else {
                        docID = docIDs[ord];
                    }
                    expected.set(docID);
                }
            }
            int limit = Math.max(expected.length(), hits.length());
            for (int docID = 0; docID < limit; docID++) {
                assertEquals("docID=" + docID, expected.get(docID), hits.get(docID));
            }
        }
        in.close();
        dir.deleteFile("bkd");
        if (toMerge != null) {
            dir.deleteFile("bkd2");
        }
        success = true;
    } finally {
        if (success == false) {
            IOUtils.closeWhileHandlingException(w, in, out);
            IOUtils.deleteFilesIgnoringExceptions(dir, "bkd", "bkd2");
        }
    }
}
Also used : IntersectVisitor(org.apache.lucene.index.PointValues.IntersectVisitor) MergeState(org.apache.lucene.index.MergeState) ArrayList(java.util.ArrayList) BitSet(java.util.BitSet) CorruptingIndexOutput(org.apache.lucene.store.CorruptingIndexOutput) IndexOutput(org.apache.lucene.store.IndexOutput) Relation(org.apache.lucene.index.PointValues.Relation) IndexInput(org.apache.lucene.store.IndexInput) BytesRef(org.apache.lucene.util.BytesRef)

Example 8 with Relation

use of org.apache.lucene.index.PointValues.Relation in project lucene-solr by apache.

the class TestBKD method testBigIntNDims.

// Tests on N-dimensional points where each dimension is a BigInteger
public void testBigIntNDims() throws Exception {
    int numDocs = atLeast(1000);
    try (Directory dir = getDirectory(numDocs)) {
        int numBytesPerDim = TestUtil.nextInt(random(), 2, 30);
        int numDims = TestUtil.nextInt(random(), 1, 5);
        int maxPointsInLeafNode = TestUtil.nextInt(random(), 50, 100);
        float maxMB = (float) 3.0 + (3 * random().nextFloat());
        BKDWriter w = new BKDWriter(numDocs, dir, "tmp", numDims, numBytesPerDim, maxPointsInLeafNode, maxMB, numDocs, true);
        BigInteger[][] docs = new BigInteger[numDocs][];
        byte[] scratch = new byte[numBytesPerDim * numDims];
        for (int docID = 0; docID < numDocs; docID++) {
            BigInteger[] values = new BigInteger[numDims];
            if (VERBOSE) {
                System.out.println("  docID=" + docID);
            }
            for (int dim = 0; dim < numDims; dim++) {
                values[dim] = randomBigInt(numBytesPerDim);
                NumericUtils.bigIntToSortableBytes(values[dim], numBytesPerDim, scratch, dim * numBytesPerDim);
                if (VERBOSE) {
                    System.out.println("    " + dim + " -> " + values[dim]);
                }
            }
            docs[docID] = values;
            w.add(scratch, docID);
        }
        long indexFP;
        try (IndexOutput out = dir.createOutput("bkd", IOContext.DEFAULT)) {
            indexFP = w.finish(out);
        }
        try (IndexInput in = dir.openInput("bkd", IOContext.DEFAULT)) {
            in.seek(indexFP);
            BKDReader r = new BKDReader(in);
            int iters = atLeast(100);
            for (int iter = 0; iter < iters; iter++) {
                if (VERBOSE) {
                    System.out.println("\nTEST: iter=" + iter);
                }
                // Random N dims rect query:
                BigInteger[] queryMin = new BigInteger[numDims];
                BigInteger[] queryMax = new BigInteger[numDims];
                for (int dim = 0; dim < numDims; dim++) {
                    queryMin[dim] = randomBigInt(numBytesPerDim);
                    queryMax[dim] = randomBigInt(numBytesPerDim);
                    if (queryMin[dim].compareTo(queryMax[dim]) > 0) {
                        BigInteger x = queryMin[dim];
                        queryMin[dim] = queryMax[dim];
                        queryMax[dim] = x;
                    }
                }
                final BitSet hits = new BitSet();
                r.intersect(new IntersectVisitor() {

                    @Override
                    public void visit(int docID) {
                        hits.set(docID);
                    //System.out.println("visit docID=" + docID);
                    }

                    @Override
                    public void visit(int docID, byte[] packedValue) {
                        //System.out.println("visit check docID=" + docID);
                        for (int dim = 0; dim < numDims; dim++) {
                            BigInteger x = NumericUtils.sortableBytesToBigInt(packedValue, dim * numBytesPerDim, numBytesPerDim);
                            if (x.compareTo(queryMin[dim]) < 0 || x.compareTo(queryMax[dim]) > 0) {
                                //System.out.println("  no");
                                return;
                            }
                        }
                        //System.out.println("  yes");
                        hits.set(docID);
                    }

                    @Override
                    public Relation compare(byte[] minPacked, byte[] maxPacked) {
                        boolean crosses = false;
                        for (int dim = 0; dim < numDims; dim++) {
                            BigInteger min = NumericUtils.sortableBytesToBigInt(minPacked, dim * numBytesPerDim, numBytesPerDim);
                            BigInteger max = NumericUtils.sortableBytesToBigInt(maxPacked, dim * numBytesPerDim, numBytesPerDim);
                            assert max.compareTo(min) >= 0;
                            if (max.compareTo(queryMin[dim]) < 0 || min.compareTo(queryMax[dim]) > 0) {
                                return Relation.CELL_OUTSIDE_QUERY;
                            } else if (min.compareTo(queryMin[dim]) < 0 || max.compareTo(queryMax[dim]) > 0) {
                                crosses = true;
                            }
                        }
                        if (crosses) {
                            return Relation.CELL_CROSSES_QUERY;
                        } else {
                            return Relation.CELL_INSIDE_QUERY;
                        }
                    }
                });
                for (int docID = 0; docID < numDocs; docID++) {
                    BigInteger[] docValues = docs[docID];
                    boolean expected = true;
                    for (int dim = 0; dim < numDims; dim++) {
                        BigInteger x = docValues[dim];
                        if (x.compareTo(queryMin[dim]) < 0 || x.compareTo(queryMax[dim]) > 0) {
                            expected = false;
                            break;
                        }
                    }
                    boolean actual = hits.get(docID);
                    assertEquals("docID=" + docID, expected, actual);
                }
            }
        }
    }
}
Also used : IntersectVisitor(org.apache.lucene.index.PointValues.IntersectVisitor) BitSet(java.util.BitSet) CorruptingIndexOutput(org.apache.lucene.store.CorruptingIndexOutput) IndexOutput(org.apache.lucene.store.IndexOutput) Relation(org.apache.lucene.index.PointValues.Relation) BigInteger(java.math.BigInteger) IndexInput(org.apache.lucene.store.IndexInput) FilterDirectory(org.apache.lucene.store.FilterDirectory) Directory(org.apache.lucene.store.Directory)

Example 9 with Relation

use of org.apache.lucene.index.PointValues.Relation in project lucene-solr by apache.

the class TestBKD method testRandomIntsNDims.

public void testRandomIntsNDims() throws Exception {
    int numDocs = atLeast(1000);
    try (Directory dir = getDirectory(numDocs)) {
        int numDims = TestUtil.nextInt(random(), 1, 5);
        int maxPointsInLeafNode = TestUtil.nextInt(random(), 50, 100);
        float maxMB = (float) 3.0 + (3 * random().nextFloat());
        BKDWriter w = new BKDWriter(numDocs, dir, "tmp", numDims, 4, maxPointsInLeafNode, maxMB, numDocs, true);
        if (VERBOSE) {
            System.out.println("TEST: numDims=" + numDims + " numDocs=" + numDocs);
        }
        int[][] docs = new int[numDocs][];
        byte[] scratch = new byte[4 * numDims];
        int[] minValue = new int[numDims];
        int[] maxValue = new int[numDims];
        Arrays.fill(minValue, Integer.MAX_VALUE);
        Arrays.fill(maxValue, Integer.MIN_VALUE);
        for (int docID = 0; docID < numDocs; docID++) {
            int[] values = new int[numDims];
            if (VERBOSE) {
                System.out.println("  docID=" + docID);
            }
            for (int dim = 0; dim < numDims; dim++) {
                values[dim] = random().nextInt();
                if (values[dim] < minValue[dim]) {
                    minValue[dim] = values[dim];
                }
                if (values[dim] > maxValue[dim]) {
                    maxValue[dim] = values[dim];
                }
                NumericUtils.intToSortableBytes(values[dim], scratch, dim * Integer.BYTES);
                if (VERBOSE) {
                    System.out.println("    " + dim + " -> " + values[dim]);
                }
            }
            docs[docID] = values;
            w.add(scratch, docID);
        }
        long indexFP;
        try (IndexOutput out = dir.createOutput("bkd", IOContext.DEFAULT)) {
            indexFP = w.finish(out);
        }
        try (IndexInput in = dir.openInput("bkd", IOContext.DEFAULT)) {
            in.seek(indexFP);
            BKDReader r = new BKDReader(in);
            byte[] minPackedValue = r.getMinPackedValue();
            byte[] maxPackedValue = r.getMaxPackedValue();
            for (int dim = 0; dim < numDims; dim++) {
                assertEquals(minValue[dim], NumericUtils.sortableBytesToInt(minPackedValue, dim * Integer.BYTES));
                assertEquals(maxValue[dim], NumericUtils.sortableBytesToInt(maxPackedValue, dim * Integer.BYTES));
            }
            int iters = atLeast(100);
            for (int iter = 0; iter < iters; iter++) {
                if (VERBOSE) {
                    System.out.println("\nTEST: iter=" + iter);
                }
                // Random N dims rect query:
                int[] queryMin = new int[numDims];
                int[] queryMax = new int[numDims];
                for (int dim = 0; dim < numDims; dim++) {
                    queryMin[dim] = random().nextInt();
                    queryMax[dim] = random().nextInt();
                    if (queryMin[dim] > queryMax[dim]) {
                        int x = queryMin[dim];
                        queryMin[dim] = queryMax[dim];
                        queryMax[dim] = x;
                    }
                }
                final BitSet hits = new BitSet();
                r.intersect(new IntersectVisitor() {

                    @Override
                    public void visit(int docID) {
                        hits.set(docID);
                    //System.out.println("visit docID=" + docID);
                    }

                    @Override
                    public void visit(int docID, byte[] packedValue) {
                        //System.out.println("visit check docID=" + docID);
                        for (int dim = 0; dim < numDims; dim++) {
                            int x = NumericUtils.sortableBytesToInt(packedValue, dim * Integer.BYTES);
                            if (x < queryMin[dim] || x > queryMax[dim]) {
                                //System.out.println("  no");
                                return;
                            }
                        }
                        //System.out.println("  yes");
                        hits.set(docID);
                    }

                    @Override
                    public Relation compare(byte[] minPacked, byte[] maxPacked) {
                        boolean crosses = false;
                        for (int dim = 0; dim < numDims; dim++) {
                            int min = NumericUtils.sortableBytesToInt(minPacked, dim * Integer.BYTES);
                            int max = NumericUtils.sortableBytesToInt(maxPacked, dim * Integer.BYTES);
                            assert max >= min;
                            if (max < queryMin[dim] || min > queryMax[dim]) {
                                return Relation.CELL_OUTSIDE_QUERY;
                            } else if (min < queryMin[dim] || max > queryMax[dim]) {
                                crosses = true;
                            }
                        }
                        if (crosses) {
                            return Relation.CELL_CROSSES_QUERY;
                        } else {
                            return Relation.CELL_INSIDE_QUERY;
                        }
                    }
                });
                for (int docID = 0; docID < numDocs; docID++) {
                    int[] docValues = docs[docID];
                    boolean expected = true;
                    for (int dim = 0; dim < numDims; dim++) {
                        int x = docValues[dim];
                        if (x < queryMin[dim] || x > queryMax[dim]) {
                            expected = false;
                            break;
                        }
                    }
                    boolean actual = hits.get(docID);
                    assertEquals("docID=" + docID, expected, actual);
                }
            }
        }
    }
}
Also used : IntersectVisitor(org.apache.lucene.index.PointValues.IntersectVisitor) BitSet(java.util.BitSet) CorruptingIndexOutput(org.apache.lucene.store.CorruptingIndexOutput) IndexOutput(org.apache.lucene.store.IndexOutput) Relation(org.apache.lucene.index.PointValues.Relation) IndexInput(org.apache.lucene.store.IndexInput) FilterDirectory(org.apache.lucene.store.FilterDirectory) Directory(org.apache.lucene.store.Directory)

Example 10 with Relation

use of org.apache.lucene.index.PointValues.Relation in project lucene-solr by apache.

the class TestBKD method testTieBreakOrder.

public void testTieBreakOrder() throws Exception {
    try (Directory dir = newDirectory()) {
        int numDocs = 10000;
        BKDWriter w = new BKDWriter(numDocs + 1, dir, "tmp", 1, Integer.BYTES, 2, 0.01f, numDocs, true);
        for (int i = 0; i < numDocs; i++) {
            w.add(new byte[Integer.BYTES], i);
        }
        IndexOutput out = dir.createOutput("bkd", IOContext.DEFAULT);
        long fp = w.finish(out);
        out.close();
        IndexInput in = dir.openInput("bkd", IOContext.DEFAULT);
        in.seek(fp);
        BKDReader r = new BKDReader(in);
        r.intersect(new IntersectVisitor() {

            int lastDocID = -1;

            @Override
            public void visit(int docID) {
                assertTrue("lastDocID=" + lastDocID + " docID=" + docID, docID > lastDocID);
                lastDocID = docID;
            }

            @Override
            public void visit(int docID, byte[] packedValue) {
                visit(docID);
            }

            @Override
            public Relation compare(byte[] minPacked, byte[] maxPacked) {
                return Relation.CELL_CROSSES_QUERY;
            }
        });
        in.close();
    }
}
Also used : Relation(org.apache.lucene.index.PointValues.Relation) IntersectVisitor(org.apache.lucene.index.PointValues.IntersectVisitor) IndexInput(org.apache.lucene.store.IndexInput) CorruptingIndexOutput(org.apache.lucene.store.CorruptingIndexOutput) IndexOutput(org.apache.lucene.store.IndexOutput) FilterDirectory(org.apache.lucene.store.FilterDirectory) Directory(org.apache.lucene.store.Directory)

Aggregations

IntersectVisitor (org.apache.lucene.index.PointValues.IntersectVisitor)23 Relation (org.apache.lucene.index.PointValues.Relation)23 Directory (org.apache.lucene.store.Directory)15 IOException (java.io.IOException)9 BitSet (java.util.BitSet)9 IndexInput (org.apache.lucene.store.IndexInput)9 IndexOutput (org.apache.lucene.store.IndexOutput)9 BinaryPoint (org.apache.lucene.document.BinaryPoint)8 Document (org.apache.lucene.document.Document)8 CorruptingIndexOutput (org.apache.lucene.store.CorruptingIndexOutput)8 PointValues (org.apache.lucene.index.PointValues)7 FilterDirectory (org.apache.lucene.store.FilterDirectory)7 IntPoint (org.apache.lucene.document.IntPoint)6 LeafReader (org.apache.lucene.index.LeafReader)4 BigInteger (java.math.BigInteger)2 MockAnalyzer (org.apache.lucene.analysis.MockAnalyzer)2 NumericDocValuesField (org.apache.lucene.document.NumericDocValuesField)2 FieldInfo (org.apache.lucene.index.FieldInfo)2 IndexReader (org.apache.lucene.index.IndexReader)2 IndexWriter (org.apache.lucene.index.IndexWriter)2