Search in sources :

Example 31 with IndexInput

use of org.apache.lucene.store.IndexInput in project lucene-solr by apache.

the class Test2BBKDPoints method test2D.

public void test2D() throws Exception {
    Directory dir = FSDirectory.open(createTempDir("2BBKDPoints2D"));
    final int numDocs = (Integer.MAX_VALUE / 26) + 100;
    BKDWriter w = new BKDWriter(numDocs, dir, "_0", 2, Long.BYTES, BKDWriter.DEFAULT_MAX_POINTS_IN_LEAF_NODE, BKDWriter.DEFAULT_MAX_MB_SORT_IN_HEAP, 26L * numDocs, false);
    int counter = 0;
    byte[] packedBytes = new byte[2 * Long.BYTES];
    for (int docID = 0; docID < numDocs; docID++) {
        for (int j = 0; j < 26; j++) {
            // first a random int:
            NumericUtils.intToSortableBytes(random().nextInt(), packedBytes, 0);
            // then our counter, which will overflow a bit in the end:
            NumericUtils.intToSortableBytes(counter, packedBytes, Integer.BYTES);
            // then two random ints for the 2nd dimension:
            NumericUtils.intToSortableBytes(random().nextInt(), packedBytes, Long.BYTES);
            NumericUtils.intToSortableBytes(random().nextInt(), packedBytes, Long.BYTES + Integer.BYTES);
            w.add(packedBytes, docID);
            counter++;
        }
        if (VERBOSE && docID % 100000 == 0) {
            System.out.println(docID + " of " + numDocs + "...");
        }
    }
    IndexOutput out = dir.createOutput("2d.bkd", IOContext.DEFAULT);
    long indexFP = w.finish(out);
    out.close();
    IndexInput in = dir.openInput("2d.bkd", IOContext.DEFAULT);
    in.seek(indexFP);
    BKDReader r = new BKDReader(in);
    CheckIndex.VerifyPointsVisitor visitor = new CheckIndex.VerifyPointsVisitor("2d", numDocs, r);
    r.intersect(visitor);
    assertEquals(r.size(), visitor.getPointCountSeen());
    assertEquals(r.getDocCount(), visitor.getDocCountSeen());
    in.close();
    dir.close();
}
Also used : IndexInput(org.apache.lucene.store.IndexInput) IndexOutput(org.apache.lucene.store.IndexOutput) CheckIndex(org.apache.lucene.index.CheckIndex) Directory(org.apache.lucene.store.Directory) FSDirectory(org.apache.lucene.store.FSDirectory)

Example 32 with IndexInput

use of org.apache.lucene.store.IndexInput in project lucene-solr by apache.

the class Test2BBKDPoints method test1D.

public void test1D() throws Exception {
    Directory dir = FSDirectory.open(createTempDir("2BBKDPoints1D"));
    final int numDocs = (Integer.MAX_VALUE / 26) + 100;
    BKDWriter w = new BKDWriter(numDocs, dir, "_0", 1, Long.BYTES, BKDWriter.DEFAULT_MAX_POINTS_IN_LEAF_NODE, BKDWriter.DEFAULT_MAX_MB_SORT_IN_HEAP, 26L * numDocs, false);
    int counter = 0;
    byte[] packedBytes = new byte[Long.BYTES];
    for (int docID = 0; docID < numDocs; docID++) {
        for (int j = 0; j < 26; j++) {
            // first a random int:
            NumericUtils.intToSortableBytes(random().nextInt(), packedBytes, 0);
            // then our counter, which will overflow a bit in the end:
            NumericUtils.intToSortableBytes(counter, packedBytes, Integer.BYTES);
            w.add(packedBytes, docID);
            counter++;
        }
        if (VERBOSE && docID % 100000 == 0) {
            System.out.println(docID + " of " + numDocs + "...");
        }
    }
    IndexOutput out = dir.createOutput("1d.bkd", IOContext.DEFAULT);
    long indexFP = w.finish(out);
    out.close();
    IndexInput in = dir.openInput("1d.bkd", IOContext.DEFAULT);
    in.seek(indexFP);
    BKDReader r = new BKDReader(in);
    CheckIndex.VerifyPointsVisitor visitor = new CheckIndex.VerifyPointsVisitor("1d", numDocs, r);
    r.intersect(visitor);
    assertEquals(r.size(), visitor.getPointCountSeen());
    assertEquals(r.getDocCount(), visitor.getDocCountSeen());
    in.close();
    dir.close();
}
Also used : IndexInput(org.apache.lucene.store.IndexInput) IndexOutput(org.apache.lucene.store.IndexOutput) CheckIndex(org.apache.lucene.index.CheckIndex) Directory(org.apache.lucene.store.Directory) FSDirectory(org.apache.lucene.store.FSDirectory)

Example 33 with IndexInput

use of org.apache.lucene.store.IndexInput in project lucene-solr by apache.

the class TestBKD method testBigIntNDims.

// Tests on N-dimensional points where each dimension is a BigInteger
public void testBigIntNDims() throws Exception {
    int numDocs = atLeast(1000);
    try (Directory dir = getDirectory(numDocs)) {
        int numBytesPerDim = TestUtil.nextInt(random(), 2, 30);
        int numDims = TestUtil.nextInt(random(), 1, 5);
        int maxPointsInLeafNode = TestUtil.nextInt(random(), 50, 100);
        float maxMB = (float) 3.0 + (3 * random().nextFloat());
        BKDWriter w = new BKDWriter(numDocs, dir, "tmp", numDims, numBytesPerDim, maxPointsInLeafNode, maxMB, numDocs, true);
        BigInteger[][] docs = new BigInteger[numDocs][];
        byte[] scratch = new byte[numBytesPerDim * numDims];
        for (int docID = 0; docID < numDocs; docID++) {
            BigInteger[] values = new BigInteger[numDims];
            if (VERBOSE) {
                System.out.println("  docID=" + docID);
            }
            for (int dim = 0; dim < numDims; dim++) {
                values[dim] = randomBigInt(numBytesPerDim);
                NumericUtils.bigIntToSortableBytes(values[dim], numBytesPerDim, scratch, dim * numBytesPerDim);
                if (VERBOSE) {
                    System.out.println("    " + dim + " -> " + values[dim]);
                }
            }
            docs[docID] = values;
            w.add(scratch, docID);
        }
        long indexFP;
        try (IndexOutput out = dir.createOutput("bkd", IOContext.DEFAULT)) {
            indexFP = w.finish(out);
        }
        try (IndexInput in = dir.openInput("bkd", IOContext.DEFAULT)) {
            in.seek(indexFP);
            BKDReader r = new BKDReader(in);
            int iters = atLeast(100);
            for (int iter = 0; iter < iters; iter++) {
                if (VERBOSE) {
                    System.out.println("\nTEST: iter=" + iter);
                }
                // Random N dims rect query:
                BigInteger[] queryMin = new BigInteger[numDims];
                BigInteger[] queryMax = new BigInteger[numDims];
                for (int dim = 0; dim < numDims; dim++) {
                    queryMin[dim] = randomBigInt(numBytesPerDim);
                    queryMax[dim] = randomBigInt(numBytesPerDim);
                    if (queryMin[dim].compareTo(queryMax[dim]) > 0) {
                        BigInteger x = queryMin[dim];
                        queryMin[dim] = queryMax[dim];
                        queryMax[dim] = x;
                    }
                }
                final BitSet hits = new BitSet();
                r.intersect(new IntersectVisitor() {

                    @Override
                    public void visit(int docID) {
                        hits.set(docID);
                    //System.out.println("visit docID=" + docID);
                    }

                    @Override
                    public void visit(int docID, byte[] packedValue) {
                        //System.out.println("visit check docID=" + docID);
                        for (int dim = 0; dim < numDims; dim++) {
                            BigInteger x = NumericUtils.sortableBytesToBigInt(packedValue, dim * numBytesPerDim, numBytesPerDim);
                            if (x.compareTo(queryMin[dim]) < 0 || x.compareTo(queryMax[dim]) > 0) {
                                //System.out.println("  no");
                                return;
                            }
                        }
                        //System.out.println("  yes");
                        hits.set(docID);
                    }

                    @Override
                    public Relation compare(byte[] minPacked, byte[] maxPacked) {
                        boolean crosses = false;
                        for (int dim = 0; dim < numDims; dim++) {
                            BigInteger min = NumericUtils.sortableBytesToBigInt(minPacked, dim * numBytesPerDim, numBytesPerDim);
                            BigInteger max = NumericUtils.sortableBytesToBigInt(maxPacked, dim * numBytesPerDim, numBytesPerDim);
                            assert max.compareTo(min) >= 0;
                            if (max.compareTo(queryMin[dim]) < 0 || min.compareTo(queryMax[dim]) > 0) {
                                return Relation.CELL_OUTSIDE_QUERY;
                            } else if (min.compareTo(queryMin[dim]) < 0 || max.compareTo(queryMax[dim]) > 0) {
                                crosses = true;
                            }
                        }
                        if (crosses) {
                            return Relation.CELL_CROSSES_QUERY;
                        } else {
                            return Relation.CELL_INSIDE_QUERY;
                        }
                    }
                });
                for (int docID = 0; docID < numDocs; docID++) {
                    BigInteger[] docValues = docs[docID];
                    boolean expected = true;
                    for (int dim = 0; dim < numDims; dim++) {
                        BigInteger x = docValues[dim];
                        if (x.compareTo(queryMin[dim]) < 0 || x.compareTo(queryMax[dim]) > 0) {
                            expected = false;
                            break;
                        }
                    }
                    boolean actual = hits.get(docID);
                    assertEquals("docID=" + docID, expected, actual);
                }
            }
        }
    }
}
Also used : IntersectVisitor(org.apache.lucene.index.PointValues.IntersectVisitor) BitSet(java.util.BitSet) CorruptingIndexOutput(org.apache.lucene.store.CorruptingIndexOutput) IndexOutput(org.apache.lucene.store.IndexOutput) Relation(org.apache.lucene.index.PointValues.Relation) BigInteger(java.math.BigInteger) IndexInput(org.apache.lucene.store.IndexInput) FilterDirectory(org.apache.lucene.store.FilterDirectory) Directory(org.apache.lucene.store.Directory)

Example 34 with IndexInput

use of org.apache.lucene.store.IndexInput in project lucene-solr by apache.

the class TestBKD method testRandomIntsNDims.

public void testRandomIntsNDims() throws Exception {
    int numDocs = atLeast(1000);
    try (Directory dir = getDirectory(numDocs)) {
        int numDims = TestUtil.nextInt(random(), 1, 5);
        int maxPointsInLeafNode = TestUtil.nextInt(random(), 50, 100);
        float maxMB = (float) 3.0 + (3 * random().nextFloat());
        BKDWriter w = new BKDWriter(numDocs, dir, "tmp", numDims, 4, maxPointsInLeafNode, maxMB, numDocs, true);
        if (VERBOSE) {
            System.out.println("TEST: numDims=" + numDims + " numDocs=" + numDocs);
        }
        int[][] docs = new int[numDocs][];
        byte[] scratch = new byte[4 * numDims];
        int[] minValue = new int[numDims];
        int[] maxValue = new int[numDims];
        Arrays.fill(minValue, Integer.MAX_VALUE);
        Arrays.fill(maxValue, Integer.MIN_VALUE);
        for (int docID = 0; docID < numDocs; docID++) {
            int[] values = new int[numDims];
            if (VERBOSE) {
                System.out.println("  docID=" + docID);
            }
            for (int dim = 0; dim < numDims; dim++) {
                values[dim] = random().nextInt();
                if (values[dim] < minValue[dim]) {
                    minValue[dim] = values[dim];
                }
                if (values[dim] > maxValue[dim]) {
                    maxValue[dim] = values[dim];
                }
                NumericUtils.intToSortableBytes(values[dim], scratch, dim * Integer.BYTES);
                if (VERBOSE) {
                    System.out.println("    " + dim + " -> " + values[dim]);
                }
            }
            docs[docID] = values;
            w.add(scratch, docID);
        }
        long indexFP;
        try (IndexOutput out = dir.createOutput("bkd", IOContext.DEFAULT)) {
            indexFP = w.finish(out);
        }
        try (IndexInput in = dir.openInput("bkd", IOContext.DEFAULT)) {
            in.seek(indexFP);
            BKDReader r = new BKDReader(in);
            byte[] minPackedValue = r.getMinPackedValue();
            byte[] maxPackedValue = r.getMaxPackedValue();
            for (int dim = 0; dim < numDims; dim++) {
                assertEquals(minValue[dim], NumericUtils.sortableBytesToInt(minPackedValue, dim * Integer.BYTES));
                assertEquals(maxValue[dim], NumericUtils.sortableBytesToInt(maxPackedValue, dim * Integer.BYTES));
            }
            int iters = atLeast(100);
            for (int iter = 0; iter < iters; iter++) {
                if (VERBOSE) {
                    System.out.println("\nTEST: iter=" + iter);
                }
                // Random N dims rect query:
                int[] queryMin = new int[numDims];
                int[] queryMax = new int[numDims];
                for (int dim = 0; dim < numDims; dim++) {
                    queryMin[dim] = random().nextInt();
                    queryMax[dim] = random().nextInt();
                    if (queryMin[dim] > queryMax[dim]) {
                        int x = queryMin[dim];
                        queryMin[dim] = queryMax[dim];
                        queryMax[dim] = x;
                    }
                }
                final BitSet hits = new BitSet();
                r.intersect(new IntersectVisitor() {

                    @Override
                    public void visit(int docID) {
                        hits.set(docID);
                    //System.out.println("visit docID=" + docID);
                    }

                    @Override
                    public void visit(int docID, byte[] packedValue) {
                        //System.out.println("visit check docID=" + docID);
                        for (int dim = 0; dim < numDims; dim++) {
                            int x = NumericUtils.sortableBytesToInt(packedValue, dim * Integer.BYTES);
                            if (x < queryMin[dim] || x > queryMax[dim]) {
                                //System.out.println("  no");
                                return;
                            }
                        }
                        //System.out.println("  yes");
                        hits.set(docID);
                    }

                    @Override
                    public Relation compare(byte[] minPacked, byte[] maxPacked) {
                        boolean crosses = false;
                        for (int dim = 0; dim < numDims; dim++) {
                            int min = NumericUtils.sortableBytesToInt(minPacked, dim * Integer.BYTES);
                            int max = NumericUtils.sortableBytesToInt(maxPacked, dim * Integer.BYTES);
                            assert max >= min;
                            if (max < queryMin[dim] || min > queryMax[dim]) {
                                return Relation.CELL_OUTSIDE_QUERY;
                            } else if (min < queryMin[dim] || max > queryMax[dim]) {
                                crosses = true;
                            }
                        }
                        if (crosses) {
                            return Relation.CELL_CROSSES_QUERY;
                        } else {
                            return Relation.CELL_INSIDE_QUERY;
                        }
                    }
                });
                for (int docID = 0; docID < numDocs; docID++) {
                    int[] docValues = docs[docID];
                    boolean expected = true;
                    for (int dim = 0; dim < numDims; dim++) {
                        int x = docValues[dim];
                        if (x < queryMin[dim] || x > queryMax[dim]) {
                            expected = false;
                            break;
                        }
                    }
                    boolean actual = hits.get(docID);
                    assertEquals("docID=" + docID, expected, actual);
                }
            }
        }
    }
}
Also used : IntersectVisitor(org.apache.lucene.index.PointValues.IntersectVisitor) BitSet(java.util.BitSet) CorruptingIndexOutput(org.apache.lucene.store.CorruptingIndexOutput) IndexOutput(org.apache.lucene.store.IndexOutput) Relation(org.apache.lucene.index.PointValues.Relation) IndexInput(org.apache.lucene.store.IndexInput) FilterDirectory(org.apache.lucene.store.FilterDirectory) Directory(org.apache.lucene.store.Directory)

Example 35 with IndexInput

use of org.apache.lucene.store.IndexInput in project lucene-solr by apache.

the class TestBKD method testTieBreakOrder.

public void testTieBreakOrder() throws Exception {
    try (Directory dir = newDirectory()) {
        int numDocs = 10000;
        BKDWriter w = new BKDWriter(numDocs + 1, dir, "tmp", 1, Integer.BYTES, 2, 0.01f, numDocs, true);
        for (int i = 0; i < numDocs; i++) {
            w.add(new byte[Integer.BYTES], i);
        }
        IndexOutput out = dir.createOutput("bkd", IOContext.DEFAULT);
        long fp = w.finish(out);
        out.close();
        IndexInput in = dir.openInput("bkd", IOContext.DEFAULT);
        in.seek(fp);
        BKDReader r = new BKDReader(in);
        r.intersect(new IntersectVisitor() {

            int lastDocID = -1;

            @Override
            public void visit(int docID) {
                assertTrue("lastDocID=" + lastDocID + " docID=" + docID, docID > lastDocID);
                lastDocID = docID;
            }

            @Override
            public void visit(int docID, byte[] packedValue) {
                visit(docID);
            }

            @Override
            public Relation compare(byte[] minPacked, byte[] maxPacked) {
                return Relation.CELL_CROSSES_QUERY;
            }
        });
        in.close();
    }
}
Also used : Relation(org.apache.lucene.index.PointValues.Relation) IntersectVisitor(org.apache.lucene.index.PointValues.IntersectVisitor) IndexInput(org.apache.lucene.store.IndexInput) CorruptingIndexOutput(org.apache.lucene.store.CorruptingIndexOutput) IndexOutput(org.apache.lucene.store.IndexOutput) FilterDirectory(org.apache.lucene.store.FilterDirectory) Directory(org.apache.lucene.store.Directory)

Aggregations

IndexInput (org.apache.lucene.store.IndexInput)173 Directory (org.apache.lucene.store.Directory)75 IndexOutput (org.apache.lucene.store.IndexOutput)75 ChecksumIndexInput (org.apache.lucene.store.ChecksumIndexInput)49 IOException (java.io.IOException)26 RAMDirectory (org.apache.lucene.store.RAMDirectory)25 FilterDirectory (org.apache.lucene.store.FilterDirectory)23 CorruptIndexException (org.apache.lucene.index.CorruptIndexException)21 BytesRef (org.apache.lucene.util.BytesRef)18 ArrayList (java.util.ArrayList)17 BufferedChecksumIndexInput (org.apache.lucene.store.BufferedChecksumIndexInput)17 Test (org.junit.Test)17 BytesRefBuilder (org.apache.lucene.util.BytesRefBuilder)13 NodeBuilder (org.apache.jackrabbit.oak.spi.state.NodeBuilder)10 IndexFormatTooNewException (org.apache.lucene.index.IndexFormatTooNewException)10 IndexFormatTooOldException (org.apache.lucene.index.IndexFormatTooOldException)10 CorruptingIndexOutput (org.apache.lucene.store.CorruptingIndexOutput)10 NRTCachingDirectory (org.apache.lucene.store.NRTCachingDirectory)10 IntersectVisitor (org.apache.lucene.index.PointValues.IntersectVisitor)9 Relation (org.apache.lucene.index.PointValues.Relation)9