Search in sources :

Example 11 with IntersectVisitor

use of org.apache.lucene.index.PointValues.IntersectVisitor in project lucene-solr by apache.

the class SimpleTextBKDWriter method writeField1Dim.

/* In the 1D case, we can simply sort points in ascending order and use the
   * same writing logic as we use at merge time. */
private long writeField1Dim(IndexOutput out, String fieldName, MutablePointValues reader) throws IOException {
    MutablePointsReaderUtils.sort(maxDoc, packedBytesLength, reader, 0, Math.toIntExact(reader.size()));
    final OneDimensionBKDWriter oneDimWriter = new OneDimensionBKDWriter(out);
    reader.intersect(new IntersectVisitor() {

        @Override
        public void visit(int docID, byte[] packedValue) throws IOException {
            oneDimWriter.add(packedValue, docID);
        }

        @Override
        public void visit(int docID) throws IOException {
            throw new IllegalStateException();
        }

        @Override
        public Relation compare(byte[] minPackedValue, byte[] maxPackedValue) {
            return Relation.CELL_CROSSES_QUERY;
        }
    });
    return oneDimWriter.finish();
}
Also used : Relation(org.apache.lucene.index.PointValues.Relation) IntersectVisitor(org.apache.lucene.index.PointValues.IntersectVisitor) IOException(java.io.IOException)

Example 12 with IntersectVisitor

use of org.apache.lucene.index.PointValues.IntersectVisitor in project lucene-solr by apache.

the class SimpleTextPointsWriter method writeField.

@Override
public void writeField(FieldInfo fieldInfo, PointsReader reader) throws IOException {
    PointValues values = reader.getValues(fieldInfo.name);
    boolean singleValuePerDoc = values.size() == values.getDocCount();
    // We use our own fork of the BKDWriter to customize how it writes the index and blocks to disk:
    try (SimpleTextBKDWriter writer = new SimpleTextBKDWriter(writeState.segmentInfo.maxDoc(), writeState.directory, writeState.segmentInfo.name, fieldInfo.getPointDimensionCount(), fieldInfo.getPointNumBytes(), SimpleTextBKDWriter.DEFAULT_MAX_POINTS_IN_LEAF_NODE, SimpleTextBKDWriter.DEFAULT_MAX_MB_SORT_IN_HEAP, values.size(), singleValuePerDoc)) {
        values.intersect(new IntersectVisitor() {

            @Override
            public void visit(int docID) {
                throw new IllegalStateException();
            }

            public void visit(int docID, byte[] packedValue) throws IOException {
                writer.add(packedValue, docID);
            }

            @Override
            public Relation compare(byte[] minPackedValue, byte[] maxPackedValue) {
                return Relation.CELL_CROSSES_QUERY;
            }
        });
        // We could have 0 points on merge since all docs with points may be deleted:
        if (writer.getPointCount() > 0) {
            indexFPs.put(fieldInfo.name, writer.finish(dataOut));
        }
    }
}
Also used : PointValues(org.apache.lucene.index.PointValues) Relation(org.apache.lucene.index.PointValues.Relation) IntersectVisitor(org.apache.lucene.index.PointValues.IntersectVisitor) IOException(java.io.IOException)

Example 13 with IntersectVisitor

use of org.apache.lucene.index.PointValues.IntersectVisitor in project lucene-solr by apache.

the class BKDWriter method writeField1Dim.

/* In the 1D case, we can simply sort points in ascending order and use the
   * same writing logic as we use at merge time. */
private long writeField1Dim(IndexOutput out, String fieldName, MutablePointValues reader) throws IOException {
    MutablePointsReaderUtils.sort(maxDoc, packedBytesLength, reader, 0, Math.toIntExact(reader.size()));
    final OneDimensionBKDWriter oneDimWriter = new OneDimensionBKDWriter(out);
    reader.intersect(new IntersectVisitor() {

        @Override
        public void visit(int docID, byte[] packedValue) throws IOException {
            oneDimWriter.add(packedValue, docID);
        }

        @Override
        public void visit(int docID) throws IOException {
            throw new IllegalStateException();
        }

        @Override
        public Relation compare(byte[] minPackedValue, byte[] maxPackedValue) {
            return Relation.CELL_CROSSES_QUERY;
        }
    });
    return oneDimWriter.finish();
}
Also used : Relation(org.apache.lucene.index.PointValues.Relation) IntersectVisitor(org.apache.lucene.index.PointValues.IntersectVisitor) IOException(java.io.IOException)

Example 14 with IntersectVisitor

use of org.apache.lucene.index.PointValues.IntersectVisitor in project lucene-solr by apache.

the class BasePointsFormatTestCase method testBigIntNDims.

// Tests on N-dimensional points where each dimension is a BigInteger
public void testBigIntNDims() throws Exception {
    int numDocs = atLeast(1000);
    try (Directory dir = getDirectory(numDocs)) {
        int numBytesPerDim = TestUtil.nextInt(random(), 2, PointValues.MAX_NUM_BYTES);
        int numDims = TestUtil.nextInt(random(), 1, PointValues.MAX_DIMENSIONS);
        IndexWriterConfig iwc = newIndexWriterConfig(new MockAnalyzer(random()));
        // We rely on docIDs not changing:
        iwc.setMergePolicy(newLogMergePolicy());
        RandomIndexWriter w = new RandomIndexWriter(random(), dir, iwc);
        BigInteger[][] docs = new BigInteger[numDocs][];
        for (int docID = 0; docID < numDocs; docID++) {
            BigInteger[] values = new BigInteger[numDims];
            if (VERBOSE) {
                System.out.println("  docID=" + docID);
            }
            byte[][] bytes = new byte[numDims][];
            for (int dim = 0; dim < numDims; dim++) {
                values[dim] = randomBigInt(numBytesPerDim);
                bytes[dim] = new byte[numBytesPerDim];
                NumericUtils.bigIntToSortableBytes(values[dim], numBytesPerDim, bytes[dim], 0);
                if (VERBOSE) {
                    System.out.println("    " + dim + " -> " + values[dim]);
                }
            }
            docs[docID] = values;
            Document doc = new Document();
            doc.add(new BinaryPoint("field", bytes));
            w.addDocument(doc);
        }
        DirectoryReader r = w.getReader();
        w.close();
        int iters = atLeast(100);
        for (int iter = 0; iter < iters; iter++) {
            if (VERBOSE) {
                System.out.println("\nTEST: iter=" + iter);
            }
            // Random N dims rect query:
            BigInteger[] queryMin = new BigInteger[numDims];
            BigInteger[] queryMax = new BigInteger[numDims];
            for (int dim = 0; dim < numDims; dim++) {
                queryMin[dim] = randomBigInt(numBytesPerDim);
                queryMax[dim] = randomBigInt(numBytesPerDim);
                if (queryMin[dim].compareTo(queryMax[dim]) > 0) {
                    BigInteger x = queryMin[dim];
                    queryMin[dim] = queryMax[dim];
                    queryMax[dim] = x;
                }
                if (VERBOSE) {
                    System.out.println("  " + dim + "\n    min=" + queryMin[dim] + "\n    max=" + queryMax[dim]);
                }
            }
            final BitSet hits = new BitSet();
            for (LeafReaderContext ctx : r.leaves()) {
                PointValues dimValues = ctx.reader().getPointValues("field");
                if (dimValues == null) {
                    continue;
                }
                final int docBase = ctx.docBase;
                dimValues.intersect(new IntersectVisitor() {

                    @Override
                    public void visit(int docID) {
                        hits.set(docBase + docID);
                    //System.out.println("visit docID=" + docID);
                    }

                    @Override
                    public void visit(int docID, byte[] packedValue) {
                        //System.out.println("visit check docID=" + docID);
                        for (int dim = 0; dim < numDims; dim++) {
                            BigInteger x = NumericUtils.sortableBytesToBigInt(packedValue, dim * numBytesPerDim, numBytesPerDim);
                            if (x.compareTo(queryMin[dim]) < 0 || x.compareTo(queryMax[dim]) > 0) {
                                //System.out.println("  no");
                                return;
                            }
                        }
                        //System.out.println("  yes");
                        hits.set(docBase + docID);
                    }

                    @Override
                    public Relation compare(byte[] minPacked, byte[] maxPacked) {
                        boolean crosses = false;
                        for (int dim = 0; dim < numDims; dim++) {
                            BigInteger min = NumericUtils.sortableBytesToBigInt(minPacked, dim * numBytesPerDim, numBytesPerDim);
                            BigInteger max = NumericUtils.sortableBytesToBigInt(maxPacked, dim * numBytesPerDim, numBytesPerDim);
                            assert max.compareTo(min) >= 0;
                            if (max.compareTo(queryMin[dim]) < 0 || min.compareTo(queryMax[dim]) > 0) {
                                return Relation.CELL_OUTSIDE_QUERY;
                            } else if (min.compareTo(queryMin[dim]) < 0 || max.compareTo(queryMax[dim]) > 0) {
                                crosses = true;
                            }
                        }
                        if (crosses) {
                            return Relation.CELL_CROSSES_QUERY;
                        } else {
                            return Relation.CELL_INSIDE_QUERY;
                        }
                    }
                });
            }
            for (int docID = 0; docID < numDocs; docID++) {
                BigInteger[] docValues = docs[docID];
                boolean expected = true;
                for (int dim = 0; dim < numDims; dim++) {
                    BigInteger x = docValues[dim];
                    if (x.compareTo(queryMin[dim]) < 0 || x.compareTo(queryMax[dim]) > 0) {
                        expected = false;
                        break;
                    }
                }
                boolean actual = hits.get(docID);
                assertEquals("docID=" + docID, expected, actual);
            }
        }
        r.close();
    }
}
Also used : IntersectVisitor(org.apache.lucene.index.PointValues.IntersectVisitor) BinaryPoint(org.apache.lucene.document.BinaryPoint) BitSet(java.util.BitSet) Document(org.apache.lucene.document.Document) BinaryPoint(org.apache.lucene.document.BinaryPoint) IntPoint(org.apache.lucene.document.IntPoint) Relation(org.apache.lucene.index.PointValues.Relation) MockAnalyzer(org.apache.lucene.analysis.MockAnalyzer) BigInteger(java.math.BigInteger) Directory(org.apache.lucene.store.Directory)

Example 15 with IntersectVisitor

use of org.apache.lucene.index.PointValues.IntersectVisitor in project lucene-solr by apache.

the class BasePointsFormatTestCase method testAllPointDocsDeletedInSegment.

public void testAllPointDocsDeletedInSegment() throws Exception {
    Directory dir = getDirectory(20);
    IndexWriterConfig iwc = newIndexWriterConfig();
    IndexWriter w = new IndexWriter(dir, iwc);
    byte[] point = new byte[4];
    for (int i = 0; i < 10; i++) {
        Document doc = new Document();
        NumericUtils.intToSortableBytes(i, point, 0);
        doc.add(new BinaryPoint("dim", point));
        doc.add(new NumericDocValuesField("id", i));
        doc.add(newStringField("x", "x", Field.Store.NO));
        w.addDocument(doc);
    }
    w.addDocument(new Document());
    w.deleteDocuments(new Term("x", "x"));
    if (random().nextBoolean()) {
        w.forceMerge(1);
    }
    w.close();
    DirectoryReader r = DirectoryReader.open(dir);
    assertEquals(1, r.numDocs());
    Bits liveDocs = MultiFields.getLiveDocs(r);
    for (LeafReaderContext ctx : r.leaves()) {
        PointValues values = ctx.reader().getPointValues("dim");
        NumericDocValues idValues = ctx.reader().getNumericDocValues("id");
        if (idValues == null) {
            // will drop the 100% deleted segments, and the "id" field never exists in the final single doc segment
            continue;
        }
        int[] docIDToID = new int[ctx.reader().maxDoc()];
        int docID;
        while ((docID = idValues.nextDoc()) != NO_MORE_DOCS) {
            docIDToID[docID] = (int) idValues.longValue();
        }
        if (values != null) {
            BitSet seen = new BitSet();
            values.intersect(new IntersectVisitor() {

                @Override
                public Relation compare(byte[] minPacked, byte[] maxPacked) {
                    return Relation.CELL_CROSSES_QUERY;
                }

                public void visit(int docID) {
                    throw new IllegalStateException();
                }

                public void visit(int docID, byte[] packedValue) {
                    if (liveDocs.get(docID)) {
                        seen.set(docID);
                    }
                    assertEquals(docIDToID[docID], NumericUtils.sortableBytesToInt(packedValue, 0));
                }
            });
            assertEquals(0, seen.cardinality());
        }
    }
    IOUtils.close(r, dir);
}
Also used : IntersectVisitor(org.apache.lucene.index.PointValues.IntersectVisitor) BinaryPoint(org.apache.lucene.document.BinaryPoint) BitSet(java.util.BitSet) Document(org.apache.lucene.document.Document) BinaryPoint(org.apache.lucene.document.BinaryPoint) IntPoint(org.apache.lucene.document.IntPoint) Relation(org.apache.lucene.index.PointValues.Relation) NumericDocValuesField(org.apache.lucene.document.NumericDocValuesField) Bits(org.apache.lucene.util.Bits) Directory(org.apache.lucene.store.Directory)

Aggregations

IntersectVisitor (org.apache.lucene.index.PointValues.IntersectVisitor)24 Relation (org.apache.lucene.index.PointValues.Relation)22 Directory (org.apache.lucene.store.Directory)14 IOException (java.io.IOException)9 PointValues (org.apache.lucene.index.PointValues)9 IndexInput (org.apache.lucene.store.IndexInput)9 IndexOutput (org.apache.lucene.store.IndexOutput)9 BitSet (java.util.BitSet)8 CorruptingIndexOutput (org.apache.lucene.store.CorruptingIndexOutput)8 BinaryPoint (org.apache.lucene.document.BinaryPoint)7 Document (org.apache.lucene.document.Document)7 FilterDirectory (org.apache.lucene.store.FilterDirectory)7 IntPoint (org.apache.lucene.document.IntPoint)6 LeafReader (org.apache.lucene.index.LeafReader)6 LeafReaderContext (org.apache.lucene.index.LeafReaderContext)4 DocIdSetBuilder (org.apache.lucene.util.DocIdSetBuilder)4 FieldInfo (org.apache.lucene.index.FieldInfo)3 ConstantScoreScorer (org.apache.lucene.search.ConstantScoreScorer)3 ConstantScoreWeight (org.apache.lucene.search.ConstantScoreWeight)3 BigInteger (java.math.BigInteger)2