Search in sources :

Example 26 with BinaryPoint

use of org.apache.lucene.document.BinaryPoint in project lucene-solr by apache.

the class TestLucene60PointsFormat method testEstimatePointCount2Dims.

// The tree is always balanced in the N dims case, and leaves are
// not all full so things are a bit different
public void testEstimatePointCount2Dims() throws IOException {
    Directory dir = newDirectory();
    IndexWriter w = new IndexWriter(dir, newIndexWriterConfig());
    byte[][] pointValue = new byte[2][];
    pointValue[0] = new byte[3];
    pointValue[1] = new byte[3];
    byte[][] uniquePointValue = new byte[2][];
    uniquePointValue[0] = new byte[3];
    uniquePointValue[1] = new byte[3];
    random().nextBytes(uniquePointValue[0]);
    random().nextBytes(uniquePointValue[1]);
    // make sure we have several leaves
    final int numDocs = atLeast(10000);
    for (int i = 0; i < numDocs; ++i) {
        Document doc = new Document();
        if (i == numDocs / 2) {
            doc.add(new BinaryPoint("f", uniquePointValue));
        } else {
            do {
                random().nextBytes(pointValue[0]);
                random().nextBytes(pointValue[1]);
            } while (Arrays.equals(pointValue[0], uniquePointValue[0]) || Arrays.equals(pointValue[1], uniquePointValue[1]));
            doc.add(new BinaryPoint("f", pointValue));
        }
        w.addDocument(doc);
    }
    w.forceMerge(1);
    final IndexReader r = DirectoryReader.open(w);
    w.close();
    final LeafReader lr = getOnlyLeafReader(r);
    PointValues points = lr.getPointValues("f");
    // With >1 dims, the tree is balanced
    int actualMaxPointsInLeafNode = numDocs;
    while (actualMaxPointsInLeafNode > maxPointsInLeafNode) {
        actualMaxPointsInLeafNode = (actualMaxPointsInLeafNode + 1) / 2;
    }
    // If all points match, then the point count is numLeaves * maxPointsInLeafNode
    final int numLeaves = Integer.highestOneBit((numDocs - 1) / actualMaxPointsInLeafNode) << 1;
    assertEquals(numLeaves * actualMaxPointsInLeafNode, points.estimatePointCount(new IntersectVisitor() {

        @Override
        public void visit(int docID, byte[] packedValue) throws IOException {
        }

        @Override
        public void visit(int docID) throws IOException {
        }

        @Override
        public Relation compare(byte[] minPackedValue, byte[] maxPackedValue) {
            return Relation.CELL_INSIDE_QUERY;
        }
    }));
    // Return 0 if no points match
    assertEquals(0, points.estimatePointCount(new IntersectVisitor() {

        @Override
        public void visit(int docID, byte[] packedValue) throws IOException {
        }

        @Override
        public void visit(int docID) throws IOException {
        }

        @Override
        public Relation compare(byte[] minPackedValue, byte[] maxPackedValue) {
            return Relation.CELL_OUTSIDE_QUERY;
        }
    }));
    // If only one point matches, then the point count is (actualMaxPointsInLeafNode + 1) / 2
    // in general, or maybe 2x that if the point is a split value
    final long pointCount = points.estimatePointCount(new IntersectVisitor() {

        @Override
        public void visit(int docID, byte[] packedValue) throws IOException {
        }

        @Override
        public void visit(int docID) throws IOException {
        }

        @Override
        public Relation compare(byte[] minPackedValue, byte[] maxPackedValue) {
            for (int dim = 0; dim < 2; ++dim) {
                if (StringHelper.compare(3, uniquePointValue[dim], 0, maxPackedValue, dim * 3) > 0 || StringHelper.compare(3, uniquePointValue[dim], 0, minPackedValue, dim * 3) < 0) {
                    return Relation.CELL_OUTSIDE_QUERY;
                }
            }
            return Relation.CELL_CROSSES_QUERY;
        }
    });
    assertTrue("" + pointCount, // common case
    pointCount == (actualMaxPointsInLeafNode + 1) / 2 || // if the point is a split value
    pointCount == 2 * ((actualMaxPointsInLeafNode + 1) / 2));
    r.close();
    dir.close();
}
Also used : IntersectVisitor(org.apache.lucene.index.PointValues.IntersectVisitor) BinaryPoint(org.apache.lucene.document.BinaryPoint) LeafReader(org.apache.lucene.index.LeafReader) IOException(java.io.IOException) Document(org.apache.lucene.document.Document) BinaryPoint(org.apache.lucene.document.BinaryPoint) PointValues(org.apache.lucene.index.PointValues) Relation(org.apache.lucene.index.PointValues.Relation) IndexWriter(org.apache.lucene.index.IndexWriter) IndexReader(org.apache.lucene.index.IndexReader) Directory(org.apache.lucene.store.Directory)

Example 27 with BinaryPoint

use of org.apache.lucene.document.BinaryPoint in project lucene-solr by apache.

the class TestMemoryIndex method testIndexingPointsAndDocValues.

public void testIndexingPointsAndDocValues() throws Exception {
    FieldType type = new FieldType();
    type.setDimensions(1, 4);
    type.setDocValuesType(DocValuesType.BINARY);
    type.freeze();
    Document doc = new Document();
    byte[] packedPoint = "term".getBytes(StandardCharsets.UTF_8);
    doc.add(new BinaryPoint("field", packedPoint, type));
    MemoryIndex mi = MemoryIndex.fromDocument(doc, analyzer);
    LeafReader leafReader = mi.createSearcher().getIndexReader().leaves().get(0).reader();
    assertEquals(1, leafReader.getPointValues("field").size());
    assertArrayEquals(packedPoint, leafReader.getPointValues("field").getMinPackedValue());
    assertArrayEquals(packedPoint, leafReader.getPointValues("field").getMaxPackedValue());
    BinaryDocValues dvs = leafReader.getBinaryDocValues("field");
    assertEquals(0, dvs.nextDoc());
    assertEquals("term", dvs.binaryValue().utf8ToString());
}
Also used : BinaryPoint(org.apache.lucene.document.BinaryPoint) LeafReader(org.apache.lucene.index.LeafReader) Document(org.apache.lucene.document.Document) BinaryDocValues(org.apache.lucene.index.BinaryDocValues) FieldType(org.apache.lucene.document.FieldType)

Example 28 with BinaryPoint

use of org.apache.lucene.document.BinaryPoint in project lucene-solr by apache.

the class TestPointValues method testIllegalNumBytesChangeViaAddIndexesSlowCodecReader.

public void testIllegalNumBytesChangeViaAddIndexesSlowCodecReader() throws Exception {
    Directory dir = newDirectory();
    IndexWriterConfig iwc = new IndexWriterConfig(new MockAnalyzer(random()));
    IndexWriter w = new IndexWriter(dir, iwc);
    Document doc = new Document();
    doc.add(new BinaryPoint("dim", new byte[4]));
    w.addDocument(doc);
    w.close();
    Directory dir2 = newDirectory();
    iwc = new IndexWriterConfig(new MockAnalyzer(random()));
    IndexWriter w2 = new IndexWriter(dir2, iwc);
    doc = new Document();
    doc.add(new BinaryPoint("dim", new byte[6]));
    w2.addDocument(doc);
    DirectoryReader r = DirectoryReader.open(dir);
    IllegalArgumentException expected = expectThrows(IllegalArgumentException.class, () -> {
        TestUtil.addIndexesSlowly(w2, r);
    });
    assertEquals("cannot change point numBytes from 6 to 4 for field=\"dim\"", expected.getMessage());
    IOUtils.close(r, w2, dir, dir2);
}
Also used : MockAnalyzer(org.apache.lucene.analysis.MockAnalyzer) BinaryPoint(org.apache.lucene.document.BinaryPoint) Document(org.apache.lucene.document.Document) RAMDirectory(org.apache.lucene.store.RAMDirectory) Directory(org.apache.lucene.store.Directory) FSDirectory(org.apache.lucene.store.FSDirectory)

Example 29 with BinaryPoint

use of org.apache.lucene.document.BinaryPoint in project lucene-solr by apache.

the class TestPointValues method testIllegalNumBytesChangeViaAddIndexesCodecReader.

public void testIllegalNumBytesChangeViaAddIndexesCodecReader() throws Exception {
    Directory dir = newDirectory();
    IndexWriterConfig iwc = new IndexWriterConfig(new MockAnalyzer(random()));
    IndexWriter w = new IndexWriter(dir, iwc);
    Document doc = new Document();
    doc.add(new BinaryPoint("dim", new byte[4]));
    w.addDocument(doc);
    w.close();
    Directory dir2 = newDirectory();
    iwc = new IndexWriterConfig(new MockAnalyzer(random()));
    IndexWriter w2 = new IndexWriter(dir2, iwc);
    doc = new Document();
    doc.add(new BinaryPoint("dim", new byte[6]));
    w2.addDocument(doc);
    DirectoryReader r = DirectoryReader.open(dir);
    IllegalArgumentException expected = expectThrows(IllegalArgumentException.class, () -> {
        w2.addIndexes(new CodecReader[] { (CodecReader) getOnlyLeafReader(r) });
    });
    assertEquals("cannot change point numBytes from 6 to 4 for field=\"dim\"", expected.getMessage());
    IOUtils.close(r, w2, dir, dir2);
}
Also used : MockAnalyzer(org.apache.lucene.analysis.MockAnalyzer) BinaryPoint(org.apache.lucene.document.BinaryPoint) Document(org.apache.lucene.document.Document) RAMDirectory(org.apache.lucene.store.RAMDirectory) Directory(org.apache.lucene.store.Directory) FSDirectory(org.apache.lucene.store.FSDirectory)

Example 30 with BinaryPoint

use of org.apache.lucene.document.BinaryPoint in project lucene-solr by apache.

the class TestPointValues method testIllegalNumBytesChangeViaAddIndexesDirectory.

public void testIllegalNumBytesChangeViaAddIndexesDirectory() throws Exception {
    Directory dir = newDirectory();
    IndexWriterConfig iwc = new IndexWriterConfig(new MockAnalyzer(random()));
    IndexWriter w = new IndexWriter(dir, iwc);
    Document doc = new Document();
    doc.add(new BinaryPoint("dim", new byte[4]));
    w.addDocument(doc);
    w.close();
    Directory dir2 = newDirectory();
    iwc = new IndexWriterConfig(new MockAnalyzer(random()));
    IndexWriter w2 = new IndexWriter(dir2, iwc);
    doc = new Document();
    doc.add(new BinaryPoint("dim", new byte[6]));
    w2.addDocument(doc);
    IllegalArgumentException expected = expectThrows(IllegalArgumentException.class, () -> {
        w2.addIndexes(new Directory[] { dir });
    });
    assertEquals("cannot change point numBytes from 6 to 4 for field=\"dim\"", expected.getMessage());
    IOUtils.close(w2, dir, dir2);
}
Also used : MockAnalyzer(org.apache.lucene.analysis.MockAnalyzer) BinaryPoint(org.apache.lucene.document.BinaryPoint) Document(org.apache.lucene.document.Document) RAMDirectory(org.apache.lucene.store.RAMDirectory) Directory(org.apache.lucene.store.Directory) FSDirectory(org.apache.lucene.store.FSDirectory)

Aggregations

BinaryPoint (org.apache.lucene.document.BinaryPoint)40 Document (org.apache.lucene.document.Document)38 Directory (org.apache.lucene.store.Directory)35 MockAnalyzer (org.apache.lucene.analysis.MockAnalyzer)20 FSDirectory (org.apache.lucene.store.FSDirectory)18 RAMDirectory (org.apache.lucene.store.RAMDirectory)18 IntPoint (org.apache.lucene.document.IntPoint)17 IndexReader (org.apache.lucene.index.IndexReader)11 DoublePoint (org.apache.lucene.document.DoublePoint)10 FloatPoint (org.apache.lucene.document.FloatPoint)10 LongPoint (org.apache.lucene.document.LongPoint)10 IndexWriterConfig (org.apache.lucene.index.IndexWriterConfig)10 RandomIndexWriter (org.apache.lucene.index.RandomIndexWriter)9 BitSet (java.util.BitSet)7 NumericDocValuesField (org.apache.lucene.document.NumericDocValuesField)7 IndexWriter (org.apache.lucene.index.IndexWriter)7 IntersectVisitor (org.apache.lucene.index.PointValues.IntersectVisitor)7 Relation (org.apache.lucene.index.PointValues.Relation)7 IOException (java.io.IOException)5 FieldType (org.apache.lucene.document.FieldType)4