Search in sources :

Example 6 with BinaryPoint

use of org.apache.lucene.document.BinaryPoint in project lucene-solr by apache.

the class TestBackwardsCompatibility method addDoc.

private void addDoc(IndexWriter writer, int id) throws IOException {
    Document doc = new Document();
    doc.add(new TextField("content", "aaa", Field.Store.NO));
    doc.add(new StringField("id", Integer.toString(id), Field.Store.YES));
    FieldType customType2 = new FieldType(TextField.TYPE_STORED);
    customType2.setStoreTermVectors(true);
    customType2.setStoreTermVectorPositions(true);
    customType2.setStoreTermVectorOffsets(true);
    doc.add(new Field("autf8", "Luš„žceš… ne  ā˜  abń•°—cd", customType2));
    doc.add(new Field("utf8", "Luš„žceš… ne  ā˜  abń•°—cd", customType2));
    doc.add(new Field("content2", "here is more content with aaa aaa aaa", customType2));
    doc.add(new Field("fieā±·ld", "field with non-ascii name", customType2));
    // add docvalues fields
    doc.add(new NumericDocValuesField("dvByte", (byte) id));
    byte[] bytes = new byte[] { (byte) (id >>> 24), (byte) (id >>> 16), (byte) (id >>> 8), (byte) id };
    BytesRef ref = new BytesRef(bytes);
    doc.add(new BinaryDocValuesField("dvBytesDerefFixed", ref));
    doc.add(new BinaryDocValuesField("dvBytesDerefVar", ref));
    doc.add(new SortedDocValuesField("dvBytesSortedFixed", ref));
    doc.add(new SortedDocValuesField("dvBytesSortedVar", ref));
    doc.add(new BinaryDocValuesField("dvBytesStraightFixed", ref));
    doc.add(new BinaryDocValuesField("dvBytesStraightVar", ref));
    doc.add(new DoubleDocValuesField("dvDouble", (double) id));
    doc.add(new FloatDocValuesField("dvFloat", (float) id));
    doc.add(new NumericDocValuesField("dvInt", id));
    doc.add(new NumericDocValuesField("dvLong", id));
    doc.add(new NumericDocValuesField("dvPacked", id));
    doc.add(new NumericDocValuesField("dvShort", (short) id));
    doc.add(new SortedSetDocValuesField("dvSortedSet", ref));
    doc.add(new SortedNumericDocValuesField("dvSortedNumeric", id));
    doc.add(new IntPoint("intPoint1d", id));
    doc.add(new IntPoint("intPoint2d", id, 2 * id));
    doc.add(new FloatPoint("floatPoint1d", (float) id));
    doc.add(new FloatPoint("floatPoint2d", (float) id, (float) 2 * id));
    doc.add(new LongPoint("longPoint1d", id));
    doc.add(new LongPoint("longPoint2d", id, 2 * id));
    doc.add(new DoublePoint("doublePoint1d", (double) id));
    doc.add(new DoublePoint("doublePoint2d", (double) id, (double) 2 * id));
    doc.add(new BinaryPoint("binaryPoint1d", bytes));
    doc.add(new BinaryPoint("binaryPoint2d", bytes, bytes));
    // a field with both offsets and term vectors for a cross-check
    FieldType customType3 = new FieldType(TextField.TYPE_STORED);
    customType3.setStoreTermVectors(true);
    customType3.setStoreTermVectorPositions(true);
    customType3.setStoreTermVectorOffsets(true);
    customType3.setIndexOptions(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS);
    doc.add(new Field("content5", "here is more content with aaa aaa aaa", customType3));
    // a field that omits only positions
    FieldType customType4 = new FieldType(TextField.TYPE_STORED);
    customType4.setStoreTermVectors(true);
    customType4.setStoreTermVectorPositions(false);
    customType4.setStoreTermVectorOffsets(true);
    customType4.setIndexOptions(IndexOptions.DOCS_AND_FREQS);
    doc.add(new Field("content6", "here is more content with aaa aaa aaa", customType4));
    // TODO: 
    //   index different norms types via similarity (we use a random one currently?!)
    //   remove any analyzer randomness, explicitly add payloads for certain fields.
    writer.addDocument(doc);
}
Also used : BinaryPoint(org.apache.lucene.document.BinaryPoint) FloatDocValuesField(org.apache.lucene.document.FloatDocValuesField) LongPoint(org.apache.lucene.document.LongPoint) Document(org.apache.lucene.document.Document) BinaryDocValuesField(org.apache.lucene.document.BinaryDocValuesField) FieldType(org.apache.lucene.document.FieldType) SortedNumericDocValuesField(org.apache.lucene.document.SortedNumericDocValuesField) SortField(org.apache.lucene.search.SortField) NumericDocValuesField(org.apache.lucene.document.NumericDocValuesField) SortedSetDocValuesField(org.apache.lucene.document.SortedSetDocValuesField) BinaryDocValuesField(org.apache.lucene.document.BinaryDocValuesField) SortedDocValuesField(org.apache.lucene.document.SortedDocValuesField) StringField(org.apache.lucene.document.StringField) DoubleDocValuesField(org.apache.lucene.document.DoubleDocValuesField) FloatDocValuesField(org.apache.lucene.document.FloatDocValuesField) Field(org.apache.lucene.document.Field) TextField(org.apache.lucene.document.TextField) IntPoint(org.apache.lucene.document.IntPoint) SortedNumericDocValuesField(org.apache.lucene.document.SortedNumericDocValuesField) SortedNumericDocValuesField(org.apache.lucene.document.SortedNumericDocValuesField) NumericDocValuesField(org.apache.lucene.document.NumericDocValuesField) FloatPoint(org.apache.lucene.document.FloatPoint) StringField(org.apache.lucene.document.StringField) DoubleDocValuesField(org.apache.lucene.document.DoubleDocValuesField) SortedDocValuesField(org.apache.lucene.document.SortedDocValuesField) DoublePoint(org.apache.lucene.document.DoublePoint) TextField(org.apache.lucene.document.TextField) SortedSetDocValuesField(org.apache.lucene.document.SortedSetDocValuesField) BytesRef(org.apache.lucene.util.BytesRef)

Example 7 with BinaryPoint

use of org.apache.lucene.document.BinaryPoint in project lucene-solr by apache.

the class TestLucene60PointsFormat method testEstimatePointCount.

public void testEstimatePointCount() throws IOException {
    Directory dir = newDirectory();
    IndexWriterConfig iwc = newIndexWriterConfig();
    // number of points per leaf hard to predict
    while (iwc.getMergePolicy() instanceof MockRandomMergePolicy) {
        iwc.setMergePolicy(newMergePolicy());
    }
    IndexWriter w = new IndexWriter(dir, iwc);
    byte[] pointValue = new byte[3];
    byte[] uniquePointValue = new byte[3];
    random().nextBytes(uniquePointValue);
    // make sure we have several leaves
    final int numDocs = atLeast(10000);
    for (int i = 0; i < numDocs; ++i) {
        Document doc = new Document();
        if (i == numDocs / 2) {
            doc.add(new BinaryPoint("f", uniquePointValue));
        } else {
            do {
                random().nextBytes(pointValue);
            } while (Arrays.equals(pointValue, uniquePointValue));
            doc.add(new BinaryPoint("f", pointValue));
        }
        w.addDocument(doc);
    }
    w.forceMerge(1);
    final IndexReader r = DirectoryReader.open(w);
    w.close();
    final LeafReader lr = getOnlyLeafReader(r);
    PointValues points = lr.getPointValues("f");
    // If all points match, then the point count is numLeaves * maxPointsInLeafNode
    final int numLeaves = (int) Math.ceil((double) numDocs / maxPointsInLeafNode);
    assertEquals(numLeaves * maxPointsInLeafNode, points.estimatePointCount(new IntersectVisitor() {

        @Override
        public void visit(int docID, byte[] packedValue) throws IOException {
        }

        @Override
        public void visit(int docID) throws IOException {
        }

        @Override
        public Relation compare(byte[] minPackedValue, byte[] maxPackedValue) {
            return Relation.CELL_INSIDE_QUERY;
        }
    }));
    // Return 0 if no points match
    assertEquals(0, points.estimatePointCount(new IntersectVisitor() {

        @Override
        public void visit(int docID, byte[] packedValue) throws IOException {
        }

        @Override
        public void visit(int docID) throws IOException {
        }

        @Override
        public Relation compare(byte[] minPackedValue, byte[] maxPackedValue) {
            return Relation.CELL_OUTSIDE_QUERY;
        }
    }));
    // If only one point matches, then the point count is (maxPointsInLeafNode + 1) / 2
    // in general, or maybe 2x that if the point is a split value
    final long pointCount = points.estimatePointCount(new IntersectVisitor() {

        @Override
        public void visit(int docID, byte[] packedValue) throws IOException {
        }

        @Override
        public void visit(int docID) throws IOException {
        }

        @Override
        public Relation compare(byte[] minPackedValue, byte[] maxPackedValue) {
            if (StringHelper.compare(3, uniquePointValue, 0, maxPackedValue, 0) > 0 || StringHelper.compare(3, uniquePointValue, 0, minPackedValue, 0) < 0) {
                return Relation.CELL_OUTSIDE_QUERY;
            }
            return Relation.CELL_CROSSES_QUERY;
        }
    });
    assertTrue("" + pointCount, // common case
    pointCount == (maxPointsInLeafNode + 1) / 2 || // if the point is a split value
    pointCount == 2 * ((maxPointsInLeafNode + 1) / 2));
    r.close();
    dir.close();
}
Also used : IntersectVisitor(org.apache.lucene.index.PointValues.IntersectVisitor) BinaryPoint(org.apache.lucene.document.BinaryPoint) LeafReader(org.apache.lucene.index.LeafReader) MockRandomMergePolicy(org.apache.lucene.index.MockRandomMergePolicy) IOException(java.io.IOException) Document(org.apache.lucene.document.Document) BinaryPoint(org.apache.lucene.document.BinaryPoint) PointValues(org.apache.lucene.index.PointValues) Relation(org.apache.lucene.index.PointValues.Relation) IndexWriter(org.apache.lucene.index.IndexWriter) IndexReader(org.apache.lucene.index.IndexReader) Directory(org.apache.lucene.store.Directory) IndexWriterConfig(org.apache.lucene.index.IndexWriterConfig)

Example 8 with BinaryPoint

use of org.apache.lucene.document.BinaryPoint in project lucene-solr by apache.

the class TestPointValues method testIllegalDimChangeViaAddIndexesSlowCodecReader.

public void testIllegalDimChangeViaAddIndexesSlowCodecReader() throws Exception {
    Directory dir = newDirectory();
    IndexWriterConfig iwc = new IndexWriterConfig(new MockAnalyzer(random()));
    IndexWriter w = new IndexWriter(dir, iwc);
    Document doc = new Document();
    doc.add(new BinaryPoint("dim", new byte[4]));
    w.addDocument(doc);
    w.close();
    Directory dir2 = newDirectory();
    iwc = new IndexWriterConfig(new MockAnalyzer(random()));
    IndexWriter w2 = new IndexWriter(dir2, iwc);
    doc = new Document();
    doc.add(new BinaryPoint("dim", new byte[4], new byte[4]));
    w2.addDocument(doc);
    DirectoryReader r = DirectoryReader.open(dir);
    IllegalArgumentException expected = expectThrows(IllegalArgumentException.class, () -> {
        TestUtil.addIndexesSlowly(w2, r);
    });
    assertEquals("cannot change point dimension count from 2 to 1 for field=\"dim\"", expected.getMessage());
    IOUtils.close(r, w2, dir, dir2);
}
Also used : MockAnalyzer(org.apache.lucene.analysis.MockAnalyzer) BinaryPoint(org.apache.lucene.document.BinaryPoint) Document(org.apache.lucene.document.Document) RAMDirectory(org.apache.lucene.store.RAMDirectory) Directory(org.apache.lucene.store.Directory) FSDirectory(org.apache.lucene.store.FSDirectory)

Example 9 with BinaryPoint

use of org.apache.lucene.document.BinaryPoint in project lucene-solr by apache.

the class TestPointValues method doTestMergedStats.

private void doTestMergedStats() throws IOException {
    final int numDims = TestUtil.nextInt(random(), 1, 8);
    final int numBytesPerDim = TestUtil.nextInt(random(), 1, 16);
    Directory dir = new RAMDirectory();
    IndexWriter w = new IndexWriter(dir, new IndexWriterConfig(null));
    final int numDocs = TestUtil.nextInt(random(), 10, 20);
    for (int i = 0; i < numDocs; ++i) {
        Document doc = new Document();
        final int numPoints = random().nextInt(3);
        for (int j = 0; j < numPoints; ++j) {
            doc.add(new BinaryPoint("field", randomBinaryValue(numDims, numBytesPerDim)));
        }
        w.addDocument(doc);
        if (random().nextBoolean()) {
            DirectoryReader.open(w).close();
        }
    }
    final IndexReader reader1 = DirectoryReader.open(w);
    w.forceMerge(1);
    final IndexReader reader2 = DirectoryReader.open(w);
    final PointValues expected = getOnlyLeafReader(reader2).getPointValues("field");
    if (expected == null) {
        assertNull(PointValues.getMinPackedValue(reader1, "field"));
        assertNull(PointValues.getMaxPackedValue(reader1, "field"));
        assertEquals(0, PointValues.getDocCount(reader1, "field"));
        assertEquals(0, PointValues.size(reader1, "field"));
    } else {
        assertArrayEquals(expected.getMinPackedValue(), PointValues.getMinPackedValue(reader1, "field"));
        assertArrayEquals(expected.getMaxPackedValue(), PointValues.getMaxPackedValue(reader1, "field"));
        assertEquals(expected.getDocCount(), PointValues.getDocCount(reader1, "field"));
        assertEquals(expected.size(), PointValues.size(reader1, "field"));
    }
    IOUtils.close(w, reader1, reader2, dir);
}
Also used : PointValues(org.apache.lucene.index.PointValues) BinaryPoint(org.apache.lucene.document.BinaryPoint) Document(org.apache.lucene.document.Document) LongPoint(org.apache.lucene.document.LongPoint) FloatPoint(org.apache.lucene.document.FloatPoint) BinaryPoint(org.apache.lucene.document.BinaryPoint) DoublePoint(org.apache.lucene.document.DoublePoint) IntPoint(org.apache.lucene.document.IntPoint) RAMDirectory(org.apache.lucene.store.RAMDirectory) RAMDirectory(org.apache.lucene.store.RAMDirectory) Directory(org.apache.lucene.store.Directory) FSDirectory(org.apache.lucene.store.FSDirectory)

Example 10 with BinaryPoint

use of org.apache.lucene.document.BinaryPoint in project lucene-solr by apache.

the class TestPointValues method testIllegalDimChangeTwoDocs.

public void testIllegalDimChangeTwoDocs() throws Exception {
    Directory dir = newDirectory();
    IndexWriterConfig iwc = new IndexWriterConfig(new MockAnalyzer(random()));
    IndexWriter w = new IndexWriter(dir, iwc);
    Document doc = new Document();
    doc.add(new BinaryPoint("dim", new byte[4]));
    w.addDocument(doc);
    Document doc2 = new Document();
    doc2.add(new BinaryPoint("dim", new byte[4], new byte[4]));
    IllegalArgumentException expected = expectThrows(IllegalArgumentException.class, () -> {
        w.addDocument(doc2);
    });
    assertEquals("cannot change point dimension count from 1 to 2 for field=\"dim\"", expected.getMessage());
    w.close();
    dir.close();
}
Also used : MockAnalyzer(org.apache.lucene.analysis.MockAnalyzer) BinaryPoint(org.apache.lucene.document.BinaryPoint) Document(org.apache.lucene.document.Document) RAMDirectory(org.apache.lucene.store.RAMDirectory) Directory(org.apache.lucene.store.Directory) FSDirectory(org.apache.lucene.store.FSDirectory)

Aggregations

BinaryPoint (org.apache.lucene.document.BinaryPoint)40 Document (org.apache.lucene.document.Document)38 Directory (org.apache.lucene.store.Directory)35 MockAnalyzer (org.apache.lucene.analysis.MockAnalyzer)20 FSDirectory (org.apache.lucene.store.FSDirectory)18 RAMDirectory (org.apache.lucene.store.RAMDirectory)18 IntPoint (org.apache.lucene.document.IntPoint)17 IndexReader (org.apache.lucene.index.IndexReader)11 DoublePoint (org.apache.lucene.document.DoublePoint)10 FloatPoint (org.apache.lucene.document.FloatPoint)10 LongPoint (org.apache.lucene.document.LongPoint)10 IndexWriterConfig (org.apache.lucene.index.IndexWriterConfig)10 RandomIndexWriter (org.apache.lucene.index.RandomIndexWriter)9 BitSet (java.util.BitSet)7 NumericDocValuesField (org.apache.lucene.document.NumericDocValuesField)7 IndexWriter (org.apache.lucene.index.IndexWriter)7 IntersectVisitor (org.apache.lucene.index.PointValues.IntersectVisitor)7 Relation (org.apache.lucene.index.PointValues.Relation)7 IOException (java.io.IOException)5 FieldType (org.apache.lucene.document.FieldType)4