use of org.apache.lucene.document.BinaryPoint in project lucene-solr by apache.
the class TestBackwardsCompatibility method addDoc.
private void addDoc(IndexWriter writer, int id) throws IOException {
Document doc = new Document();
doc.add(new TextField("content", "aaa", Field.Store.NO));
doc.add(new StringField("id", Integer.toString(id), Field.Store.YES));
FieldType customType2 = new FieldType(TextField.TYPE_STORED);
customType2.setStoreTermVectors(true);
customType2.setStoreTermVectorPositions(true);
customType2.setStoreTermVectorOffsets(true);
doc.add(new Field("autf8", "Lušceš
ne ā abń°cd", customType2));
doc.add(new Field("utf8", "Lušceš
ne ā abń°cd", customType2));
doc.add(new Field("content2", "here is more content with aaa aaa aaa", customType2));
doc.add(new Field("fieā±·ld", "field with non-ascii name", customType2));
// add docvalues fields
doc.add(new NumericDocValuesField("dvByte", (byte) id));
byte[] bytes = new byte[] { (byte) (id >>> 24), (byte) (id >>> 16), (byte) (id >>> 8), (byte) id };
BytesRef ref = new BytesRef(bytes);
doc.add(new BinaryDocValuesField("dvBytesDerefFixed", ref));
doc.add(new BinaryDocValuesField("dvBytesDerefVar", ref));
doc.add(new SortedDocValuesField("dvBytesSortedFixed", ref));
doc.add(new SortedDocValuesField("dvBytesSortedVar", ref));
doc.add(new BinaryDocValuesField("dvBytesStraightFixed", ref));
doc.add(new BinaryDocValuesField("dvBytesStraightVar", ref));
doc.add(new DoubleDocValuesField("dvDouble", (double) id));
doc.add(new FloatDocValuesField("dvFloat", (float) id));
doc.add(new NumericDocValuesField("dvInt", id));
doc.add(new NumericDocValuesField("dvLong", id));
doc.add(new NumericDocValuesField("dvPacked", id));
doc.add(new NumericDocValuesField("dvShort", (short) id));
doc.add(new SortedSetDocValuesField("dvSortedSet", ref));
doc.add(new SortedNumericDocValuesField("dvSortedNumeric", id));
doc.add(new IntPoint("intPoint1d", id));
doc.add(new IntPoint("intPoint2d", id, 2 * id));
doc.add(new FloatPoint("floatPoint1d", (float) id));
doc.add(new FloatPoint("floatPoint2d", (float) id, (float) 2 * id));
doc.add(new LongPoint("longPoint1d", id));
doc.add(new LongPoint("longPoint2d", id, 2 * id));
doc.add(new DoublePoint("doublePoint1d", (double) id));
doc.add(new DoublePoint("doublePoint2d", (double) id, (double) 2 * id));
doc.add(new BinaryPoint("binaryPoint1d", bytes));
doc.add(new BinaryPoint("binaryPoint2d", bytes, bytes));
// a field with both offsets and term vectors for a cross-check
FieldType customType3 = new FieldType(TextField.TYPE_STORED);
customType3.setStoreTermVectors(true);
customType3.setStoreTermVectorPositions(true);
customType3.setStoreTermVectorOffsets(true);
customType3.setIndexOptions(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS);
doc.add(new Field("content5", "here is more content with aaa aaa aaa", customType3));
// a field that omits only positions
FieldType customType4 = new FieldType(TextField.TYPE_STORED);
customType4.setStoreTermVectors(true);
customType4.setStoreTermVectorPositions(false);
customType4.setStoreTermVectorOffsets(true);
customType4.setIndexOptions(IndexOptions.DOCS_AND_FREQS);
doc.add(new Field("content6", "here is more content with aaa aaa aaa", customType4));
// TODO:
// index different norms types via similarity (we use a random one currently?!)
// remove any analyzer randomness, explicitly add payloads for certain fields.
writer.addDocument(doc);
}
use of org.apache.lucene.document.BinaryPoint in project lucene-solr by apache.
the class TestLucene60PointsFormat method testEstimatePointCount.
public void testEstimatePointCount() throws IOException {
Directory dir = newDirectory();
IndexWriterConfig iwc = newIndexWriterConfig();
// number of points per leaf hard to predict
while (iwc.getMergePolicy() instanceof MockRandomMergePolicy) {
iwc.setMergePolicy(newMergePolicy());
}
IndexWriter w = new IndexWriter(dir, iwc);
byte[] pointValue = new byte[3];
byte[] uniquePointValue = new byte[3];
random().nextBytes(uniquePointValue);
// make sure we have several leaves
final int numDocs = atLeast(10000);
for (int i = 0; i < numDocs; ++i) {
Document doc = new Document();
if (i == numDocs / 2) {
doc.add(new BinaryPoint("f", uniquePointValue));
} else {
do {
random().nextBytes(pointValue);
} while (Arrays.equals(pointValue, uniquePointValue));
doc.add(new BinaryPoint("f", pointValue));
}
w.addDocument(doc);
}
w.forceMerge(1);
final IndexReader r = DirectoryReader.open(w);
w.close();
final LeafReader lr = getOnlyLeafReader(r);
PointValues points = lr.getPointValues("f");
// If all points match, then the point count is numLeaves * maxPointsInLeafNode
final int numLeaves = (int) Math.ceil((double) numDocs / maxPointsInLeafNode);
assertEquals(numLeaves * maxPointsInLeafNode, points.estimatePointCount(new IntersectVisitor() {
@Override
public void visit(int docID, byte[] packedValue) throws IOException {
}
@Override
public void visit(int docID) throws IOException {
}
@Override
public Relation compare(byte[] minPackedValue, byte[] maxPackedValue) {
return Relation.CELL_INSIDE_QUERY;
}
}));
// Return 0 if no points match
assertEquals(0, points.estimatePointCount(new IntersectVisitor() {
@Override
public void visit(int docID, byte[] packedValue) throws IOException {
}
@Override
public void visit(int docID) throws IOException {
}
@Override
public Relation compare(byte[] minPackedValue, byte[] maxPackedValue) {
return Relation.CELL_OUTSIDE_QUERY;
}
}));
// If only one point matches, then the point count is (maxPointsInLeafNode + 1) / 2
// in general, or maybe 2x that if the point is a split value
final long pointCount = points.estimatePointCount(new IntersectVisitor() {
@Override
public void visit(int docID, byte[] packedValue) throws IOException {
}
@Override
public void visit(int docID) throws IOException {
}
@Override
public Relation compare(byte[] minPackedValue, byte[] maxPackedValue) {
if (StringHelper.compare(3, uniquePointValue, 0, maxPackedValue, 0) > 0 || StringHelper.compare(3, uniquePointValue, 0, minPackedValue, 0) < 0) {
return Relation.CELL_OUTSIDE_QUERY;
}
return Relation.CELL_CROSSES_QUERY;
}
});
assertTrue("" + pointCount, // common case
pointCount == (maxPointsInLeafNode + 1) / 2 || // if the point is a split value
pointCount == 2 * ((maxPointsInLeafNode + 1) / 2));
r.close();
dir.close();
}
use of org.apache.lucene.document.BinaryPoint in project lucene-solr by apache.
the class TestPointValues method testIllegalDimChangeViaAddIndexesSlowCodecReader.
public void testIllegalDimChangeViaAddIndexesSlowCodecReader() throws Exception {
Directory dir = newDirectory();
IndexWriterConfig iwc = new IndexWriterConfig(new MockAnalyzer(random()));
IndexWriter w = new IndexWriter(dir, iwc);
Document doc = new Document();
doc.add(new BinaryPoint("dim", new byte[4]));
w.addDocument(doc);
w.close();
Directory dir2 = newDirectory();
iwc = new IndexWriterConfig(new MockAnalyzer(random()));
IndexWriter w2 = new IndexWriter(dir2, iwc);
doc = new Document();
doc.add(new BinaryPoint("dim", new byte[4], new byte[4]));
w2.addDocument(doc);
DirectoryReader r = DirectoryReader.open(dir);
IllegalArgumentException expected = expectThrows(IllegalArgumentException.class, () -> {
TestUtil.addIndexesSlowly(w2, r);
});
assertEquals("cannot change point dimension count from 2 to 1 for field=\"dim\"", expected.getMessage());
IOUtils.close(r, w2, dir, dir2);
}
use of org.apache.lucene.document.BinaryPoint in project lucene-solr by apache.
the class TestPointValues method doTestMergedStats.
private void doTestMergedStats() throws IOException {
final int numDims = TestUtil.nextInt(random(), 1, 8);
final int numBytesPerDim = TestUtil.nextInt(random(), 1, 16);
Directory dir = new RAMDirectory();
IndexWriter w = new IndexWriter(dir, new IndexWriterConfig(null));
final int numDocs = TestUtil.nextInt(random(), 10, 20);
for (int i = 0; i < numDocs; ++i) {
Document doc = new Document();
final int numPoints = random().nextInt(3);
for (int j = 0; j < numPoints; ++j) {
doc.add(new BinaryPoint("field", randomBinaryValue(numDims, numBytesPerDim)));
}
w.addDocument(doc);
if (random().nextBoolean()) {
DirectoryReader.open(w).close();
}
}
final IndexReader reader1 = DirectoryReader.open(w);
w.forceMerge(1);
final IndexReader reader2 = DirectoryReader.open(w);
final PointValues expected = getOnlyLeafReader(reader2).getPointValues("field");
if (expected == null) {
assertNull(PointValues.getMinPackedValue(reader1, "field"));
assertNull(PointValues.getMaxPackedValue(reader1, "field"));
assertEquals(0, PointValues.getDocCount(reader1, "field"));
assertEquals(0, PointValues.size(reader1, "field"));
} else {
assertArrayEquals(expected.getMinPackedValue(), PointValues.getMinPackedValue(reader1, "field"));
assertArrayEquals(expected.getMaxPackedValue(), PointValues.getMaxPackedValue(reader1, "field"));
assertEquals(expected.getDocCount(), PointValues.getDocCount(reader1, "field"));
assertEquals(expected.size(), PointValues.size(reader1, "field"));
}
IOUtils.close(w, reader1, reader2, dir);
}
use of org.apache.lucene.document.BinaryPoint in project lucene-solr by apache.
the class TestPointValues method testIllegalDimChangeTwoDocs.
public void testIllegalDimChangeTwoDocs() throws Exception {
Directory dir = newDirectory();
IndexWriterConfig iwc = new IndexWriterConfig(new MockAnalyzer(random()));
IndexWriter w = new IndexWriter(dir, iwc);
Document doc = new Document();
doc.add(new BinaryPoint("dim", new byte[4]));
w.addDocument(doc);
Document doc2 = new Document();
doc2.add(new BinaryPoint("dim", new byte[4], new byte[4]));
IllegalArgumentException expected = expectThrows(IllegalArgumentException.class, () -> {
w.addDocument(doc2);
});
assertEquals("cannot change point dimension count from 1 to 2 for field=\"dim\"", expected.getMessage());
w.close();
dir.close();
}
Aggregations