use of org.apache.lucene.document.BinaryDocValuesField in project elasticsearch by elastic.
the class UidFieldMapper method parseCreateField.
@Override
protected void parseCreateField(ParseContext context, List<IndexableField> fields) throws IOException {
Field uid = new Field(NAME, Uid.createUid(context.sourceToParse().type(), context.sourceToParse().id()), Defaults.FIELD_TYPE);
fields.add(uid);
if (fieldType().hasDocValues()) {
fields.add(new BinaryDocValuesField(NAME, new BytesRef(uid.stringValue())));
}
}
use of org.apache.lucene.document.BinaryDocValuesField in project lucene-solr by apache.
the class TestLucene54DocValuesFormat method doTestSparseDocValuesVsStoredFields.
private void doTestSparseDocValuesVsStoredFields() throws Exception {
final long[] values = new long[TestUtil.nextInt(random(), 1, 500)];
for (int i = 0; i < values.length; ++i) {
values[i] = random().nextLong();
}
Directory dir = newFSDirectory(createTempDir());
IndexWriterConfig conf = newIndexWriterConfig(new MockAnalyzer(random()));
conf.setMergeScheduler(new SerialMergeScheduler());
RandomIndexWriter writer = new RandomIndexWriter(random(), dir, conf);
// sparse compression is only enabled if less than 1% of docs have a value
final int avgGap = 100;
final int numDocs = atLeast(200);
for (int i = random().nextInt(avgGap * 2); i >= 0; --i) {
writer.addDocument(new Document());
}
final int maxNumValuesPerDoc = random().nextBoolean() ? 1 : TestUtil.nextInt(random(), 2, 5);
for (int i = 0; i < numDocs; ++i) {
Document doc = new Document();
// single-valued
long docValue = values[random().nextInt(values.length)];
doc.add(new NumericDocValuesField("numeric", docValue));
doc.add(new SortedDocValuesField("sorted", new BytesRef(Long.toString(docValue))));
doc.add(new BinaryDocValuesField("binary", new BytesRef(Long.toString(docValue))));
doc.add(new StoredField("value", docValue));
// multi-valued
final int numValues = TestUtil.nextInt(random(), 1, maxNumValuesPerDoc);
for (int j = 0; j < numValues; ++j) {
docValue = values[random().nextInt(values.length)];
doc.add(new SortedNumericDocValuesField("sorted_numeric", docValue));
doc.add(new SortedSetDocValuesField("sorted_set", new BytesRef(Long.toString(docValue))));
doc.add(new StoredField("values", docValue));
}
writer.addDocument(doc);
// add a gap
for (int j = TestUtil.nextInt(random(), 0, avgGap * 2); j >= 0; --j) {
writer.addDocument(new Document());
}
}
if (random().nextBoolean()) {
writer.forceMerge(1);
}
final IndexReader indexReader = writer.getReader();
TestUtil.checkReader(indexReader);
writer.close();
for (LeafReaderContext context : indexReader.leaves()) {
final LeafReader reader = context.reader();
final NumericDocValues numeric = DocValues.getNumeric(reader, "numeric");
final SortedDocValues sorted = DocValues.getSorted(reader, "sorted");
final BinaryDocValues binary = DocValues.getBinary(reader, "binary");
final SortedNumericDocValues sortedNumeric = DocValues.getSortedNumeric(reader, "sorted_numeric");
final SortedSetDocValues sortedSet = DocValues.getSortedSet(reader, "sorted_set");
for (int i = 0; i < reader.maxDoc(); ++i) {
final Document doc = reader.document(i);
final IndexableField valueField = doc.getField("value");
final Long value = valueField == null ? null : valueField.numericValue().longValue();
if (value == null) {
assertTrue(numeric.docID() + " vs " + i, numeric.docID() < i);
} else {
assertEquals(i, numeric.nextDoc());
assertEquals(i, binary.nextDoc());
assertEquals(i, sorted.nextDoc());
assertEquals(value.longValue(), numeric.longValue());
assertTrue(sorted.ordValue() >= 0);
assertEquals(new BytesRef(Long.toString(value)), sorted.lookupOrd(sorted.ordValue()));
assertEquals(new BytesRef(Long.toString(value)), binary.binaryValue());
}
final IndexableField[] valuesFields = doc.getFields("values");
if (valuesFields.length == 0) {
assertTrue(sortedNumeric.docID() + " vs " + i, sortedNumeric.docID() < i);
} else {
final Set<Long> valueSet = new HashSet<>();
for (IndexableField sf : valuesFields) {
valueSet.add(sf.numericValue().longValue());
}
assertEquals(i, sortedNumeric.nextDoc());
assertEquals(valuesFields.length, sortedNumeric.docValueCount());
for (int j = 0; j < sortedNumeric.docValueCount(); ++j) {
assertTrue(valueSet.contains(sortedNumeric.nextValue()));
}
assertEquals(i, sortedSet.nextDoc());
int sortedSetCount = 0;
while (true) {
long ord = sortedSet.nextOrd();
if (ord == SortedSetDocValues.NO_MORE_ORDS) {
break;
}
assertTrue(valueSet.contains(Long.parseLong(sortedSet.lookupOrd(ord).utf8ToString())));
sortedSetCount++;
}
assertEquals(valueSet.size(), sortedSetCount);
}
}
}
indexReader.close();
dir.close();
}
use of org.apache.lucene.document.BinaryDocValuesField in project lucene-solr by apache.
the class TestBackwardsCompatibility method testCreateIndexWithDocValuesUpdates.
// Creates an index with DocValues updates
public void testCreateIndexWithDocValuesUpdates() throws Exception {
Path indexDir = getIndexDir().resolve("dvupdates");
Files.deleteIfExists(indexDir);
Directory dir = newFSDirectory(indexDir);
IndexWriterConfig conf = new IndexWriterConfig(new MockAnalyzer(random())).setUseCompoundFile(false).setMergePolicy(NoMergePolicy.INSTANCE);
IndexWriter writer = new IndexWriter(dir, conf);
// create an index w/ few doc-values fields, some with updates and some without
for (int i = 0; i < 30; i++) {
Document doc = new Document();
doc.add(new StringField("id", "" + i, Field.Store.NO));
doc.add(new NumericDocValuesField("ndv1", i));
doc.add(new NumericDocValuesField("ndv1_c", i * 2));
doc.add(new NumericDocValuesField("ndv2", i * 3));
doc.add(new NumericDocValuesField("ndv2_c", i * 6));
doc.add(new BinaryDocValuesField("bdv1", toBytes(i)));
doc.add(new BinaryDocValuesField("bdv1_c", toBytes(i * 2)));
doc.add(new BinaryDocValuesField("bdv2", toBytes(i * 3)));
doc.add(new BinaryDocValuesField("bdv2_c", toBytes(i * 6)));
writer.addDocument(doc);
if ((i + 1) % 10 == 0) {
// flush every 10 docs
writer.commit();
}
}
// first segment: no updates
// second segment: update two fields, same gen
updateNumeric(writer, "10", "ndv1", "ndv1_c", 100L);
updateBinary(writer, "11", "bdv1", "bdv1_c", 100L);
writer.commit();
// third segment: update few fields, different gens, few docs
updateNumeric(writer, "20", "ndv1", "ndv1_c", 100L);
updateBinary(writer, "21", "bdv1", "bdv1_c", 100L);
writer.commit();
// update the field again
updateNumeric(writer, "22", "ndv1", "ndv1_c", 200L);
writer.commit();
writer.close();
dir.close();
}
use of org.apache.lucene.document.BinaryDocValuesField in project lucene-solr by apache.
the class TestBackwardsCompatibility method addDoc.
private void addDoc(IndexWriter writer, int id) throws IOException {
Document doc = new Document();
doc.add(new TextField("content", "aaa", Field.Store.NO));
doc.add(new StringField("id", Integer.toString(id), Field.Store.YES));
FieldType customType2 = new FieldType(TextField.TYPE_STORED);
customType2.setStoreTermVectors(true);
customType2.setStoreTermVectorPositions(true);
customType2.setStoreTermVectorOffsets(true);
doc.add(new Field("autf8", "Lušceš
ne ā abń°cd", customType2));
doc.add(new Field("utf8", "Lušceš
ne ā abń°cd", customType2));
doc.add(new Field("content2", "here is more content with aaa aaa aaa", customType2));
doc.add(new Field("fieā±·ld", "field with non-ascii name", customType2));
// add docvalues fields
doc.add(new NumericDocValuesField("dvByte", (byte) id));
byte[] bytes = new byte[] { (byte) (id >>> 24), (byte) (id >>> 16), (byte) (id >>> 8), (byte) id };
BytesRef ref = new BytesRef(bytes);
doc.add(new BinaryDocValuesField("dvBytesDerefFixed", ref));
doc.add(new BinaryDocValuesField("dvBytesDerefVar", ref));
doc.add(new SortedDocValuesField("dvBytesSortedFixed", ref));
doc.add(new SortedDocValuesField("dvBytesSortedVar", ref));
doc.add(new BinaryDocValuesField("dvBytesStraightFixed", ref));
doc.add(new BinaryDocValuesField("dvBytesStraightVar", ref));
doc.add(new DoubleDocValuesField("dvDouble", (double) id));
doc.add(new FloatDocValuesField("dvFloat", (float) id));
doc.add(new NumericDocValuesField("dvInt", id));
doc.add(new NumericDocValuesField("dvLong", id));
doc.add(new NumericDocValuesField("dvPacked", id));
doc.add(new NumericDocValuesField("dvShort", (short) id));
doc.add(new SortedSetDocValuesField("dvSortedSet", ref));
doc.add(new SortedNumericDocValuesField("dvSortedNumeric", id));
doc.add(new IntPoint("intPoint1d", id));
doc.add(new IntPoint("intPoint2d", id, 2 * id));
doc.add(new FloatPoint("floatPoint1d", (float) id));
doc.add(new FloatPoint("floatPoint2d", (float) id, (float) 2 * id));
doc.add(new LongPoint("longPoint1d", id));
doc.add(new LongPoint("longPoint2d", id, 2 * id));
doc.add(new DoublePoint("doublePoint1d", (double) id));
doc.add(new DoublePoint("doublePoint2d", (double) id, (double) 2 * id));
doc.add(new BinaryPoint("binaryPoint1d", bytes));
doc.add(new BinaryPoint("binaryPoint2d", bytes, bytes));
// a field with both offsets and term vectors for a cross-check
FieldType customType3 = new FieldType(TextField.TYPE_STORED);
customType3.setStoreTermVectors(true);
customType3.setStoreTermVectorPositions(true);
customType3.setStoreTermVectorOffsets(true);
customType3.setIndexOptions(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS);
doc.add(new Field("content5", "here is more content with aaa aaa aaa", customType3));
// a field that omits only positions
FieldType customType4 = new FieldType(TextField.TYPE_STORED);
customType4.setStoreTermVectors(true);
customType4.setStoreTermVectorPositions(false);
customType4.setStoreTermVectorOffsets(true);
customType4.setIndexOptions(IndexOptions.DOCS_AND_FREQS);
doc.add(new Field("content6", "here is more content with aaa aaa aaa", customType4));
// TODO:
// index different norms types via similarity (we use a random one currently?!)
// remove any analyzer randomness, explicitly add payloads for certain fields.
writer.addDocument(doc);
}
use of org.apache.lucene.document.BinaryDocValuesField in project lucene-solr by apache.
the class TestLucene70DocValuesFormat method doTestSparseDocValuesVsStoredFields.
private void doTestSparseDocValuesVsStoredFields() throws Exception {
final long[] values = new long[TestUtil.nextInt(random(), 1, 500)];
for (int i = 0; i < values.length; ++i) {
values[i] = random().nextLong();
}
Directory dir = newFSDirectory(createTempDir());
IndexWriterConfig conf = newIndexWriterConfig(new MockAnalyzer(random()));
conf.setMergeScheduler(new SerialMergeScheduler());
RandomIndexWriter writer = new RandomIndexWriter(random(), dir, conf);
// sparse compression is only enabled if less than 1% of docs have a value
final int avgGap = 100;
final int numDocs = atLeast(200);
for (int i = random().nextInt(avgGap * 2); i >= 0; --i) {
writer.addDocument(new Document());
}
final int maxNumValuesPerDoc = random().nextBoolean() ? 1 : TestUtil.nextInt(random(), 2, 5);
for (int i = 0; i < numDocs; ++i) {
Document doc = new Document();
// single-valued
long docValue = values[random().nextInt(values.length)];
doc.add(new NumericDocValuesField("numeric", docValue));
doc.add(new SortedDocValuesField("sorted", new BytesRef(Long.toString(docValue))));
doc.add(new BinaryDocValuesField("binary", new BytesRef(Long.toString(docValue))));
doc.add(new StoredField("value", docValue));
// multi-valued
final int numValues = TestUtil.nextInt(random(), 1, maxNumValuesPerDoc);
for (int j = 0; j < numValues; ++j) {
docValue = values[random().nextInt(values.length)];
doc.add(new SortedNumericDocValuesField("sorted_numeric", docValue));
doc.add(new SortedSetDocValuesField("sorted_set", new BytesRef(Long.toString(docValue))));
doc.add(new StoredField("values", docValue));
}
writer.addDocument(doc);
// add a gap
for (int j = TestUtil.nextInt(random(), 0, avgGap * 2); j >= 0; --j) {
writer.addDocument(new Document());
}
}
if (random().nextBoolean()) {
writer.forceMerge(1);
}
final IndexReader indexReader = writer.getReader();
writer.close();
for (LeafReaderContext context : indexReader.leaves()) {
final LeafReader reader = context.reader();
final NumericDocValues numeric = DocValues.getNumeric(reader, "numeric");
final SortedDocValues sorted = DocValues.getSorted(reader, "sorted");
final BinaryDocValues binary = DocValues.getBinary(reader, "binary");
final SortedNumericDocValues sortedNumeric = DocValues.getSortedNumeric(reader, "sorted_numeric");
final SortedSetDocValues sortedSet = DocValues.getSortedSet(reader, "sorted_set");
for (int i = 0; i < reader.maxDoc(); ++i) {
final Document doc = reader.document(i);
final IndexableField valueField = doc.getField("value");
final Long value = valueField == null ? null : valueField.numericValue().longValue();
if (value == null) {
assertTrue(numeric.docID() + " vs " + i, numeric.docID() < i);
} else {
assertEquals(i, numeric.nextDoc());
assertEquals(i, binary.nextDoc());
assertEquals(i, sorted.nextDoc());
assertEquals(value.longValue(), numeric.longValue());
assertTrue(sorted.ordValue() >= 0);
assertEquals(new BytesRef(Long.toString(value)), sorted.lookupOrd(sorted.ordValue()));
assertEquals(new BytesRef(Long.toString(value)), binary.binaryValue());
}
final IndexableField[] valuesFields = doc.getFields("values");
if (valuesFields.length == 0) {
assertTrue(sortedNumeric.docID() + " vs " + i, sortedNumeric.docID() < i);
} else {
final Set<Long> valueSet = new HashSet<>();
for (IndexableField sf : valuesFields) {
valueSet.add(sf.numericValue().longValue());
}
assertEquals(i, sortedNumeric.nextDoc());
assertEquals(valuesFields.length, sortedNumeric.docValueCount());
for (int j = 0; j < sortedNumeric.docValueCount(); ++j) {
assertTrue(valueSet.contains(sortedNumeric.nextValue()));
}
assertEquals(i, sortedSet.nextDoc());
int sortedSetCount = 0;
while (true) {
long ord = sortedSet.nextOrd();
if (ord == SortedSetDocValues.NO_MORE_ORDS) {
break;
}
assertTrue(valueSet.contains(Long.parseLong(sortedSet.lookupOrd(ord).utf8ToString())));
sortedSetCount++;
}
assertEquals(valueSet.size(), sortedSetCount);
}
}
}
indexReader.close();
dir.close();
}
Aggregations