use of org.apache.lucene.codecs.DocValuesFormat in project lucene-solr by apache.
the class TestLucene54DocValuesFormat method doTestTermsEnumRandom.
// TODO: try to refactor this and some termsenum tests into the base class.
// to do this we need to fix the test class to get a DVF not a Codec so we can setup
// the postings format correctly.
private void doTestTermsEnumRandom(int numDocs, int minLength, int maxLength) throws Exception {
Directory dir = newFSDirectory(createTempDir());
IndexWriterConfig conf = newIndexWriterConfig(new MockAnalyzer(random()));
conf.setMergeScheduler(new SerialMergeScheduler());
// set to duel against a codec which has ordinals:
final PostingsFormat pf = TestUtil.getPostingsFormatWithOrds(random());
final DocValuesFormat dv = new Lucene54DocValuesFormat();
conf.setCodec(new AssertingCodec() {
@Override
public PostingsFormat getPostingsFormatForField(String field) {
return pf;
}
@Override
public DocValuesFormat getDocValuesFormatForField(String field) {
return dv;
}
});
RandomIndexWriter writer = new RandomIndexWriter(random(), dir, conf);
// index some docs
for (int i = 0; i < numDocs; i++) {
Document doc = new Document();
Field idField = new StringField("id", Integer.toString(i), Field.Store.NO);
doc.add(idField);
final int length = TestUtil.nextInt(random(), minLength, maxLength);
int numValues = random().nextInt(17);
// create a random list of strings
List<String> values = new ArrayList<>();
for (int v = 0; v < numValues; v++) {
values.add(TestUtil.randomSimpleString(random(), minLength, length));
}
// add in any order to the indexed field
ArrayList<String> unordered = new ArrayList<>(values);
Collections.shuffle(unordered, random());
for (String v : values) {
doc.add(newStringField("indexed", v, Field.Store.NO));
}
// add in any order to the dv field
ArrayList<String> unordered2 = new ArrayList<>(values);
Collections.shuffle(unordered2, random());
for (String v : unordered2) {
doc.add(new SortedSetDocValuesField("dv", new BytesRef(v)));
}
writer.addDocument(doc);
if (random().nextInt(31) == 0) {
writer.commit();
}
}
// delete some docs
int numDeletions = random().nextInt(numDocs / 10);
for (int i = 0; i < numDeletions; i++) {
int id = random().nextInt(numDocs);
writer.deleteDocuments(new Term("id", Integer.toString(id)));
}
// compare per-segment
DirectoryReader ir = writer.getReader();
for (LeafReaderContext context : ir.leaves()) {
LeafReader r = context.reader();
Terms terms = r.terms("indexed");
if (terms != null) {
SortedSetDocValues ssdv = r.getSortedSetDocValues("dv");
assertEquals(terms.size(), ssdv.getValueCount());
TermsEnum expected = terms.iterator();
TermsEnum actual = r.getSortedSetDocValues("dv").termsEnum();
assertEquals(terms.size(), expected, actual);
doTestSortedSetEnumAdvanceIndependently(ssdv);
}
}
ir.close();
writer.forceMerge(1);
// now compare again after the merge
ir = writer.getReader();
LeafReader ar = getOnlyLeafReader(ir);
Terms terms = ar.terms("indexed");
if (terms != null) {
assertEquals(terms.size(), ar.getSortedSetDocValues("dv").getValueCount());
TermsEnum expected = terms.iterator();
TermsEnum actual = ar.getSortedSetDocValues("dv").termsEnum();
assertEquals(terms.size(), expected, actual);
}
ir.close();
writer.close();
dir.close();
}
use of org.apache.lucene.codecs.DocValuesFormat in project lucene-solr by apache.
the class TestLucene70DocValuesFormat method doTestTermsEnumRandom.
// TODO: try to refactor this and some termsenum tests into the base class.
// to do this we need to fix the test class to get a DVF not a Codec so we can setup
// the postings format correctly.
private void doTestTermsEnumRandom(int numDocs, Supplier<String> valuesProducer) throws Exception {
Directory dir = newFSDirectory(createTempDir());
IndexWriterConfig conf = newIndexWriterConfig(new MockAnalyzer(random()));
conf.setMergeScheduler(new SerialMergeScheduler());
// set to duel against a codec which has ordinals:
final PostingsFormat pf = TestUtil.getPostingsFormatWithOrds(random());
final DocValuesFormat dv = new Lucene70DocValuesFormat();
conf.setCodec(new AssertingCodec() {
@Override
public PostingsFormat getPostingsFormatForField(String field) {
return pf;
}
@Override
public DocValuesFormat getDocValuesFormatForField(String field) {
return dv;
}
});
RandomIndexWriter writer = new RandomIndexWriter(random(), dir, conf);
// index some docs
for (int i = 0; i < numDocs; i++) {
Document doc = new Document();
Field idField = new StringField("id", Integer.toString(i), Field.Store.NO);
doc.add(idField);
int numValues = random().nextInt(17);
// create a random list of strings
List<String> values = new ArrayList<>();
for (int v = 0; v < numValues; v++) {
values.add(valuesProducer.get());
}
// add in any order to the indexed field
ArrayList<String> unordered = new ArrayList<>(values);
Collections.shuffle(unordered, random());
for (String v : values) {
doc.add(newStringField("indexed", v, Field.Store.NO));
}
// add in any order to the dv field
ArrayList<String> unordered2 = new ArrayList<>(values);
Collections.shuffle(unordered2, random());
for (String v : unordered2) {
doc.add(new SortedSetDocValuesField("dv", new BytesRef(v)));
}
writer.addDocument(doc);
if (random().nextInt(31) == 0) {
writer.commit();
}
}
// delete some docs
int numDeletions = random().nextInt(numDocs / 10);
for (int i = 0; i < numDeletions; i++) {
int id = random().nextInt(numDocs);
writer.deleteDocuments(new Term("id", Integer.toString(id)));
}
// compare per-segment
DirectoryReader ir = writer.getReader();
for (LeafReaderContext context : ir.leaves()) {
LeafReader r = context.reader();
Terms terms = r.terms("indexed");
if (terms != null) {
SortedSetDocValues ssdv = r.getSortedSetDocValues("dv");
assertEquals(terms.size(), ssdv.getValueCount());
TermsEnum expected = terms.iterator();
TermsEnum actual = r.getSortedSetDocValues("dv").termsEnum();
assertEquals(terms.size(), expected, actual);
doTestSortedSetEnumAdvanceIndependently(ssdv);
}
}
ir.close();
writer.forceMerge(1);
// now compare again after the merge
ir = writer.getReader();
LeafReader ar = getOnlyLeafReader(ir);
Terms terms = ar.terms("indexed");
if (terms != null) {
assertEquals(terms.size(), ar.getSortedSetDocValues("dv").getValueCount());
TermsEnum expected = terms.iterator();
TermsEnum actual = ar.getSortedSetDocValues("dv").termsEnum();
assertEquals(terms.size(), expected, actual);
}
ir.close();
writer.close();
dir.close();
}
use of org.apache.lucene.codecs.DocValuesFormat in project lucene-solr by apache.
the class TestPerFieldDocValuesFormat method testMergeCalledOnTwoFormats.
public void testMergeCalledOnTwoFormats() throws IOException {
MergeRecordingDocValueFormatWrapper dvf1 = new MergeRecordingDocValueFormatWrapper(TestUtil.getDefaultDocValuesFormat());
MergeRecordingDocValueFormatWrapper dvf2 = new MergeRecordingDocValueFormatWrapper(TestUtil.getDefaultDocValuesFormat());
IndexWriterConfig iwc = new IndexWriterConfig();
iwc.setCodec(new AssertingCodec() {
@Override
public DocValuesFormat getDocValuesFormatForField(String field) {
switch(field) {
case "dv1":
case "dv2":
return dvf1;
case "dv3":
return dvf2;
default:
return super.getDocValuesFormatForField(field);
}
}
});
Directory directory = newDirectory();
IndexWriter iwriter = new IndexWriter(directory, iwc);
Document doc = new Document();
doc.add(new NumericDocValuesField("dv1", 5));
doc.add(new NumericDocValuesField("dv2", 42));
doc.add(new BinaryDocValuesField("dv3", new BytesRef("hello world")));
iwriter.addDocument(doc);
iwriter.commit();
doc = new Document();
doc.add(new NumericDocValuesField("dv1", 8));
doc.add(new NumericDocValuesField("dv2", 45));
doc.add(new BinaryDocValuesField("dv3", new BytesRef("goodbye world")));
iwriter.addDocument(doc);
iwriter.commit();
iwriter.forceMerge(1, true);
iwriter.close();
assertEquals(1, dvf1.nbMergeCalls);
assertEquals(new HashSet<>(Arrays.asList("dv1", "dv2")), new HashSet<>(dvf1.fieldNames));
assertEquals(1, dvf2.nbMergeCalls);
assertEquals(Collections.singletonList("dv3"), dvf2.fieldNames);
directory.close();
}
use of org.apache.lucene.codecs.DocValuesFormat in project lucene-solr by apache.
the class TestBinaryDocValuesUpdates method testChangeCodec.
public void testChangeCodec() throws Exception {
Directory dir = newDirectory();
IndexWriterConfig conf = newIndexWriterConfig(new MockAnalyzer(random()));
// disable merges to simplify test assertions.
conf.setMergePolicy(NoMergePolicy.INSTANCE);
conf.setCodec(new AssertingCodec() {
@Override
public DocValuesFormat getDocValuesFormatForField(String field) {
return TestUtil.getDefaultDocValuesFormat();
}
});
IndexWriter writer = new IndexWriter(dir, conf);
Document doc = new Document();
doc.add(new StringField("id", "d0", Store.NO));
doc.add(new BinaryDocValuesField("f1", toBytes(5L)));
doc.add(new BinaryDocValuesField("f2", toBytes(13L)));
writer.addDocument(doc);
writer.close();
// change format
conf = newIndexWriterConfig(new MockAnalyzer(random()));
// disable merges to simplify test assertions.
conf.setMergePolicy(NoMergePolicy.INSTANCE);
conf.setCodec(new AssertingCodec() {
@Override
public DocValuesFormat getDocValuesFormatForField(String field) {
return new AssertingDocValuesFormat();
}
});
writer = new IndexWriter(dir, conf);
doc = new Document();
doc.add(new StringField("id", "d1", Store.NO));
doc.add(new BinaryDocValuesField("f1", toBytes(17L)));
doc.add(new BinaryDocValuesField("f2", toBytes(2L)));
writer.addDocument(doc);
writer.updateBinaryDocValue(new Term("id", "d0"), "f1", toBytes(12L));
writer.close();
DirectoryReader reader = DirectoryReader.open(dir);
BinaryDocValues f1 = MultiDocValues.getBinaryValues(reader, "f1");
BinaryDocValues f2 = MultiDocValues.getBinaryValues(reader, "f2");
assertEquals(0, f1.nextDoc());
assertEquals(0, f2.nextDoc());
assertEquals(12L, getValue(f1));
assertEquals(13L, getValue(f2));
assertEquals(1, f1.nextDoc());
assertEquals(1, f2.nextDoc());
assertEquals(17L, getValue(f1));
assertEquals(2L, getValue(f2));
reader.close();
dir.close();
}
use of org.apache.lucene.codecs.DocValuesFormat in project lucene-solr by apache.
the class TestNumericDocValuesUpdates method testChangeCodec.
@Test
public void testChangeCodec() throws Exception {
Directory dir = newDirectory();
IndexWriterConfig conf = newIndexWriterConfig(new MockAnalyzer(random()));
// disable merges to simplify test assertions.
conf.setMergePolicy(NoMergePolicy.INSTANCE);
conf.setCodec(new AssertingCodec() {
@Override
public DocValuesFormat getDocValuesFormatForField(String field) {
return TestUtil.getDefaultDocValuesFormat();
}
});
IndexWriter writer = new IndexWriter(dir, conf);
Document doc = new Document();
doc.add(new StringField("id", "d0", Store.NO));
doc.add(new NumericDocValuesField("f1", 5L));
doc.add(new NumericDocValuesField("f2", 13L));
writer.addDocument(doc);
writer.close();
// change format
conf = newIndexWriterConfig(new MockAnalyzer(random()));
// disable merges to simplify test assertions.
conf.setMergePolicy(NoMergePolicy.INSTANCE);
conf.setCodec(new AssertingCodec() {
@Override
public DocValuesFormat getDocValuesFormatForField(String field) {
return new AssertingDocValuesFormat();
}
});
writer = new IndexWriter(dir, conf);
doc = new Document();
doc.add(new StringField("id", "d1", Store.NO));
doc.add(new NumericDocValuesField("f1", 17L));
doc.add(new NumericDocValuesField("f2", 2L));
writer.addDocument(doc);
writer.updateNumericDocValue(new Term("id", "d0"), "f1", 12L);
writer.close();
DirectoryReader reader = DirectoryReader.open(dir);
NumericDocValues f1 = MultiDocValues.getNumericValues(reader, "f1");
NumericDocValues f2 = MultiDocValues.getNumericValues(reader, "f2");
assertEquals(0, f1.nextDoc());
assertEquals(12L, f1.longValue());
assertEquals(0, f2.nextDoc());
assertEquals(13L, f2.longValue());
assertEquals(1, f1.nextDoc());
assertEquals(17L, f1.longValue());
assertEquals(1, f2.nextDoc());
assertEquals(2L, f2.longValue());
reader.close();
dir.close();
}
Aggregations