use of org.apache.lucene.document.SortedSetDocValuesField in project lucene-solr by apache.
the class TestSortedSetFieldSource method testSimple.
public void testSimple() throws Exception {
Directory dir = newDirectory();
IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig(null));
Document doc = new Document();
doc.add(new SortedSetDocValuesField("value", new BytesRef("baz")));
doc.add(newStringField("id", "2", Field.Store.YES));
writer.addDocument(doc);
doc = new Document();
doc.add(new SortedSetDocValuesField("value", new BytesRef("foo")));
doc.add(new SortedSetDocValuesField("value", new BytesRef("bar")));
doc.add(newStringField("id", "1", Field.Store.YES));
writer.addDocument(doc);
writer.forceMerge(1);
writer.close();
DirectoryReader ir = DirectoryReader.open(dir);
IndexSearcher searcher = newSearcher(ir);
LeafReader ar = getOnlyLeafReader(ir);
ValueSource vs = new SortedSetFieldSource("value");
FunctionValues values = vs.getValues(Collections.emptyMap(), ar.getContext());
assertEquals("baz", values.strVal(0));
assertEquals("bar", values.strVal(1));
// test SortField optimization
final boolean reverse = random().nextBoolean();
SortField vssf = vs.getSortField(reverse);
SortField sf = new SortedSetSortField("value", reverse);
assertEquals(sf, vssf);
vssf = vssf.rewrite(searcher);
sf = sf.rewrite(searcher);
assertEquals(sf, vssf);
ir.close();
dir.close();
}
use of org.apache.lucene.document.SortedSetDocValuesField in project lucene-solr by apache.
the class BaseDocValuesFormatTestCase method testSortedSetEnumAdvanceIndependently.
public void testSortedSetEnumAdvanceIndependently() throws IOException {
Directory directory = newDirectory();
Analyzer analyzer = new MockAnalyzer(random());
IndexWriterConfig iwconfig = newIndexWriterConfig(analyzer);
iwconfig.setMergePolicy(newLogMergePolicy());
RandomIndexWriter iwriter = new RandomIndexWriter(random(), directory, iwconfig);
Document doc = new Document();
SortedSetDocValuesField field1 = new SortedSetDocValuesField("field", new BytesRef("2"));
SortedSetDocValuesField field2 = new SortedSetDocValuesField("field", new BytesRef("3"));
doc.add(field1);
doc.add(field2);
iwriter.addDocument(doc);
field1.setBytesValue(new BytesRef("1"));
iwriter.addDocument(doc);
field2.setBytesValue(new BytesRef("2"));
iwriter.addDocument(doc);
iwriter.commit();
iwriter.forceMerge(1);
DirectoryReader ireader = iwriter.getReader();
iwriter.close();
SortedSetDocValues dv = getOnlyLeafReader(ireader).getSortedSetDocValues("field");
doTestSortedSetEnumAdvanceIndependently(dv);
ireader.close();
directory.close();
}
use of org.apache.lucene.document.SortedSetDocValuesField in project lucene-solr by apache.
the class BaseDocValuesFormatTestCase method addRandomFields.
@Override
protected void addRandomFields(Document doc) {
if (usually()) {
doc.add(new NumericDocValuesField("ndv", random().nextInt(1 << 12)));
doc.add(new BinaryDocValuesField("bdv", new BytesRef(TestUtil.randomSimpleString(random()))));
doc.add(new SortedDocValuesField("sdv", new BytesRef(TestUtil.randomSimpleString(random(), 2))));
}
int numValues = random().nextInt(5);
for (int i = 0; i < numValues; ++i) {
doc.add(new SortedSetDocValuesField("ssdv", new BytesRef(TestUtil.randomSimpleString(random(), 2))));
}
numValues = random().nextInt(5);
for (int i = 0; i < numValues; ++i) {
doc.add(new SortedNumericDocValuesField("sndv", TestUtil.nextLong(random(), Long.MIN_VALUE, Long.MAX_VALUE)));
}
}
use of org.apache.lucene.document.SortedSetDocValuesField in project lucene-solr by apache.
the class BaseDocValuesFormatTestCase method testSortedSetTwoDocumentsFirstMissingMerge.
public void testSortedSetTwoDocumentsFirstMissingMerge() throws IOException {
Directory directory = newDirectory();
Analyzer analyzer = new MockAnalyzer(random());
IndexWriterConfig iwconfig = newIndexWriterConfig(analyzer);
iwconfig.setMergePolicy(newLogMergePolicy());
RandomIndexWriter iwriter = new RandomIndexWriter(random(), directory, iwconfig);
Document doc = new Document();
iwriter.addDocument(doc);
iwriter.commit();
doc = new Document();
doc.add(new SortedSetDocValuesField("field", new BytesRef("hello")));
iwriter.addDocument(doc);
iwriter.forceMerge(1);
DirectoryReader ireader = iwriter.getReader();
iwriter.close();
SortedSetDocValues dv = getOnlyLeafReader(ireader).getSortedSetDocValues("field");
assertEquals(1, dv.getValueCount());
assertEquals(1, dv.nextDoc());
assertEquals(0, dv.nextOrd());
assertEquals(NO_MORE_ORDS, dv.nextOrd());
BytesRef bytes = dv.lookupOrd(0);
assertEquals(new BytesRef("hello"), bytes);
ireader.close();
directory.close();
}
use of org.apache.lucene.document.SortedSetDocValuesField in project lucene-solr by apache.
the class BaseDocValuesFormatTestCase method doTestSortedSetVsStoredFields.
protected void doTestSortedSetVsStoredFields(int numDocs, int minLength, int maxLength, int maxValuesPerDoc, int maxUniqueValues) throws Exception {
Directory dir = newFSDirectory(createTempDir("dvduel"));
IndexWriterConfig conf = newIndexWriterConfig(new MockAnalyzer(random()));
RandomIndexWriter writer = new RandomIndexWriter(random(), dir, conf);
Set<String> valueSet = new HashSet<String>();
for (int i = 0; i < 10000 && valueSet.size() < maxUniqueValues; ++i) {
final int length = TestUtil.nextInt(random(), minLength, maxLength);
valueSet.add(TestUtil.randomSimpleString(random(), length));
}
String[] uniqueValues = valueSet.toArray(new String[0]);
// index some docs
for (int i = 0; i < numDocs; i++) {
Document doc = new Document();
Field idField = new StringField("id", Integer.toString(i), Field.Store.NO);
doc.add(idField);
int numValues = TestUtil.nextInt(random(), 0, maxValuesPerDoc);
// create a random set of strings
Set<String> values = new TreeSet<>();
for (int v = 0; v < numValues; v++) {
values.add(RandomPicks.randomFrom(random(), uniqueValues));
}
// add ordered to the stored field
for (String v : values) {
doc.add(new StoredField("stored", v));
}
// add in any order to the dv field
ArrayList<String> unordered = new ArrayList<>(values);
Collections.shuffle(unordered, random());
for (String v : unordered) {
doc.add(new SortedSetDocValuesField("dv", new BytesRef(v)));
}
writer.addDocument(doc);
if (random().nextInt(31) == 0) {
writer.commit();
}
}
// delete some docs
int numDeletions = random().nextInt(numDocs / 10);
for (int i = 0; i < numDeletions; i++) {
int id = random().nextInt(numDocs);
writer.deleteDocuments(new Term("id", Integer.toString(id)));
}
// compare
DirectoryReader ir = writer.getReader();
TestUtil.checkReader(ir);
for (LeafReaderContext context : ir.leaves()) {
LeafReader r = context.reader();
SortedSetDocValues docValues = r.getSortedSetDocValues("dv");
for (int i = 0; i < r.maxDoc(); i++) {
String[] stringValues = r.document(i).getValues("stored");
if (docValues != null) {
if (docValues.docID() < i) {
docValues.nextDoc();
}
}
if (docValues != null && stringValues.length > 0) {
assertEquals(i, docValues.docID());
for (int j = 0; j < stringValues.length; j++) {
assert docValues != null;
long ord = docValues.nextOrd();
assert ord != NO_MORE_ORDS;
BytesRef scratch = docValues.lookupOrd(ord);
assertEquals(stringValues[j], scratch.utf8ToString());
}
assertEquals(NO_MORE_ORDS, docValues.nextOrd());
}
}
}
ir.close();
writer.forceMerge(1);
// compare again
ir = writer.getReader();
TestUtil.checkReader(ir);
for (LeafReaderContext context : ir.leaves()) {
LeafReader r = context.reader();
SortedSetDocValues docValues = r.getSortedSetDocValues("dv");
for (int i = 0; i < r.maxDoc(); i++) {
String[] stringValues = r.document(i).getValues("stored");
if (docValues.docID() < i) {
docValues.nextDoc();
}
if (docValues != null && stringValues.length > 0) {
assertEquals(i, docValues.docID());
for (int j = 0; j < stringValues.length; j++) {
assert docValues != null;
long ord = docValues.nextOrd();
assert ord != NO_MORE_ORDS;
BytesRef scratch = docValues.lookupOrd(ord);
assertEquals(stringValues[j], scratch.utf8ToString());
}
assertEquals(NO_MORE_ORDS, docValues.nextOrd());
}
}
}
ir.close();
writer.close();
dir.close();
}
Aggregations