use of org.apache.lucene.document.SortedSetDocValuesField in project lucene-solr by apache.
the class TestNumericDocValuesUpdates method testMultipleDocValuesTypes.
@Test
public void testMultipleDocValuesTypes() throws Exception {
Directory dir = newDirectory();
IndexWriterConfig conf = newIndexWriterConfig(new MockAnalyzer(random()));
// prevent merges
conf.setMaxBufferedDocs(10);
IndexWriter writer = new IndexWriter(dir, conf);
for (int i = 0; i < 4; i++) {
Document doc = new Document();
doc.add(new StringField("dvUpdateKey", "dv", Store.NO));
doc.add(new NumericDocValuesField("ndv", i));
doc.add(new BinaryDocValuesField("bdv", new BytesRef(Integer.toString(i))));
doc.add(new SortedDocValuesField("sdv", new BytesRef(Integer.toString(i))));
doc.add(new SortedSetDocValuesField("ssdv", new BytesRef(Integer.toString(i))));
doc.add(new SortedSetDocValuesField("ssdv", new BytesRef(Integer.toString(i * 2))));
writer.addDocument(doc);
}
writer.commit();
// update all docs' ndv field
writer.updateNumericDocValue(new Term("dvUpdateKey", "dv"), "ndv", 17L);
writer.close();
final DirectoryReader reader = DirectoryReader.open(dir);
LeafReader r = reader.leaves().get(0).reader();
NumericDocValues ndv = r.getNumericDocValues("ndv");
BinaryDocValues bdv = r.getBinaryDocValues("bdv");
SortedDocValues sdv = r.getSortedDocValues("sdv");
SortedSetDocValues ssdv = r.getSortedSetDocValues("ssdv");
for (int i = 0; i < r.maxDoc(); i++) {
assertEquals(i, ndv.nextDoc());
assertEquals(17, ndv.longValue());
assertEquals(i, bdv.nextDoc());
BytesRef term = bdv.binaryValue();
assertEquals(new BytesRef(Integer.toString(i)), term);
assertEquals(i, sdv.nextDoc());
term = sdv.binaryValue();
assertEquals(new BytesRef(Integer.toString(i)), term);
assertEquals(i, ssdv.nextDoc());
long ord = ssdv.nextOrd();
term = ssdv.lookupOrd(ord);
assertEquals(i, Integer.parseInt(term.utf8ToString()));
if (i != 0) {
ord = ssdv.nextOrd();
term = ssdv.lookupOrd(ord);
assertEquals(i * 2, Integer.parseInt(term.utf8ToString()));
}
assertEquals(SortedSetDocValues.NO_MORE_ORDS, ssdv.nextOrd());
}
reader.close();
dir.close();
}
use of org.apache.lucene.document.SortedSetDocValuesField in project lucene-solr by apache.
the class TestDocValuesIndexing method testMixedTypesAfterReopenAppend2.
public void testMixedTypesAfterReopenAppend2() throws IOException {
Directory dir = newDirectory();
IndexWriter w = new IndexWriter(dir, newIndexWriterConfig(new MockAnalyzer(random())));
Document doc = new Document();
doc.add(new SortedSetDocValuesField("foo", new BytesRef("foo")));
w.addDocument(doc);
w.close();
Document doc2 = new Document();
IndexWriter w2 = new IndexWriter(dir, newIndexWriterConfig(new MockAnalyzer(random())));
doc2.add(new StringField("foo", "bar", Field.Store.NO));
doc2.add(new BinaryDocValuesField("foo", new BytesRef("foo")));
// NOTE: this case follows a different code path inside
// DefaultIndexingChain/FieldInfos, because the field (foo)
// is first added without DocValues:
expectThrows(IllegalArgumentException.class, () -> {
w2.addDocument(doc2);
});
w2.forceMerge(1);
w2.close();
dir.close();
}
use of org.apache.lucene.document.SortedSetDocValuesField in project lucene-solr by apache.
the class TestMultiDocValues method testSortedSetWithDups.
// tries to make more dups than testSortedSet
public void testSortedSetWithDups() throws Exception {
Directory dir = newDirectory();
IndexWriterConfig iwc = newIndexWriterConfig(random(), null);
iwc.setMergePolicy(newLogMergePolicy());
RandomIndexWriter iw = new RandomIndexWriter(random(), dir, iwc);
int numDocs = TEST_NIGHTLY ? atLeast(500) : atLeast(50);
for (int i = 0; i < numDocs; i++) {
Document doc = new Document();
int numValues = random().nextInt(5);
for (int j = 0; j < numValues; j++) {
doc.add(new SortedSetDocValuesField("bytes", new BytesRef(TestUtil.randomSimpleString(random(), 2))));
}
iw.addDocument(doc);
if (random().nextInt(17) == 0) {
iw.commit();
}
}
DirectoryReader ir = iw.getReader();
iw.forceMerge(1);
DirectoryReader ir2 = iw.getReader();
LeafReader merged = getOnlyLeafReader(ir2);
iw.close();
SortedSetDocValues multi = MultiDocValues.getSortedSetValues(ir, "bytes");
SortedSetDocValues single = merged.getSortedSetDocValues("bytes");
if (multi == null) {
assertNull(single);
} else {
assertEquals(single.getValueCount(), multi.getValueCount());
// check values
for (long i = 0; i < single.getValueCount(); i++) {
final BytesRef expected = BytesRef.deepCopyOf(single.lookupOrd(i));
final BytesRef actual = multi.lookupOrd(i);
assertEquals(expected, actual);
}
// check ord list
while (true) {
int docID = single.nextDoc();
assertEquals(docID, multi.nextDoc());
if (docID == NO_MORE_DOCS) {
break;
}
ArrayList<Long> expectedList = new ArrayList<>();
long ord;
while ((ord = single.nextOrd()) != SortedSetDocValues.NO_MORE_ORDS) {
expectedList.add(ord);
}
int upto = 0;
while ((ord = multi.nextOrd()) != SortedSetDocValues.NO_MORE_ORDS) {
assertEquals(expectedList.get(upto).longValue(), ord);
upto++;
}
assertEquals(expectedList.size(), upto);
}
}
testRandomAdvance(merged.getSortedSetDocValues("bytes"), MultiDocValues.getSortedSetValues(ir, "bytes"));
testRandomAdvanceExact(merged.getSortedSetDocValues("bytes"), MultiDocValues.getSortedSetValues(ir, "bytes"), merged.maxDoc());
ir.close();
ir2.close();
dir.close();
}
use of org.apache.lucene.document.SortedSetDocValuesField in project lucene-solr by apache.
the class TestDocValuesRewriteMethod method setUp.
@Override
public void setUp() throws Exception {
super.setUp();
dir = newDirectory();
// sometimes use an empty string as field name
fieldName = random().nextBoolean() ? "field" : "";
RandomIndexWriter writer = new RandomIndexWriter(random(), dir, newIndexWriterConfig(new MockAnalyzer(random(), MockTokenizer.KEYWORD, false)).setMaxBufferedDocs(TestUtil.nextInt(random(), 50, 1000)));
List<String> terms = new ArrayList<>();
int num = atLeast(200);
for (int i = 0; i < num; i++) {
Document doc = new Document();
doc.add(newStringField("id", Integer.toString(i), Field.Store.NO));
int numTerms = random().nextInt(4);
for (int j = 0; j < numTerms; j++) {
String s = TestUtil.randomUnicodeString(random());
doc.add(newStringField(fieldName, s, Field.Store.NO));
doc.add(new SortedSetDocValuesField(fieldName, new BytesRef(s)));
terms.add(s);
}
writer.addDocument(doc);
}
if (VERBOSE) {
// utf16 order
Collections.sort(terms);
System.out.println("UTF16 order:");
for (String s : terms) {
System.out.println(" " + UnicodeUtil.toHexString(s) + " " + s);
}
}
int numDeletions = random().nextInt(num / 10);
for (int i = 0; i < numDeletions; i++) {
writer.deleteDocuments(new Term("id", Integer.toString(random().nextInt(num))));
}
reader = writer.getReader();
searcher1 = newSearcher(reader);
searcher2 = newSearcher(reader);
writer.close();
}
use of org.apache.lucene.document.SortedSetDocValuesField in project lucene-solr by apache.
the class TestIndexWriterOnVMError method doTest.
// just one thread, serial merge policy, hopefully debuggable
private void doTest(MockDirectoryWrapper.Failure failOn) throws Exception {
// log all exceptions we hit, in case we fail (for debugging)
ByteArrayOutputStream exceptionLog = new ByteArrayOutputStream();
PrintStream exceptionStream = new PrintStream(exceptionLog, true, "UTF-8");
//PrintStream exceptionStream = System.out;
final long analyzerSeed = random().nextLong();
final Analyzer analyzer = new Analyzer() {
@Override
protected TokenStreamComponents createComponents(String fieldName) {
MockTokenizer tokenizer = new MockTokenizer(MockTokenizer.WHITESPACE, false);
// we are gonna make it angry
tokenizer.setEnableChecks(false);
TokenStream stream = tokenizer;
// emit some payloads
if (fieldName.contains("payloads")) {
stream = new MockVariableLengthPayloadFilter(new Random(analyzerSeed), stream);
}
return new TokenStreamComponents(tokenizer, stream);
}
};
MockDirectoryWrapper dir = null;
final int numIterations = TEST_NIGHTLY ? atLeast(100) : atLeast(5);
STARTOVER: for (int iter = 0; iter < numIterations; iter++) {
try {
// close from last run
if (dir != null) {
dir.close();
}
// disable slow things: we don't rely upon sleeps here.
dir = newMockDirectory();
dir.setThrottling(MockDirectoryWrapper.Throttling.NEVER);
dir.setUseSlowOpenClosers(false);
IndexWriterConfig conf = newIndexWriterConfig(analyzer);
// just for now, try to keep this test reproducible
conf.setMergeScheduler(new SerialMergeScheduler());
// test never makes it this far...
int numDocs = atLeast(2000);
IndexWriter iw = new IndexWriter(dir, conf);
// ensure there is always a commit
iw.commit();
dir.failOn(failOn);
for (int i = 0; i < numDocs; i++) {
Document doc = new Document();
doc.add(newStringField("id", Integer.toString(i), Field.Store.NO));
doc.add(new NumericDocValuesField("dv", i));
doc.add(new BinaryDocValuesField("dv2", new BytesRef(Integer.toString(i))));
doc.add(new SortedDocValuesField("dv3", new BytesRef(Integer.toString(i))));
doc.add(new SortedSetDocValuesField("dv4", new BytesRef(Integer.toString(i))));
doc.add(new SortedSetDocValuesField("dv4", new BytesRef(Integer.toString(i - 1))));
doc.add(new SortedNumericDocValuesField("dv5", i));
doc.add(new SortedNumericDocValuesField("dv5", i - 1));
doc.add(newTextField("text1", TestUtil.randomAnalysisString(random(), 20, true), Field.Store.NO));
// ensure we store something
doc.add(new StoredField("stored1", "foo"));
doc.add(new StoredField("stored1", "bar"));
// ensure we get some payloads
doc.add(newTextField("text_payloads", TestUtil.randomAnalysisString(random(), 6, true), Field.Store.NO));
// ensure we get some vectors
FieldType ft = new FieldType(TextField.TYPE_NOT_STORED);
ft.setStoreTermVectors(true);
doc.add(newField("text_vectors", TestUtil.randomAnalysisString(random(), 6, true), ft));
doc.add(new IntPoint("point", random().nextInt()));
doc.add(new IntPoint("point2d", random().nextInt(), random().nextInt()));
if (random().nextInt(10) > 0) {
// single doc
try {
iw.addDocument(doc);
// we made it, sometimes delete our doc, or update a dv
int thingToDo = random().nextInt(4);
if (thingToDo == 0) {
iw.deleteDocuments(new Term("id", Integer.toString(i)));
} else if (thingToDo == 1) {
iw.updateNumericDocValue(new Term("id", Integer.toString(i)), "dv", i + 1L);
} else if (thingToDo == 2) {
iw.updateBinaryDocValue(new Term("id", Integer.toString(i)), "dv2", new BytesRef(Integer.toString(i + 1)));
}
} catch (VirtualMachineError | AlreadyClosedException disaster) {
getTragedy(disaster, iw, exceptionStream);
continue STARTOVER;
}
} else {
// block docs
Document doc2 = new Document();
doc2.add(newStringField("id", Integer.toString(-i), Field.Store.NO));
doc2.add(newTextField("text1", TestUtil.randomAnalysisString(random(), 20, true), Field.Store.NO));
doc2.add(new StoredField("stored1", "foo"));
doc2.add(new StoredField("stored1", "bar"));
doc2.add(newField("text_vectors", TestUtil.randomAnalysisString(random(), 6, true), ft));
try {
iw.addDocuments(Arrays.asList(doc, doc2));
// we made it, sometimes delete our docs
if (random().nextBoolean()) {
iw.deleteDocuments(new Term("id", Integer.toString(i)), new Term("id", Integer.toString(-i)));
}
} catch (VirtualMachineError | AlreadyClosedException disaster) {
getTragedy(disaster, iw, exceptionStream);
continue STARTOVER;
}
}
if (random().nextInt(10) == 0) {
// trigger flush:
try {
if (random().nextBoolean()) {
DirectoryReader ir = null;
try {
ir = DirectoryReader.open(iw, random().nextBoolean(), false);
TestUtil.checkReader(ir);
} finally {
IOUtils.closeWhileHandlingException(ir);
}
} else {
iw.commit();
}
if (DirectoryReader.indexExists(dir)) {
TestUtil.checkIndex(dir);
}
} catch (VirtualMachineError | AlreadyClosedException disaster) {
getTragedy(disaster, iw, exceptionStream);
continue STARTOVER;
}
}
}
try {
iw.close();
} catch (VirtualMachineError | AlreadyClosedException disaster) {
getTragedy(disaster, iw, exceptionStream);
continue STARTOVER;
}
} catch (Throwable t) {
System.out.println("Unexpected exception: dumping fake-exception-log:...");
exceptionStream.flush();
System.out.println(exceptionLog.toString("UTF-8"));
System.out.flush();
Rethrow.rethrow(t);
}
}
dir.close();
if (VERBOSE) {
System.out.println("TEST PASSED: dumping fake-exception-log:...");
System.out.println(exceptionLog.toString("UTF-8"));
}
}
Aggregations