use of org.apache.lucene.document.SortedDocValuesField in project lucene-solr by apache.
the class TestDocValuesIndexing method testTypeChangeViaAddIndexes2.
public void testTypeChangeViaAddIndexes2() throws Exception {
Directory dir = newDirectory();
IndexWriterConfig conf = newIndexWriterConfig(new MockAnalyzer(random()));
IndexWriter writer = new IndexWriter(dir, conf);
Document doc = new Document();
doc.add(new NumericDocValuesField("dv", 0L));
writer.addDocument(doc);
writer.close();
Directory dir2 = newDirectory();
conf = newIndexWriterConfig(new MockAnalyzer(random()));
IndexWriter writer2 = new IndexWriter(dir2, conf);
writer2.addIndexes(dir);
Document doc2 = new Document();
doc2.add(new SortedDocValuesField("dv", new BytesRef("foo")));
expectThrows(IllegalArgumentException.class, () -> {
writer2.addDocument(doc2);
});
writer2.close();
dir2.close();
dir.close();
}
use of org.apache.lucene.document.SortedDocValuesField in project lucene-solr by apache.
the class TestMultiDocValues method testSorted.
public void testSorted() throws Exception {
Directory dir = newDirectory();
Document doc = new Document();
Field field = new SortedDocValuesField("bytes", new BytesRef());
doc.add(field);
IndexWriterConfig iwc = newIndexWriterConfig(random(), null);
iwc.setMergePolicy(newLogMergePolicy());
RandomIndexWriter iw = new RandomIndexWriter(random(), dir, iwc);
int numDocs = TEST_NIGHTLY ? atLeast(500) : atLeast(50);
for (int i = 0; i < numDocs; i++) {
BytesRef ref = new BytesRef(TestUtil.randomUnicodeString(random()));
field.setBytesValue(ref);
if (random().nextInt(7) == 0) {
iw.addDocument(new Document());
}
iw.addDocument(doc);
if (random().nextInt(17) == 0) {
iw.commit();
}
}
DirectoryReader ir = iw.getReader();
iw.forceMerge(1);
DirectoryReader ir2 = iw.getReader();
LeafReader merged = getOnlyLeafReader(ir2);
iw.close();
SortedDocValues multi = MultiDocValues.getSortedValues(ir, "bytes");
SortedDocValues single = merged.getSortedDocValues("bytes");
assertEquals(single.getValueCount(), multi.getValueCount());
while (true) {
assertEquals(single.nextDoc(), multi.nextDoc());
if (single.docID() == NO_MORE_DOCS) {
break;
}
// check value
final BytesRef expected = BytesRef.deepCopyOf(single.binaryValue());
final BytesRef actual = multi.binaryValue();
assertEquals(expected, actual);
// check ord
assertEquals(single.ordValue(), multi.ordValue());
}
testRandomAdvance(merged.getSortedDocValues("bytes"), MultiDocValues.getSortedValues(ir, "bytes"));
testRandomAdvanceExact(merged.getSortedDocValues("bytes"), MultiDocValues.getSortedValues(ir, "bytes"), merged.maxDoc());
ir.close();
ir2.close();
dir.close();
}
use of org.apache.lucene.document.SortedDocValuesField in project lucene-solr by apache.
the class TestIndexSearcher method setUp.
@Override
public void setUp() throws Exception {
super.setUp();
dir = newDirectory();
RandomIndexWriter iw = new RandomIndexWriter(random(), dir);
for (int i = 0; i < 100; i++) {
Document doc = new Document();
doc.add(newStringField("field", Integer.toString(i), Field.Store.NO));
doc.add(newStringField("field2", Boolean.toString(i % 2 == 0), Field.Store.NO));
doc.add(new SortedDocValuesField("field2", new BytesRef(Boolean.toString(i % 2 == 0))));
iw.addDocument(doc);
}
reader = iw.getReader();
iw.close();
}
use of org.apache.lucene.document.SortedDocValuesField in project elasticsearch by elastic.
the class StoreTests method testRecoveryDiff.
public void testRecoveryDiff() throws IOException, InterruptedException {
int numDocs = 2 + random().nextInt(100);
List<Document> docs = new ArrayList<>();
for (int i = 0; i < numDocs; i++) {
Document doc = new Document();
doc.add(new StringField("id", "" + i, random().nextBoolean() ? Field.Store.YES : Field.Store.NO));
doc.add(new TextField("body", TestUtil.randomRealisticUnicodeString(random()), random().nextBoolean() ? Field.Store.YES : Field.Store.NO));
doc.add(new SortedDocValuesField("dv", new BytesRef(TestUtil.randomRealisticUnicodeString(random()))));
docs.add(doc);
}
long seed = random().nextLong();
Store.MetadataSnapshot first;
{
Random random = new Random(seed);
IndexWriterConfig iwc = new IndexWriterConfig(new MockAnalyzer(random)).setCodec(TestUtil.getDefaultCodec());
iwc.setMergePolicy(NoMergePolicy.INSTANCE);
iwc.setUseCompoundFile(random.nextBoolean());
final ShardId shardId = new ShardId("index", "_na_", 1);
DirectoryService directoryService = new LuceneManagedDirectoryService(random);
Store store = new Store(shardId, INDEX_SETTINGS, directoryService, new DummyShardLock(shardId));
IndexWriter writer = new IndexWriter(store.directory(), iwc);
final boolean lotsOfSegments = rarely(random);
for (Document d : docs) {
writer.addDocument(d);
if (lotsOfSegments && random.nextBoolean()) {
writer.commit();
} else if (rarely(random)) {
writer.commit();
}
}
writer.commit();
writer.close();
first = store.getMetadata(null);
assertDeleteContent(store, directoryService);
store.close();
}
long time = new Date().getTime();
while (time == new Date().getTime()) {
// bump the time
Thread.sleep(10);
}
Store.MetadataSnapshot second;
Store store;
{
Random random = new Random(seed);
IndexWriterConfig iwc = new IndexWriterConfig(new MockAnalyzer(random)).setCodec(TestUtil.getDefaultCodec());
iwc.setMergePolicy(NoMergePolicy.INSTANCE);
iwc.setUseCompoundFile(random.nextBoolean());
final ShardId shardId = new ShardId("index", "_na_", 1);
DirectoryService directoryService = new LuceneManagedDirectoryService(random);
store = new Store(shardId, INDEX_SETTINGS, directoryService, new DummyShardLock(shardId));
IndexWriter writer = new IndexWriter(store.directory(), iwc);
final boolean lotsOfSegments = rarely(random);
for (Document d : docs) {
writer.addDocument(d);
if (lotsOfSegments && random.nextBoolean()) {
writer.commit();
} else if (rarely(random)) {
writer.commit();
}
}
writer.commit();
writer.close();
second = store.getMetadata(null);
}
Store.RecoveryDiff diff = first.recoveryDiff(second);
assertThat(first.size(), equalTo(second.size()));
for (StoreFileMetaData md : first) {
assertThat(second.get(md.name()), notNullValue());
// si files are different - containing timestamps etc
assertThat(second.get(md.name()).isSame(md), equalTo(false));
}
assertThat(diff.different.size(), equalTo(first.size()));
// in lucene 5 nothing is identical - we use random ids in file headers
assertThat(diff.identical.size(), equalTo(0));
assertThat(diff.missing, empty());
// check the self diff
Store.RecoveryDiff selfDiff = first.recoveryDiff(first);
assertThat(selfDiff.identical.size(), equalTo(first.size()));
assertThat(selfDiff.different, empty());
assertThat(selfDiff.missing, empty());
// lets add some deletes
Random random = new Random(seed);
IndexWriterConfig iwc = new IndexWriterConfig(new MockAnalyzer(random)).setCodec(TestUtil.getDefaultCodec());
iwc.setMergePolicy(NoMergePolicy.INSTANCE);
iwc.setUseCompoundFile(random.nextBoolean());
iwc.setOpenMode(IndexWriterConfig.OpenMode.APPEND);
IndexWriter writer = new IndexWriter(store.directory(), iwc);
writer.deleteDocuments(new Term("id", Integer.toString(random().nextInt(numDocs))));
writer.commit();
writer.close();
Store.MetadataSnapshot metadata = store.getMetadata(null);
StoreFileMetaData delFile = null;
for (StoreFileMetaData md : metadata) {
if (md.name().endsWith(".liv")) {
delFile = md;
break;
}
}
Store.RecoveryDiff afterDeleteDiff = metadata.recoveryDiff(second);
if (delFile != null) {
// segments_N + del file
assertThat(afterDeleteDiff.identical.size(), equalTo(metadata.size() - 2));
assertThat(afterDeleteDiff.different.size(), equalTo(0));
assertThat(afterDeleteDiff.missing.size(), equalTo(2));
} else {
// an entire segment must be missing (single doc segment got dropped)
assertThat(afterDeleteDiff.identical.size(), greaterThan(0));
assertThat(afterDeleteDiff.different.size(), equalTo(0));
// the commit file is different
assertThat(afterDeleteDiff.missing.size(), equalTo(1));
}
// check the self diff
selfDiff = metadata.recoveryDiff(metadata);
assertThat(selfDiff.identical.size(), equalTo(metadata.size()));
assertThat(selfDiff.different, empty());
assertThat(selfDiff.missing, empty());
// add a new commit
iwc = new IndexWriterConfig(new MockAnalyzer(random)).setCodec(TestUtil.getDefaultCodec());
iwc.setMergePolicy(NoMergePolicy.INSTANCE);
// force CFS - easier to test here since we know it will add 3 files
iwc.setUseCompoundFile(true);
iwc.setOpenMode(IndexWriterConfig.OpenMode.APPEND);
writer = new IndexWriter(store.directory(), iwc);
writer.addDocument(docs.get(0));
writer.close();
Store.MetadataSnapshot newCommitMetaData = store.getMetadata(null);
Store.RecoveryDiff newCommitDiff = newCommitMetaData.recoveryDiff(metadata);
if (delFile != null) {
// segments_N, del file, cfs, cfe, si for the new segment
assertThat(newCommitDiff.identical.size(), equalTo(newCommitMetaData.size() - 5));
// the del file must be different
assertThat(newCommitDiff.different.size(), equalTo(1));
assertThat(newCommitDiff.different.get(0).name(), endsWith(".liv"));
// segments_N,cfs, cfe, si for the new segment
assertThat(newCommitDiff.missing.size(), equalTo(4));
} else {
// segments_N, cfs, cfe, si for the new segment
assertThat(newCommitDiff.identical.size(), equalTo(newCommitMetaData.size() - 4));
assertThat(newCommitDiff.different.size(), equalTo(0));
// an entire segment must be missing (single doc segment got dropped) plus the commit is different
assertThat(newCommitDiff.missing.size(), equalTo(4));
}
deleteContent(store.directory());
IOUtils.close(store);
}
use of org.apache.lucene.document.SortedDocValuesField in project elasticsearch by elastic.
the class StoreTests method testMarkCorruptedOnTruncatedSegmentsFile.
public void testMarkCorruptedOnTruncatedSegmentsFile() throws IOException {
IndexWriterConfig iwc = newIndexWriterConfig();
final ShardId shardId = new ShardId("index", "_na_", 1);
DirectoryService directoryService = new LuceneManagedDirectoryService(random());
Store store = new Store(shardId, INDEX_SETTINGS, directoryService, new DummyShardLock(shardId));
IndexWriter writer = new IndexWriter(store.directory(), iwc);
int numDocs = 1 + random().nextInt(10);
List<Document> docs = new ArrayList<>();
for (int i = 0; i < numDocs; i++) {
Document doc = new Document();
doc.add(new StringField("id", "" + i, random().nextBoolean() ? Field.Store.YES : Field.Store.NO));
doc.add(new TextField("body", TestUtil.randomRealisticUnicodeString(random()), random().nextBoolean() ? Field.Store.YES : Field.Store.NO));
doc.add(new SortedDocValuesField("dv", new BytesRef(TestUtil.randomRealisticUnicodeString(random()))));
docs.add(doc);
}
for (Document d : docs) {
writer.addDocument(d);
}
writer.commit();
writer.close();
SegmentInfos segmentCommitInfos = store.readLastCommittedSegmentsInfo();
store.directory().deleteFile(segmentCommitInfos.getSegmentsFileName());
try (IndexOutput out = store.directory().createOutput(segmentCommitInfos.getSegmentsFileName(), IOContext.DEFAULT)) {
// empty file
}
try {
if (randomBoolean()) {
store.getMetadata(null);
} else {
store.readLastCommittedSegmentsInfo();
}
fail("corrupted segments_N file");
} catch (CorruptIndexException ex) {
// expected
}
assertTrue(store.isMarkedCorrupted());
// we have to remove the index since it's corrupted and might fail the MocKDirWrapper checkindex call
Lucene.cleanLuceneIndex(store.directory());
store.close();
}
Aggregations