use of org.apache.lucene.document.SortedDocValuesField in project elasticsearch by elastic.
the class StoreTests method testNewChecksums.
public void testNewChecksums() throws IOException {
final ShardId shardId = new ShardId("index", "_na_", 1);
DirectoryService directoryService = new LuceneManagedDirectoryService(random());
Store store = new Store(shardId, INDEX_SETTINGS, directoryService, new DummyShardLock(shardId));
// set default codec - all segments need checksums
IndexWriter writer = new IndexWriter(store.directory(), newIndexWriterConfig(random(), new MockAnalyzer(random())).setCodec(TestUtil.getDefaultCodec()));
int docs = 1 + random().nextInt(100);
for (int i = 0; i < docs; i++) {
Document doc = new Document();
doc.add(new TextField("id", "" + i, random().nextBoolean() ? Field.Store.YES : Field.Store.NO));
doc.add(new TextField("body", TestUtil.randomRealisticUnicodeString(random()), random().nextBoolean() ? Field.Store.YES : Field.Store.NO));
doc.add(new SortedDocValuesField("dv", new BytesRef(TestUtil.randomRealisticUnicodeString(random()))));
writer.addDocument(doc);
}
if (random().nextBoolean()) {
for (int i = 0; i < docs; i++) {
if (random().nextBoolean()) {
Document doc = new Document();
doc.add(new TextField("id", "" + i, random().nextBoolean() ? Field.Store.YES : Field.Store.NO));
doc.add(new TextField("body", TestUtil.randomRealisticUnicodeString(random()), random().nextBoolean() ? Field.Store.YES : Field.Store.NO));
writer.updateDocument(new Term("id", "" + i), doc);
}
}
}
if (random().nextBoolean()) {
// flush
DirectoryReader.open(writer).close();
}
Store.MetadataSnapshot metadata;
// check before we committed
try {
store.getMetadata(null);
fail("no index present - expected exception");
} catch (IndexNotFoundException ex) {
// expected
}
writer.commit();
writer.close();
metadata = store.getMetadata(null);
assertThat(metadata.asMap().isEmpty(), is(false));
for (StoreFileMetaData meta : metadata) {
try (IndexInput input = store.directory().openInput(meta.name(), IOContext.DEFAULT)) {
String checksum = Store.digestToString(CodecUtil.retrieveChecksum(input));
assertThat("File: " + meta.name() + " has a different checksum", meta.checksum(), equalTo(checksum));
assertThat(meta.writtenBy(), equalTo(Version.LATEST));
if (meta.name().endsWith(".si") || meta.name().startsWith("segments_")) {
assertThat(meta.hash().length, greaterThan(0));
}
}
}
assertConsistent(store, metadata);
TestUtil.checkIndex(store.directory());
assertDeleteContent(store, directoryService);
IOUtils.close(store);
}
use of org.apache.lucene.document.SortedDocValuesField in project elasticsearch by elastic.
the class StoreTests method testCleanupFromSnapshot.
public void testCleanupFromSnapshot() throws IOException {
final ShardId shardId = new ShardId("index", "_na_", 1);
DirectoryService directoryService = new LuceneManagedDirectoryService(random());
Store store = new Store(shardId, INDEX_SETTINGS, directoryService, new DummyShardLock(shardId));
// this time random codec....
IndexWriterConfig indexWriterConfig = newIndexWriterConfig(random(), new MockAnalyzer(random())).setCodec(TestUtil.getDefaultCodec());
// we keep all commits and that allows us clean based on multiple snapshots
indexWriterConfig.setIndexDeletionPolicy(NoDeletionPolicy.INSTANCE);
IndexWriter writer = new IndexWriter(store.directory(), indexWriterConfig);
int docs = 1 + random().nextInt(100);
int numCommits = 0;
for (int i = 0; i < docs; i++) {
if (i > 0 && randomIntBetween(0, 10) == 0) {
writer.commit();
numCommits++;
}
Document doc = new Document();
doc.add(new TextField("id", "" + i, random().nextBoolean() ? Field.Store.YES : Field.Store.NO));
doc.add(new TextField("body", TestUtil.randomRealisticUnicodeString(random()), random().nextBoolean() ? Field.Store.YES : Field.Store.NO));
doc.add(new SortedDocValuesField("dv", new BytesRef(TestUtil.randomRealisticUnicodeString(random()))));
writer.addDocument(doc);
}
if (numCommits < 1) {
writer.commit();
Document doc = new Document();
doc.add(new TextField("id", "" + docs++, random().nextBoolean() ? Field.Store.YES : Field.Store.NO));
doc.add(new TextField("body", TestUtil.randomRealisticUnicodeString(random()), random().nextBoolean() ? Field.Store.YES : Field.Store.NO));
doc.add(new SortedDocValuesField("dv", new BytesRef(TestUtil.randomRealisticUnicodeString(random()))));
writer.addDocument(doc);
}
Store.MetadataSnapshot firstMeta = store.getMetadata(null);
if (random().nextBoolean()) {
for (int i = 0; i < docs; i++) {
if (random().nextBoolean()) {
Document doc = new Document();
doc.add(new TextField("id", "" + i, random().nextBoolean() ? Field.Store.YES : Field.Store.NO));
doc.add(new TextField("body", TestUtil.randomRealisticUnicodeString(random()), random().nextBoolean() ? Field.Store.YES : Field.Store.NO));
writer.updateDocument(new Term("id", "" + i), doc);
}
}
}
writer.commit();
writer.close();
Store.MetadataSnapshot secondMeta = store.getMetadata(null);
if (randomBoolean()) {
store.cleanupAndVerify("test", firstMeta);
String[] strings = store.directory().listAll();
int numChecksums = 0;
int numNotFound = 0;
for (String file : strings) {
if (file.startsWith("extra")) {
continue;
}
assertTrue(firstMeta.contains(file) || file.equals("write.lock"));
if (secondMeta.contains(file) == false) {
numNotFound++;
}
}
assertTrue("at least one file must not be in here since we have two commits?", numNotFound > 0);
} else {
store.cleanupAndVerify("test", secondMeta);
String[] strings = store.directory().listAll();
int numChecksums = 0;
int numNotFound = 0;
for (String file : strings) {
if (file.startsWith("extra")) {
continue;
}
assertTrue(file, secondMeta.contains(file) || file.equals("write.lock"));
if (firstMeta.contains(file) == false) {
numNotFound++;
}
}
assertTrue("at least one file must not be in here since we have two commits?", numNotFound > 0);
}
deleteContent(store.directory());
IOUtils.close(store);
}
use of org.apache.lucene.document.SortedDocValuesField in project elasticsearch by elastic.
the class ReplaceMissingTests method test.
public void test() throws Exception {
Directory dir = newDirectory();
IndexWriterConfig iwc = newIndexWriterConfig(null);
iwc.setMergePolicy(newLogMergePolicy());
IndexWriter iw = new IndexWriter(dir, iwc);
Document doc = new Document();
doc.add(new SortedDocValuesField("field", new BytesRef("cat")));
iw.addDocument(doc);
doc = new Document();
iw.addDocument(doc);
doc = new Document();
doc.add(new SortedDocValuesField("field", new BytesRef("dog")));
iw.addDocument(doc);
iw.forceMerge(1);
iw.close();
DirectoryReader reader = DirectoryReader.open(dir);
LeafReader ar = getOnlyLeafReader(reader);
SortedDocValues raw = ar.getSortedDocValues("field");
assertEquals(2, raw.getValueCount());
// existing values
SortedDocValues dv = new BytesRefFieldComparatorSource.ReplaceMissing(raw, new BytesRef("cat"));
assertEquals(2, dv.getValueCount());
assertEquals("cat", dv.lookupOrd(0).utf8ToString());
assertEquals("dog", dv.lookupOrd(1).utf8ToString());
assertEquals(0, dv.getOrd(0));
assertEquals(0, dv.getOrd(1));
assertEquals(1, dv.getOrd(2));
dv = new BytesRefFieldComparatorSource.ReplaceMissing(raw, new BytesRef("dog"));
assertEquals(2, dv.getValueCount());
assertEquals("cat", dv.lookupOrd(0).utf8ToString());
assertEquals("dog", dv.lookupOrd(1).utf8ToString());
assertEquals(0, dv.getOrd(0));
assertEquals(1, dv.getOrd(1));
assertEquals(1, dv.getOrd(2));
// non-existing values
dv = new BytesRefFieldComparatorSource.ReplaceMissing(raw, new BytesRef("apple"));
assertEquals(3, dv.getValueCount());
assertEquals("apple", dv.lookupOrd(0).utf8ToString());
assertEquals("cat", dv.lookupOrd(1).utf8ToString());
assertEquals("dog", dv.lookupOrd(2).utf8ToString());
assertEquals(1, dv.getOrd(0));
assertEquals(0, dv.getOrd(1));
assertEquals(2, dv.getOrd(2));
dv = new BytesRefFieldComparatorSource.ReplaceMissing(raw, new BytesRef("company"));
assertEquals(3, dv.getValueCount());
assertEquals("cat", dv.lookupOrd(0).utf8ToString());
assertEquals("company", dv.lookupOrd(1).utf8ToString());
assertEquals("dog", dv.lookupOrd(2).utf8ToString());
assertEquals(0, dv.getOrd(0));
assertEquals(1, dv.getOrd(1));
assertEquals(2, dv.getOrd(2));
dv = new BytesRefFieldComparatorSource.ReplaceMissing(raw, new BytesRef("ebay"));
assertEquals(3, dv.getValueCount());
assertEquals("cat", dv.lookupOrd(0).utf8ToString());
assertEquals("dog", dv.lookupOrd(1).utf8ToString());
assertEquals("ebay", dv.lookupOrd(2).utf8ToString());
assertEquals(0, dv.getOrd(0));
assertEquals(2, dv.getOrd(1));
assertEquals(1, dv.getOrd(2));
reader.close();
dir.close();
}
use of org.apache.lucene.document.SortedDocValuesField in project elasticsearch by elastic.
the class CollapsingTopDocsCollectorTests method testEmptySortedSegment.
public void testEmptySortedSegment() throws Exception {
final Directory dir = newDirectory();
final RandomIndexWriter w = new RandomIndexWriter(random(), dir);
Document doc = new Document();
doc.add(new SortedDocValuesField("group", new BytesRef("0")));
w.addDocument(doc);
doc.clear();
doc.add(new SortedDocValuesField("group", new BytesRef("1")));
w.addDocument(doc);
w.commit();
doc.clear();
doc.add(new SortedDocValuesField("group", new BytesRef("10")));
w.addDocument(doc);
w.commit();
doc.clear();
doc.add(new NumericDocValuesField("category", 0));
w.addDocument(doc);
w.commit();
final IndexReader reader = w.getReader();
final IndexSearcher searcher = newSearcher(reader);
Sort sort = new Sort(new SortField("group", SortField.Type.STRING_VAL));
final CollapsingTopDocsCollector collapsingCollector = CollapsingTopDocsCollector.createKeyword("group", sort, 10, false);
searcher.search(new MatchAllDocsQuery(), collapsingCollector);
CollapseTopFieldDocs collapseTopFieldDocs = collapsingCollector.getTopDocs();
assertEquals(4, collapseTopFieldDocs.scoreDocs.length);
assertEquals(4, collapseTopFieldDocs.collapseValues.length);
assertNull(collapseTopFieldDocs.collapseValues[0]);
assertEquals(new BytesRef("0"), collapseTopFieldDocs.collapseValues[1]);
assertEquals(new BytesRef("1"), collapseTopFieldDocs.collapseValues[2]);
assertEquals(new BytesRef("10"), collapseTopFieldDocs.collapseValues[3]);
w.close();
reader.close();
dir.close();
}
use of org.apache.lucene.document.SortedDocValuesField in project elasticsearch by elastic.
the class SimpleLuceneTests method testSortValues.
public void testSortValues() throws Exception {
Directory dir = new RAMDirectory();
IndexWriter indexWriter = new IndexWriter(dir, new IndexWriterConfig(Lucene.STANDARD_ANALYZER));
for (int i = 0; i < 10; i++) {
Document document = new Document();
String text = new String(new char[] { (char) (97 + i), (char) (97 + i) });
document.add(new TextField("str", text, Field.Store.YES));
document.add(new SortedDocValuesField("str", new BytesRef(text)));
indexWriter.addDocument(document);
}
IndexReader reader = SlowCompositeReaderWrapper.wrap(DirectoryReader.open(indexWriter));
IndexSearcher searcher = new IndexSearcher(reader);
TopFieldDocs docs = searcher.search(new MatchAllDocsQuery(), 10, new Sort(new SortField("str", SortField.Type.STRING)));
for (int i = 0; i < 10; i++) {
FieldDoc fieldDoc = (FieldDoc) docs.scoreDocs[i];
assertThat((BytesRef) fieldDoc.fields[0], equalTo(new BytesRef(new String(new char[] { (char) (97 + i), (char) (97 + i) }))));
}
}
Aggregations