use of org.apache.lucene.analysis.MockAnalyzer in project lucene-solr by apache.
the class TestCompressingTermVectorsFormat method testChunkCleanup.
/**
* writes some tiny segments with incomplete compressed blocks,
* and ensures merge recompresses them.
*/
public void testChunkCleanup() throws IOException {
Directory dir = newDirectory();
IndexWriterConfig iwConf = newIndexWriterConfig(new MockAnalyzer(random()));
iwConf.setMergePolicy(NoMergePolicy.INSTANCE);
// we have to enforce certain things like maxDocsPerChunk to cause dirty chunks to be created
// by this test.
iwConf.setCodec(CompressingCodec.randomInstance(random(), 4 * 1024, 100, false, 8));
IndexWriter iw = new IndexWriter(dir, iwConf);
DirectoryReader ir = DirectoryReader.open(iw);
for (int i = 0; i < 5; i++) {
Document doc = new Document();
FieldType ft = new FieldType(TextField.TYPE_NOT_STORED);
ft.setStoreTermVectors(true);
doc.add(new Field("text", "not very long at all", ft));
iw.addDocument(doc);
// force flush
DirectoryReader ir2 = DirectoryReader.openIfChanged(ir);
assertNotNull(ir2);
ir.close();
ir = ir2;
// examine dirty counts:
for (LeafReaderContext leaf : ir2.leaves()) {
CodecReader sr = (CodecReader) leaf.reader();
CompressingTermVectorsReader reader = (CompressingTermVectorsReader) sr.getTermVectorsReader();
assertEquals(1, reader.getNumChunks());
assertEquals(1, reader.getNumDirtyChunks());
}
}
iw.getConfig().setMergePolicy(newLogMergePolicy());
iw.forceMerge(1);
DirectoryReader ir2 = DirectoryReader.openIfChanged(ir);
assertNotNull(ir2);
ir.close();
ir = ir2;
CodecReader sr = (CodecReader) getOnlyLeafReader(ir);
CompressingTermVectorsReader reader = (CompressingTermVectorsReader) sr.getTermVectorsReader();
// we could get lucky, and have zero, but typically one.
assertTrue(reader.getNumDirtyChunks() <= 1);
ir.close();
iw.close();
dir.close();
}
use of org.apache.lucene.analysis.MockAnalyzer in project lucene-solr by apache.
the class TestCompressingStoredFieldsFormat method testDeletePartiallyWrittenFilesIfAbort.
public void testDeletePartiallyWrittenFilesIfAbort() throws IOException {
Directory dir = newDirectory();
IndexWriterConfig iwConf = newIndexWriterConfig(new MockAnalyzer(random()));
iwConf.setMaxBufferedDocs(RandomNumbers.randomIntBetween(random(), 2, 30));
iwConf.setCodec(CompressingCodec.randomInstance(random()));
// disable CFS because this test checks file names
iwConf.setMergePolicy(newLogMergePolicy(false));
iwConf.setUseCompoundFile(false);
// Cannot use RIW because this test wants CFS to stay off:
IndexWriter iw = new IndexWriter(dir, iwConf);
final Document validDoc = new Document();
validDoc.add(new IntPoint("id", 0));
validDoc.add(new StoredField("id", 0));
iw.addDocument(validDoc);
iw.commit();
// make sure that #writeField will fail to trigger an abort
final Document invalidDoc = new Document();
FieldType fieldType = new FieldType();
fieldType.setStored(true);
invalidDoc.add(new Field("invalid", fieldType) {
@Override
public String stringValue() {
// abort the segment!! We should fix this.
return null;
}
});
try {
iw.addDocument(invalidDoc);
iw.commit();
} catch (IllegalArgumentException iae) {
// expected
assertEquals(iae, iw.getTragicException());
}
// Writer should be closed by tragedy
assertFalse(iw.isOpen());
dir.close();
}
use of org.apache.lucene.analysis.MockAnalyzer in project lucene-solr by apache.
the class TestBufferedIndexInput method testSetBufferSize.
public void testSetBufferSize() throws IOException {
Path indexDir = createTempDir("testSetBufferSize");
MockFSDirectory dir = new MockFSDirectory(indexDir, random());
IndexWriter writer = new IndexWriter(dir, new IndexWriterConfig(new MockAnalyzer(random())).setOpenMode(OpenMode.CREATE).setMergePolicy(newLogMergePolicy(false)));
for (int i = 0; i < 37; i++) {
Document doc = new Document();
doc.add(newTextField("content", "aaa bbb ccc ddd" + i, Field.Store.YES));
doc.add(newTextField("id", "" + i, Field.Store.YES));
writer.addDocument(doc);
}
dir.allIndexInputs.clear();
IndexReader reader = DirectoryReader.open(writer);
Term aaa = new Term("content", "aaa");
Term bbb = new Term("content", "bbb");
reader.close();
dir.tweakBufferSizes();
writer.deleteDocuments(new Term("id", "0"));
reader = DirectoryReader.open(writer);
IndexSearcher searcher = newSearcher(reader);
ScoreDoc[] hits = searcher.search(new TermQuery(bbb), 1000).scoreDocs;
dir.tweakBufferSizes();
assertEquals(36, hits.length);
reader.close();
dir.tweakBufferSizes();
writer.deleteDocuments(new Term("id", "4"));
reader = DirectoryReader.open(writer);
searcher = newSearcher(reader);
hits = searcher.search(new TermQuery(bbb), 1000).scoreDocs;
dir.tweakBufferSizes();
assertEquals(35, hits.length);
dir.tweakBufferSizes();
hits = searcher.search(new TermQuery(new Term("id", "33")), 1000).scoreDocs;
dir.tweakBufferSizes();
assertEquals(1, hits.length);
hits = searcher.search(new TermQuery(aaa), 1000).scoreDocs;
dir.tweakBufferSizes();
assertEquals(35, hits.length);
writer.close();
reader.close();
}
use of org.apache.lucene.analysis.MockAnalyzer in project lucene-solr by apache.
the class TestFileSwitchDirectory method testBasic.
/**
* Test if writing doc stores to disk and everything else to ram works.
*/
public void testBasic() throws IOException {
Set<String> fileExtensions = new HashSet<>();
fileExtensions.add(CompressingStoredFieldsWriter.FIELDS_EXTENSION);
fileExtensions.add(CompressingStoredFieldsWriter.FIELDS_INDEX_EXTENSION);
MockDirectoryWrapper primaryDir = new MockDirectoryWrapper(random(), new RAMDirectory());
// only part of an index
primaryDir.setCheckIndexOnClose(false);
MockDirectoryWrapper secondaryDir = new MockDirectoryWrapper(random(), new RAMDirectory());
// only part of an index
secondaryDir.setCheckIndexOnClose(false);
FileSwitchDirectory fsd = new FileSwitchDirectory(fileExtensions, primaryDir, secondaryDir, true);
// for now we wire the default codec because we rely upon its specific impl
IndexWriter writer = new IndexWriter(fsd, new IndexWriterConfig(new MockAnalyzer(random())).setMergePolicy(newLogMergePolicy(false)).setCodec(TestUtil.getDefaultCodec()).setUseCompoundFile(false));
TestIndexWriterReader.createIndexNoClose(true, "ram", writer);
IndexReader reader = DirectoryReader.open(writer);
assertEquals(100, reader.maxDoc());
writer.commit();
// we should see only fdx,fdt files here
String[] files = primaryDir.listAll();
assertTrue(files.length > 0);
for (int x = 0; x < files.length; x++) {
String ext = FileSwitchDirectory.getExtension(files[x]);
assertTrue(fileExtensions.contains(ext));
}
files = secondaryDir.listAll();
assertTrue(files.length > 0);
// we should not see fdx,fdt files here
for (int x = 0; x < files.length; x++) {
String ext = FileSwitchDirectory.getExtension(files[x]);
assertFalse(fileExtensions.contains(ext));
}
reader.close();
writer.close();
files = fsd.listAll();
for (int i = 0; i < files.length; i++) {
assertNotNull(files[i]);
}
fsd.close();
}
use of org.apache.lucene.analysis.MockAnalyzer in project lucene-solr by apache.
the class TestMultiMMap method assertChunking.
private void assertChunking(Random random, int chunkSize) throws Exception {
Path path = createTempDir("mmap" + chunkSize);
MMapDirectory mmapDir = new MMapDirectory(path, chunkSize);
// we will map a lot, try to turn on the unmap hack
if (MMapDirectory.UNMAP_SUPPORTED)
mmapDir.setUseUnmap(true);
MockDirectoryWrapper dir = new MockDirectoryWrapper(random, mmapDir);
RandomIndexWriter writer = new RandomIndexWriter(random, dir, newIndexWriterConfig(new MockAnalyzer(random)).setMergePolicy(newLogMergePolicy()));
Document doc = new Document();
Field docid = newStringField("docid", "0", Field.Store.YES);
Field junk = newStringField("junk", "", Field.Store.YES);
doc.add(docid);
doc.add(junk);
int numDocs = 100;
for (int i = 0; i < numDocs; i++) {
docid.setStringValue("" + i);
junk.setStringValue(TestUtil.randomUnicodeString(random));
writer.addDocument(doc);
}
IndexReader reader = writer.getReader();
writer.close();
int numAsserts = atLeast(100);
for (int i = 0; i < numAsserts; i++) {
int docID = random.nextInt(numDocs);
assertEquals("" + docID, reader.document(docID).get("docid"));
}
reader.close();
dir.close();
}
Aggregations