use of org.apache.lucene.codecs.simpletext.SimpleTextCodec in project lucene-solr by apache.
the class BaseStoredFieldsFormatTestCase method testWriteReadMerge.
public void testWriteReadMerge() throws IOException {
// get another codec, other than the default: so we are merging segments across different codecs
final Codec otherCodec;
if ("SimpleText".equals(Codec.getDefault().getName())) {
otherCodec = TestUtil.getDefaultCodec();
} else {
otherCodec = new SimpleTextCodec();
}
Directory dir = newDirectory();
IndexWriterConfig iwConf = newIndexWriterConfig(new MockAnalyzer(random()));
iwConf.setMaxBufferedDocs(RandomNumbers.randomIntBetween(random(), 2, 30));
RandomIndexWriter iw = new RandomIndexWriter(random(), dir, iwConf);
final int docCount = atLeast(200);
final byte[][][] data = new byte[docCount][][];
for (int i = 0; i < docCount; ++i) {
final int fieldCount = rarely() ? RandomNumbers.randomIntBetween(random(), 1, 500) : RandomNumbers.randomIntBetween(random(), 1, 5);
data[i] = new byte[fieldCount][];
for (int j = 0; j < fieldCount; ++j) {
final int length = rarely() ? random().nextInt(1000) : random().nextInt(10);
final int max = rarely() ? 256 : 2;
data[i][j] = randomByteArray(length, max);
}
}
final FieldType type = new FieldType(StringField.TYPE_STORED);
type.setIndexOptions(IndexOptions.NONE);
type.freeze();
IntPoint id = new IntPoint("id", 0);
StoredField idStored = new StoredField("id", 0);
for (int i = 0; i < data.length; ++i) {
Document doc = new Document();
doc.add(id);
doc.add(idStored);
id.setIntValue(i);
idStored.setIntValue(i);
for (int j = 0; j < data[i].length; ++j) {
Field f = new Field("bytes" + j, data[i][j], type);
doc.add(f);
}
iw.w.addDocument(doc);
if (random().nextBoolean() && (i % (data.length / 10) == 0)) {
iw.w.close();
IndexWriterConfig iwConfNew = newIndexWriterConfig(new MockAnalyzer(random()));
// test merging against a non-compressing codec
if (iwConf.getCodec() == otherCodec) {
iwConfNew.setCodec(Codec.getDefault());
} else {
iwConfNew.setCodec(otherCodec);
}
iwConf = iwConfNew;
iw = new RandomIndexWriter(random(), dir, iwConf);
}
}
for (int i = 0; i < 10; ++i) {
final int min = random().nextInt(data.length);
final int max = min + random().nextInt(20);
iw.deleteDocuments(IntPoint.newRangeQuery("id", min, max - 1));
}
// force merges with deletions
iw.forceMerge(2);
iw.commit();
final DirectoryReader ir = DirectoryReader.open(dir);
assertTrue(ir.numDocs() > 0);
int numDocs = 0;
for (int i = 0; i < ir.maxDoc(); ++i) {
final Document doc = ir.document(i);
if (doc == null) {
continue;
}
++numDocs;
final int docId = doc.getField("id").numericValue().intValue();
assertEquals(data[docId].length + 1, doc.getFields().size());
for (int j = 0; j < data[docId].length; ++j) {
final byte[] arr = data[docId][j];
final BytesRef arr2Ref = doc.getBinaryValue("bytes" + j);
final byte[] arr2 = Arrays.copyOfRange(arr2Ref.bytes, arr2Ref.offset, arr2Ref.offset + arr2Ref.length);
assertArrayEquals(arr, arr2);
}
}
assertTrue(ir.numDocs() <= numDocs);
ir.close();
iw.deleteAll();
iw.commit();
iw.forceMerge(1);
iw.close();
dir.close();
}
use of org.apache.lucene.codecs.simpletext.SimpleTextCodec in project lucene-solr by apache.
the class SimpleTextCodecFactory method init.
@Override
public void init(NamedList args) {
super.init(args);
assert codec == null;
codec = new SimpleTextCodec();
}
use of org.apache.lucene.codecs.simpletext.SimpleTextCodec in project lucene-solr by apache.
the class TestIndexWriter method testDeleteUnusedFiles.
public void testDeleteUnusedFiles() throws Exception {
assumeFalse("test relies on exact filenames", Codec.getDefault() instanceof SimpleTextCodec);
assumeWorkingMMapOnWindows();
for (int iter = 0; iter < 2; iter++) {
// relies on windows semantics
Path path = createTempDir();
FileSystem fs = new WindowsFS(path.getFileSystem()).getFileSystem(URI.create("file:///"));
Path indexPath = new FilterPath(path, fs);
// NOTE: on Unix, we cannot use MMapDir, because WindowsFS doesn't see/think it keeps file handles open. Yet, on Windows, we MUST use
// MMapDir because the windows OS will in fact prevent file deletion for us, and fails otherwise:
FSDirectory dir;
if (Constants.WINDOWS) {
dir = new MMapDirectory(indexPath);
} else {
dir = new NIOFSDirectory(indexPath);
}
MergePolicy mergePolicy = newLogMergePolicy(true);
// This test expects all of its segments to be in CFS
mergePolicy.setNoCFSRatio(1.0);
mergePolicy.setMaxCFSSegmentSizeMB(Double.POSITIVE_INFINITY);
IndexWriter w = new IndexWriter(dir, newIndexWriterConfig(new MockAnalyzer(random())).setMergePolicy(mergePolicy).setUseCompoundFile(true));
Document doc = new Document();
doc.add(newTextField("field", "go", Field.Store.NO));
w.addDocument(doc);
DirectoryReader r;
if (iter == 0) {
// use NRT
r = w.getReader();
} else {
// don't use NRT
w.commit();
r = DirectoryReader.open(dir);
}
assertTrue(Files.exists(indexPath.resolve("_0.cfs")));
assertTrue(Files.exists(indexPath.resolve("_0.cfe")));
assertTrue(Files.exists(indexPath.resolve("_0.si")));
if (iter == 1) {
// we run a full commit so there should be a segments file etc.
assertTrue(Files.exists(indexPath.resolve("segments_1")));
} else {
// this is an NRT reopen - no segments files yet
assertFalse(Files.exists(indexPath.resolve("segments_1")));
}
w.addDocument(doc);
w.forceMerge(1);
if (iter == 1) {
w.commit();
}
IndexReader r2 = DirectoryReader.openIfChanged(r);
assertNotNull(r2);
assertTrue(r != r2);
// NOTE: here we rely on "Windows" behavior, ie, even
// though IW wanted to delete _0.cfs since it was
// merged away, because we have a reader open
// against this file, it should still be here:
assertTrue(Files.exists(indexPath.resolve("_0.cfs")));
// forceMerge created this
//assertTrue(files.contains("_2.cfs"));
w.deleteUnusedFiles();
// r still holds this file open
assertTrue(Files.exists(indexPath.resolve("_0.cfs")));
//assertTrue(files.contains("_2.cfs"));
r.close();
if (iter == 0) {
// on closing NRT reader, it calls writer.deleteUnusedFiles
assertFalse(Files.exists(indexPath.resolve("_0.cfs")));
} else {
// now FSDir can remove it
dir.deletePendingFiles();
assertFalse(Files.exists(indexPath.resolve("_0.cfs")));
}
w.close();
r2.close();
dir.close();
}
}
use of org.apache.lucene.codecs.simpletext.SimpleTextCodec in project lucene-solr by apache.
the class TestIndexWriter method testEmptyDirRollback.
public void testEmptyDirRollback() throws Exception {
// TODO: generalize this test
assumeFalse("test makes assumptions about file counts", Codec.getDefault() instanceof SimpleTextCodec);
// Tests that if IW is created over an empty Directory, some documents are
// indexed, flushed (but not committed) and then IW rolls back, then no
// files are left in the Directory.
Directory dir = newDirectory();
String[] origFiles = dir.listAll();
IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig(new MockAnalyzer(random())).setMaxBufferedDocs(2).setMergePolicy(newLogMergePolicy()).setUseCompoundFile(false));
String[] files = dir.listAll();
// Creating over empty dir should not create any files,
// or, at most the write.lock file
final int extraFileCount = files.length - origFiles.length;
if (extraFileCount == 1) {
assertTrue(Arrays.asList(files).contains(IndexWriter.WRITE_LOCK_NAME));
} else {
Arrays.sort(origFiles);
Arrays.sort(files);
assertArrayEquals(origFiles, files);
}
Document doc = new Document();
FieldType customType = new FieldType(TextField.TYPE_STORED);
customType.setStoreTermVectors(true);
customType.setStoreTermVectorPositions(true);
customType.setStoreTermVectorOffsets(true);
// create as many files as possible
doc.add(newField("c", "val", customType));
writer.addDocument(doc);
// Adding just one document does not call flush yet.
int computedExtraFileCount = 0;
for (String file : dir.listAll()) {
if (IndexWriter.WRITE_LOCK_NAME.equals(file) || file.startsWith(IndexFileNames.SEGMENTS) || IndexFileNames.CODEC_FILE_PATTERN.matcher(file).matches()) {
if (file.lastIndexOf('.') < 0 || // don't count stored fields and term vectors in
!Arrays.asList("fdx", "fdt", "tvx", "tvd", "tvf").contains(file.substring(file.lastIndexOf('.') + 1))) {
++computedExtraFileCount;
}
}
}
assertEquals("only the stored and term vector files should exist in the directory", extraFileCount, computedExtraFileCount);
doc = new Document();
doc.add(newField("c", "val", customType));
writer.addDocument(doc);
// The second document should cause a flush.
assertTrue("flush should have occurred and files should have been created", dir.listAll().length > 5 + extraFileCount);
// After rollback, IW should remove all files
writer.rollback();
String[] allFiles = dir.listAll();
assertEquals("no files should exist in the directory after rollback", origFiles.length + extraFileCount, allFiles.length);
// Since we rolled-back above, that close should be a no-op
writer.close();
allFiles = dir.listAll();
assertEquals("expected a no-op close after IW.rollback()", origFiles.length + extraFileCount, allFiles.length);
dir.close();
}
use of org.apache.lucene.codecs.simpletext.SimpleTextCodec in project lucene-solr by apache.
the class TestIndexFileDeleter method testDeleteLeftoverFiles.
public void testDeleteLeftoverFiles() throws IOException {
Directory dir = newDirectory();
MergePolicy mergePolicy = newLogMergePolicy(true, 10);
// This test expects all of its segments to be in CFS
mergePolicy.setNoCFSRatio(1.0);
mergePolicy.setMaxCFSSegmentSizeMB(Double.POSITIVE_INFINITY);
IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig(new MockAnalyzer(random())).setMaxBufferedDocs(10).setMergePolicy(mergePolicy).setUseCompoundFile(true));
int i;
for (i = 0; i < 35; i++) {
addDoc(writer, i);
}
writer.getConfig().getMergePolicy().setNoCFSRatio(0.0);
writer.getConfig().setUseCompoundFile(false);
for (; i < 45; i++) {
addDoc(writer, i);
}
writer.close();
// Delete one doc so we get a .del file:
writer = new IndexWriter(dir, newIndexWriterConfig(new MockAnalyzer(random())).setMergePolicy(NoMergePolicy.INSTANCE).setUseCompoundFile(true));
Term searchTerm = new Term("id", "7");
writer.deleteDocuments(searchTerm);
writer.close();
// read in index to try to not depend on codec-specific filenames so much
SegmentInfos sis = SegmentInfos.readLatestCommit(dir);
SegmentInfo si0 = sis.info(0).info;
SegmentInfo si1 = sis.info(1).info;
SegmentInfo si3 = sis.info(3).info;
// Now, artificially create an extra .del file & extra
// .s0 file:
String[] files = dir.listAll();
/*
for(int j=0;j<files.length;j++) {
System.out.println(j + ": " + files[j]);
}
*/
// TODO: fix this test better
String ext = ".liv";
// Create a bogus separate del file for a
// segment that already has a separate del file:
copyFile(dir, "_0_1" + ext, "_0_2" + ext);
// Create a bogus separate del file for a
// segment that does not yet have a separate del file:
copyFile(dir, "_0_1" + ext, "_1_1" + ext);
// Create a bogus separate del file for a
// non-existent segment:
copyFile(dir, "_0_1" + ext, "_188_1" + ext);
String[] cfsFiles0 = si0.getCodec() instanceof SimpleTextCodec ? new String[] { "_0.scf" } : new String[] { "_0.cfs", "_0.cfe" };
// Create a bogus segment file:
copyFile(dir, cfsFiles0[0], "_188.cfs");
// Create a bogus fnm file when the CFS already exists:
copyFile(dir, cfsFiles0[0], "_0.fnm");
// Create a bogus cfs file shadowing a non-cfs segment:
// TODO: assert is bogus (relies upon codec-specific filenames)
assertTrue(slowFileExists(dir, "_3.fdt") || slowFileExists(dir, "_3.fld"));
String[] cfsFiles3 = si3.getCodec() instanceof SimpleTextCodec ? new String[] { "_3.scf" } : new String[] { "_3.cfs", "_3.cfe" };
for (String f : cfsFiles3) {
assertTrue(!slowFileExists(dir, f));
}
String[] cfsFiles1 = si1.getCodec() instanceof SimpleTextCodec ? new String[] { "_1.scf" } : new String[] { "_1.cfs", "_1.cfe" };
copyFile(dir, cfsFiles1[0], "_3.cfs");
String[] filesPre = dir.listAll();
// Open & close a writer: it should delete the above files and nothing more:
writer = new IndexWriter(dir, newIndexWriterConfig(new MockAnalyzer(random())).setOpenMode(OpenMode.APPEND));
writer.close();
String[] files2 = dir.listAll();
dir.close();
Arrays.sort(files);
Arrays.sort(files2);
Set<String> dif = difFiles(files, files2);
if (!Arrays.equals(files, files2)) {
fail("IndexFileDeleter failed to delete unreferenced extra files: should have deleted " + (filesPre.length - files.length) + " files but only deleted " + (filesPre.length - files2.length) + "; expected files:\n " + asString(files) + "\n actual files:\n " + asString(files2) + "\ndiff: " + dif);
}
}
Aggregations