use of org.apache.lucene.store.MMapDirectory in project lucene-solr by apache.
the class MMapDirectoryFactory method create.
@Override
protected Directory create(String path, LockFactory lockFactory, DirContext dirContext) throws IOException {
// we pass NoLockFactory, because the real lock factory is set later by injectLockFactory:
MMapDirectory mapDirectory = new MMapDirectory(new File(path).toPath(), lockFactory, maxChunk);
try {
mapDirectory.setUseUnmap(unmapHack);
} catch (IllegalArgumentException e) {
log.warn("Unmap not supported on this JVM, continuing on without setting unmap", e);
}
mapDirectory.setPreload(preload);
return mapDirectory;
}
use of org.apache.lucene.store.MMapDirectory in project lucene-solr by apache.
the class TestIndexWriter method testDeleteUnusedFiles.
public void testDeleteUnusedFiles() throws Exception {
assumeFalse("test relies on exact filenames", Codec.getDefault() instanceof SimpleTextCodec);
assumeWorkingMMapOnWindows();
for (int iter = 0; iter < 2; iter++) {
// relies on windows semantics
Path path = createTempDir();
FileSystem fs = new WindowsFS(path.getFileSystem()).getFileSystem(URI.create("file:///"));
Path indexPath = new FilterPath(path, fs);
// NOTE: on Unix, we cannot use MMapDir, because WindowsFS doesn't see/think it keeps file handles open. Yet, on Windows, we MUST use
// MMapDir because the windows OS will in fact prevent file deletion for us, and fails otherwise:
FSDirectory dir;
if (Constants.WINDOWS) {
dir = new MMapDirectory(indexPath);
} else {
dir = new NIOFSDirectory(indexPath);
}
MergePolicy mergePolicy = newLogMergePolicy(true);
// This test expects all of its segments to be in CFS
mergePolicy.setNoCFSRatio(1.0);
mergePolicy.setMaxCFSSegmentSizeMB(Double.POSITIVE_INFINITY);
IndexWriter w = new IndexWriter(dir, newIndexWriterConfig(new MockAnalyzer(random())).setMergePolicy(mergePolicy).setUseCompoundFile(true));
Document doc = new Document();
doc.add(newTextField("field", "go", Field.Store.NO));
w.addDocument(doc);
DirectoryReader r;
if (iter == 0) {
// use NRT
r = w.getReader();
} else {
// don't use NRT
w.commit();
r = DirectoryReader.open(dir);
}
assertTrue(Files.exists(indexPath.resolve("_0.cfs")));
assertTrue(Files.exists(indexPath.resolve("_0.cfe")));
assertTrue(Files.exists(indexPath.resolve("_0.si")));
if (iter == 1) {
// we run a full commit so there should be a segments file etc.
assertTrue(Files.exists(indexPath.resolve("segments_1")));
} else {
// this is an NRT reopen - no segments files yet
assertFalse(Files.exists(indexPath.resolve("segments_1")));
}
w.addDocument(doc);
w.forceMerge(1);
if (iter == 1) {
w.commit();
}
IndexReader r2 = DirectoryReader.openIfChanged(r);
assertNotNull(r2);
assertTrue(r != r2);
// NOTE: here we rely on "Windows" behavior, ie, even
// though IW wanted to delete _0.cfs since it was
// merged away, because we have a reader open
// against this file, it should still be here:
assertTrue(Files.exists(indexPath.resolve("_0.cfs")));
// forceMerge created this
//assertTrue(files.contains("_2.cfs"));
w.deleteUnusedFiles();
// r still holds this file open
assertTrue(Files.exists(indexPath.resolve("_0.cfs")));
//assertTrue(files.contains("_2.cfs"));
r.close();
if (iter == 0) {
// on closing NRT reader, it calls writer.deleteUnusedFiles
assertFalse(Files.exists(indexPath.resolve("_0.cfs")));
} else {
// now FSDir can remove it
dir.deletePendingFiles();
assertFalse(Files.exists(indexPath.resolve("_0.cfs")));
}
w.close();
r2.close();
dir.close();
}
}
use of org.apache.lucene.store.MMapDirectory in project lucene-solr by apache.
the class Test2BFST method test.
public void test() throws Exception {
assumeWorkingMMapOnWindows();
int[] ints = new int[7];
IntsRef input = new IntsRef(ints, 0, ints.length);
long seed = random().nextLong();
Directory dir = new MMapDirectory(createTempDir("2BFST"));
for (int iter = 0; iter < 1; iter++) {
// Build FST w/ NoOutputs and stop when nodeCount > 2.2B
{
System.out.println("\nTEST: 3B nodes; doPack=false output=NO_OUTPUTS");
Outputs<Object> outputs = NoOutputs.getSingleton();
Object NO_OUTPUT = outputs.getNoOutput();
final Builder<Object> b = new Builder<>(FST.INPUT_TYPE.BYTE1, 0, 0, true, true, Integer.MAX_VALUE, outputs, true, 15);
int count = 0;
Random r = new Random(seed);
int[] ints2 = new int[200];
IntsRef input2 = new IntsRef(ints2, 0, ints2.length);
while (true) {
//System.out.println("add: " + input + " -> " + output);
for (int i = 10; i < ints2.length; i++) {
ints2[i] = r.nextInt(256);
}
b.add(input2, NO_OUTPUT);
count++;
if (count % 100000 == 0) {
System.out.println(count + ": " + b.fstRamBytesUsed() + " bytes; " + b.getNodeCount() + " nodes");
}
if (b.getNodeCount() > Integer.MAX_VALUE + 100L * 1024 * 1024) {
break;
}
nextInput(r, ints2);
}
FST<Object> fst = b.finish();
for (int verify = 0; verify < 2; verify++) {
System.out.println("\nTEST: now verify [fst size=" + fst.ramBytesUsed() + "; nodeCount=" + b.getNodeCount() + "; arcCount=" + b.getArcCount() + "]");
Arrays.fill(ints2, 0);
r = new Random(seed);
for (int i = 0; i < count; i++) {
if (i % 1000000 == 0) {
System.out.println(i + "...: ");
}
for (int j = 10; j < ints2.length; j++) {
ints2[j] = r.nextInt(256);
}
assertEquals(NO_OUTPUT, Util.get(fst, input2));
nextInput(r, ints2);
}
System.out.println("\nTEST: enum all input/outputs");
IntsRefFSTEnum<Object> fstEnum = new IntsRefFSTEnum<>(fst);
Arrays.fill(ints2, 0);
r = new Random(seed);
int upto = 0;
while (true) {
IntsRefFSTEnum.InputOutput<Object> pair = fstEnum.next();
if (pair == null) {
break;
}
for (int j = 10; j < ints2.length; j++) {
ints2[j] = r.nextInt(256);
}
assertEquals(input2, pair.input);
assertEquals(NO_OUTPUT, pair.output);
upto++;
nextInput(r, ints2);
}
assertEquals(count, upto);
if (verify == 0) {
System.out.println("\nTEST: save/load FST and re-verify");
IndexOutput out = dir.createOutput("fst", IOContext.DEFAULT);
fst.save(out);
out.close();
IndexInput in = dir.openInput("fst", IOContext.DEFAULT);
fst = new FST<>(in, outputs);
in.close();
} else {
dir.deleteFile("fst");
}
}
}
// Build FST w/ ByteSequenceOutputs and stop when FST
// size = 3GB
{
System.out.println("\nTEST: 3 GB size; outputs=bytes");
Outputs<BytesRef> outputs = ByteSequenceOutputs.getSingleton();
final Builder<BytesRef> b = new Builder<>(FST.INPUT_TYPE.BYTE1, 0, 0, true, true, Integer.MAX_VALUE, outputs, true, 15);
byte[] outputBytes = new byte[20];
BytesRef output = new BytesRef(outputBytes);
Arrays.fill(ints, 0);
int count = 0;
Random r = new Random(seed);
while (true) {
r.nextBytes(outputBytes);
//System.out.println("add: " + input + " -> " + output);
b.add(input, BytesRef.deepCopyOf(output));
count++;
if (count % 1000000 == 0) {
System.out.println(count + "...: " + b.fstRamBytesUsed() + " bytes");
}
if (b.fstRamBytesUsed() > LIMIT) {
break;
}
nextInput(r, ints);
}
FST<BytesRef> fst = b.finish();
for (int verify = 0; verify < 2; verify++) {
System.out.println("\nTEST: now verify [fst size=" + fst.ramBytesUsed() + "; nodeCount=" + b.getNodeCount() + "; arcCount=" + b.getArcCount() + "]");
r = new Random(seed);
Arrays.fill(ints, 0);
for (int i = 0; i < count; i++) {
if (i % 1000000 == 0) {
System.out.println(i + "...: ");
}
r.nextBytes(outputBytes);
assertEquals(output, Util.get(fst, input));
nextInput(r, ints);
}
System.out.println("\nTEST: enum all input/outputs");
IntsRefFSTEnum<BytesRef> fstEnum = new IntsRefFSTEnum<>(fst);
Arrays.fill(ints, 0);
r = new Random(seed);
int upto = 0;
while (true) {
IntsRefFSTEnum.InputOutput<BytesRef> pair = fstEnum.next();
if (pair == null) {
break;
}
assertEquals(input, pair.input);
r.nextBytes(outputBytes);
assertEquals(output, pair.output);
upto++;
nextInput(r, ints);
}
assertEquals(count, upto);
if (verify == 0) {
System.out.println("\nTEST: save/load FST and re-verify");
IndexOutput out = dir.createOutput("fst", IOContext.DEFAULT);
fst.save(out);
out.close();
IndexInput in = dir.openInput("fst", IOContext.DEFAULT);
fst = new FST<>(in, outputs);
in.close();
} else {
dir.deleteFile("fst");
}
}
}
// Build FST w/ PositiveIntOutputs and stop when FST
// size = 3GB
{
System.out.println("\nTEST: 3 GB size; outputs=long");
Outputs<Long> outputs = PositiveIntOutputs.getSingleton();
final Builder<Long> b = new Builder<>(FST.INPUT_TYPE.BYTE1, 0, 0, true, true, Integer.MAX_VALUE, outputs, true, 15);
long output = 1;
Arrays.fill(ints, 0);
int count = 0;
Random r = new Random(seed);
while (true) {
//System.out.println("add: " + input + " -> " + output);
b.add(input, output);
output += 1 + r.nextInt(10);
count++;
if (count % 1000000 == 0) {
System.out.println(count + "...: " + b.fstRamBytesUsed() + " bytes");
}
if (b.fstRamBytesUsed() > LIMIT) {
break;
}
nextInput(r, ints);
}
FST<Long> fst = b.finish();
for (int verify = 0; verify < 2; verify++) {
System.out.println("\nTEST: now verify [fst size=" + fst.ramBytesUsed() + "; nodeCount=" + b.getNodeCount() + "; arcCount=" + b.getArcCount() + "]");
Arrays.fill(ints, 0);
output = 1;
r = new Random(seed);
for (int i = 0; i < count; i++) {
if (i % 1000000 == 0) {
System.out.println(i + "...: ");
}
// forward lookup:
assertEquals(output, Util.get(fst, input).longValue());
// reverse lookup:
assertEquals(input, Util.getByOutput(fst, output));
output += 1 + r.nextInt(10);
nextInput(r, ints);
}
System.out.println("\nTEST: enum all input/outputs");
IntsRefFSTEnum<Long> fstEnum = new IntsRefFSTEnum<>(fst);
Arrays.fill(ints, 0);
r = new Random(seed);
int upto = 0;
output = 1;
while (true) {
IntsRefFSTEnum.InputOutput<Long> pair = fstEnum.next();
if (pair == null) {
break;
}
assertEquals(input, pair.input);
assertEquals(output, pair.output.longValue());
output += 1 + r.nextInt(10);
upto++;
nextInput(r, ints);
}
assertEquals(count, upto);
if (verify == 0) {
System.out.println("\nTEST: save/load FST and re-verify");
IndexOutput out = dir.createOutput("fst", IOContext.DEFAULT);
fst.save(out);
out.close();
IndexInput in = dir.openInput("fst", IOContext.DEFAULT);
fst = new FST<>(in, outputs);
in.close();
} else {
dir.deleteFile("fst");
}
}
}
}
dir.close();
}
use of org.apache.lucene.store.MMapDirectory in project Anserini by castorini.
the class Indexer method StartIndexing.
public static String StartIndexing(String dir) throws IOException {
FileUtils.deleteDirectory(new File(dir));
Directory index = new MMapDirectory(Paths.get(dir));
IndexWriterConfig config = new IndexWriterConfig(ANALYZER);
indexWriter = new IndexWriter(index, config);
TRECIndexerRunnable its = new TRECIndexerRunnable(indexWriter);
itsThread = new Thread(its);
itsThread.start();
return dir;
}
use of org.apache.lucene.store.MMapDirectory in project Krill by KorAP.
the class TestBenchmarkSpans method checkBenchmark1.
@Test
public void checkBenchmark1() throws IOException {
Properties prop = new Properties();
InputStream fr = new FileInputStream(getClass().getResource("/krill.properties").getFile());
prop.load(fr);
// Get the real index
KrillIndex ki = new KrillIndex(new MMapDirectory(Paths.get(prop.getProperty("krill.indexDir"))));
// Create a container for virtual collections:
KrillCollection kc = new KrillCollection(ki);
long t1 = 0, t2 = 0;
// / cosmas20.json!!!
String json = getString(getClass().getResource("/queries/benchmark1.jsonld").getFile());
int rounds = 100;
Result kr = new Result();
t1 = System.nanoTime();
for (int i = 1; i <= rounds; i++) {
kr = new Krill(json).apply(ki);
}
;
t2 = System.nanoTime();
// assertEquals("TotalResults", 30751, kr.getTotalResults());
assertEquals("TotalResults", kr.getTotalResults(), 4803739);
// long seconds = (long) (t2 - t1 / 1000) % 60 ;
double seconds = (double) (t2 - t1) / 1000000000.0;
// System.out.println("It took " + seconds + " seconds");
// 100 times:
// 43,538 sec
// 4.874
// 1000 times:
// 36.613 sec
// After refactoring
// 100 times
// 273.58114372 seconds
// After intro of attributes
// 100 times
// 350.171506379 seconds
}
Aggregations