Search in sources :

Example 16 with MMapDirectory

use of org.apache.lucene.store.MMapDirectory in project lucene-solr by apache.

the class MMapDirectoryFactory method create.

@Override
protected Directory create(String path, LockFactory lockFactory, DirContext dirContext) throws IOException {
    // we pass NoLockFactory, because the real lock factory is set later by injectLockFactory:
    MMapDirectory mapDirectory = new MMapDirectory(new File(path).toPath(), lockFactory, maxChunk);
    try {
        mapDirectory.setUseUnmap(unmapHack);
    } catch (IllegalArgumentException e) {
        log.warn("Unmap not supported on this JVM, continuing on without setting unmap", e);
    }
    mapDirectory.setPreload(preload);
    return mapDirectory;
}
Also used : MMapDirectory(org.apache.lucene.store.MMapDirectory) File(java.io.File)

Example 17 with MMapDirectory

use of org.apache.lucene.store.MMapDirectory in project lucene-solr by apache.

the class TestIndexWriter method testDeleteUnusedFiles.

public void testDeleteUnusedFiles() throws Exception {
    assumeFalse("test relies on exact filenames", Codec.getDefault() instanceof SimpleTextCodec);
    assumeWorkingMMapOnWindows();
    for (int iter = 0; iter < 2; iter++) {
        // relies on windows semantics
        Path path = createTempDir();
        FileSystem fs = new WindowsFS(path.getFileSystem()).getFileSystem(URI.create("file:///"));
        Path indexPath = new FilterPath(path, fs);
        // NOTE: on Unix, we cannot use MMapDir, because WindowsFS doesn't see/think it keeps file handles open.  Yet, on Windows, we MUST use
        // MMapDir because the windows OS will in fact prevent file deletion for us, and fails otherwise:
        FSDirectory dir;
        if (Constants.WINDOWS) {
            dir = new MMapDirectory(indexPath);
        } else {
            dir = new NIOFSDirectory(indexPath);
        }
        MergePolicy mergePolicy = newLogMergePolicy(true);
        // This test expects all of its segments to be in CFS
        mergePolicy.setNoCFSRatio(1.0);
        mergePolicy.setMaxCFSSegmentSizeMB(Double.POSITIVE_INFINITY);
        IndexWriter w = new IndexWriter(dir, newIndexWriterConfig(new MockAnalyzer(random())).setMergePolicy(mergePolicy).setUseCompoundFile(true));
        Document doc = new Document();
        doc.add(newTextField("field", "go", Field.Store.NO));
        w.addDocument(doc);
        DirectoryReader r;
        if (iter == 0) {
            // use NRT
            r = w.getReader();
        } else {
            // don't use NRT
            w.commit();
            r = DirectoryReader.open(dir);
        }
        assertTrue(Files.exists(indexPath.resolve("_0.cfs")));
        assertTrue(Files.exists(indexPath.resolve("_0.cfe")));
        assertTrue(Files.exists(indexPath.resolve("_0.si")));
        if (iter == 1) {
            // we run a full commit so there should be a segments file etc.
            assertTrue(Files.exists(indexPath.resolve("segments_1")));
        } else {
            // this is an NRT reopen - no segments files yet
            assertFalse(Files.exists(indexPath.resolve("segments_1")));
        }
        w.addDocument(doc);
        w.forceMerge(1);
        if (iter == 1) {
            w.commit();
        }
        IndexReader r2 = DirectoryReader.openIfChanged(r);
        assertNotNull(r2);
        assertTrue(r != r2);
        // NOTE: here we rely on "Windows" behavior, ie, even
        // though IW wanted to delete _0.cfs since it was
        // merged away, because we have a reader open
        // against this file, it should still be here:
        assertTrue(Files.exists(indexPath.resolve("_0.cfs")));
        // forceMerge created this
        //assertTrue(files.contains("_2.cfs"));
        w.deleteUnusedFiles();
        // r still holds this file open
        assertTrue(Files.exists(indexPath.resolve("_0.cfs")));
        //assertTrue(files.contains("_2.cfs"));
        r.close();
        if (iter == 0) {
            // on closing NRT reader, it calls writer.deleteUnusedFiles
            assertFalse(Files.exists(indexPath.resolve("_0.cfs")));
        } else {
            // now FSDir can remove it
            dir.deletePendingFiles();
            assertFalse(Files.exists(indexPath.resolve("_0.cfs")));
        }
        w.close();
        r2.close();
        dir.close();
    }
}
Also used : FilterPath(org.apache.lucene.mockfile.FilterPath) Path(java.nio.file.Path) NIOFSDirectory(org.apache.lucene.store.NIOFSDirectory) FilterPath(org.apache.lucene.mockfile.FilterPath) SimpleTextCodec(org.apache.lucene.codecs.simpletext.SimpleTextCodec) FSDirectory(org.apache.lucene.store.FSDirectory) SimpleFSDirectory(org.apache.lucene.store.SimpleFSDirectory) NIOFSDirectory(org.apache.lucene.store.NIOFSDirectory) Document(org.apache.lucene.document.Document) MMapDirectory(org.apache.lucene.store.MMapDirectory) WindowsFS(org.apache.lucene.mockfile.WindowsFS) MockAnalyzer(org.apache.lucene.analysis.MockAnalyzer) FileSystem(java.nio.file.FileSystem)

Example 18 with MMapDirectory

use of org.apache.lucene.store.MMapDirectory in project lucene-solr by apache.

the class Test2BFST method test.

public void test() throws Exception {
    assumeWorkingMMapOnWindows();
    int[] ints = new int[7];
    IntsRef input = new IntsRef(ints, 0, ints.length);
    long seed = random().nextLong();
    Directory dir = new MMapDirectory(createTempDir("2BFST"));
    for (int iter = 0; iter < 1; iter++) {
        // Build FST w/ NoOutputs and stop when nodeCount > 2.2B
        {
            System.out.println("\nTEST: 3B nodes; doPack=false output=NO_OUTPUTS");
            Outputs<Object> outputs = NoOutputs.getSingleton();
            Object NO_OUTPUT = outputs.getNoOutput();
            final Builder<Object> b = new Builder<>(FST.INPUT_TYPE.BYTE1, 0, 0, true, true, Integer.MAX_VALUE, outputs, true, 15);
            int count = 0;
            Random r = new Random(seed);
            int[] ints2 = new int[200];
            IntsRef input2 = new IntsRef(ints2, 0, ints2.length);
            while (true) {
                //System.out.println("add: " + input + " -> " + output);
                for (int i = 10; i < ints2.length; i++) {
                    ints2[i] = r.nextInt(256);
                }
                b.add(input2, NO_OUTPUT);
                count++;
                if (count % 100000 == 0) {
                    System.out.println(count + ": " + b.fstRamBytesUsed() + " bytes; " + b.getNodeCount() + " nodes");
                }
                if (b.getNodeCount() > Integer.MAX_VALUE + 100L * 1024 * 1024) {
                    break;
                }
                nextInput(r, ints2);
            }
            FST<Object> fst = b.finish();
            for (int verify = 0; verify < 2; verify++) {
                System.out.println("\nTEST: now verify [fst size=" + fst.ramBytesUsed() + "; nodeCount=" + b.getNodeCount() + "; arcCount=" + b.getArcCount() + "]");
                Arrays.fill(ints2, 0);
                r = new Random(seed);
                for (int i = 0; i < count; i++) {
                    if (i % 1000000 == 0) {
                        System.out.println(i + "...: ");
                    }
                    for (int j = 10; j < ints2.length; j++) {
                        ints2[j] = r.nextInt(256);
                    }
                    assertEquals(NO_OUTPUT, Util.get(fst, input2));
                    nextInput(r, ints2);
                }
                System.out.println("\nTEST: enum all input/outputs");
                IntsRefFSTEnum<Object> fstEnum = new IntsRefFSTEnum<>(fst);
                Arrays.fill(ints2, 0);
                r = new Random(seed);
                int upto = 0;
                while (true) {
                    IntsRefFSTEnum.InputOutput<Object> pair = fstEnum.next();
                    if (pair == null) {
                        break;
                    }
                    for (int j = 10; j < ints2.length; j++) {
                        ints2[j] = r.nextInt(256);
                    }
                    assertEquals(input2, pair.input);
                    assertEquals(NO_OUTPUT, pair.output);
                    upto++;
                    nextInput(r, ints2);
                }
                assertEquals(count, upto);
                if (verify == 0) {
                    System.out.println("\nTEST: save/load FST and re-verify");
                    IndexOutput out = dir.createOutput("fst", IOContext.DEFAULT);
                    fst.save(out);
                    out.close();
                    IndexInput in = dir.openInput("fst", IOContext.DEFAULT);
                    fst = new FST<>(in, outputs);
                    in.close();
                } else {
                    dir.deleteFile("fst");
                }
            }
        }
        // Build FST w/ ByteSequenceOutputs and stop when FST
        // size = 3GB
        {
            System.out.println("\nTEST: 3 GB size; outputs=bytes");
            Outputs<BytesRef> outputs = ByteSequenceOutputs.getSingleton();
            final Builder<BytesRef> b = new Builder<>(FST.INPUT_TYPE.BYTE1, 0, 0, true, true, Integer.MAX_VALUE, outputs, true, 15);
            byte[] outputBytes = new byte[20];
            BytesRef output = new BytesRef(outputBytes);
            Arrays.fill(ints, 0);
            int count = 0;
            Random r = new Random(seed);
            while (true) {
                r.nextBytes(outputBytes);
                //System.out.println("add: " + input + " -> " + output);
                b.add(input, BytesRef.deepCopyOf(output));
                count++;
                if (count % 1000000 == 0) {
                    System.out.println(count + "...: " + b.fstRamBytesUsed() + " bytes");
                }
                if (b.fstRamBytesUsed() > LIMIT) {
                    break;
                }
                nextInput(r, ints);
            }
            FST<BytesRef> fst = b.finish();
            for (int verify = 0; verify < 2; verify++) {
                System.out.println("\nTEST: now verify [fst size=" + fst.ramBytesUsed() + "; nodeCount=" + b.getNodeCount() + "; arcCount=" + b.getArcCount() + "]");
                r = new Random(seed);
                Arrays.fill(ints, 0);
                for (int i = 0; i < count; i++) {
                    if (i % 1000000 == 0) {
                        System.out.println(i + "...: ");
                    }
                    r.nextBytes(outputBytes);
                    assertEquals(output, Util.get(fst, input));
                    nextInput(r, ints);
                }
                System.out.println("\nTEST: enum all input/outputs");
                IntsRefFSTEnum<BytesRef> fstEnum = new IntsRefFSTEnum<>(fst);
                Arrays.fill(ints, 0);
                r = new Random(seed);
                int upto = 0;
                while (true) {
                    IntsRefFSTEnum.InputOutput<BytesRef> pair = fstEnum.next();
                    if (pair == null) {
                        break;
                    }
                    assertEquals(input, pair.input);
                    r.nextBytes(outputBytes);
                    assertEquals(output, pair.output);
                    upto++;
                    nextInput(r, ints);
                }
                assertEquals(count, upto);
                if (verify == 0) {
                    System.out.println("\nTEST: save/load FST and re-verify");
                    IndexOutput out = dir.createOutput("fst", IOContext.DEFAULT);
                    fst.save(out);
                    out.close();
                    IndexInput in = dir.openInput("fst", IOContext.DEFAULT);
                    fst = new FST<>(in, outputs);
                    in.close();
                } else {
                    dir.deleteFile("fst");
                }
            }
        }
        // Build FST w/ PositiveIntOutputs and stop when FST
        // size = 3GB
        {
            System.out.println("\nTEST: 3 GB size; outputs=long");
            Outputs<Long> outputs = PositiveIntOutputs.getSingleton();
            final Builder<Long> b = new Builder<>(FST.INPUT_TYPE.BYTE1, 0, 0, true, true, Integer.MAX_VALUE, outputs, true, 15);
            long output = 1;
            Arrays.fill(ints, 0);
            int count = 0;
            Random r = new Random(seed);
            while (true) {
                //System.out.println("add: " + input + " -> " + output);
                b.add(input, output);
                output += 1 + r.nextInt(10);
                count++;
                if (count % 1000000 == 0) {
                    System.out.println(count + "...: " + b.fstRamBytesUsed() + " bytes");
                }
                if (b.fstRamBytesUsed() > LIMIT) {
                    break;
                }
                nextInput(r, ints);
            }
            FST<Long> fst = b.finish();
            for (int verify = 0; verify < 2; verify++) {
                System.out.println("\nTEST: now verify [fst size=" + fst.ramBytesUsed() + "; nodeCount=" + b.getNodeCount() + "; arcCount=" + b.getArcCount() + "]");
                Arrays.fill(ints, 0);
                output = 1;
                r = new Random(seed);
                for (int i = 0; i < count; i++) {
                    if (i % 1000000 == 0) {
                        System.out.println(i + "...: ");
                    }
                    // forward lookup:
                    assertEquals(output, Util.get(fst, input).longValue());
                    // reverse lookup:
                    assertEquals(input, Util.getByOutput(fst, output));
                    output += 1 + r.nextInt(10);
                    nextInput(r, ints);
                }
                System.out.println("\nTEST: enum all input/outputs");
                IntsRefFSTEnum<Long> fstEnum = new IntsRefFSTEnum<>(fst);
                Arrays.fill(ints, 0);
                r = new Random(seed);
                int upto = 0;
                output = 1;
                while (true) {
                    IntsRefFSTEnum.InputOutput<Long> pair = fstEnum.next();
                    if (pair == null) {
                        break;
                    }
                    assertEquals(input, pair.input);
                    assertEquals(output, pair.output.longValue());
                    output += 1 + r.nextInt(10);
                    upto++;
                    nextInput(r, ints);
                }
                assertEquals(count, upto);
                if (verify == 0) {
                    System.out.println("\nTEST: save/load FST and re-verify");
                    IndexOutput out = dir.createOutput("fst", IOContext.DEFAULT);
                    fst.save(out);
                    out.close();
                    IndexInput in = dir.openInput("fst", IOContext.DEFAULT);
                    fst = new FST<>(in, outputs);
                    in.close();
                } else {
                    dir.deleteFile("fst");
                }
            }
        }
    }
    dir.close();
}
Also used : IndexOutput(org.apache.lucene.store.IndexOutput) MMapDirectory(org.apache.lucene.store.MMapDirectory) Random(java.util.Random) IndexInput(org.apache.lucene.store.IndexInput) IntsRef(org.apache.lucene.util.IntsRef) BytesRef(org.apache.lucene.util.BytesRef) MMapDirectory(org.apache.lucene.store.MMapDirectory) Directory(org.apache.lucene.store.Directory)

Example 19 with MMapDirectory

use of org.apache.lucene.store.MMapDirectory in project Anserini by castorini.

the class Indexer method StartIndexing.

public static String StartIndexing(String dir) throws IOException {
    FileUtils.deleteDirectory(new File(dir));
    Directory index = new MMapDirectory(Paths.get(dir));
    IndexWriterConfig config = new IndexWriterConfig(ANALYZER);
    indexWriter = new IndexWriter(index, config);
    TRECIndexerRunnable its = new TRECIndexerRunnable(indexWriter);
    itsThread = new Thread(its);
    itsThread.start();
    return dir;
}
Also used : IndexWriter(org.apache.lucene.index.IndexWriter) File(java.io.File) MMapDirectory(org.apache.lucene.store.MMapDirectory) MMapDirectory(org.apache.lucene.store.MMapDirectory) Directory(org.apache.lucene.store.Directory) IndexWriterConfig(org.apache.lucene.index.IndexWriterConfig)

Example 20 with MMapDirectory

use of org.apache.lucene.store.MMapDirectory in project Krill by KorAP.

the class TestBenchmarkSpans method checkBenchmark1.

@Test
public void checkBenchmark1() throws IOException {
    Properties prop = new Properties();
    InputStream fr = new FileInputStream(getClass().getResource("/krill.properties").getFile());
    prop.load(fr);
    // Get the real index
    KrillIndex ki = new KrillIndex(new MMapDirectory(Paths.get(prop.getProperty("krill.indexDir"))));
    // Create a container for virtual collections:
    KrillCollection kc = new KrillCollection(ki);
    long t1 = 0, t2 = 0;
    // / cosmas20.json!!!
    String json = getString(getClass().getResource("/queries/benchmark1.jsonld").getFile());
    int rounds = 100;
    Result kr = new Result();
    t1 = System.nanoTime();
    for (int i = 1; i <= rounds; i++) {
        kr = new Krill(json).apply(ki);
    }
    ;
    t2 = System.nanoTime();
    // assertEquals("TotalResults", 30751, kr.getTotalResults());
    assertEquals("TotalResults", kr.getTotalResults(), 4803739);
    // long seconds = (long) (t2 - t1 / 1000) % 60 ;
    double seconds = (double) (t2 - t1) / 1000000000.0;
// System.out.println("It took " + seconds + " seconds");
// 100 times:
// 43,538 sec
// 4.874
// 1000 times:
// 36.613 sec
// After refactoring
// 100 times
// 273.58114372 seconds
// After intro of attributes
// 100 times
// 350.171506379 seconds
}
Also used : Krill(de.ids_mannheim.korap.Krill) KrillIndex(de.ids_mannheim.korap.KrillIndex) MMapDirectory(org.apache.lucene.store.MMapDirectory) KrillCollection(de.ids_mannheim.korap.KrillCollection) Result(de.ids_mannheim.korap.response.Result) Test(org.junit.Test)

Aggregations

MMapDirectory (org.apache.lucene.store.MMapDirectory)25 Directory (org.apache.lucene.store.Directory)10 NIOFSDirectory (org.apache.lucene.store.NIOFSDirectory)8 SimpleFSDirectory (org.apache.lucene.store.SimpleFSDirectory)8 KrillIndex (de.ids_mannheim.korap.KrillIndex)6 Test (org.junit.Test)6 Krill (de.ids_mannheim.korap.Krill)5 KrillCollection (de.ids_mannheim.korap.KrillCollection)5 Result (de.ids_mannheim.korap.response.Result)5 Path (java.nio.file.Path)5 FSDirectory (org.apache.lucene.store.FSDirectory)5 Settings (org.elasticsearch.common.settings.Settings)5 IndexSettings (org.elasticsearch.index.IndexSettings)5 Document (org.apache.lucene.document.Document)4 MockAnalyzer (org.apache.lucene.analysis.MockAnalyzer)3 Field (org.apache.lucene.document.Field)3 FeatureExtractors (io.anserini.ltr.feature.FeatureExtractors)2 RerankerCascade (io.anserini.rerank.RerankerCascade)2 Rm3Reranker (io.anserini.rerank.rm3.Rm3Reranker)2 Qrels (io.anserini.util.Qrels)2