Search in sources :

Example 11 with MMapDirectory

use of org.apache.lucene.store.MMapDirectory in project lucene-solr by apache.

the class MMapDirectoryFactory method create.

@Override
protected Directory create(String path, LockFactory lockFactory, DirContext dirContext) throws IOException {
    // we pass NoLockFactory, because the real lock factory is set later by injectLockFactory:
    MMapDirectory mapDirectory = new MMapDirectory(new File(path).toPath(), lockFactory, maxChunk);
    try {
        mapDirectory.setUseUnmap(unmapHack);
    } catch (IllegalArgumentException e) {
        log.warn("Unmap not supported on this JVM, continuing on without setting unmap", e);
    }
    mapDirectory.setPreload(preload);
    return mapDirectory;
}
Also used : MMapDirectory(org.apache.lucene.store.MMapDirectory) File(java.io.File)

Example 12 with MMapDirectory

use of org.apache.lucene.store.MMapDirectory in project lucene-solr by apache.

the class Test2BFST method test.

public void test() throws Exception {
    assumeWorkingMMapOnWindows();
    int[] ints = new int[7];
    IntsRef input = new IntsRef(ints, 0, ints.length);
    long seed = random().nextLong();
    Directory dir = new MMapDirectory(createTempDir("2BFST"));
    for (int iter = 0; iter < 1; iter++) {
        // Build FST w/ NoOutputs and stop when nodeCount > 2.2B
        {
            System.out.println("\nTEST: 3B nodes; doPack=false output=NO_OUTPUTS");
            Outputs<Object> outputs = NoOutputs.getSingleton();
            Object NO_OUTPUT = outputs.getNoOutput();
            final Builder<Object> b = new Builder<>(FST.INPUT_TYPE.BYTE1, 0, 0, true, true, Integer.MAX_VALUE, outputs, true, 15);
            int count = 0;
            Random r = new Random(seed);
            int[] ints2 = new int[200];
            IntsRef input2 = new IntsRef(ints2, 0, ints2.length);
            while (true) {
                //System.out.println("add: " + input + " -> " + output);
                for (int i = 10; i < ints2.length; i++) {
                    ints2[i] = r.nextInt(256);
                }
                b.add(input2, NO_OUTPUT);
                count++;
                if (count % 100000 == 0) {
                    System.out.println(count + ": " + b.fstRamBytesUsed() + " bytes; " + b.getNodeCount() + " nodes");
                }
                if (b.getNodeCount() > Integer.MAX_VALUE + 100L * 1024 * 1024) {
                    break;
                }
                nextInput(r, ints2);
            }
            FST<Object> fst = b.finish();
            for (int verify = 0; verify < 2; verify++) {
                System.out.println("\nTEST: now verify [fst size=" + fst.ramBytesUsed() + "; nodeCount=" + b.getNodeCount() + "; arcCount=" + b.getArcCount() + "]");
                Arrays.fill(ints2, 0);
                r = new Random(seed);
                for (int i = 0; i < count; i++) {
                    if (i % 1000000 == 0) {
                        System.out.println(i + "...: ");
                    }
                    for (int j = 10; j < ints2.length; j++) {
                        ints2[j] = r.nextInt(256);
                    }
                    assertEquals(NO_OUTPUT, Util.get(fst, input2));
                    nextInput(r, ints2);
                }
                System.out.println("\nTEST: enum all input/outputs");
                IntsRefFSTEnum<Object> fstEnum = new IntsRefFSTEnum<>(fst);
                Arrays.fill(ints2, 0);
                r = new Random(seed);
                int upto = 0;
                while (true) {
                    IntsRefFSTEnum.InputOutput<Object> pair = fstEnum.next();
                    if (pair == null) {
                        break;
                    }
                    for (int j = 10; j < ints2.length; j++) {
                        ints2[j] = r.nextInt(256);
                    }
                    assertEquals(input2, pair.input);
                    assertEquals(NO_OUTPUT, pair.output);
                    upto++;
                    nextInput(r, ints2);
                }
                assertEquals(count, upto);
                if (verify == 0) {
                    System.out.println("\nTEST: save/load FST and re-verify");
                    IndexOutput out = dir.createOutput("fst", IOContext.DEFAULT);
                    fst.save(out);
                    out.close();
                    IndexInput in = dir.openInput("fst", IOContext.DEFAULT);
                    fst = new FST<>(in, outputs);
                    in.close();
                } else {
                    dir.deleteFile("fst");
                }
            }
        }
        // Build FST w/ ByteSequenceOutputs and stop when FST
        // size = 3GB
        {
            System.out.println("\nTEST: 3 GB size; outputs=bytes");
            Outputs<BytesRef> outputs = ByteSequenceOutputs.getSingleton();
            final Builder<BytesRef> b = new Builder<>(FST.INPUT_TYPE.BYTE1, 0, 0, true, true, Integer.MAX_VALUE, outputs, true, 15);
            byte[] outputBytes = new byte[20];
            BytesRef output = new BytesRef(outputBytes);
            Arrays.fill(ints, 0);
            int count = 0;
            Random r = new Random(seed);
            while (true) {
                r.nextBytes(outputBytes);
                //System.out.println("add: " + input + " -> " + output);
                b.add(input, BytesRef.deepCopyOf(output));
                count++;
                if (count % 1000000 == 0) {
                    System.out.println(count + "...: " + b.fstRamBytesUsed() + " bytes");
                }
                if (b.fstRamBytesUsed() > LIMIT) {
                    break;
                }
                nextInput(r, ints);
            }
            FST<BytesRef> fst = b.finish();
            for (int verify = 0; verify < 2; verify++) {
                System.out.println("\nTEST: now verify [fst size=" + fst.ramBytesUsed() + "; nodeCount=" + b.getNodeCount() + "; arcCount=" + b.getArcCount() + "]");
                r = new Random(seed);
                Arrays.fill(ints, 0);
                for (int i = 0; i < count; i++) {
                    if (i % 1000000 == 0) {
                        System.out.println(i + "...: ");
                    }
                    r.nextBytes(outputBytes);
                    assertEquals(output, Util.get(fst, input));
                    nextInput(r, ints);
                }
                System.out.println("\nTEST: enum all input/outputs");
                IntsRefFSTEnum<BytesRef> fstEnum = new IntsRefFSTEnum<>(fst);
                Arrays.fill(ints, 0);
                r = new Random(seed);
                int upto = 0;
                while (true) {
                    IntsRefFSTEnum.InputOutput<BytesRef> pair = fstEnum.next();
                    if (pair == null) {
                        break;
                    }
                    assertEquals(input, pair.input);
                    r.nextBytes(outputBytes);
                    assertEquals(output, pair.output);
                    upto++;
                    nextInput(r, ints);
                }
                assertEquals(count, upto);
                if (verify == 0) {
                    System.out.println("\nTEST: save/load FST and re-verify");
                    IndexOutput out = dir.createOutput("fst", IOContext.DEFAULT);
                    fst.save(out);
                    out.close();
                    IndexInput in = dir.openInput("fst", IOContext.DEFAULT);
                    fst = new FST<>(in, outputs);
                    in.close();
                } else {
                    dir.deleteFile("fst");
                }
            }
        }
        // Build FST w/ PositiveIntOutputs and stop when FST
        // size = 3GB
        {
            System.out.println("\nTEST: 3 GB size; outputs=long");
            Outputs<Long> outputs = PositiveIntOutputs.getSingleton();
            final Builder<Long> b = new Builder<>(FST.INPUT_TYPE.BYTE1, 0, 0, true, true, Integer.MAX_VALUE, outputs, true, 15);
            long output = 1;
            Arrays.fill(ints, 0);
            int count = 0;
            Random r = new Random(seed);
            while (true) {
                //System.out.println("add: " + input + " -> " + output);
                b.add(input, output);
                output += 1 + r.nextInt(10);
                count++;
                if (count % 1000000 == 0) {
                    System.out.println(count + "...: " + b.fstRamBytesUsed() + " bytes");
                }
                if (b.fstRamBytesUsed() > LIMIT) {
                    break;
                }
                nextInput(r, ints);
            }
            FST<Long> fst = b.finish();
            for (int verify = 0; verify < 2; verify++) {
                System.out.println("\nTEST: now verify [fst size=" + fst.ramBytesUsed() + "; nodeCount=" + b.getNodeCount() + "; arcCount=" + b.getArcCount() + "]");
                Arrays.fill(ints, 0);
                output = 1;
                r = new Random(seed);
                for (int i = 0; i < count; i++) {
                    if (i % 1000000 == 0) {
                        System.out.println(i + "...: ");
                    }
                    // forward lookup:
                    assertEquals(output, Util.get(fst, input).longValue());
                    // reverse lookup:
                    assertEquals(input, Util.getByOutput(fst, output));
                    output += 1 + r.nextInt(10);
                    nextInput(r, ints);
                }
                System.out.println("\nTEST: enum all input/outputs");
                IntsRefFSTEnum<Long> fstEnum = new IntsRefFSTEnum<>(fst);
                Arrays.fill(ints, 0);
                r = new Random(seed);
                int upto = 0;
                output = 1;
                while (true) {
                    IntsRefFSTEnum.InputOutput<Long> pair = fstEnum.next();
                    if (pair == null) {
                        break;
                    }
                    assertEquals(input, pair.input);
                    assertEquals(output, pair.output.longValue());
                    output += 1 + r.nextInt(10);
                    upto++;
                    nextInput(r, ints);
                }
                assertEquals(count, upto);
                if (verify == 0) {
                    System.out.println("\nTEST: save/load FST and re-verify");
                    IndexOutput out = dir.createOutput("fst", IOContext.DEFAULT);
                    fst.save(out);
                    out.close();
                    IndexInput in = dir.openInput("fst", IOContext.DEFAULT);
                    fst = new FST<>(in, outputs);
                    in.close();
                } else {
                    dir.deleteFile("fst");
                }
            }
        }
    }
    dir.close();
}
Also used : IndexOutput(org.apache.lucene.store.IndexOutput) MMapDirectory(org.apache.lucene.store.MMapDirectory) Random(java.util.Random) IndexInput(org.apache.lucene.store.IndexInput) IntsRef(org.apache.lucene.util.IntsRef) BytesRef(org.apache.lucene.util.BytesRef) MMapDirectory(org.apache.lucene.store.MMapDirectory) Directory(org.apache.lucene.store.Directory)

Example 13 with MMapDirectory

use of org.apache.lucene.store.MMapDirectory in project Anserini by castorini.

the class Indexer method StartIndexing.

public static String StartIndexing(String dir) throws IOException {
    FileUtils.deleteDirectory(new File(dir));
    Directory index = new MMapDirectory(Paths.get(dir));
    IndexWriterConfig config = new IndexWriterConfig(ANALYZER);
    indexWriter = new IndexWriter(index, config);
    TRECIndexerRunnable its = new TRECIndexerRunnable(indexWriter);
    itsThread = new Thread(its);
    itsThread.start();
    return dir;
}
Also used : IndexWriter(org.apache.lucene.index.IndexWriter) File(java.io.File) MMapDirectory(org.apache.lucene.store.MMapDirectory) MMapDirectory(org.apache.lucene.store.MMapDirectory) Directory(org.apache.lucene.store.Directory) IndexWriterConfig(org.apache.lucene.index.IndexWriterConfig)

Aggregations

MMapDirectory (org.apache.lucene.store.MMapDirectory)13 Directory (org.apache.lucene.store.Directory)7 Path (java.nio.file.Path)4 FSDirectory (org.apache.lucene.store.FSDirectory)4 NIOFSDirectory (org.apache.lucene.store.NIOFSDirectory)4 SimpleFSDirectory (org.apache.lucene.store.SimpleFSDirectory)4 File (java.io.File)3 MockAnalyzer (org.apache.lucene.analysis.MockAnalyzer)3 Document (org.apache.lucene.document.Document)3 FeatureExtractors (io.anserini.ltr.feature.FeatureExtractors)2 RerankerCascade (io.anserini.rerank.RerankerCascade)2 Rm3Reranker (io.anserini.rerank.rm3.Rm3Reranker)2 Qrels (io.anserini.util.Qrels)2 PrintStream (java.io.PrintStream)2 Field (org.apache.lucene.document.Field)2 FieldType (org.apache.lucene.document.FieldType)2 BM25Similarity (org.apache.lucene.search.similarities.BM25Similarity)2 LMDirichletSimilarity (org.apache.lucene.search.similarities.LMDirichletSimilarity)2 MockDirectoryWrapper (org.apache.lucene.store.MockDirectoryWrapper)2 CmdLineException (org.kohsuke.args4j.CmdLineException)2