use of org.apache.lucene.store.MMapDirectory in project elasticsearch by elastic.
the class FsDirectoryService method setPreload.
private static Directory setPreload(Directory directory, Path location, LockFactory lockFactory, Set<String> preLoadExtensions) throws IOException {
if (preLoadExtensions.isEmpty() == false && directory instanceof MMapDirectory && ((MMapDirectory) directory).getPreload() == false) {
if (preLoadExtensions.contains("*")) {
((MMapDirectory) directory).setPreload(true);
return directory;
}
MMapDirectory primary = new MMapDirectory(location, lockFactory);
primary.setPreload(true);
return new FileSwitchDirectory(preLoadExtensions, primary, directory, true) {
@Override
public String[] listAll() throws IOException {
// avoid listing twice
return primary.listAll();
}
};
}
return directory;
}
use of org.apache.lucene.store.MMapDirectory in project elasticsearch by elastic.
the class IndexStoreTests method doTestStoreDirectory.
private void doTestStoreDirectory(Index index, Path tempDir, String typeSettingValue, IndexModule.Type type) throws IOException {
Settings.Builder settingsBuilder = Settings.builder().put(IndexMetaData.SETTING_VERSION_CREATED, Version.CURRENT);
if (typeSettingValue != null) {
settingsBuilder.put(IndexModule.INDEX_STORE_TYPE_SETTING.getKey(), typeSettingValue);
}
Settings settings = settingsBuilder.build();
IndexSettings indexSettings = IndexSettingsModule.newIndexSettings("foo", settings);
FsDirectoryService service = new FsDirectoryService(indexSettings, null, new ShardPath(false, tempDir, tempDir, new ShardId(index, 0)));
try (Directory directory = service.newFSDirectory(tempDir, NoLockFactory.INSTANCE)) {
switch(type) {
case NIOFS:
assertTrue(type + " " + directory.toString(), directory instanceof NIOFSDirectory);
break;
case MMAPFS:
assertTrue(type + " " + directory.toString(), directory instanceof MMapDirectory);
break;
case SIMPLEFS:
assertTrue(type + " " + directory.toString(), directory instanceof SimpleFSDirectory);
break;
case FS:
if (Constants.JRE_IS_64BIT && MMapDirectory.UNMAP_SUPPORTED) {
assertTrue(directory.toString(), directory instanceof MMapDirectory);
} else if (Constants.WINDOWS) {
assertTrue(directory.toString(), directory instanceof SimpleFSDirectory);
} else {
assertTrue(directory.toString(), directory instanceof NIOFSDirectory);
}
break;
default:
fail();
}
}
}
use of org.apache.lucene.store.MMapDirectory in project lucene-solr by apache.
the class BaseStoredFieldsFormatTestCase method testBigDocuments.
@Nightly
public void testBigDocuments() throws IOException {
assumeWorkingMMapOnWindows();
// "big" as "much bigger than the chunk size"
// for this test we force a FS dir
// we can't just use newFSDirectory, because this test doesn't really index anything.
// so if we get NRTCachingDir+SimpleText, we make massive stored fields and OOM (LUCENE-4484)
Directory dir = new MockDirectoryWrapper(random(), new MMapDirectory(createTempDir("testBigDocuments")));
IndexWriterConfig iwConf = newIndexWriterConfig(new MockAnalyzer(random()));
iwConf.setMaxBufferedDocs(RandomNumbers.randomIntBetween(random(), 2, 30));
RandomIndexWriter iw = new RandomIndexWriter(random(), dir, iwConf);
if (dir instanceof MockDirectoryWrapper) {
((MockDirectoryWrapper) dir).setThrottling(Throttling.NEVER);
}
// emptyDoc
final Document emptyDoc = new Document();
// lot of small fields
final Document bigDoc1 = new Document();
// 1 very big field
final Document bigDoc2 = new Document();
final Field idField = new StringField("id", "", Store.NO);
emptyDoc.add(idField);
bigDoc1.add(idField);
bigDoc2.add(idField);
final FieldType onlyStored = new FieldType(StringField.TYPE_STORED);
onlyStored.setIndexOptions(IndexOptions.NONE);
final Field smallField = new Field("fld", randomByteArray(random().nextInt(10), 256), onlyStored);
final int numFields = RandomNumbers.randomIntBetween(random(), 500000, 1000000);
for (int i = 0; i < numFields; ++i) {
bigDoc1.add(smallField);
}
final Field bigField = new Field("fld", randomByteArray(RandomNumbers.randomIntBetween(random(), 1000000, 5000000), 2), onlyStored);
bigDoc2.add(bigField);
final int numDocs = atLeast(5);
final Document[] docs = new Document[numDocs];
for (int i = 0; i < numDocs; ++i) {
docs[i] = RandomPicks.randomFrom(random(), Arrays.asList(emptyDoc, bigDoc1, bigDoc2));
}
for (int i = 0; i < numDocs; ++i) {
idField.setStringValue("" + i);
iw.addDocument(docs[i]);
if (random().nextInt(numDocs) == 0) {
iw.commit();
}
}
iw.commit();
// look at what happens when big docs are merged
iw.forceMerge(1);
final DirectoryReader rd = DirectoryReader.open(dir);
final IndexSearcher searcher = new IndexSearcher(rd);
for (int i = 0; i < numDocs; ++i) {
final Query query = new TermQuery(new Term("id", "" + i));
final TopDocs topDocs = searcher.search(query, 1);
assertEquals("" + i, 1, topDocs.totalHits);
final Document doc = rd.document(topDocs.scoreDocs[0].doc);
assertNotNull(doc);
final IndexableField[] fieldValues = doc.getFields("fld");
assertEquals(docs[i].getFields("fld").length, fieldValues.length);
if (fieldValues.length > 0) {
assertEquals(docs[i].getFields("fld")[0].binaryValue(), fieldValues[0].binaryValue());
}
}
rd.close();
iw.close();
dir.close();
}
use of org.apache.lucene.store.MMapDirectory in project lucene-solr by apache.
the class Test4GBStoredFields method test.
@Nightly
public void test() throws Exception {
assumeWorkingMMapOnWindows();
MockDirectoryWrapper dir = new MockDirectoryWrapper(random(), new MMapDirectory(createTempDir("4GBStoredFields")));
dir.setThrottling(MockDirectoryWrapper.Throttling.NEVER);
IndexWriterConfig iwc = new IndexWriterConfig(new MockAnalyzer(random()));
iwc.setMaxBufferedDocs(IndexWriterConfig.DISABLE_AUTO_FLUSH);
iwc.setRAMBufferSizeMB(256.0);
iwc.setMergeScheduler(new ConcurrentMergeScheduler());
iwc.setMergePolicy(newLogMergePolicy(false, 10));
iwc.setOpenMode(IndexWriterConfig.OpenMode.CREATE);
// maybe we should factor out crazy cases to ExtremeCompressing? then annotations can handle this stuff...
if (random().nextBoolean()) {
iwc.setCodec(CompressingCodec.reasonableInstance(random()));
}
IndexWriter w = new IndexWriter(dir, iwc);
MergePolicy mp = w.getConfig().getMergePolicy();
if (mp instanceof LogByteSizeMergePolicy) {
// 1 petabyte:
((LogByteSizeMergePolicy) mp).setMaxMergeMB(1024 * 1024 * 1024);
}
final Document doc = new Document();
final FieldType ft = new FieldType();
ft.setStored(true);
ft.freeze();
final int valueLength = RandomNumbers.randomIntBetween(random(), 1 << 13, 1 << 20);
final byte[] value = new byte[valueLength];
for (int i = 0; i < valueLength; ++i) {
// random so that even compressing codecs can't compress it
value[i] = (byte) random().nextInt(256);
}
final Field f = new Field("fld", value, ft);
doc.add(f);
final int numDocs = (int) ((1L << 32) / valueLength + 100);
for (int i = 0; i < numDocs; ++i) {
w.addDocument(doc);
if (VERBOSE && i % (numDocs / 10) == 0) {
System.out.println(i + " of " + numDocs + "...");
}
}
w.forceMerge(1);
w.close();
if (VERBOSE) {
boolean found = false;
for (String file : dir.listAll()) {
if (file.endsWith(".fdt")) {
final long fileLength = dir.fileLength(file);
if (fileLength >= 1L << 32) {
found = true;
}
System.out.println("File length of " + file + " : " + fileLength);
}
}
if (!found) {
System.out.println("No .fdt file larger than 4GB, test bug?");
}
}
DirectoryReader rd = DirectoryReader.open(dir);
Document sd = rd.document(numDocs - 1);
assertNotNull(sd);
assertEquals(1, sd.getFields().size());
BytesRef valueRef = sd.getBinaryValue("fld");
assertNotNull(valueRef);
assertEquals(new BytesRef(value), valueRef);
rd.close();
dir.close();
}
use of org.apache.lucene.store.MMapDirectory in project Anserini by castorini.
the class SearchWebCollection method main.
public static void main(String[] args) throws Exception {
SearchArgs searchArgs = new SearchArgs();
CmdLineParser parser = new CmdLineParser(searchArgs, ParserProperties.defaults().withUsageWidth(90));
try {
parser.parseArgument(args);
} catch (CmdLineException e) {
System.err.println(e.getMessage());
parser.printUsage(System.err);
System.err.println("Example: SearchWebCollection" + parser.printExample(OptionHandlerFilter.REQUIRED));
return;
}
LOG.info("Reading index at " + searchArgs.index);
Directory dir;
if (searchArgs.inmem) {
LOG.info("Using MMapDirectory with preload");
dir = new MMapDirectory(Paths.get(searchArgs.index));
((MMapDirectory) dir).setPreload(true);
} else {
LOG.info("Using default FSDirectory");
dir = FSDirectory.open(Paths.get(searchArgs.index));
}
Similarity similarity = null;
if (searchArgs.ql) {
LOG.info("Using QL scoring model");
similarity = new LMDirichletSimilarity(searchArgs.mu);
} else if (searchArgs.bm25) {
LOG.info("Using BM25 scoring model");
similarity = new BM25Similarity(searchArgs.k1, searchArgs.b);
} else {
LOG.error("Error: Must specify scoring model!");
System.exit(-1);
}
RerankerCascade cascade = new RerankerCascade();
boolean useQueryParser = false;
if (searchArgs.rm3) {
cascade.add(new Rm3Reranker(new EnglishAnalyzer(), FIELD_BODY, "src/main/resources/io/anserini/rerank/rm3/rm3-stoplist.gov2.txt"));
useQueryParser = true;
} else {
cascade.add(new IdentityReranker());
}
FeatureExtractors extractors = null;
if (searchArgs.extractors != null) {
extractors = FeatureExtractors.loadExtractor(searchArgs.extractors);
}
if (searchArgs.dumpFeatures) {
PrintStream out = new PrintStream(searchArgs.featureFile);
Qrels qrels = new Qrels(searchArgs.qrels);
cascade.add(new WebCollectionLtrDataGenerator(out, qrels, extractors));
}
Path topicsFile = Paths.get(searchArgs.topics);
if (!Files.exists(topicsFile) || !Files.isRegularFile(topicsFile) || !Files.isReadable(topicsFile)) {
throw new IllegalArgumentException("Topics file : " + topicsFile + " does not exist or is not a (readable) file.");
}
TopicReader tr = (TopicReader) Class.forName("io.anserini.search.query." + searchArgs.topicReader + "TopicReader").getConstructor(Path.class).newInstance(topicsFile);
SortedMap<Integer, String> topics = tr.read();
final long start = System.nanoTime();
SearchWebCollection searcher = new SearchWebCollection(searchArgs.index);
searcher.search(topics, searchArgs.output, similarity, searchArgs.hits, cascade, useQueryParser, searchArgs.keepstop);
searcher.close();
final long durationMillis = TimeUnit.MILLISECONDS.convert(System.nanoTime() - start, TimeUnit.NANOSECONDS);
LOG.info("Total " + topics.size() + " topics searched in " + DurationFormatUtils.formatDuration(durationMillis, "HH:mm:ss"));
}
Aggregations