use of org.apache.lucene.index.ConcurrentMergeScheduler in project lucene-solr by apache.
the class TestSearcherManager method testReferenceDecrementIllegally.
public void testReferenceDecrementIllegally() throws Exception {
Directory dir = newDirectory();
IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig(new MockAnalyzer(random())).setMergeScheduler(new ConcurrentMergeScheduler()));
SearcherManager sm = new SearcherManager(writer, false, false, new SearcherFactory());
writer.addDocument(new Document());
writer.commit();
sm.maybeRefreshBlocking();
IndexSearcher acquire = sm.acquire();
IndexSearcher acquire2 = sm.acquire();
sm.release(acquire);
sm.release(acquire2);
acquire = sm.acquire();
acquire.getIndexReader().decRef();
sm.release(acquire);
expectThrows(IllegalStateException.class, () -> {
sm.acquire();
});
// sm.close(); -- already closed
writer.close();
dir.close();
}
use of org.apache.lucene.index.ConcurrentMergeScheduler in project lucene-solr by apache.
the class CreateIndexTask method createWriterConfig.
public static IndexWriterConfig createWriterConfig(Config config, PerfRunData runData, OpenMode mode, IndexCommit commit) {
@SuppressWarnings("deprecation") IndexWriterConfig iwConf = new IndexWriterConfig(runData.getAnalyzer());
iwConf.setOpenMode(mode);
IndexDeletionPolicy indexDeletionPolicy = getIndexDeletionPolicy(config);
iwConf.setIndexDeletionPolicy(indexDeletionPolicy);
if (commit != null) {
iwConf.setIndexCommit(commit);
}
final String mergeScheduler = config.get("merge.scheduler", "org.apache.lucene.index.ConcurrentMergeScheduler");
if (mergeScheduler.equals(NoMergeScheduler.class.getName())) {
iwConf.setMergeScheduler(NoMergeScheduler.INSTANCE);
} else {
try {
iwConf.setMergeScheduler(Class.forName(mergeScheduler).asSubclass(MergeScheduler.class).newInstance());
} catch (Exception e) {
throw new RuntimeException("unable to instantiate class '" + mergeScheduler + "' as merge scheduler", e);
}
if (mergeScheduler.equals("org.apache.lucene.index.ConcurrentMergeScheduler")) {
ConcurrentMergeScheduler cms = (ConcurrentMergeScheduler) iwConf.getMergeScheduler();
int maxThreadCount = config.get("concurrent.merge.scheduler.max.thread.count", ConcurrentMergeScheduler.AUTO_DETECT_MERGES_AND_THREADS);
int maxMergeCount = config.get("concurrent.merge.scheduler.max.merge.count", ConcurrentMergeScheduler.AUTO_DETECT_MERGES_AND_THREADS);
cms.setMaxMergesAndThreads(maxMergeCount, maxThreadCount);
}
}
final String defaultCodec = config.get("default.codec", null);
if (defaultCodec != null) {
try {
Class<? extends Codec> clazz = Class.forName(defaultCodec).asSubclass(Codec.class);
iwConf.setCodec(clazz.newInstance());
} catch (Exception e) {
throw new RuntimeException("Couldn't instantiate Codec: " + defaultCodec, e);
}
}
final String postingsFormat = config.get("codec.postingsFormat", null);
if (defaultCodec == null && postingsFormat != null) {
try {
final PostingsFormat postingsFormatChosen = PostingsFormat.forName(postingsFormat);
iwConf.setCodec(new Lucene70Codec() {
@Override
public PostingsFormat getPostingsFormatForField(String field) {
return postingsFormatChosen;
}
});
} catch (Exception e) {
throw new RuntimeException("Couldn't instantiate Postings Format: " + postingsFormat, e);
}
}
final String mergePolicy = config.get("merge.policy", "org.apache.lucene.index.LogByteSizeMergePolicy");
boolean isCompound = config.get("compound", true);
iwConf.setUseCompoundFile(isCompound);
if (mergePolicy.equals(NoMergePolicy.class.getName())) {
iwConf.setMergePolicy(NoMergePolicy.INSTANCE);
} else {
try {
iwConf.setMergePolicy(Class.forName(mergePolicy).asSubclass(MergePolicy.class).newInstance());
} catch (Exception e) {
throw new RuntimeException("unable to instantiate class '" + mergePolicy + "' as merge policy", e);
}
iwConf.getMergePolicy().setNoCFSRatio(isCompound ? 1.0 : 0.0);
if (iwConf.getMergePolicy() instanceof LogMergePolicy) {
LogMergePolicy logMergePolicy = (LogMergePolicy) iwConf.getMergePolicy();
logMergePolicy.setMergeFactor(config.get("merge.factor", OpenIndexTask.DEFAULT_MERGE_PFACTOR));
}
}
final double ramBuffer = config.get("ram.flush.mb", OpenIndexTask.DEFAULT_RAM_FLUSH_MB);
final int maxBuffered = config.get("max.buffered", OpenIndexTask.DEFAULT_MAX_BUFFERED);
if (maxBuffered == IndexWriterConfig.DISABLE_AUTO_FLUSH) {
iwConf.setRAMBufferSizeMB(ramBuffer);
iwConf.setMaxBufferedDocs(maxBuffered);
} else {
iwConf.setMaxBufferedDocs(maxBuffered);
iwConf.setRAMBufferSizeMB(ramBuffer);
}
return iwConf;
}
use of org.apache.lucene.index.ConcurrentMergeScheduler in project lucene-solr by apache.
the class SolrIndexConfigTest method testTieredMPSolrIndexConfigCreation.
@Test
public void testTieredMPSolrIndexConfigCreation() throws Exception {
String solrConfigFileName = solrConfigFileNameTieredMergePolicyFactory;
SolrConfig solrConfig = new SolrConfig(instanceDir, solrConfigFileName, null);
SolrIndexConfig solrIndexConfig = new SolrIndexConfig(solrConfig, null, null);
IndexSchema indexSchema = IndexSchemaFactory.buildIndexSchema(schemaFileName, solrConfig);
h.getCore().setLatestSchema(indexSchema);
IndexWriterConfig iwc = solrIndexConfig.toIndexWriterConfig(h.getCore());
assertNotNull("null mp", iwc.getMergePolicy());
assertTrue("mp is not TieredMergePolicy", iwc.getMergePolicy() instanceof TieredMergePolicy);
TieredMergePolicy mp = (TieredMergePolicy) iwc.getMergePolicy();
assertEquals("mp.maxMergeAtOnceExplicit", 19, mp.getMaxMergeAtOnceExplicit());
assertEquals("mp.segmentsPerTier", 9, (int) mp.getSegmentsPerTier());
assertNotNull("null ms", iwc.getMergeScheduler());
assertTrue("ms is not CMS", iwc.getMergeScheduler() instanceof ConcurrentMergeScheduler);
ConcurrentMergeScheduler ms = (ConcurrentMergeScheduler) iwc.getMergeScheduler();
assertEquals("ms.maxMergeCount", 987, ms.getMaxMergeCount());
assertEquals("ms.maxThreadCount", 42, ms.getMaxThreadCount());
}
use of org.apache.lucene.index.ConcurrentMergeScheduler in project lucene-solr by apache.
the class TestUtil method reduceOpenFiles.
/** just tries to configure things to keep the open file
* count lowish */
public static void reduceOpenFiles(IndexWriter w) {
// keep number of open files lowish
MergePolicy mp = w.getConfig().getMergePolicy();
mp.setNoCFSRatio(1.0);
if (mp instanceof LogMergePolicy) {
LogMergePolicy lmp = (LogMergePolicy) mp;
lmp.setMergeFactor(Math.min(5, lmp.getMergeFactor()));
} else if (mp instanceof TieredMergePolicy) {
TieredMergePolicy tmp = (TieredMergePolicy) mp;
tmp.setMaxMergeAtOnce(Math.min(5, tmp.getMaxMergeAtOnce()));
tmp.setSegmentsPerTier(Math.min(5, tmp.getSegmentsPerTier()));
}
MergeScheduler ms = w.getConfig().getMergeScheduler();
if (ms instanceof ConcurrentMergeScheduler) {
// wtf... shouldnt it be even lower since it's 1 by default?!?!
((ConcurrentMergeScheduler) ms).setMaxMergesAndThreads(3, 2);
}
}
use of org.apache.lucene.index.ConcurrentMergeScheduler in project Anserini by castorini.
the class IndexCollection method run.
public void run() throws IOException, InterruptedException {
final long start = System.nanoTime();
LOG.info("Starting indexer...");
int numThreads = args.threads;
final Directory dir = FSDirectory.open(indexPath);
final EnglishAnalyzer analyzer = args.keepStopwords ? new EnglishAnalyzer(CharArraySet.EMPTY_SET) : new EnglishAnalyzer();
final IndexWriterConfig config = new IndexWriterConfig(analyzer);
config.setSimilarity(new BM25Similarity());
config.setOpenMode(IndexWriterConfig.OpenMode.CREATE);
config.setRAMBufferSizeMB(args.memorybufferSize);
config.setUseCompoundFile(false);
config.setMergeScheduler(new ConcurrentMergeScheduler());
final IndexWriter writer = new IndexWriter(dir, config);
final ThreadPoolExecutor executor = (ThreadPoolExecutor) Executors.newFixedThreadPool(numThreads);
final List<Path> segmentPaths = collection.getFileSegmentPaths();
final int segmentCnt = segmentPaths.size();
LOG.info(segmentCnt + " files found in " + collectionPath.toString());
for (int i = 0; i < segmentCnt; i++) {
executor.execute(new IndexerThread(writer, collection, segmentPaths.get(i)));
}
executor.shutdown();
try {
// Wait for existing tasks to terminate
while (!executor.awaitTermination(1, TimeUnit.MINUTES)) {
LOG.info(String.format("%.2f percent completed", (double) executor.getCompletedTaskCount() / segmentCnt * 100.0d));
}
} catch (InterruptedException ie) {
// (Re-)Cancel if current thread also interrupted
executor.shutdownNow();
// Preserve interrupt status
Thread.currentThread().interrupt();
}
if (segmentCnt != executor.getCompletedTaskCount()) {
throw new RuntimeException("totalFiles = " + segmentCnt + " is not equal to completedTaskCount = " + executor.getCompletedTaskCount());
}
int numIndexed = writer.maxDoc();
try {
writer.commit();
if (args.optimize)
writer.forceMerge(1);
} finally {
try {
writer.close();
} catch (IOException e) {
// It is possible that this happens... but nothing much we can do at this point,
// so just log the error and move on.
LOG.error(e);
}
}
LOG.info("Indexed documents: " + counters.indexedDocuments.get());
LOG.info("Empty documents: " + counters.emptyDocuments.get());
LOG.info("Errors: " + counters.errors.get());
final long durationMillis = TimeUnit.MILLISECONDS.convert(System.nanoTime() - start, TimeUnit.NANOSECONDS);
LOG.info("Total " + numIndexed + " documents indexed in " + DurationFormatUtils.formatDuration(durationMillis, "HH:mm:ss"));
}
Aggregations