use of org.archive.checkpointing.Checkpoint in project heritrix3 by internetarchive.
the class BdbModule method doCheckpoint.
public void doCheckpoint(final Checkpoint checkpointInProgress) throws IOException {
// First sync objectCaches
for (@SuppressWarnings("rawtypes") ObjectIdentityCache oic : oiCaches.values()) {
oic.sync();
}
try {
// sync all databases
for (DatabasePlusConfig dbc : databases.values()) {
dbc.database.sync();
}
// Do a force checkpoint. That's what a sync does (i.e. doSync).
CheckpointConfig chkptConfig = new CheckpointConfig();
chkptConfig.setForce(true);
// Mark Hayes of sleepycat says:
// "The default for this property is false, which gives the current
// behavior (allow deltas). If this property is true, deltas are
// prohibited -- full versions of internal nodes are always logged
// during the checkpoint. When a full version of an internal node
// is logged during a checkpoint, recovery does not need to process
// it at all. It is only fetched if needed by the application,
// during normal DB operations after recovery. When a delta of an
// internal node is logged during a checkpoint, recovery must
// process it by fetching the full version of the node from earlier
// in the log, and then applying the delta to it. This can be
// pretty slow, since it is potentially a large amount of
// random I/O."
// chkptConfig.setMinimizeRecoveryTime(true);
bdbEnvironment.checkpoint(chkptConfig);
LOGGER.fine("Finished bdb checkpoint.");
DbBackup dbBackup = new DbBackup(bdbEnvironment);
try {
dbBackup.startBackup();
File envCpDir = new File(dir.getFile(), checkpointInProgress.getName());
org.archive.util.FileUtils.ensureWriteableDirectory(envCpDir);
File logfilesList = new File(envCpDir, "jdbfiles.manifest");
String[] filedata = dbBackup.getLogFilesInBackupSet();
for (int i = 0; i < filedata.length; i++) {
File f = new File(dir.getFile(), filedata[i]);
filedata[i] += "," + f.length();
if (getUseHardLinkCheckpoints()) {
File hardLink = new File(envCpDir, filedata[i]);
try {
Files.createLink(hardLink.toPath(), f.toPath().toAbsolutePath());
} catch (IOException | UnsupportedOperationException e) {
LOGGER.log(Level.SEVERE, "unable to create required checkpoint link " + hardLink, e);
}
}
}
FileUtils.writeLines(logfilesList, Arrays.asList(filedata));
LOGGER.fine("Finished processing bdb log files.");
} finally {
dbBackup.endBackup();
}
} catch (DatabaseException e) {
throw new IOException(e);
}
if (checkpointInProgress.getForgetAllButLatest()) {
File[] oldEnvCpDirs = dir.getFile().listFiles(new FilenameFilter() {
@Override
public boolean accept(File dir, String name) {
return !name.equals(checkpointInProgress.getName()) && TextUtils.matches("cp\\d{5}-\\d{14}", name);
}
});
for (File d : oldEnvCpDirs) {
FileUtils.deleteDirectory(d);
}
}
}
use of org.archive.checkpointing.Checkpoint in project heritrix3 by internetarchive.
the class BdbModule method setup.
protected void setup(File f, boolean create) throws DatabaseException, IOException {
EnvironmentConfig config = new EnvironmentConfig();
config.setAllowCreate(create);
// set to max
config.setLockTimeout(75, TimeUnit.MINUTES);
if (getCacheSize() > 0) {
config.setCacheSize(getCacheSize());
if (getCachePercent() > 0) {
LOGGER.warning("cachePercent and cacheSize are both set. Only cacheSize will be used.");
}
} else if (getCachePercent() > 0) {
config.setCachePercent(getCachePercent());
}
config.setSharedCache(getUseSharedCache());
// we take the advice literally from...
// https://web.archive.org/web/20100727081707/http://www.oracle.com/technology/products/berkeley-db/faq/je_faq.html#33
long nLockTables = getExpectedConcurrency() - 1;
while (!BigInteger.valueOf(nLockTables).isProbablePrime(Integer.MAX_VALUE)) {
nLockTables--;
}
config.setConfigParam("je.lock.nLockTables", Long.toString(nLockTables));
// configure the number of cleaner threads, to speed up clearing out old state files:
int cleaners = getCleanerThreads();
if (cleaners > 0) {
config.setConfigParam(EnvironmentConfig.CLEANER_THREADS, Integer.toString(cleaners));
}
// configure number if evictor threads, to avoid critical eviction slowdowns:
int evictors = this.getEvictorCoreThreads();
if (evictors > -1) {
config.setConfigParam(EnvironmentConfig.EVICTOR_CORE_THREADS, Integer.toString(evictors));
}
int maxEvictors = this.getEvictorMaxThreads();
if (maxEvictors > 0) {
config.setConfigParam(EnvironmentConfig.EVICTOR_MAX_THREADS, Integer.toString(maxEvictors));
}
// triple this value to 6K because stats show many faults
config.setConfigParam("je.log.faultReadSize", "6144");
// set max bdb log file size. default 10M
config.setConfigParam("je.log.fileMax", Long.toString(getMaxLogFileSize()));
if (!getUseHardLinkCheckpoints()) {
// to support checkpoints by textual manifest only,
// prevent BDB's cleaner from deleting log files
config.setConfigParam("je.cleaner.expunge", "false");
}
// else leave whatever other setting was already in place
org.archive.util.FileUtils.ensureWriteableDirectory(f);
this.bdbEnvironment = new EnhancedEnvironment(f, config);
this.classCatalog = this.bdbEnvironment.getClassCatalog();
if (!create) {
// freeze last log file -- so that originating checkpoint isn't fouled
DbBackup dbBackup = new DbBackup(bdbEnvironment);
dbBackup.startBackup();
dbBackup.endBackup();
}
}
use of org.archive.checkpointing.Checkpoint in project heritrix3 by internetarchive.
the class CheckpointService method requestCrawlCheckpoint.
/**
* Run a checkpoint of the crawler
*/
public synchronized String requestCrawlCheckpoint() throws IllegalStateException {
if (!controller.hasStarted()) {
LOGGER.info("crawl job has not started; ignoring");
return null;
}
if (isCheckpointing()) {
throw new IllegalStateException("Checkpoint already running.");
}
// prevent redundant auto-checkpoints when crawler paused or stopping
if (controller.isPaused() || controller.getState().equals(CrawlController.State.STOPPING)) {
if (controller.getStatisticsTracker().getSnapshot().sameProgressAs(lastCheckpointSnapshot)) {
LOGGER.info("no progress since last checkpoint; ignoring");
System.err.println("no progress since last checkpoint; ignoring");
return null;
}
}
long checkpointStart = System.currentTimeMillis();
Map<String, Checkpointable> toCheckpoint = appCtx.getBeansOfType(Checkpointable.class);
if (LOGGER.isLoggable(Level.FINE)) {
LOGGER.fine("checkpointing beans " + toCheckpoint);
}
checkpointInProgress = new Checkpoint();
try {
checkpointInProgress.setForgetAllButLatest(getForgetAllButLatest());
checkpointInProgress.generateFrom(getCheckpointsDir(), getNextCheckpointNumber());
// pre (incl. acquire necessary locks)
long startStart = System.currentTimeMillis();
for (Checkpointable c : toCheckpoint.values()) {
c.startCheckpoint(checkpointInProgress);
}
LOGGER.info("all startCheckpoint() completed in " + (System.currentTimeMillis() - startStart) + "ms");
// flush/write
long doStart = System.currentTimeMillis();
for (Checkpointable c : toCheckpoint.values()) {
long doMs = System.currentTimeMillis();
c.doCheckpoint(checkpointInProgress);
long doDuration = System.currentTimeMillis() - doMs;
LOGGER.fine("doCheckpoint() " + c + " in " + doDuration + "ms");
}
LOGGER.info("all doCheckpoint() completed in " + (System.currentTimeMillis() - doStart) + "ms");
if (getForgetAllButLatest() && lastCheckpoint != null) {
try {
long deleteStart = System.currentTimeMillis();
FileUtils.deleteDirectory(lastCheckpoint.getCheckpointDir().getFile());
lastCheckpoint = null;
LOGGER.info("deleted old checkpoint in " + (System.currentTimeMillis() - deleteStart) + "ms");
} catch (IOException e) {
LOGGER.log(Level.SEVERE, "problem deleting last checkpoint directory " + lastCheckpoint.getCheckpointDir().getFile(), e);
}
}
checkpointInProgress.setSuccess(true);
appCtx.publishEvent(new CheckpointSuccessEvent(this, checkpointInProgress));
// Record the stats associated with this successfully-completed checkpoint:
lastCheckpointSnapshot = controller.getStatisticsTracker().getSnapshot();
} catch (Exception e) {
checkpointFailed(e);
} finally {
checkpointInProgress.writeValidity(controller.getStatisticsTracker().getProgressStamp());
// close (incl. release locks)
long finishStart = System.currentTimeMillis();
for (Checkpointable c : toCheckpoint.values()) {
c.finishCheckpoint(checkpointInProgress);
}
LOGGER.info("all finishCheckpoint() completed in " + (System.currentTimeMillis() - finishStart) + "ms");
}
LOGGER.info("completed checkpoint " + checkpointInProgress.getName() + " in " + (System.currentTimeMillis() - checkpointStart) + "ms");
this.nextCheckpointNumber++;
String nameToReport = checkpointInProgress.getSuccess() ? checkpointInProgress.getName() : null;
this.lastCheckpoint = this.checkpointInProgress;
this.checkpointInProgress = null;
return nameToReport;
}
use of org.archive.checkpointing.Checkpoint in project heritrix3 by internetarchive.
the class BdbModuleTest method testDoCheckpoint.
@SuppressWarnings("unchecked")
public void testDoCheckpoint() throws Exception {
ConfigPath basePath = new ConfigPath("testBase", getTmpDir().getAbsolutePath());
ConfigPath bdbDir = new ConfigPath("bdb", "bdb");
bdbDir.setBase(basePath);
FileUtils.deleteDirectory(bdbDir.getFile());
BdbModule bdb = new BdbModule();
bdb.setDir(bdbDir);
bdb.start();
// avoid data from prior runs being mistaken for current run
int randomFactor = RandomUtils.nextInt();
ObjectIdentityBdbManualCache<IdentityCacheableWrapper> testData = bdb.getOIBCCache("testData", false, IdentityCacheableWrapper.class);
for (int i1 = 0; i1 < 1000; i1++) {
String key = String.valueOf(i1);
final String value = String.valueOf(randomFactor * i1);
String cached = (String) testData.getOrUse(key, new Supplier<IdentityCacheableWrapper>(new IdentityCacheableWrapper(key, value))).get();
assertSame("unexpected prior entry", value, cached);
}
Checkpoint checkpointInProgress = new Checkpoint();
ConfigPath checkpointsPath = new ConfigPath("checkpoints", "checkpoints");
checkpointsPath.setBase(basePath);
checkpointInProgress.generateFrom(checkpointsPath, 998);
bdb.doCheckpoint(checkpointInProgress);
String firstCheckpointName = checkpointInProgress.getName();
for (int i2 = 1000; i2 < 2000; i2++) {
String key = String.valueOf(i2);
final String value = String.valueOf(randomFactor * i2);
String cached = (String) testData.getOrUse(key, new Supplier<IdentityCacheableWrapper>(new IdentityCacheableWrapper(key, value))).get();
assertSame("unexpected prior entry", value, cached);
}
checkpointInProgress = new Checkpoint();
checkpointInProgress.generateFrom(checkpointsPath, 999);
bdb.doCheckpoint(checkpointInProgress);
bdb.stop();
bdb.destroy();
BdbModule bdb2 = new BdbModule();
bdb2.setDir(bdbDir);
Checkpoint recoveryCheckpoint = new Checkpoint();
ConfigPath recoverPath = new ConfigPath("recover", firstCheckpointName);
recoverPath.setBase(basePath);
recoveryCheckpoint.setCheckpointDir(recoverPath);
recoveryCheckpoint.afterPropertiesSet();
bdb2.setRecoveryCheckpoint(recoveryCheckpoint);
bdb2.start();
ObjectIdentityBdbManualCache<IdentityCacheableWrapper> restoreData = bdb2.getOIBCCache("testData", true, IdentityCacheableWrapper.class);
assertEquals("unexpected size", 1000, restoreData.size());
assertEquals("unexpected value", randomFactor * 999, Integer.parseInt((String) restoreData.get("" + 999).get()));
bdb2.stop();
bdb2.destroy();
}
use of org.archive.checkpointing.Checkpoint in project heritrix3 by internetarchive.
the class CrawlerJournal method rotateForCheckpoint.
/**
* Handle a checkpoint by rotating the current log to a checkpoint-named
* file and starting a new log.
*/
public synchronized void rotateForCheckpoint(Checkpoint checkpointInProgress) {
if (this.out == null || !this.gzipFile.exists()) {
return;
}
close();
File newName = new File(this.gzipFile.getParentFile(), this.gzipFile.getName() + "." + checkpointInProgress.getName());
try {
FileUtils.moveAsideIfExists(newName);
if (checkpointInProgress.getForgetAllButLatest()) {
// merge any earlier checkpointed files into new checkpoint
// file, taking advantage of the legality of concatenating gzips
File[] oldCheckpointeds = this.gzipFile.getParentFile().listFiles(new FilenameFilter() {
@Override
public boolean accept(File dir, String name) {
String regex = "^" + Pattern.quote(gzipFile.getName()) + "\\.cp\\d{5}-\\d{14}$";
return TextUtils.matches(regex, name);
}
});
Arrays.sort(oldCheckpointeds);
for (int i = 1; i < oldCheckpointeds.length; i++) {
FileUtils.appendTo(oldCheckpointeds[0], oldCheckpointeds[i]);
oldCheckpointeds[i].delete();
}
if (oldCheckpointeds.length > 0) {
FileUtils.appendTo(oldCheckpointeds[0], this.gzipFile);
this.gzipFile.delete();
oldCheckpointeds[0].renameTo(newName);
} else {
this.gzipFile.renameTo(newName);
}
} else {
this.gzipFile.renameTo(newName);
}
// Open new gzip file.
this.out = initialize(this.gzipFile);
} catch (IOException ioe) {
LOGGER.log(Level.SEVERE, "Problem rotating recovery journal", ioe);
}
}
Aggregations