Search in sources :

Example 1 with Checkpoint

use of org.archive.checkpointing.Checkpoint in project heritrix3 by internetarchive.

the class BdbModule method doCheckpoint.

public void doCheckpoint(final Checkpoint checkpointInProgress) throws IOException {
    // First sync objectCaches
    for (@SuppressWarnings("rawtypes") ObjectIdentityCache oic : oiCaches.values()) {
        oic.sync();
    }
    try {
        // sync all databases
        for (DatabasePlusConfig dbc : databases.values()) {
            dbc.database.sync();
        }
        // Do a force checkpoint.  That's what a sync does (i.e. doSync).
        CheckpointConfig chkptConfig = new CheckpointConfig();
        chkptConfig.setForce(true);
        // Mark Hayes of sleepycat says:
        // "The default for this property is false, which gives the current
        // behavior (allow deltas).  If this property is true, deltas are
        // prohibited -- full versions of internal nodes are always logged
        // during the checkpoint. When a full version of an internal node
        // is logged during a checkpoint, recovery does not need to process
        // it at all.  It is only fetched if needed by the application,
        // during normal DB operations after recovery. When a delta of an
        // internal node is logged during a checkpoint, recovery must
        // process it by fetching the full version of the node from earlier
        // in the log, and then applying the delta to it.  This can be
        // pretty slow, since it is potentially a large amount of
        // random I/O."
        // chkptConfig.setMinimizeRecoveryTime(true);
        bdbEnvironment.checkpoint(chkptConfig);
        LOGGER.fine("Finished bdb checkpoint.");
        DbBackup dbBackup = new DbBackup(bdbEnvironment);
        try {
            dbBackup.startBackup();
            File envCpDir = new File(dir.getFile(), checkpointInProgress.getName());
            org.archive.util.FileUtils.ensureWriteableDirectory(envCpDir);
            File logfilesList = new File(envCpDir, "jdbfiles.manifest");
            String[] filedata = dbBackup.getLogFilesInBackupSet();
            for (int i = 0; i < filedata.length; i++) {
                File f = new File(dir.getFile(), filedata[i]);
                filedata[i] += "," + f.length();
                if (getUseHardLinkCheckpoints()) {
                    File hardLink = new File(envCpDir, filedata[i]);
                    try {
                        Files.createLink(hardLink.toPath(), f.toPath().toAbsolutePath());
                    } catch (IOException | UnsupportedOperationException e) {
                        LOGGER.log(Level.SEVERE, "unable to create required checkpoint link " + hardLink, e);
                    }
                }
            }
            FileUtils.writeLines(logfilesList, Arrays.asList(filedata));
            LOGGER.fine("Finished processing bdb log files.");
        } finally {
            dbBackup.endBackup();
        }
    } catch (DatabaseException e) {
        throw new IOException(e);
    }
    if (checkpointInProgress.getForgetAllButLatest()) {
        File[] oldEnvCpDirs = dir.getFile().listFiles(new FilenameFilter() {

            @Override
            public boolean accept(File dir, String name) {
                return !name.equals(checkpointInProgress.getName()) && TextUtils.matches("cp\\d{5}-\\d{14}", name);
            }
        });
        for (File d : oldEnvCpDirs) {
            FileUtils.deleteDirectory(d);
        }
    }
}
Also used : CheckpointConfig(com.sleepycat.je.CheckpointConfig) IOException(java.io.IOException) Checkpoint(org.archive.checkpointing.Checkpoint) FilenameFilter(java.io.FilenameFilter) DbBackup(com.sleepycat.je.util.DbBackup) File(java.io.File) DatabaseException(com.sleepycat.je.DatabaseException) ObjectIdentityCache(org.archive.util.ObjectIdentityCache)

Example 2 with Checkpoint

use of org.archive.checkpointing.Checkpoint in project heritrix3 by internetarchive.

the class BdbModule method setup.

protected void setup(File f, boolean create) throws DatabaseException, IOException {
    EnvironmentConfig config = new EnvironmentConfig();
    config.setAllowCreate(create);
    // set to max
    config.setLockTimeout(75, TimeUnit.MINUTES);
    if (getCacheSize() > 0) {
        config.setCacheSize(getCacheSize());
        if (getCachePercent() > 0) {
            LOGGER.warning("cachePercent and cacheSize are both set. Only cacheSize will be used.");
        }
    } else if (getCachePercent() > 0) {
        config.setCachePercent(getCachePercent());
    }
    config.setSharedCache(getUseSharedCache());
    // we take the advice literally from...
    // https://web.archive.org/web/20100727081707/http://www.oracle.com/technology/products/berkeley-db/faq/je_faq.html#33
    long nLockTables = getExpectedConcurrency() - 1;
    while (!BigInteger.valueOf(nLockTables).isProbablePrime(Integer.MAX_VALUE)) {
        nLockTables--;
    }
    config.setConfigParam("je.lock.nLockTables", Long.toString(nLockTables));
    // configure the number of cleaner threads, to speed up clearing out old state files:
    int cleaners = getCleanerThreads();
    if (cleaners > 0) {
        config.setConfigParam(EnvironmentConfig.CLEANER_THREADS, Integer.toString(cleaners));
    }
    // configure number if evictor threads, to avoid critical eviction slowdowns:
    int evictors = this.getEvictorCoreThreads();
    if (evictors > -1) {
        config.setConfigParam(EnvironmentConfig.EVICTOR_CORE_THREADS, Integer.toString(evictors));
    }
    int maxEvictors = this.getEvictorMaxThreads();
    if (maxEvictors > 0) {
        config.setConfigParam(EnvironmentConfig.EVICTOR_MAX_THREADS, Integer.toString(maxEvictors));
    }
    // triple this value to 6K because stats show many faults
    config.setConfigParam("je.log.faultReadSize", "6144");
    // set max bdb log file size. default 10M
    config.setConfigParam("je.log.fileMax", Long.toString(getMaxLogFileSize()));
    if (!getUseHardLinkCheckpoints()) {
        // to support checkpoints by textual manifest only,
        // prevent BDB's cleaner from deleting log files
        config.setConfigParam("je.cleaner.expunge", "false");
    }
    // else leave whatever other setting was already in place
    org.archive.util.FileUtils.ensureWriteableDirectory(f);
    this.bdbEnvironment = new EnhancedEnvironment(f, config);
    this.classCatalog = this.bdbEnvironment.getClassCatalog();
    if (!create) {
        // freeze last log file -- so that originating checkpoint isn't fouled
        DbBackup dbBackup = new DbBackup(bdbEnvironment);
        dbBackup.startBackup();
        dbBackup.endBackup();
    }
}
Also used : EnvironmentConfig(com.sleepycat.je.EnvironmentConfig) DbBackup(com.sleepycat.je.util.DbBackup) EnhancedEnvironment(org.archive.util.bdbje.EnhancedEnvironment) Checkpoint(org.archive.checkpointing.Checkpoint)

Example 3 with Checkpoint

use of org.archive.checkpointing.Checkpoint in project heritrix3 by internetarchive.

the class CheckpointService method requestCrawlCheckpoint.

/**
 * Run a checkpoint of the crawler
 */
public synchronized String requestCrawlCheckpoint() throws IllegalStateException {
    if (!controller.hasStarted()) {
        LOGGER.info("crawl job has not started; ignoring");
        return null;
    }
    if (isCheckpointing()) {
        throw new IllegalStateException("Checkpoint already running.");
    }
    // prevent redundant auto-checkpoints when crawler paused or stopping
    if (controller.isPaused() || controller.getState().equals(CrawlController.State.STOPPING)) {
        if (controller.getStatisticsTracker().getSnapshot().sameProgressAs(lastCheckpointSnapshot)) {
            LOGGER.info("no progress since last checkpoint; ignoring");
            System.err.println("no progress since last checkpoint; ignoring");
            return null;
        }
    }
    long checkpointStart = System.currentTimeMillis();
    Map<String, Checkpointable> toCheckpoint = appCtx.getBeansOfType(Checkpointable.class);
    if (LOGGER.isLoggable(Level.FINE)) {
        LOGGER.fine("checkpointing beans " + toCheckpoint);
    }
    checkpointInProgress = new Checkpoint();
    try {
        checkpointInProgress.setForgetAllButLatest(getForgetAllButLatest());
        checkpointInProgress.generateFrom(getCheckpointsDir(), getNextCheckpointNumber());
        // pre (incl. acquire necessary locks)
        long startStart = System.currentTimeMillis();
        for (Checkpointable c : toCheckpoint.values()) {
            c.startCheckpoint(checkpointInProgress);
        }
        LOGGER.info("all startCheckpoint() completed in " + (System.currentTimeMillis() - startStart) + "ms");
        // flush/write
        long doStart = System.currentTimeMillis();
        for (Checkpointable c : toCheckpoint.values()) {
            long doMs = System.currentTimeMillis();
            c.doCheckpoint(checkpointInProgress);
            long doDuration = System.currentTimeMillis() - doMs;
            LOGGER.fine("doCheckpoint() " + c + " in " + doDuration + "ms");
        }
        LOGGER.info("all doCheckpoint() completed in " + (System.currentTimeMillis() - doStart) + "ms");
        if (getForgetAllButLatest() && lastCheckpoint != null) {
            try {
                long deleteStart = System.currentTimeMillis();
                FileUtils.deleteDirectory(lastCheckpoint.getCheckpointDir().getFile());
                lastCheckpoint = null;
                LOGGER.info("deleted old checkpoint in " + (System.currentTimeMillis() - deleteStart) + "ms");
            } catch (IOException e) {
                LOGGER.log(Level.SEVERE, "problem deleting last checkpoint directory " + lastCheckpoint.getCheckpointDir().getFile(), e);
            }
        }
        checkpointInProgress.setSuccess(true);
        appCtx.publishEvent(new CheckpointSuccessEvent(this, checkpointInProgress));
        // Record the stats associated with this successfully-completed checkpoint:
        lastCheckpointSnapshot = controller.getStatisticsTracker().getSnapshot();
    } catch (Exception e) {
        checkpointFailed(e);
    } finally {
        checkpointInProgress.writeValidity(controller.getStatisticsTracker().getProgressStamp());
        // close (incl. release locks)
        long finishStart = System.currentTimeMillis();
        for (Checkpointable c : toCheckpoint.values()) {
            c.finishCheckpoint(checkpointInProgress);
        }
        LOGGER.info("all finishCheckpoint() completed in " + (System.currentTimeMillis() - finishStart) + "ms");
    }
    LOGGER.info("completed checkpoint " + checkpointInProgress.getName() + " in " + (System.currentTimeMillis() - checkpointStart) + "ms");
    this.nextCheckpointNumber++;
    String nameToReport = checkpointInProgress.getSuccess() ? checkpointInProgress.getName() : null;
    this.lastCheckpoint = this.checkpointInProgress;
    this.checkpointInProgress = null;
    return nameToReport;
}
Also used : Checkpoint(org.archive.checkpointing.Checkpoint) Checkpointable(org.archive.checkpointing.Checkpointable) IOException(java.io.IOException) ParseException(java.text.ParseException) IOException(java.io.IOException) BeansException(org.springframework.beans.BeansException)

Example 4 with Checkpoint

use of org.archive.checkpointing.Checkpoint in project heritrix3 by internetarchive.

the class BdbModuleTest method testDoCheckpoint.

@SuppressWarnings("unchecked")
public void testDoCheckpoint() throws Exception {
    ConfigPath basePath = new ConfigPath("testBase", getTmpDir().getAbsolutePath());
    ConfigPath bdbDir = new ConfigPath("bdb", "bdb");
    bdbDir.setBase(basePath);
    FileUtils.deleteDirectory(bdbDir.getFile());
    BdbModule bdb = new BdbModule();
    bdb.setDir(bdbDir);
    bdb.start();
    // avoid data from prior runs being mistaken for current run
    int randomFactor = RandomUtils.nextInt();
    ObjectIdentityBdbManualCache<IdentityCacheableWrapper> testData = bdb.getOIBCCache("testData", false, IdentityCacheableWrapper.class);
    for (int i1 = 0; i1 < 1000; i1++) {
        String key = String.valueOf(i1);
        final String value = String.valueOf(randomFactor * i1);
        String cached = (String) testData.getOrUse(key, new Supplier<IdentityCacheableWrapper>(new IdentityCacheableWrapper(key, value))).get();
        assertSame("unexpected prior entry", value, cached);
    }
    Checkpoint checkpointInProgress = new Checkpoint();
    ConfigPath checkpointsPath = new ConfigPath("checkpoints", "checkpoints");
    checkpointsPath.setBase(basePath);
    checkpointInProgress.generateFrom(checkpointsPath, 998);
    bdb.doCheckpoint(checkpointInProgress);
    String firstCheckpointName = checkpointInProgress.getName();
    for (int i2 = 1000; i2 < 2000; i2++) {
        String key = String.valueOf(i2);
        final String value = String.valueOf(randomFactor * i2);
        String cached = (String) testData.getOrUse(key, new Supplier<IdentityCacheableWrapper>(new IdentityCacheableWrapper(key, value))).get();
        assertSame("unexpected prior entry", value, cached);
    }
    checkpointInProgress = new Checkpoint();
    checkpointInProgress.generateFrom(checkpointsPath, 999);
    bdb.doCheckpoint(checkpointInProgress);
    bdb.stop();
    bdb.destroy();
    BdbModule bdb2 = new BdbModule();
    bdb2.setDir(bdbDir);
    Checkpoint recoveryCheckpoint = new Checkpoint();
    ConfigPath recoverPath = new ConfigPath("recover", firstCheckpointName);
    recoverPath.setBase(basePath);
    recoveryCheckpoint.setCheckpointDir(recoverPath);
    recoveryCheckpoint.afterPropertiesSet();
    bdb2.setRecoveryCheckpoint(recoveryCheckpoint);
    bdb2.start();
    ObjectIdentityBdbManualCache<IdentityCacheableWrapper> restoreData = bdb2.getOIBCCache("testData", true, IdentityCacheableWrapper.class);
    assertEquals("unexpected size", 1000, restoreData.size());
    assertEquals("unexpected value", randomFactor * 999, Integer.parseInt((String) restoreData.get("" + 999).get()));
    bdb2.stop();
    bdb2.destroy();
}
Also used : BdbModule(org.archive.bdb.BdbModule) Checkpoint(org.archive.checkpointing.Checkpoint) ConfigPath(org.archive.spring.ConfigPath) IdentityCacheableWrapper(org.archive.util.IdentityCacheableWrapper) Checkpoint(org.archive.checkpointing.Checkpoint)

Example 5 with Checkpoint

use of org.archive.checkpointing.Checkpoint in project heritrix3 by internetarchive.

the class CrawlerJournal method rotateForCheckpoint.

/**
 * Handle a checkpoint by rotating the current log to a checkpoint-named
 * file and starting a new log.
 */
public synchronized void rotateForCheckpoint(Checkpoint checkpointInProgress) {
    if (this.out == null || !this.gzipFile.exists()) {
        return;
    }
    close();
    File newName = new File(this.gzipFile.getParentFile(), this.gzipFile.getName() + "." + checkpointInProgress.getName());
    try {
        FileUtils.moveAsideIfExists(newName);
        if (checkpointInProgress.getForgetAllButLatest()) {
            // merge any earlier checkpointed files into new checkpoint
            // file, taking advantage of the legality of concatenating gzips
            File[] oldCheckpointeds = this.gzipFile.getParentFile().listFiles(new FilenameFilter() {

                @Override
                public boolean accept(File dir, String name) {
                    String regex = "^" + Pattern.quote(gzipFile.getName()) + "\\.cp\\d{5}-\\d{14}$";
                    return TextUtils.matches(regex, name);
                }
            });
            Arrays.sort(oldCheckpointeds);
            for (int i = 1; i < oldCheckpointeds.length; i++) {
                FileUtils.appendTo(oldCheckpointeds[0], oldCheckpointeds[i]);
                oldCheckpointeds[i].delete();
            }
            if (oldCheckpointeds.length > 0) {
                FileUtils.appendTo(oldCheckpointeds[0], this.gzipFile);
                this.gzipFile.delete();
                oldCheckpointeds[0].renameTo(newName);
            } else {
                this.gzipFile.renameTo(newName);
            }
        } else {
            this.gzipFile.renameTo(newName);
        }
        // Open new gzip file.
        this.out = initialize(this.gzipFile);
    } catch (IOException ioe) {
        LOGGER.log(Level.SEVERE, "Problem rotating recovery journal", ioe);
    }
}
Also used : FilenameFilter(java.io.FilenameFilter) IOException(java.io.IOException) File(java.io.File) Checkpoint(org.archive.checkpointing.Checkpoint)

Aggregations

Checkpoint (org.archive.checkpointing.Checkpoint)6 File (java.io.File)3 IOException (java.io.IOException)3 DbBackup (com.sleepycat.je.util.DbBackup)2 FilenameFilter (java.io.FilenameFilter)2 Checkpointable (org.archive.checkpointing.Checkpointable)2 CheckpointConfig (com.sleepycat.je.CheckpointConfig)1 DatabaseException (com.sleepycat.je.DatabaseException)1 EnvironmentConfig (com.sleepycat.je.EnvironmentConfig)1 ParseException (java.text.ParseException)1 BdbModule (org.archive.bdb.BdbModule)1 ConfigPath (org.archive.spring.ConfigPath)1 ConfigPathConfigurer (org.archive.spring.ConfigPathConfigurer)1 IdentityCacheableWrapper (org.archive.util.IdentityCacheableWrapper)1 ObjectIdentityCache (org.archive.util.ObjectIdentityCache)1 EnhancedEnvironment (org.archive.util.bdbje.EnhancedEnvironment)1 BeansException (org.springframework.beans.BeansException)1