Search in sources :

Example 1 with ConfigPath

use of org.archive.spring.ConfigPath in project heritrix3 by internetarchive.

the class CrawlControllerTest method makeTempCrawlController.

// TODO TESTME
public static CrawlController makeTempCrawlController() throws Exception {
    String tmpPath = System.getProperty(TEST_TMP_SYSTEM_PROPERTY_NAME);
    if (tmpPath == null) {
        tmpPath = DEFAULT_TEST_TMP_DIR;
    }
    File tmp = new File(tmpPath);
    FileUtils.ensureWriteableDirectory(tmp);
    FileWriter fileWriter = null;
    try {
        fileWriter = new FileWriter(new File(tmp, "seeds.txt"));
        fileWriter.write("http://www.pandemoniummovie.com");
        fileWriter.close();
    } finally {
        ArchiveUtils.closeQuietly(fileWriter);
    }
    File state = new File(tmp, "state");
    FileUtils.ensureWriteableDirectory(state);
    File checkpoints = new File(tmp, "checkpoints");
    FileUtils.ensureWriteableDirectory(checkpoints);
    BdbModule bdb = new BdbModule();
    bdb.setDir(new ConfigPath("test", state.getAbsolutePath()));
    // def.set(bdb, BdbModule.DIR, state.getAbsolutePath());
    bdb.start();
    CrawlController controller = new CrawlController();
    controller.setServerCache(new BdbServerCache());
    controller.start();
    return controller;
}
Also used : BdbModule(org.archive.bdb.BdbModule) BdbServerCache(org.archive.modules.net.BdbServerCache) FileWriter(java.io.FileWriter) ConfigPath(org.archive.spring.ConfigPath) File(java.io.File)

Example 2 with ConfigPath

use of org.archive.spring.ConfigPath in project heritrix3 by internetarchive.

the class BdbModuleTest method testDoCheckpoint.

@SuppressWarnings("unchecked")
public void testDoCheckpoint() throws Exception {
    ConfigPath basePath = new ConfigPath("testBase", getTmpDir().getAbsolutePath());
    ConfigPath bdbDir = new ConfigPath("bdb", "bdb");
    bdbDir.setBase(basePath);
    FileUtils.deleteDirectory(bdbDir.getFile());
    BdbModule bdb = new BdbModule();
    bdb.setDir(bdbDir);
    bdb.start();
    // avoid data from prior runs being mistaken for current run
    int randomFactor = RandomUtils.nextInt();
    ObjectIdentityBdbManualCache<IdentityCacheableWrapper> testData = bdb.getOIBCCache("testData", false, IdentityCacheableWrapper.class);
    for (int i1 = 0; i1 < 1000; i1++) {
        String key = String.valueOf(i1);
        final String value = String.valueOf(randomFactor * i1);
        String cached = (String) testData.getOrUse(key, new Supplier<IdentityCacheableWrapper>(new IdentityCacheableWrapper(key, value))).get();
        assertSame("unexpected prior entry", value, cached);
    }
    Checkpoint checkpointInProgress = new Checkpoint();
    ConfigPath checkpointsPath = new ConfigPath("checkpoints", "checkpoints");
    checkpointsPath.setBase(basePath);
    checkpointInProgress.generateFrom(checkpointsPath, 998);
    bdb.doCheckpoint(checkpointInProgress);
    String firstCheckpointName = checkpointInProgress.getName();
    for (int i2 = 1000; i2 < 2000; i2++) {
        String key = String.valueOf(i2);
        final String value = String.valueOf(randomFactor * i2);
        String cached = (String) testData.getOrUse(key, new Supplier<IdentityCacheableWrapper>(new IdentityCacheableWrapper(key, value))).get();
        assertSame("unexpected prior entry", value, cached);
    }
    checkpointInProgress = new Checkpoint();
    checkpointInProgress.generateFrom(checkpointsPath, 999);
    bdb.doCheckpoint(checkpointInProgress);
    bdb.stop();
    bdb.destroy();
    BdbModule bdb2 = new BdbModule();
    bdb2.setDir(bdbDir);
    Checkpoint recoveryCheckpoint = new Checkpoint();
    ConfigPath recoverPath = new ConfigPath("recover", firstCheckpointName);
    recoverPath.setBase(basePath);
    recoveryCheckpoint.setCheckpointDir(recoverPath);
    recoveryCheckpoint.afterPropertiesSet();
    bdb2.setRecoveryCheckpoint(recoveryCheckpoint);
    bdb2.start();
    ObjectIdentityBdbManualCache<IdentityCacheableWrapper> restoreData = bdb2.getOIBCCache("testData", true, IdentityCacheableWrapper.class);
    assertEquals("unexpected size", 1000, restoreData.size());
    assertEquals("unexpected value", randomFactor * 999, Integer.parseInt((String) restoreData.get("" + 999).get()));
    bdb2.stop();
    bdb2.destroy();
}
Also used : BdbModule(org.archive.bdb.BdbModule) Checkpoint(org.archive.checkpointing.Checkpoint) ConfigPath(org.archive.spring.ConfigPath) IdentityCacheableWrapper(org.archive.util.IdentityCacheableWrapper) Checkpoint(org.archive.checkpointing.Checkpoint)

Example 3 with ConfigPath

use of org.archive.spring.ConfigPath in project heritrix3 by internetarchive.

the class WARCWriterProcessorTest method newTestWarcWriter.

public static WARCWriterProcessor newTestWarcWriter(String name) throws IOException {
    File tmp = TmpDirTestCase.tmpDir();
    tmp = new File(tmp, name);
    FileUtils.ensureWriteableDirectory(tmp);
    WARCWriterProcessor result = new WARCWriterProcessor();
    result.setDirectory(new ConfigPath("test", tmp.getAbsolutePath()));
    result.setServerCache(new DefaultServerCache());
    CrawlMetadata metadata = new CrawlMetadata();
    metadata.afterPropertiesSet();
    result.setMetadataProvider(metadata);
    return result;
}
Also used : DefaultServerCache(org.archive.modules.fetcher.DefaultServerCache) ConfigPath(org.archive.spring.ConfigPath) File(java.io.File) CrawlMetadata(org.archive.modules.CrawlMetadata)

Example 4 with ConfigPath

use of org.archive.spring.ConfigPath in project heritrix3 by internetarchive.

the class WARCWriterChainProcessorTest method makeTestWARCWriterChainProcessor.

public static WARCWriterChainProcessor makeTestWARCWriterChainProcessor() throws IOException {
    File tmp = TmpDirTestCase.tmpDir();
    tmp = new File(tmp, WARCWriterChainProcessorTest.class.getSimpleName());
    FileUtils.ensureWriteableDirectory(tmp);
    WARCWriterChainProcessor result = new WARCWriterChainProcessor();
    result.setDirectory(new ConfigPath("test", tmp.getAbsolutePath()));
    result.setServerCache(new DefaultServerCache());
    CrawlMetadata metadata = new CrawlMetadata();
    metadata.afterPropertiesSet();
    result.setMetadataProvider(metadata);
    return result;
}
Also used : DefaultServerCache(org.archive.modules.fetcher.DefaultServerCache) ConfigPath(org.archive.spring.ConfigPath) File(java.io.File) CrawlMetadata(org.archive.modules.CrawlMetadata)

Example 5 with ConfigPath

use of org.archive.spring.ConfigPath in project heritrix3 by internetarchive.

the class CrawlJobModel method generateConfigReferencedPaths.

private List<Map<String, Object>> generateConfigReferencedPaths(String baseRef) {
    List<Map<String, Object>> referencedPaths = new ArrayList<Map<String, Object>>();
    for (String key : crawlJob.getConfigPaths().keySet()) {
        ConfigPath cp = crawlJob.getConfigPaths().get(key);
        Map<String, Object> configMap = new LinkedHashMap<String, Object>();
        configMap.put("key", key);
        configMap.put("name", cp.getName());
        configMap.put("path", FileUtils.tryToCanonicalize(cp.getFile()).getAbsolutePath());
        configMap.put("url", baseRef + "engine/anypath/" + configMap.get("path"));
        configMap.put("editable", EDIT_FILTER.accept(cp.getFile()));
        referencedPaths.add(configMap);
    }
    return referencedPaths;
}
Also used : ArrayList(java.util.ArrayList) ConfigPath(org.archive.spring.ConfigPath) LinkedHashMap(java.util.LinkedHashMap) Map(java.util.Map) LinkedHashMap(java.util.LinkedHashMap)

Aggregations

ConfigPath (org.archive.spring.ConfigPath)14 File (java.io.File)6 ArrayList (java.util.ArrayList)4 BdbModule (org.archive.bdb.BdbModule)4 CrawlMetadata (org.archive.modules.CrawlMetadata)4 DefaultServerCache (org.archive.modules.fetcher.DefaultServerCache)4 IOException (java.io.IOException)2 ByteArrayInputStream (java.io.ByteArrayInputStream)1 FileFilter (java.io.FileFilter)1 FileWriter (java.io.FileWriter)1 LinkedHashMap (java.util.LinkedHashMap)1 Map (java.util.Map)1 Formatter (java.util.logging.Formatter)1 Logger (java.util.logging.Logger)1 SimpleFormatter (java.util.logging.SimpleFormatter)1 Checkpoint (org.archive.checkpointing.Checkpoint)1 NonFatalErrorFormatter (org.archive.crawler.io.NonFatalErrorFormatter)1 RuntimeErrorFormatter (org.archive.crawler.io.RuntimeErrorFormatter)1 StatisticsLogFormatter (org.archive.crawler.io.StatisticsLogFormatter)1 UriErrorFormatter (org.archive.crawler.io.UriErrorFormatter)1