Search in sources :

Example 6 with StandardCrawlerContainer

use of org.codelibs.fess.crawler.container.StandardCrawlerContainer in project fess-crawler by codelibs.

the class HcHttpClientTest method setUp.

@Override
protected void setUp() throws Exception {
    super.setUp();
    StandardCrawlerContainer container = new StandardCrawlerContainer().singleton("mimeTypeHelper", // 
    MimeTypeHelperImpl.class).singleton("dataHelper", // 
    MemoryDataHelper.class).singleton("urlFilterService", // 
    UrlFilterServiceImpl.class).singleton("urlFilter", // 
    UrlFilterImpl.class).singleton("robotsTxtHelper", // 
    RobotsTxtHelper.class).singleton("httpClient", HcHttpClient.class);
    httpClient = container.getComponent("httpClient");
    urlFilter = container.getComponent("urlFilter");
}
Also used : UrlFilterServiceImpl(org.codelibs.fess.crawler.service.impl.UrlFilterServiceImpl) RobotsTxtHelper(org.codelibs.fess.crawler.helper.RobotsTxtHelper) StandardCrawlerContainer(org.codelibs.fess.crawler.container.StandardCrawlerContainer) MimeTypeHelperImpl(org.codelibs.fess.crawler.helper.impl.MimeTypeHelperImpl)

Example 7 with StandardCrawlerContainer

use of org.codelibs.fess.crawler.container.StandardCrawlerContainer in project fess-crawler by codelibs.

the class ExtractorFactoryTest method setUp.

@Override
protected void setUp() throws Exception {
    super.setUp();
    StandardCrawlerContainer container = new StandardCrawlerContainer().singleton("tikaExtractor", // 
    TikaExtractor.class).singleton("pdfExtractor", // 
    PdfExtractor.class).singleton("lhaExtractor", // 
    LhaExtractor.class).singleton("extractorFactory", ExtractorFactory.class);
    extractorFactory = container.getComponent("extractorFactory");
    TikaExtractor tikaExtractor = container.getComponent("tikaExtractor");
    LhaExtractor lhaExtractor = container.getComponent("lhaExtractor");
    PasswordBasedExtractor pdfExtractor = container.getComponent("pdfExtractor");
    extractorFactory.addExtractor("application/msword", tikaExtractor);
    extractorFactory.addExtractor("application/vnd.ms-excel", tikaExtractor);
    extractorFactory.addExtractor("application/vnd.ms-powerpoint", tikaExtractor);
    extractorFactory.addExtractor("application/vnd.visio", tikaExtractor);
    extractorFactory.addExtractor("application/pdf", pdfExtractor);
    extractorFactory.addExtractor("application/x-lha", lhaExtractor);
    extractorFactory.addExtractor("application/x-lharc", lhaExtractor);
}
Also used : PasswordBasedExtractor(org.codelibs.fess.crawler.extractor.impl.PasswordBasedExtractor) LhaExtractor(org.codelibs.fess.crawler.extractor.impl.LhaExtractor) StandardCrawlerContainer(org.codelibs.fess.crawler.container.StandardCrawlerContainer) TikaExtractor(org.codelibs.fess.crawler.extractor.impl.TikaExtractor)

Example 8 with StandardCrawlerContainer

use of org.codelibs.fess.crawler.container.StandardCrawlerContainer in project fess-crawler by codelibs.

the class HtmlXpathExtractorTest method setUp.

@Override
protected void setUp() throws Exception {
    super.setUp();
    StandardCrawlerContainer container = new StandardCrawlerContainer().singleton("htmlXpathExtractor", HtmlXpathExtractor.class);
    htmlXpathExtractor = container.getComponent("htmlXpathExtractor");
}
Also used : StandardCrawlerContainer(org.codelibs.fess.crawler.container.StandardCrawlerContainer)

Example 9 with StandardCrawlerContainer

use of org.codelibs.fess.crawler.container.StandardCrawlerContainer in project fess-crawler by codelibs.

the class JodExtractorTest method setUp.

@Override
protected void setUp() throws Exception {
    super.setUp();
    StandardCrawlerContainer container = new StandardCrawlerContainer().singleton("officeManagerConfiguration", // 
    DefaultOfficeManagerConfiguration.class).singleton("jodExtractor", JodExtractor.class);
    jodExtractor = container.getComponent("jodExtractor");
    DefaultOfficeManagerConfiguration officeManagerConfiguration = container.getComponent("officeManagerConfiguration");
    jodExtractor.officeManager = officeManagerConfiguration.setPortNumber(12002).buildOfficeManager();
}
Also used : StandardCrawlerContainer(org.codelibs.fess.crawler.container.StandardCrawlerContainer) DefaultOfficeManagerConfiguration(org.artofsolving.jodconverter.office.DefaultOfficeManagerConfiguration)

Example 10 with StandardCrawlerContainer

use of org.codelibs.fess.crawler.container.StandardCrawlerContainer in project fess-crawler by codelibs.

the class LhaExtractorTest method setUp.

@Override
protected void setUp() throws Exception {
    super.setUp();
    StandardCrawlerContainer container = new StandardCrawlerContainer();
    container.singleton("mimeTypeHelper", MimeTypeHelperImpl.class).singleton("tikaExtractor", TikaExtractor.class).singleton("lhaExtractor", LhaExtractor.class).<ExtractorFactory>singleton("extractorFactory", ExtractorFactory.class, factory -> {
        TikaExtractor tikaExtractor = container.getComponent("tikaExtractor");
        LhaExtractor lhaExtractor = container.getComponent("lhaExtractor");
        factory.addExtractor("text/plain", tikaExtractor);
        factory.addExtractor("text/html", tikaExtractor);
        factory.addExtractor("application/x-lha", lhaExtractor);
    });
    lhaExtractor = container.getComponent("lhaExtractor");
}
Also used : ExtractorFactory(org.codelibs.fess.crawler.extractor.ExtractorFactory) StandardCrawlerContainer(org.codelibs.fess.crawler.container.StandardCrawlerContainer) MimeTypeHelperImpl(org.codelibs.fess.crawler.helper.impl.MimeTypeHelperImpl)

Aggregations

StandardCrawlerContainer (org.codelibs.fess.crawler.container.StandardCrawlerContainer)32 MimeTypeHelperImpl (org.codelibs.fess.crawler.helper.impl.MimeTypeHelperImpl)9 ExtractorFactory (org.codelibs.fess.crawler.extractor.ExtractorFactory)7 TikaExtractor (org.codelibs.fess.crawler.extractor.impl.TikaExtractor)3 MemoryDataHelper (org.codelibs.fess.crawler.helper.MemoryDataHelper)3 SitemapsHelper (org.codelibs.fess.crawler.helper.SitemapsHelper)3 UrlFilterServiceImpl (org.codelibs.fess.crawler.service.impl.UrlFilterServiceImpl)3 File (java.io.File)2 ArchiveStreamFactory (org.apache.commons.compress.archivers.ArchiveStreamFactory)2 ResourceUtil (org.codelibs.core.io.ResourceUtil)2 HcHttpClient (org.codelibs.fess.crawler.client.http.HcHttpClient)2 RobotsTxtHelper (org.codelibs.fess.crawler.helper.RobotsTxtHelper)2 CrawlerWebServer (org.codelibs.fess.crawler.util.CrawlerWebServer)2 PlainTestCase (org.dbflute.utflute.core.PlainTestCase)2 Iterator (java.util.Iterator)1 Map (java.util.Map)1 Set (java.util.Set)1 TimeUnit (java.util.concurrent.TimeUnit)1 Resource (javax.annotation.Resource)1 GenericObjectPool (org.apache.commons.pool2.impl.GenericObjectPool)1