Search in sources :

Example 6 with CrawlerWebServer

use of org.codelibs.fess.crawler.util.CrawlerWebServer in project fess-crawler by codelibs.

the class CrawlerTest method test_executeTx.

public void test_executeTx() throws Exception {
    final CrawlerWebServer server = new CrawlerWebServer(7070);
    server.start();
    final String url = "http://localhost:7070/";
    try {
        final int maxCount = 50;
        final int numOfThread = 10;
        final File file = File.createTempFile("crawler-", "");
        file.delete();
        file.mkdirs();
        file.deleteOnExit();
        fileTransformer.setPath(file.getAbsolutePath());
        crawler.addUrl(url);
        crawler.getCrawlerContext().setMaxAccessCount(maxCount);
        crawler.getCrawlerContext().setNumOfThread(numOfThread);
        crawler.urlFilter.addInclude(url + ".*");
        final String sessionId = crawler.execute();
        assertEquals(maxCount, dataService.getCount(sessionId));
        dataService.delete(sessionId);
    } finally {
        server.stop();
    }
}
Also used : CrawlerWebServer(org.codelibs.fess.crawler.util.CrawlerWebServer) File(java.io.File)

Example 7 with CrawlerWebServer

use of org.codelibs.fess.crawler.util.CrawlerWebServer in project fess-crawler by codelibs.

the class CrawlerTest method test_execute_2instanceTx.

public void test_execute_2instanceTx() throws Exception {
    final CrawlerWebServer server1 = new CrawlerWebServer(7070);
    server1.start();
    final CrawlerWebServer server2 = new CrawlerWebServer(7071);
    server2.start();
    final String url1 = "http://localhost:7070/";
    final String url2 = "http://localhost:7071/";
    try {
        final int maxCount = 10;
        final int numOfThread = 10;
        final File file = File.createTempFile("crawler-", "");
        file.delete();
        file.mkdirs();
        file.deleteOnExit();
        fileTransformer.setPath(file.getAbsolutePath());
        final Crawler crawler1 = getComponent(Crawler.class);
        crawler1.setBackground(true);
        ((UrlFilterImpl) crawler1.urlFilter).setIncludeFilteringPattern("$1$2$3.*");
        crawler1.addUrl(url1);
        crawler1.getCrawlerContext().setMaxAccessCount(maxCount);
        crawler1.getCrawlerContext().setNumOfThread(numOfThread);
        Thread.sleep(100);
        final Crawler crawler2 = getComponent(Crawler.class);
        crawler2.setBackground(true);
        ((UrlFilterImpl) crawler2.urlFilter).setIncludeFilteringPattern("$1$2$3.*");
        crawler2.addUrl(url2);
        crawler2.getCrawlerContext().setMaxAccessCount(maxCount);
        crawler2.getCrawlerContext().setNumOfThread(numOfThread);
        final String sessionId1 = crawler1.execute();
        final String sessionId2 = crawler2.execute();
        assertNotSame(sessionId1, sessionId2);
        assertNotSame(crawler1.crawlerContext, crawler2.crawlerContext);
        for (int i = 0; i < 10; i++) {
            if (crawler1.crawlerContext.getStatus() == CrawlerStatus.RUNNING) {
                break;
            }
            Thread.sleep(500);
        }
        assertEquals(CrawlerStatus.RUNNING, crawler1.crawlerContext.getStatus());
        for (int i = 0; i < 10; i++) {
            if (crawler2.crawlerContext.getStatus() == CrawlerStatus.RUNNING) {
                break;
            }
            Thread.sleep(500);
        }
        assertEquals(CrawlerStatus.RUNNING, crawler2.crawlerContext.getStatus());
        crawler1.awaitTermination();
        crawler2.awaitTermination();
        assertEquals(maxCount, dataService.getCount(sessionId1));
        assertEquals(maxCount, dataService.getCount(sessionId2));
        UrlQueue urlQueue;
        while ((urlQueue = urlQueueService.poll(sessionId1)) != null) {
            assertTrue(urlQueue.getUrl() + "=>" + url1, urlQueue.getUrl().startsWith(url1));
        }
        while ((urlQueue = urlQueueService.poll(sessionId2)) != null) {
            assertTrue(urlQueue.getUrl() + "=>" + url2, urlQueue.getUrl().startsWith(url2));
        }
        dataService.iterate(sessionId1, accessResult -> assertTrue(accessResult.getUrl().startsWith(url1)));
        dataService.iterate(sessionId2, accessResult -> assertTrue(accessResult.getUrl().startsWith(url2)));
        dataService.delete(sessionId1);
        dataService.delete(sessionId2);
    } finally {
        try {
            server1.stop();
        } finally {
            server2.stop();
        }
    }
}
Also used : UrlQueue(org.codelibs.fess.crawler.entity.UrlQueue) UrlFilterImpl(org.codelibs.fess.crawler.filter.impl.UrlFilterImpl) CrawlerWebServer(org.codelibs.fess.crawler.util.CrawlerWebServer) Crawler(org.codelibs.fess.crawler.Crawler) File(java.io.File)

Example 8 with CrawlerWebServer

use of org.codelibs.fess.crawler.util.CrawlerWebServer in project fess-crawler by codelibs.

the class WebDriverClientTest method test_doGet.

public void test_doGet() {
    File docRootDir = new File(ResourceUtil.getBuildDir("ajax/index.html"), "ajax");
    final CrawlerWebServer server = new CrawlerWebServer(7070, docRootDir);
    final String url = "http://localhost:7070/";
    try {
        server.start();
        final ResponseData responseData = webDriverClient.execute(RequestDataBuilder.newRequestData().get().url(url).build());
        assertEquals(200, responseData.getHttpStatusCode());
        assertTrue(new String(InputStreamUtil.getBytes(responseData.getResponseBody()), Constants.UTF_8_CHARSET).contains("Ajax Test"));
        Set<RequestData> childUrlSet = responseData.getChildUrlSet();
        assertEquals(6, childUrlSet.size());
        Iterator<RequestData> requestDataIter = childUrlSet.iterator();
        ResponseData responseData1 = webDriverClient.execute(requestDataIter.next());
        assertEquals(Constants.GET_METHOD, responseData1.getMethod());
        assertEquals("http://localhost:7070/#menu-1-1.html", responseData1.getUrl());
        assertTrue(new String(InputStreamUtil.getBytes(responseData1.getResponseBody()), Constants.UTF_8_CHARSET).contains("MENU 11"));
        ResponseData responseData2 = webDriverClient.execute(requestDataIter.next());
        assertEquals(Constants.GET_METHOD, responseData2.getMethod());
        assertEquals("http://localhost:7070/#menu-1-2.html", responseData2.getUrl());
        assertTrue(new String(InputStreamUtil.getBytes(responseData2.getResponseBody()), Constants.UTF_8_CHARSET).contains("MENU 12"));
        ResponseData responseData3 = webDriverClient.execute(requestDataIter.next());
        assertEquals(Constants.GET_METHOD, responseData3.getMethod());
        assertEquals("http://localhost:7070/#menu-2-1.html", responseData3.getUrl());
        assertTrue(new String(InputStreamUtil.getBytes(responseData3.getResponseBody()), Constants.UTF_8_CHARSET).contains("MENU 21"));
        ResponseData responseData4 = webDriverClient.execute(requestDataIter.next());
        assertEquals(Constants.GET_METHOD, responseData4.getMethod());
        assertEquals("http://localhost:7070/#menu-2-2.html", responseData4.getUrl());
        assertTrue(new String(InputStreamUtil.getBytes(responseData4.getResponseBody()), Constants.UTF_8_CHARSET).contains("MENU 22"));
        ResponseData responseData5 = webDriverClient.execute(requestDataIter.next());
        assertEquals(Constants.GET_METHOD, responseData5.getMethod());
        assertEquals("http://localhost:7070/#", responseData5.getUrl());
        assertTrue(new String(InputStreamUtil.getBytes(responseData5.getResponseBody()), Constants.UTF_8_CHARSET).contains("Ajax Test"));
        ResponseData responseData6 = webDriverClient.execute(requestDataIter.next());
        assertEquals(Constants.POST_METHOD, responseData6.getMethod());
        assertEquals("http://localhost:7070/form.html", responseData6.getUrl());
    } finally {
        server.stop();
    }
}
Also used : CrawlerWebServer(org.codelibs.fess.crawler.util.CrawlerWebServer) RequestData(org.codelibs.fess.crawler.entity.RequestData) ResponseData(org.codelibs.fess.crawler.entity.ResponseData) File(java.io.File)

Example 9 with CrawlerWebServer

use of org.codelibs.fess.crawler.util.CrawlerWebServer in project fess-crawler by codelibs.

the class CrawlerTest method test_execute_2instance.

public void test_execute_2instance() throws Exception {
    final CrawlerWebServer server1 = new CrawlerWebServer(7070);
    server1.start();
    final CrawlerWebServer server2 = new CrawlerWebServer(7071);
    server2.start();
    final String url1 = "http://localhost:7070/";
    final String url2 = "http://localhost:7071/";
    try {
        final int maxCount = 10;
        final int numOfThread = 10;
        final File file = File.createTempFile("crawler-", "");
        file.delete();
        file.mkdirs();
        file.deleteOnExit();
        fileTransformer.setPath(file.getAbsolutePath());
        final Crawler crawler1 = container.getComponent("crawler");
        crawler1.setSessionId(crawler1.getSessionId() + "1");
        crawler1.setBackground(true);
        ((UrlFilterImpl) crawler1.urlFilter).setIncludeFilteringPattern("$1$2$3.*");
        crawler1.addUrl(url1);
        crawler1.getCrawlerContext().setMaxAccessCount(maxCount);
        crawler1.getCrawlerContext().setNumOfThread(numOfThread);
        final Crawler crawler2 = container.getComponent("crawler");
        crawler2.setSessionId(crawler2.getSessionId() + "2");
        crawler2.setBackground(true);
        ((UrlFilterImpl) crawler2.urlFilter).setIncludeFilteringPattern("$1$2$3.*");
        crawler2.addUrl(url2);
        crawler2.getCrawlerContext().setMaxAccessCount(maxCount);
        crawler2.getCrawlerContext().setNumOfThread(numOfThread);
        final String sessionId1 = crawler1.execute();
        final String sessionId2 = crawler2.execute();
        assertNotSame(sessionId1, sessionId2);
        assertNotSame(crawler1.crawlerContext, crawler2.crawlerContext);
        Thread.sleep(1000);
        assertEquals(CrawlerStatus.RUNNING, crawler1.crawlerContext.getStatus());
        assertEquals(CrawlerStatus.RUNNING, crawler2.crawlerContext.getStatus());
        crawler1.awaitTermination();
        crawler2.awaitTermination();
        assertEquals(maxCount, dataService.getCount(sessionId1));
        assertEquals(maxCount, dataService.getCount(sessionId2));
        UrlQueue urlQueue;
        while ((urlQueue = urlQueueService.poll(sessionId1)) != null) {
            assertTrue(urlQueue.getUrl().startsWith(url1));
        }
        while ((urlQueue = urlQueueService.poll(sessionId2)) != null) {
            assertTrue(urlQueue.getUrl().startsWith(url2));
        }
        dataService.iterate(sessionId1, accessResult -> {
            assertTrue(accessResult.getUrl().startsWith(url1));
            assertEquals(Constants.GET_METHOD, accessResult.getMethod());
        });
        dataService.iterate(sessionId2, accessResult -> {
            assertTrue(accessResult.getUrl().startsWith(url2));
            assertEquals(Constants.GET_METHOD, accessResult.getMethod());
        });
        dataService.delete(sessionId1);
        dataService.delete(sessionId2);
    } finally {
        try {
            server1.stop();
        } finally {
            server2.stop();
        }
    }
}
Also used : UrlQueue(org.codelibs.fess.crawler.entity.UrlQueue) UrlFilterImpl(org.codelibs.fess.crawler.filter.impl.UrlFilterImpl) CrawlerWebServer(org.codelibs.fess.crawler.util.CrawlerWebServer) File(java.io.File)

Example 10 with CrawlerWebServer

use of org.codelibs.fess.crawler.util.CrawlerWebServer in project fess-crawler by codelibs.

the class CrawlerTest method test_execute_textSitemaps.

public void test_execute_textSitemaps() throws Exception {
    final CrawlerWebServer server = new CrawlerWebServer(7070);
    server.start();
    final String url = "http://localhost:7070/";
    try {
        final int maxCount = 50;
        final int numOfThread = 10;
        final File file = File.createTempFile("crawler-", "");
        file.delete();
        file.mkdirs();
        file.deleteOnExit();
        fileTransformer.setPath(file.getAbsolutePath());
        crawler.addUrl(url + "sitemaps.xml");
        crawler.crawlerContext.setMaxAccessCount(maxCount);
        crawler.crawlerContext.setNumOfThread(numOfThread);
        crawler.urlFilter.addInclude(url + ".*");
        final String sessionId = crawler.execute();
        assertEquals(maxCount, dataService.getCount(sessionId));
        dataService.delete(sessionId);
    } finally {
        server.stop();
    }
}
Also used : CrawlerWebServer(org.codelibs.fess.crawler.util.CrawlerWebServer) File(java.io.File)

Aggregations

CrawlerWebServer (org.codelibs.fess.crawler.util.CrawlerWebServer)12 File (java.io.File)9 ResponseData (org.codelibs.fess.crawler.entity.ResponseData)4 UrlFilterImpl (org.codelibs.fess.crawler.filter.impl.UrlFilterImpl)3 UrlQueue (org.codelibs.fess.crawler.entity.UrlQueue)2 Date (java.util.Date)1 Crawler (org.codelibs.fess.crawler.Crawler)1 CrawlerContext (org.codelibs.fess.crawler.CrawlerContext)1 RequestData (org.codelibs.fess.crawler.entity.RequestData)1