use of org.codelibs.fess.crawler.util.CrawlerWebServer in project fess-crawler by codelibs.
the class CrawlerTest method test_executeTx.
public void test_executeTx() throws Exception {
final CrawlerWebServer server = new CrawlerWebServer(7070);
server.start();
final String url = "http://localhost:7070/";
try {
final int maxCount = 50;
final int numOfThread = 10;
final File file = File.createTempFile("crawler-", "");
file.delete();
file.mkdirs();
file.deleteOnExit();
fileTransformer.setPath(file.getAbsolutePath());
crawler.addUrl(url);
crawler.getCrawlerContext().setMaxAccessCount(maxCount);
crawler.getCrawlerContext().setNumOfThread(numOfThread);
crawler.urlFilter.addInclude(url + ".*");
final String sessionId = crawler.execute();
assertEquals(maxCount, dataService.getCount(sessionId));
dataService.delete(sessionId);
} finally {
server.stop();
}
}
use of org.codelibs.fess.crawler.util.CrawlerWebServer in project fess-crawler by codelibs.
the class CrawlerTest method test_execute_2instanceTx.
public void test_execute_2instanceTx() throws Exception {
final CrawlerWebServer server1 = new CrawlerWebServer(7070);
server1.start();
final CrawlerWebServer server2 = new CrawlerWebServer(7071);
server2.start();
final String url1 = "http://localhost:7070/";
final String url2 = "http://localhost:7071/";
try {
final int maxCount = 10;
final int numOfThread = 10;
final File file = File.createTempFile("crawler-", "");
file.delete();
file.mkdirs();
file.deleteOnExit();
fileTransformer.setPath(file.getAbsolutePath());
final Crawler crawler1 = getComponent(Crawler.class);
crawler1.setBackground(true);
((UrlFilterImpl) crawler1.urlFilter).setIncludeFilteringPattern("$1$2$3.*");
crawler1.addUrl(url1);
crawler1.getCrawlerContext().setMaxAccessCount(maxCount);
crawler1.getCrawlerContext().setNumOfThread(numOfThread);
Thread.sleep(100);
final Crawler crawler2 = getComponent(Crawler.class);
crawler2.setBackground(true);
((UrlFilterImpl) crawler2.urlFilter).setIncludeFilteringPattern("$1$2$3.*");
crawler2.addUrl(url2);
crawler2.getCrawlerContext().setMaxAccessCount(maxCount);
crawler2.getCrawlerContext().setNumOfThread(numOfThread);
final String sessionId1 = crawler1.execute();
final String sessionId2 = crawler2.execute();
assertNotSame(sessionId1, sessionId2);
assertNotSame(crawler1.crawlerContext, crawler2.crawlerContext);
for (int i = 0; i < 10; i++) {
if (crawler1.crawlerContext.getStatus() == CrawlerStatus.RUNNING) {
break;
}
Thread.sleep(500);
}
assertEquals(CrawlerStatus.RUNNING, crawler1.crawlerContext.getStatus());
for (int i = 0; i < 10; i++) {
if (crawler2.crawlerContext.getStatus() == CrawlerStatus.RUNNING) {
break;
}
Thread.sleep(500);
}
assertEquals(CrawlerStatus.RUNNING, crawler2.crawlerContext.getStatus());
crawler1.awaitTermination();
crawler2.awaitTermination();
assertEquals(maxCount, dataService.getCount(sessionId1));
assertEquals(maxCount, dataService.getCount(sessionId2));
UrlQueue urlQueue;
while ((urlQueue = urlQueueService.poll(sessionId1)) != null) {
assertTrue(urlQueue.getUrl() + "=>" + url1, urlQueue.getUrl().startsWith(url1));
}
while ((urlQueue = urlQueueService.poll(sessionId2)) != null) {
assertTrue(urlQueue.getUrl() + "=>" + url2, urlQueue.getUrl().startsWith(url2));
}
dataService.iterate(sessionId1, accessResult -> assertTrue(accessResult.getUrl().startsWith(url1)));
dataService.iterate(sessionId2, accessResult -> assertTrue(accessResult.getUrl().startsWith(url2)));
dataService.delete(sessionId1);
dataService.delete(sessionId2);
} finally {
try {
server1.stop();
} finally {
server2.stop();
}
}
}
use of org.codelibs.fess.crawler.util.CrawlerWebServer in project fess-crawler by codelibs.
the class WebDriverClientTest method test_doGet.
public void test_doGet() {
File docRootDir = new File(ResourceUtil.getBuildDir("ajax/index.html"), "ajax");
final CrawlerWebServer server = new CrawlerWebServer(7070, docRootDir);
final String url = "http://localhost:7070/";
try {
server.start();
final ResponseData responseData = webDriverClient.execute(RequestDataBuilder.newRequestData().get().url(url).build());
assertEquals(200, responseData.getHttpStatusCode());
assertTrue(new String(InputStreamUtil.getBytes(responseData.getResponseBody()), Constants.UTF_8_CHARSET).contains("Ajax Test"));
Set<RequestData> childUrlSet = responseData.getChildUrlSet();
assertEquals(6, childUrlSet.size());
Iterator<RequestData> requestDataIter = childUrlSet.iterator();
ResponseData responseData1 = webDriverClient.execute(requestDataIter.next());
assertEquals(Constants.GET_METHOD, responseData1.getMethod());
assertEquals("http://localhost:7070/#menu-1-1.html", responseData1.getUrl());
assertTrue(new String(InputStreamUtil.getBytes(responseData1.getResponseBody()), Constants.UTF_8_CHARSET).contains("MENU 11"));
ResponseData responseData2 = webDriverClient.execute(requestDataIter.next());
assertEquals(Constants.GET_METHOD, responseData2.getMethod());
assertEquals("http://localhost:7070/#menu-1-2.html", responseData2.getUrl());
assertTrue(new String(InputStreamUtil.getBytes(responseData2.getResponseBody()), Constants.UTF_8_CHARSET).contains("MENU 12"));
ResponseData responseData3 = webDriverClient.execute(requestDataIter.next());
assertEquals(Constants.GET_METHOD, responseData3.getMethod());
assertEquals("http://localhost:7070/#menu-2-1.html", responseData3.getUrl());
assertTrue(new String(InputStreamUtil.getBytes(responseData3.getResponseBody()), Constants.UTF_8_CHARSET).contains("MENU 21"));
ResponseData responseData4 = webDriverClient.execute(requestDataIter.next());
assertEquals(Constants.GET_METHOD, responseData4.getMethod());
assertEquals("http://localhost:7070/#menu-2-2.html", responseData4.getUrl());
assertTrue(new String(InputStreamUtil.getBytes(responseData4.getResponseBody()), Constants.UTF_8_CHARSET).contains("MENU 22"));
ResponseData responseData5 = webDriverClient.execute(requestDataIter.next());
assertEquals(Constants.GET_METHOD, responseData5.getMethod());
assertEquals("http://localhost:7070/#", responseData5.getUrl());
assertTrue(new String(InputStreamUtil.getBytes(responseData5.getResponseBody()), Constants.UTF_8_CHARSET).contains("Ajax Test"));
ResponseData responseData6 = webDriverClient.execute(requestDataIter.next());
assertEquals(Constants.POST_METHOD, responseData6.getMethod());
assertEquals("http://localhost:7070/form.html", responseData6.getUrl());
} finally {
server.stop();
}
}
use of org.codelibs.fess.crawler.util.CrawlerWebServer in project fess-crawler by codelibs.
the class CrawlerTest method test_execute_2instance.
public void test_execute_2instance() throws Exception {
final CrawlerWebServer server1 = new CrawlerWebServer(7070);
server1.start();
final CrawlerWebServer server2 = new CrawlerWebServer(7071);
server2.start();
final String url1 = "http://localhost:7070/";
final String url2 = "http://localhost:7071/";
try {
final int maxCount = 10;
final int numOfThread = 10;
final File file = File.createTempFile("crawler-", "");
file.delete();
file.mkdirs();
file.deleteOnExit();
fileTransformer.setPath(file.getAbsolutePath());
final Crawler crawler1 = container.getComponent("crawler");
crawler1.setSessionId(crawler1.getSessionId() + "1");
crawler1.setBackground(true);
((UrlFilterImpl) crawler1.urlFilter).setIncludeFilteringPattern("$1$2$3.*");
crawler1.addUrl(url1);
crawler1.getCrawlerContext().setMaxAccessCount(maxCount);
crawler1.getCrawlerContext().setNumOfThread(numOfThread);
final Crawler crawler2 = container.getComponent("crawler");
crawler2.setSessionId(crawler2.getSessionId() + "2");
crawler2.setBackground(true);
((UrlFilterImpl) crawler2.urlFilter).setIncludeFilteringPattern("$1$2$3.*");
crawler2.addUrl(url2);
crawler2.getCrawlerContext().setMaxAccessCount(maxCount);
crawler2.getCrawlerContext().setNumOfThread(numOfThread);
final String sessionId1 = crawler1.execute();
final String sessionId2 = crawler2.execute();
assertNotSame(sessionId1, sessionId2);
assertNotSame(crawler1.crawlerContext, crawler2.crawlerContext);
Thread.sleep(1000);
assertEquals(CrawlerStatus.RUNNING, crawler1.crawlerContext.getStatus());
assertEquals(CrawlerStatus.RUNNING, crawler2.crawlerContext.getStatus());
crawler1.awaitTermination();
crawler2.awaitTermination();
assertEquals(maxCount, dataService.getCount(sessionId1));
assertEquals(maxCount, dataService.getCount(sessionId2));
UrlQueue urlQueue;
while ((urlQueue = urlQueueService.poll(sessionId1)) != null) {
assertTrue(urlQueue.getUrl().startsWith(url1));
}
while ((urlQueue = urlQueueService.poll(sessionId2)) != null) {
assertTrue(urlQueue.getUrl().startsWith(url2));
}
dataService.iterate(sessionId1, accessResult -> {
assertTrue(accessResult.getUrl().startsWith(url1));
assertEquals(Constants.GET_METHOD, accessResult.getMethod());
});
dataService.iterate(sessionId2, accessResult -> {
assertTrue(accessResult.getUrl().startsWith(url2));
assertEquals(Constants.GET_METHOD, accessResult.getMethod());
});
dataService.delete(sessionId1);
dataService.delete(sessionId2);
} finally {
try {
server1.stop();
} finally {
server2.stop();
}
}
}
use of org.codelibs.fess.crawler.util.CrawlerWebServer in project fess-crawler by codelibs.
the class CrawlerTest method test_execute_textSitemaps.
public void test_execute_textSitemaps() throws Exception {
final CrawlerWebServer server = new CrawlerWebServer(7070);
server.start();
final String url = "http://localhost:7070/";
try {
final int maxCount = 50;
final int numOfThread = 10;
final File file = File.createTempFile("crawler-", "");
file.delete();
file.mkdirs();
file.deleteOnExit();
fileTransformer.setPath(file.getAbsolutePath());
crawler.addUrl(url + "sitemaps.xml");
crawler.crawlerContext.setMaxAccessCount(maxCount);
crawler.crawlerContext.setNumOfThread(numOfThread);
crawler.urlFilter.addInclude(url + ".*");
final String sessionId = crawler.execute();
assertEquals(maxCount, dataService.getCount(sessionId));
dataService.delete(sessionId);
} finally {
server.stop();
}
}
Aggregations