use of org.codelibs.fess.crawler.filter.impl.UrlFilterImpl in project fess-crawler by codelibs.
the class CrawlerTest method test_execute_bg.
public void test_execute_bg() throws Exception {
final CrawlerWebServer server = new CrawlerWebServer(7070);
server.start();
try {
final String url = "http://localhost:7070/";
final int maxCount = 50;
final int numOfThread = 10;
final File file = File.createTempFile("crawler-", "");
file.delete();
file.mkdirs();
file.deleteOnExit();
fileTransformer.setPath(file.getAbsolutePath());
crawler.setBackground(true);
((UrlFilterImpl) crawler.urlFilter).setIncludeFilteringPattern("$1$2$3.*");
crawler.addUrl(url);
crawler.getCrawlerContext().setMaxAccessCount(maxCount);
crawler.getCrawlerContext().setNumOfThread(numOfThread);
final String sessionId = crawler.execute();
Thread.sleep(3000);
assertEquals(CrawlerStatus.RUNNING, crawler.crawlerContext.getStatus());
crawler.awaitTermination();
assertEquals(maxCount, dataService.getCount(sessionId));
dataService.delete(sessionId);
} finally {
server.stop();
}
}
use of org.codelibs.fess.crawler.filter.impl.UrlFilterImpl in project fess-crawler by codelibs.
the class CrawlerTest method test_execute_2instanceTx.
public void test_execute_2instanceTx() throws Exception {
final CrawlerWebServer server1 = new CrawlerWebServer(7070);
server1.start();
final CrawlerWebServer server2 = new CrawlerWebServer(7071);
server2.start();
final String url1 = "http://localhost:7070/";
final String url2 = "http://localhost:7071/";
try {
final int maxCount = 10;
final int numOfThread = 10;
final File file = File.createTempFile("crawler-", "");
file.delete();
file.mkdirs();
file.deleteOnExit();
fileTransformer.setPath(file.getAbsolutePath());
final Crawler crawler1 = getComponent(Crawler.class);
crawler1.setBackground(true);
((UrlFilterImpl) crawler1.urlFilter).setIncludeFilteringPattern("$1$2$3.*");
crawler1.addUrl(url1);
crawler1.getCrawlerContext().setMaxAccessCount(maxCount);
crawler1.getCrawlerContext().setNumOfThread(numOfThread);
Thread.sleep(100);
final Crawler crawler2 = getComponent(Crawler.class);
crawler2.setBackground(true);
((UrlFilterImpl) crawler2.urlFilter).setIncludeFilteringPattern("$1$2$3.*");
crawler2.addUrl(url2);
crawler2.getCrawlerContext().setMaxAccessCount(maxCount);
crawler2.getCrawlerContext().setNumOfThread(numOfThread);
final String sessionId1 = crawler1.execute();
final String sessionId2 = crawler2.execute();
assertNotSame(sessionId1, sessionId2);
assertNotSame(crawler1.crawlerContext, crawler2.crawlerContext);
for (int i = 0; i < 10; i++) {
if (crawler1.crawlerContext.getStatus() == CrawlerStatus.RUNNING) {
break;
}
Thread.sleep(500);
}
assertEquals(CrawlerStatus.RUNNING, crawler1.crawlerContext.getStatus());
for (int i = 0; i < 10; i++) {
if (crawler2.crawlerContext.getStatus() == CrawlerStatus.RUNNING) {
break;
}
Thread.sleep(500);
}
assertEquals(CrawlerStatus.RUNNING, crawler2.crawlerContext.getStatus());
crawler1.awaitTermination();
crawler2.awaitTermination();
assertEquals(maxCount, dataService.getCount(sessionId1));
assertEquals(maxCount, dataService.getCount(sessionId2));
UrlQueue urlQueue;
while ((urlQueue = urlQueueService.poll(sessionId1)) != null) {
assertTrue(urlQueue.getUrl() + "=>" + url1, urlQueue.getUrl().startsWith(url1));
}
while ((urlQueue = urlQueueService.poll(sessionId2)) != null) {
assertTrue(urlQueue.getUrl() + "=>" + url2, urlQueue.getUrl().startsWith(url2));
}
dataService.iterate(sessionId1, accessResult -> assertTrue(accessResult.getUrl().startsWith(url1)));
dataService.iterate(sessionId2, accessResult -> assertTrue(accessResult.getUrl().startsWith(url2)));
dataService.delete(sessionId1);
dataService.delete(sessionId2);
} finally {
try {
server1.stop();
} finally {
server2.stop();
}
}
}
use of org.codelibs.fess.crawler.filter.impl.UrlFilterImpl in project fess-crawler by codelibs.
the class CrawlerTest method test_execute_2instance.
public void test_execute_2instance() throws Exception {
final CrawlerWebServer server1 = new CrawlerWebServer(7070);
server1.start();
final CrawlerWebServer server2 = new CrawlerWebServer(7071);
server2.start();
final String url1 = "http://localhost:7070/";
final String url2 = "http://localhost:7071/";
try {
final int maxCount = 10;
final int numOfThread = 10;
final File file = File.createTempFile("crawler-", "");
file.delete();
file.mkdirs();
file.deleteOnExit();
fileTransformer.setPath(file.getAbsolutePath());
final Crawler crawler1 = container.getComponent("crawler");
crawler1.setSessionId(crawler1.getSessionId() + "1");
crawler1.setBackground(true);
((UrlFilterImpl) crawler1.urlFilter).setIncludeFilteringPattern("$1$2$3.*");
crawler1.addUrl(url1);
crawler1.getCrawlerContext().setMaxAccessCount(maxCount);
crawler1.getCrawlerContext().setNumOfThread(numOfThread);
final Crawler crawler2 = container.getComponent("crawler");
crawler2.setSessionId(crawler2.getSessionId() + "2");
crawler2.setBackground(true);
((UrlFilterImpl) crawler2.urlFilter).setIncludeFilteringPattern("$1$2$3.*");
crawler2.addUrl(url2);
crawler2.getCrawlerContext().setMaxAccessCount(maxCount);
crawler2.getCrawlerContext().setNumOfThread(numOfThread);
final String sessionId1 = crawler1.execute();
final String sessionId2 = crawler2.execute();
assertNotSame(sessionId1, sessionId2);
assertNotSame(crawler1.crawlerContext, crawler2.crawlerContext);
Thread.sleep(1000);
assertEquals(CrawlerStatus.RUNNING, crawler1.crawlerContext.getStatus());
assertEquals(CrawlerStatus.RUNNING, crawler2.crawlerContext.getStatus());
crawler1.awaitTermination();
crawler2.awaitTermination();
assertEquals(maxCount, dataService.getCount(sessionId1));
assertEquals(maxCount, dataService.getCount(sessionId2));
UrlQueue urlQueue;
while ((urlQueue = urlQueueService.poll(sessionId1)) != null) {
assertTrue(urlQueue.getUrl().startsWith(url1));
}
while ((urlQueue = urlQueueService.poll(sessionId2)) != null) {
assertTrue(urlQueue.getUrl().startsWith(url2));
}
dataService.iterate(sessionId1, accessResult -> {
assertTrue(accessResult.getUrl().startsWith(url1));
assertEquals(Constants.GET_METHOD, accessResult.getMethod());
});
dataService.iterate(sessionId2, accessResult -> {
assertTrue(accessResult.getUrl().startsWith(url2));
assertEquals(Constants.GET_METHOD, accessResult.getMethod());
});
dataService.delete(sessionId1);
dataService.delete(sessionId2);
} finally {
try {
server1.stop();
} finally {
server2.stop();
}
}
}
Aggregations