use of org.codelibs.fess.crawler.entity.ResponseData in project fess by codelibs.
the class FessXpathTransformerTest method test_canonicalXpath.
public void test_canonicalXpath() throws Exception {
final FessXpathTransformer transformer = new FessXpathTransformer();
transformer.init();
final Map<String, Object> dataMap = new HashMap<String, Object>();
final ResponseData responseData = new ResponseData();
responseData.setUrl("http://example.com/");
String data = "<html><body>aaa</body></html>";
Document document = getDocument(data);
try {
transformer.putAdditionalData(dataMap, responseData, document);
fail();
} catch (final ComponentNotFoundException e) {
// ignore
}
data = "<html><head><link rel=\"canonical\" href=\"http://example.com/\"></head><body>aaa</body></html>";
document = getDocument(data);
try {
transformer.putAdditionalData(dataMap, responseData, document);
fail();
} catch (final ComponentNotFoundException e) {
// ignore
}
data = "<html><head><link rel=\"canonical\" href=\"http://example.com/foo\"></head><body>aaa</body></html>";
document = getDocument(data);
try {
transformer.putAdditionalData(dataMap, responseData, document);
fail();
} catch (final ChildUrlsException e) {
final Set<RequestData> childUrlList = e.getChildUrlList();
assertEquals(1, childUrlList.size());
assertEquals("http://example.com/foo", childUrlList.iterator().next().getUrl());
}
data = "<html><link rel=\"canonical\" href=\"http://example.com/foo\"><body>aaa</body></html>";
document = getDocument(data);
try {
transformer.putAdditionalData(dataMap, responseData, document);
fail();
} catch (final ChildUrlsException e) {
final Set<RequestData> childUrlList = e.getChildUrlList();
assertEquals(1, childUrlList.size());
assertEquals("http://example.com/foo", childUrlList.iterator().next().getUrl());
}
}
use of org.codelibs.fess.crawler.entity.ResponseData in project fess by codelibs.
the class DocumentHelperTest method test_getContent_maxSymbol.
public void test_getContent_maxSymbol() {
DocumentHelper documentHelper = new DocumentHelper() {
protected int getMaxSymbolTermSize() {
return 2;
}
};
ResponseData responseData = new ResponseData();
Map<String, Object> dataMap = new HashMap<>();
assertEquals("", documentHelper.getContent(responseData, null, dataMap));
assertEquals("", documentHelper.getContent(responseData, "", dataMap));
assertEquals("", documentHelper.getContent(responseData, " ", dataMap));
assertEquals("", documentHelper.getContent(responseData, " ", dataMap));
assertEquals("", documentHelper.getContent(responseData, "\t", dataMap));
assertEquals("", documentHelper.getContent(responseData, "\t\t", dataMap));
assertEquals("", documentHelper.getContent(responseData, "\t \t", dataMap));
assertEquals("123 abc", documentHelper.getContent(responseData, " 123 abc ", dataMap));
assertEquals("123 あいう", documentHelper.getContent(responseData, " 123 あいう ", dataMap));
assertEquals("123 abc", documentHelper.getContent(responseData, " 123\nabc ", dataMap));
assertEquals("123abc", documentHelper.getContent(responseData, " 123abc ", dataMap));
assertEquals("!!", documentHelper.getContent(responseData, "!!!", dataMap));
assertEquals("//", documentHelper.getContent(responseData, "///", dataMap));
assertEquals("::", documentHelper.getContent(responseData, ":::", dataMap));
assertEquals("@@", documentHelper.getContent(responseData, "@@@", dataMap));
assertEquals("[[", documentHelper.getContent(responseData, "[[[", dataMap));
assertEquals("``", documentHelper.getContent(responseData, "```", dataMap));
assertEquals("{{", documentHelper.getContent(responseData, "{{{", dataMap));
assertEquals("~~", documentHelper.getContent(responseData, "~~~", dataMap));
assertEquals("!\"", documentHelper.getContent(responseData, "!\"#$%&'()*+,-./:;<=>?@[\\]^_`{|}~", dataMap));
}
use of org.codelibs.fess.crawler.entity.ResponseData in project fess by codelibs.
the class DocumentHelperTest method test_getContent.
public void test_getContent() {
DocumentHelper documentHelper = new DocumentHelper();
ResponseData responseData = new ResponseData();
Map<String, Object> dataMap = new HashMap<>();
assertEquals("", documentHelper.getContent(responseData, null, dataMap));
assertEquals("", documentHelper.getContent(responseData, "", dataMap));
assertEquals("", documentHelper.getContent(responseData, " ", dataMap));
assertEquals("", documentHelper.getContent(responseData, " ", dataMap));
assertEquals("", documentHelper.getContent(responseData, "\t", dataMap));
assertEquals("", documentHelper.getContent(responseData, "\t\t", dataMap));
assertEquals("", documentHelper.getContent(responseData, "\t \t", dataMap));
assertEquals("123 abc", documentHelper.getContent(responseData, " 123 abc ", dataMap));
assertEquals("123 あいう", documentHelper.getContent(responseData, " 123 あいう ", dataMap));
assertEquals("123 abc", documentHelper.getContent(responseData, " 123\nabc ", dataMap));
}
use of org.codelibs.fess.crawler.entity.ResponseData in project fess by codelibs.
the class FessXpathTransformerTest method assertGetThumbnailUrl.
private void assertGetThumbnailUrl(String data, String expected) throws Exception {
final Document document = getDocument(data);
final FessXpathTransformer transformer = new FessXpathTransformer();
transformer.init();
final ResponseData responseData = new ResponseData();
responseData.setUrl("http://example.com/");
assertEquals(expected, transformer.getThumbnailUrl(responseData, document));
}
use of org.codelibs.fess.crawler.entity.ResponseData in project fess by codelibs.
the class FessXpathTransformerTest method test_processMetaRobots_no.
public void test_processMetaRobots_no() throws Exception {
final String data = "<html><body>foo</body></html>";
final Document document = getDocument(data);
final FessXpathTransformer transformer = new FessXpathTransformer();
final ResponseData responseData = new ResponseData();
responseData.setUrl("http://example.com/");
transformer.processMetaRobots(responseData, new ResultData(), document);
assertFalse(responseData.isNoFollow());
}
Aggregations