use of org.codelibs.fess.crawler.exception.CrawlerSystemException in project fess-crawler by codelibs.
the class XmlTransformer method transform.
/*
* (non-Javadoc)
*
* @see org.codelibs.fess.crawler.transformer.impl.AbstractTransformer#transform(org.codelibs.fess.crawler.entity.ResponseData)
*/
@Override
public ResultData transform(final ResponseData responseData) {
if (responseData == null || !responseData.hasResponseBody()) {
throw new CrawlingAccessException("No response body.");
}
try (final InputStream is = responseData.getResponseBody()) {
final DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance();
for (final Map.Entry<String, Object> entry : attributeMap.entrySet()) {
factory.setAttribute(entry.getKey(), entry.getValue());
}
for (final Map.Entry<String, String> entry : featureMap.entrySet()) {
factory.setFeature(entry.getKey(), "true".equalsIgnoreCase(entry.getValue()));
}
factory.setCoalescing(coalescing);
factory.setExpandEntityReferences(expandEntityRef);
factory.setIgnoringComments(ignoringComments);
factory.setIgnoringElementContentWhitespace(ignoringElementContentWhitespace);
factory.setNamespaceAware(namespaceAware);
factory.setValidating(validating);
factory.setXIncludeAware(includeAware);
final DocumentBuilder builder = factory.newDocumentBuilder();
final Document doc = builder.parse(is);
final StringBuilder buf = new StringBuilder(1000);
buf.append(getResultDataHeader());
for (final Map.Entry<String, String> entry : fieldRuleMap.entrySet()) {
final List<String> nodeStrList = new ArrayList<>();
try {
final NodeList nodeList = getNodeList(doc, entry.getValue());
for (int i = 0; i < nodeList.getLength(); i++) {
final Node node = nodeList.item(i);
nodeStrList.add(node.getTextContent());
}
} catch (final TransformerException e) {
logger.warn("Could not parse a value of " + entry.getKey() + ":" + entry.getValue(), e);
}
if (nodeStrList.size() == 1) {
buf.append(getResultDataBody(entry.getKey(), nodeStrList.get(0)));
} else if (nodeStrList.size() > 1) {
buf.append(getResultDataBody(entry.getKey(), nodeStrList));
}
}
buf.append(getAdditionalData(responseData, doc));
buf.append(getResultDataFooter());
final ResultData resultData = new ResultData();
resultData.setTransformerName(getName());
final String data = buf.toString().trim();
try {
resultData.setData(data.getBytes(charsetName));
} catch (final UnsupportedEncodingException e) {
if (logger.isInfoEnabled()) {
logger.info("Invalid charsetName: " + charsetName + ". Changed to " + Constants.UTF_8, e);
}
charsetName = Constants.UTF_8_CHARSET.name();
resultData.setData(data.getBytes(Constants.UTF_8_CHARSET));
}
resultData.setEncoding(charsetName);
return resultData;
} catch (final CrawlerSystemException e) {
throw e;
} catch (final Exception e) {
throw new CrawlerSystemException("Could not store data.", e);
}
}
use of org.codelibs.fess.crawler.exception.CrawlerSystemException in project fess-crawler by codelibs.
the class XpathTransformer method getData.
/**
* Returns data as XML content of String.
*
* @return XML content of String.
*/
@Override
public Object getData(final AccessResultData<?> accessResultData) {
if (dataClass == null) {
return super.getData(accessResultData);
}
final Map<String, Object> dataMap = XmlUtil.getDataMap(accessResultData);
if (Map.class.equals(dataClass)) {
return dataMap;
}
try {
final Object obj = dataClass.newInstance();
BeanUtil.copyMapToBean(dataMap, obj);
return obj;
} catch (final Exception e) {
throw new CrawlerSystemException("Could not create/copy a data map to " + dataClass, e);
}
}
use of org.codelibs.fess.crawler.exception.CrawlerSystemException in project fess-crawler by codelibs.
the class HtmlTransformerTest method test_getData_wrongName.
public void test_getData_wrongName() throws Exception {
final String value = "<html><body>hoge</body></html>";
final AccessResultDataImpl accessResultDataImpl = new AccessResultDataImpl();
accessResultDataImpl.setData(value.getBytes());
accessResultDataImpl.setEncoding(Constants.UTF_8);
accessResultDataImpl.setTransformerName("transformer");
try {
htmlTransformer.getData(accessResultDataImpl);
fail();
} catch (final CrawlerSystemException e) {
}
}
use of org.codelibs.fess.crawler.exception.CrawlerSystemException in project fess-crawler by codelibs.
the class TextTransformerTest method test_getData_wrongName.
public void test_getData_wrongName() throws Exception {
final AccessResultDataImpl accessResultData = new AccessResultDataImpl();
accessResultData.setTransformerName("transformer");
accessResultData.setData("xyz".getBytes());
try {
textTransformer.getData(accessResultData);
fail();
} catch (final CrawlerSystemException e) {
}
}
use of org.codelibs.fess.crawler.exception.CrawlerSystemException in project fess-crawler by codelibs.
the class XmlTransformerTest method test_getData_wrongName.
public void test_getData_wrongName() throws Exception {
final String value = //
"<?xml version=\"1.0\"?>\n" + //
"<doc>\n" + //
"<field name=\"title\">タイトル</field>\n" + //
"<field name=\"body\">第一章 第一節 ほげほげふがふが LINK 第2章 第2節</field>\n" + "</doc>";
final AccessResultDataImpl accessResultDataImpl = new AccessResultDataImpl();
accessResultDataImpl.setData(value.getBytes(Constants.UTF_8));
accessResultDataImpl.setEncoding(Constants.UTF_8);
accessResultDataImpl.setTransformerName("transformer");
try {
final Object obj = xmlTransformer.getData(accessResultDataImpl);
fail();
} catch (final CrawlerSystemException e) {
}
}
Aggregations