use of org.asqatasun.crawler.exception.CrawlerException in project Asqatasun by Asqatasun.
the class AsqatasunCrawlJob method initializeCrawlContext.
/**
*
* @param url
* @param crawlParameterSet
* @param heritrixFileName
* @return
*/
private File initializeCrawlContext(Collection<String> urlList, Set<Parameter> crawlParameterSet, String heritrixFileName) {
buildOutputDirectory();
BufferedReader in = null;
FileWriter fw = null;
try {
LOGGER.debug("crawlConfigFilePath: " + crawlConfigFilePath + " for copy");
String filepath = crawlConfigFilePath + "/" + heritrixFileName;
DocumentBuilderFactory docFactory = DocumentBuilderFactory.newInstance();
DocumentBuilder docBuilder = docFactory.newDocumentBuilder();
Document doc = docBuilder.parse(filepath);
if (LOGGER.isDebugEnabled()) {
LOGGER.debug("filepath : " + filepath);
for (Parameter param : crawlParameterSet) {
LOGGER.debug(param.getParameterElement().getParameterElementCode() + " " + param.getValue());
}
}
doc = setOptionToDocument(urlList, crawlParameterSet, doc);
//write the content into xml file
TransformerFactory transformerFactory = TransformerFactory.newInstance();
Transformer transformer = transformerFactory.newTransformer();
DOMSource source = new DOMSource(doc);
String resultFileName = currentJobOutputDir.getPath() + "/" + heritrixFileName;
StreamResult result = new StreamResult(new File(resultFileName));
transformer.transform(source, result);
} catch (IOException | ParserConfigurationException | SAXException ex) {
LOGGER.error(ex);
throw new CrawlerException(ex);
} catch (TransformerConfigurationException ex) {
LOGGER.error(ex);
throw new CrawlerException(ex);
} catch (TransformerException ex) {
LOGGER.error(ex);
throw new CrawlerException(ex);
} finally {
if (in != null) {
try {
in.close();
} catch (IOException ex) {
LOGGER.error(ex);
throw new CrawlerException(ex);
}
}
if (fw != null) {
try {
fw.close();
} catch (IOException ex) {
LOGGER.error(ex);
throw new CrawlerException(ex);
}
}
}
return new File(currentJobOutputDir.getPath() + "/" + heritrixFileName);
}
Aggregations