use of org.asqatasun.contentadapter.html.HTMLCleanerImpl in project Asqatasun by Asqatasun.
the class ContentsAdapterImpl method run.
private Collection<Content> run(Collection<Content> contentList) {
Collection<Content> localResult = new ArrayList<>();
for (Content content : contentList) {
// We only handle here the fetched content (HttpStatus=200)
if (content instanceof SSP && content.getHttpStatusCode() == 200) {
LOGGER.debug("Adapting " + content.getURI());
SSP ssp = (SSP) content;
htmlCleaner.setDirtyHTML(ssp.getSource());
htmlCleaner.run();
ssp.setAdaptedContent(htmlCleaner.getResult());
htmlCleaner.setDirtyHTML(null);
writeCleanDomInFile(ssp);
if (parseAndRetrievelRelatedContent) {
htmlParser.setSSP(ssp);
htmlParser.run();
} else {
LOGGER.debug("no Html parse executed for the current audit");
}
if (xmlizeContent) {
AbstractHTMLCleaner cleaner = new HTMLCleanerImpl();
cleaner.setDirtyHTML(ssp.getAdaptedContent());
cleaner.run();
ssp.setAdaptedContent(DocumentCaseInsensitiveAdapter.removeLowerCaseTags(cleaner.getResult()));
}
localResult.add(ssp);
}
}
return localResult;
}
Aggregations