use of org.apache.stanbol.enhancer.engines.htmlextractor.impl.ExtractorException in project stanbol by apache.
the class HtmlExtractorEngine method computeEnhancements.
@Override
public void computeEnhancements(ContentItem ci) throws EngineException {
HtmlExtractor extractor = new HtmlExtractor(htmlExtractorRegistry, htmlParser);
Graph model = new SimpleGraph();
ci.getLock().readLock().lock();
try {
extractor.extract(ci.getUri().getUnicodeString(), ci.getStream(), null, ci.getMimeType(), model);
} catch (ExtractorException e) {
throw new EngineException("Error while processing ContentItem " + ci.getUri() + " with HtmlExtractor", e);
} finally {
ci.getLock().readLock().unlock();
}
ClerezzaRDFUtils.urifyBlankNodes(model);
// make the model single rooted
if (singleRootRdf) {
ClerezzaRDFUtils.makeConnected(model, ci.getUri(), new IRI(NIE_NS + "contains"));
}
//add the extracted triples to the metadata of the ContentItem
ci.getLock().writeLock().lock();
try {
LOG.info("Model: {}", model);
ci.getMetadata().addAll(model);
model = null;
} finally {
ci.getLock().writeLock().unlock();
}
}
Aggregations