use of org.ambraproject.rhino.service.taxonomy.TaxonomyRemoteServiceNotAvailableException in project rhino by PLOS.
the class TaxonomyClassificationServiceImpl method getRawTerms.
/**
* @inheritDoc
*/
@Override
public List<String> getRawTerms(Document articleXml, Article article, boolean isTextRequired) {
RuntimeConfiguration.TaxonomyConfiguration configuration = getTaxonomyConfiguration();
String toCategorize = getCategorizationContent(articleXml);
ArticleIngestion latest = articleCrudService.readLatestRevision(article).getIngestion();
String header = String.format(MESSAGE_HEADER, new SimpleDateFormat("yyyy-MM-dd").format(latest.getPublicationDate()), latest.getJournal().getTitle(), latest.getArticleType(), article.getDoi());
String aiMessage = String.format(MESSAGE_BEGIN, configuration.getThesaurus()) + StringEscapeUtils.escapeXml10(String.format(MESSAGE_DOC_ELEMENT, header, toCategorize)) + MESSAGE_END;
HttpPost post = new HttpPost(configuration.getServer().toString());
post.setEntity(new StringEntity(aiMessage, APPLICATION_XML_UTF_8));
DocumentBuilder documentBuilder = newDocumentBuilder();
Document response;
try (CloseableHttpResponse httpResponse = httpClient.execute(post);
InputStream stream = httpResponse.getEntity().getContent()) {
response = documentBuilder.parse(stream);
} catch (IOException e) {
throw new TaxonomyRemoteServiceNotAvailableException(e);
} catch (SAXException e) {
throw new TaxonomyRemoteServiceInvalidBehaviorException("Invalid XML returned from " + configuration.getServer(), e);
}
//parse result
NodeList vectorElements = response.getElementsByTagName("VectorElement");
List<String> results = new ArrayList<>(vectorElements.getLength());
// Add the text that is sent to taxonomy server if isTextRequired is true
if (isTextRequired) {
toCategorize = StringEscapeUtils.unescapeXml(toCategorize);
results.add(toCategorize);
}
//The first and last elements of the vector response are just MAITERMS
for (int i = 1; i < vectorElements.getLength() - 1; i++) {
results.add(vectorElements.item(i).getTextContent());
}
if ((isTextRequired && results.size() == 1) || results.isEmpty()) {
log.error("Taxonomy server returned 0 terms. " + article.getDoi());
}
return results;
}
Aggregations