use of org.ambraproject.rhino.model.ArticleIngestion in project rhino by PLOS.
the class ArticleCrudServiceImpl method buildOverview.
@Override
public ArticleOverview buildOverview(Article article) {
return hibernateTemplate.execute(session -> {
Query ingestionQuery = session.createQuery("FROM ArticleIngestion WHERE article = :article");
ingestionQuery.setParameter("article", article);
List<ArticleIngestion> ingestions = ingestionQuery.list();
Query revisionQuery = session.createQuery("" + "FROM ArticleRevision WHERE ingestion IN " + " (FROM ArticleIngestion WHERE article = :article)");
revisionQuery.setParameter("article", article);
List<ArticleRevision> revisions = revisionQuery.list();
ArticleIdentifier id = ArticleIdentifier.create(article.getDoi());
return ArticleOverview.build(id, ingestions, revisions);
});
}
use of org.ambraproject.rhino.model.ArticleIngestion in project rhino by PLOS.
the class IngestionServiceTest method createStubArticleItem.
private static ArticleItem createStubArticleItem() {
ArticleItem articleItem = new ArticleItem();
ArticleIngestion articleIngestion = new ArticleIngestion();
Article article = new Article();
article.setDoi("test");
articleItem.setIngestion(articleIngestion);
articleIngestion.setArticle(article);
return articleItem;
}
use of org.ambraproject.rhino.model.ArticleIngestion in project rhino by PLOS.
the class TaxonomyClassificationServiceImpl method getRawTerms.
/**
* @inheritDoc
*/
@Override
public List<String> getRawTerms(Document articleXml, Article article, boolean isTextRequired) {
RuntimeConfiguration.TaxonomyConfiguration configuration = getTaxonomyConfiguration();
String toCategorize = getCategorizationContent(articleXml);
ArticleIngestion latest = articleCrudService.readLatestRevision(article).getIngestion();
String header = String.format(MESSAGE_HEADER, new SimpleDateFormat("yyyy-MM-dd").format(latest.getPublicationDate()), latest.getJournal().getTitle(), latest.getArticleType(), article.getDoi());
String aiMessage = String.format(MESSAGE_BEGIN, configuration.getThesaurus()) + StringEscapeUtils.escapeXml10(String.format(MESSAGE_DOC_ELEMENT, header, toCategorize)) + MESSAGE_END;
HttpPost post = new HttpPost(configuration.getServer().toString());
post.setEntity(new StringEntity(aiMessage, APPLICATION_XML_UTF_8));
DocumentBuilder documentBuilder = newDocumentBuilder();
Document response;
try (CloseableHttpResponse httpResponse = httpClient.execute(post);
InputStream stream = httpResponse.getEntity().getContent()) {
response = documentBuilder.parse(stream);
} catch (IOException e) {
throw new TaxonomyRemoteServiceNotAvailableException(e);
} catch (SAXException e) {
throw new TaxonomyRemoteServiceInvalidBehaviorException("Invalid XML returned from " + configuration.getServer(), e);
}
//parse result
NodeList vectorElements = response.getElementsByTagName("VectorElement");
List<String> results = new ArrayList<>(vectorElements.getLength());
// Add the text that is sent to taxonomy server if isTextRequired is true
if (isTextRequired) {
toCategorize = StringEscapeUtils.unescapeXml(toCategorize);
results.add(toCategorize);
}
//The first and last elements of the vector response are just MAITERMS
for (int i = 1; i < vectorElements.getLength() - 1; i++) {
results.add(vectorElements.item(i).getTextContent());
}
if ((isTextRequired && results.size() == 1) || results.isEmpty()) {
log.error("Taxonomy server returned 0 terms. " + article.getDoi());
}
return results;
}
use of org.ambraproject.rhino.model.ArticleIngestion in project rhino by PLOS.
the class IssueOutputView method getIssueImageFigureDoi.
private static String getIssueImageFigureDoi(ArticleCrudService articleCrudService, Article imageArticle) {
ArticleRevision latestArticleRevision = articleCrudService.getLatestRevision(imageArticle).orElseThrow(() -> new RuntimeException("Image article has no published revisions. " + imageArticle.getDoi()));
ArticleIngestion ingestion = latestArticleRevision.getIngestion();
Collection<ArticleItem> allArticleItems = articleCrudService.getAllArticleItems(ingestion);
List<ArticleItem> figureImageItems = allArticleItems.stream().filter(item -> FIGURE_IMAGE_TYPES.contains(item.getItemType())).collect(Collectors.toList());
if (figureImageItems.size() != 1) {
throw new RuntimeException("Image article does not contain exactly one image file. " + imageArticle.getDoi());
}
return figureImageItems.get(0).getDoi();
}
use of org.ambraproject.rhino.model.ArticleIngestion in project rhino by PLOS.
the class TaxonomyClassificationServiceImpl method populateCategories.
/**
* {@inheritDoc}
*/
@Override
public void populateCategories(ArticleRevision revision) {
ArticleIngestion ingestion = revision.getIngestion();
Article article = ingestion.getArticle();
Document xml = articleCrudService.getManuscriptXml(ingestion);
List<WeightedTerm> terms;
String doi = article.getDoi();
//todo: fix or remove this when we find a home for article types
boolean isAmendment = false;
if (!isAmendment) {
terms = classifyArticle(article, xml);
if (terms != null && terms.size() > 0) {
List<WeightedTerm> leafNodes = getDistinctLeafNodes(CATEGORY_COUNT, terms);
persistCategories(leafNodes, article);
} else {
log.error("Taxonomy server returned 0 terms. Cannot populate Categories. " + doi);
}
}
}
Aggregations