use of org.ambraproject.wombat.model.Reference in project wombat by PLOS.
the class ArticleController method getXmlContent.
/**
* Gets article xml from cache if it exists; otherwise, gets it from rhino and caches it. Then it parses the
* references and does html transform
*
* @param articlePointer
* @param request
* @return an XmlContent containing the list of references and article html
* @throws IOException
*/
private XmlContent getXmlContent(Site site, ArticlePointer articlePointer, HttpServletRequest request) throws IOException {
return corpusContentApi.readManuscript(articlePointer, site, "html", (InputStream stream) -> {
byte[] xml = ByteStreams.toByteArray(stream);
final Document document = parseXmlService.getDocument(new ByteArrayInputStream(xml));
List<Reference> references = parseXmlService.parseArticleReferences(document, doi -> getLinkText(site, request, doi));
StringWriter articleHtml = new StringWriter(XFORM_BUFFER_SIZE);
try (OutputStream outputStream = new WriterOutputStream(articleHtml, charset)) {
articleTransformService.transformArticle(site, articlePointer, references, new ByteArrayInputStream(xml), outputStream);
}
return new XmlContent(articleHtml.toString(), references);
});
}
use of org.ambraproject.wombat.model.Reference in project wombat by PLOS.
the class ParseReferenceService method buildReferences.
/**
* Builds a list of Reference objects for each <ref></ref> element
*
* @param refElement a <ref></ref> node which can contain (label?, (element-citation | mixed-citation | nlm-citation)+)
* @return list of Reference objects
*/
public List<Reference> buildReferences(Element refElement, DoiToJournalLinkService linkService) throws XMLParseException, IOException {
List<Node> citationElements = null;
for (CitationElement elementType : CitationElement.values()) {
NodeList nodes = refElement.getElementsByTagName(elementType.getValue());
if (!ParseXmlUtil.isNullOrEmpty(nodes)) {
citationElements = NodeListAdapter.wrap(nodes);
break;
}
}
List<Reference> references = new ArrayList<>();
// a <ref></ref> element can have one or more of citation elements
for (Node citationNode : citationElements) {
if (citationNode.getNodeType() != Node.ELEMENT_NODE) {
throw new XMLParseException("<element-citation>, <mixed-citation>, <nlm-citation> is not an element.");
}
Element element = (Element) citationNode;
String unstructuredReference = null;
String uri = null;
String doi = null;
String year = ParseXmlUtil.getElementSingleValue(element, "year");
String volume = ParseXmlUtil.getElementSingleValue(element, "volume");
String title = buildTitle(element);
if (Strings.isNullOrEmpty(title)) {
unstructuredReference = getUnstructuredCitation(element);
}
NodeList extLinkList = element.getElementsByTagName("ext-link");
if (!ParseXmlUtil.isNullOrEmpty(extLinkList)) {
Element extLink = (Element) extLinkList.item(0);
// use the ext-link as doi, only if ext-link previous text is "doi:" or "doi"
boolean useDoi = false;
Node previousNode = extLink.getPreviousSibling();
if (previousNode != null) {
String previousText = previousNode.getTextContent();
if (!Strings.isNullOrEmpty(previousText)) {
previousText = previousText.trim();
useDoi = previousText.equalsIgnoreCase("doi:") || previousText.equalsIgnoreCase("doi");
}
}
String linkType = ParseXmlUtil.getElementAttributeValue(extLink, "ext-link-type");
if (linkType.equals("uri")) {
uri = ParseXmlUtil.getElementAttributeValue(extLink, "xlink:href");
// TODO: add a validation check for the doi and add it to the meta-tags
if (useDoi) {
doi = extLink.getFirstChild() == null ? null : extLink.getFirstChild().getNodeValue();
}
}
}
PageRange pages = buildPages(element);
String fullArticleLink = null;
if (doi != null) {
fullArticleLink = linkService.getLink(doi);
}
Reference reference = Reference.build().setJournal(parseJournal(element)).setFullArticleLink(fullArticleLink).setTitle(title).setChapterTitle(buildChapterTitle(element)).setUnStructuredReference(unstructuredReference).setAuthors(parseAuthors(element)).setCollabAuthors(parseCollabAuthors(element)).setYear(parseYear(year)).setVolume(volume).setVolumeNumber(parseVolumeNumber(volume)).setIssue(ParseXmlUtil.getElementSingleValue(element, "issue")).setPublisherName(ParseXmlUtil.getElementSingleValue(element, "publisher-name")).setIsbn(ParseXmlUtil.getElementSingleValue(element, "isbn")).setUri(uri).setDoi(doi).setfPage(pages.firstPage).setlPage(pages.lastPage).build();
references.add(reference);
}
return references;
}
use of org.ambraproject.wombat.model.Reference in project wombat by PLOS.
the class ParseXmlServiceImpl method parseArticleReferences.
/**
* {@inheritDoc}
*/
@Override
public List<Reference> parseArticleReferences(Document doc, ParseReferenceService.DoiToJournalLinkService linkService) throws IOException {
List<Reference> references = new ArrayList<>();
List<Node> refListNodes = NodeListAdapter.wrap(doc.getElementsByTagName("ref-list"));
if (ParseXmlUtil.isNullOrEmpty(refListNodes)) {
log.info("No <ref-list> element was found in the xml.");
return references;
}
for (Node refListNode : refListNodes) {
if (refListNode.getNodeType() != Node.ELEMENT_NODE) {
throw new XmlContentException("<ref-list> is not an element.");
}
Element refListElement = (Element) refListNode;
List<Node> refNodes = NodeListAdapter.wrap(refListElement.getElementsByTagName("ref"));
references.addAll(refNodes.stream().map(ref -> {
try {
return parseReferenceService.buildReferences((Element) ref, linkService);
} catch (XMLParseException | IOException e) {
throw new RuntimeException(e);
}
}).flatMap(Collection::stream).collect(Collectors.toList()));
}
return references;
}
Aggregations