Search in sources :

Example 1 with XMLParseException

use of javax.management.modelmbean.XMLParseException in project wombat by PLOS.

the class ParseReferenceService method buildReferences.

/**
 * Builds a list of Reference objects for each <ref></ref> element
 *
 * @param refElement a <ref></ref> node which can contain (label?, (element-citation | mixed-citation | nlm-citation)+)
 * @return list of Reference objects
 */
public List<Reference> buildReferences(Element refElement, DoiToJournalLinkService linkService) throws XMLParseException, IOException {
    List<Node> citationElements = null;
    for (CitationElement elementType : CitationElement.values()) {
        NodeList nodes = refElement.getElementsByTagName(elementType.getValue());
        if (!ParseXmlUtil.isNullOrEmpty(nodes)) {
            citationElements = NodeListAdapter.wrap(nodes);
            break;
        }
    }
    List<Reference> references = new ArrayList<>();
    // a <ref></ref> element can have one or more of citation elements
    for (Node citationNode : citationElements) {
        if (citationNode.getNodeType() != Node.ELEMENT_NODE) {
            throw new XMLParseException("<element-citation>, <mixed-citation>, <nlm-citation> is not an element.");
        }
        Element element = (Element) citationNode;
        String unstructuredReference = null;
        String uri = null;
        String doi = null;
        String year = ParseXmlUtil.getElementSingleValue(element, "year");
        String volume = ParseXmlUtil.getElementSingleValue(element, "volume");
        String title = buildTitle(element);
        if (Strings.isNullOrEmpty(title)) {
            unstructuredReference = getUnstructuredCitation(element);
        }
        NodeList extLinkList = element.getElementsByTagName("ext-link");
        if (!ParseXmlUtil.isNullOrEmpty(extLinkList)) {
            Element extLink = (Element) extLinkList.item(0);
            // use the ext-link as doi, only if ext-link previous text is "doi:" or "doi"
            boolean useDoi = false;
            Node previousNode = extLink.getPreviousSibling();
            if (previousNode != null) {
                String previousText = previousNode.getTextContent();
                if (!Strings.isNullOrEmpty(previousText)) {
                    previousText = previousText.trim();
                    useDoi = previousText.equalsIgnoreCase("doi:") || previousText.equalsIgnoreCase("doi");
                }
            }
            String linkType = ParseXmlUtil.getElementAttributeValue(extLink, "ext-link-type");
            if (linkType.equals("uri")) {
                uri = ParseXmlUtil.getElementAttributeValue(extLink, "xlink:href");
                // TODO: add a validation check for the doi and add it to the meta-tags
                if (useDoi) {
                    doi = extLink.getFirstChild() == null ? null : extLink.getFirstChild().getNodeValue();
                }
            }
        }
        PageRange pages = buildPages(element);
        String fullArticleLink = null;
        if (doi != null) {
            fullArticleLink = linkService.getLink(doi);
        }
        Reference reference = Reference.build().setJournal(parseJournal(element)).setFullArticleLink(fullArticleLink).setTitle(title).setChapterTitle(buildChapterTitle(element)).setUnStructuredReference(unstructuredReference).setAuthors(parseAuthors(element)).setCollabAuthors(parseCollabAuthors(element)).setYear(parseYear(year)).setVolume(volume).setVolumeNumber(parseVolumeNumber(volume)).setIssue(ParseXmlUtil.getElementSingleValue(element, "issue")).setPublisherName(ParseXmlUtil.getElementSingleValue(element, "publisher-name")).setIsbn(ParseXmlUtil.getElementSingleValue(element, "isbn")).setUri(uri).setDoi(doi).setfPage(pages.firstPage).setlPage(pages.lastPage).build();
        references.add(reference);
    }
    return references;
}
Also used : Reference(org.ambraproject.wombat.model.Reference) Node(org.w3c.dom.Node) NodeList(org.w3c.dom.NodeList) Element(org.w3c.dom.Element) ArrayList(java.util.ArrayList) XMLParseException(javax.management.modelmbean.XMLParseException)

Example 2 with XMLParseException

use of javax.management.modelmbean.XMLParseException in project wombat by PLOS.

the class ParseXmlServiceImpl method parseArticleReferences.

/**
 * {@inheritDoc}
 */
@Override
public List<Reference> parseArticleReferences(Document doc, ParseReferenceService.DoiToJournalLinkService linkService) throws IOException {
    List<Reference> references = new ArrayList<>();
    List<Node> refListNodes = NodeListAdapter.wrap(doc.getElementsByTagName("ref-list"));
    if (ParseXmlUtil.isNullOrEmpty(refListNodes)) {
        log.info("No <ref-list> element was found in the xml.");
        return references;
    }
    for (Node refListNode : refListNodes) {
        if (refListNode.getNodeType() != Node.ELEMENT_NODE) {
            throw new XmlContentException("<ref-list> is not an element.");
        }
        Element refListElement = (Element) refListNode;
        List<Node> refNodes = NodeListAdapter.wrap(refListElement.getElementsByTagName("ref"));
        references.addAll(refNodes.stream().map(ref -> {
            try {
                return parseReferenceService.buildReferences((Element) ref, linkService);
            } catch (XMLParseException | IOException e) {
                throw new RuntimeException(e);
            }
        }).flatMap(Collection::stream).collect(Collectors.toList()));
    }
    return references;
}
Also used : Reference(org.ambraproject.wombat.model.Reference) NodeListAdapter(org.ambraproject.wombat.util.NodeListAdapter) Logger(org.slf4j.Logger) ParseXmlUtil(org.ambraproject.wombat.util.ParseXmlUtil) Collection(java.util.Collection) LoggerFactory(org.slf4j.LoggerFactory) Autowired(org.springframework.beans.factory.annotation.Autowired) IOException(java.io.IOException) Collectors(java.util.stream.Collectors) ArrayList(java.util.ArrayList) Objects(java.util.Objects) XMLParseException(javax.management.modelmbean.XMLParseException) List(java.util.List) Element(org.w3c.dom.Element) Document(org.w3c.dom.Document) Node(org.w3c.dom.Node) SAXException(org.xml.sax.SAXException) DocumentBuilder(javax.xml.parsers.DocumentBuilder) InputStream(java.io.InputStream) Reference(org.ambraproject.wombat.model.Reference) Node(org.w3c.dom.Node) Element(org.w3c.dom.Element) ArrayList(java.util.ArrayList) Collection(java.util.Collection)

Aggregations

ArrayList (java.util.ArrayList)2 XMLParseException (javax.management.modelmbean.XMLParseException)2 Reference (org.ambraproject.wombat.model.Reference)2 Element (org.w3c.dom.Element)2 Node (org.w3c.dom.Node)2 IOException (java.io.IOException)1 InputStream (java.io.InputStream)1 Collection (java.util.Collection)1 List (java.util.List)1 Objects (java.util.Objects)1 Collectors (java.util.stream.Collectors)1 DocumentBuilder (javax.xml.parsers.DocumentBuilder)1 NodeListAdapter (org.ambraproject.wombat.util.NodeListAdapter)1 ParseXmlUtil (org.ambraproject.wombat.util.ParseXmlUtil)1 Logger (org.slf4j.Logger)1 LoggerFactory (org.slf4j.LoggerFactory)1 Autowired (org.springframework.beans.factory.annotation.Autowired)1 Document (org.w3c.dom.Document)1 NodeList (org.w3c.dom.NodeList)1 SAXException (org.xml.sax.SAXException)1