Search in sources :

Example 1 with TextTypeSimpleType

use of eu.transkribus.core.model.beans.pagecontent.TextTypeSimpleType in project TranskribusCore by Transkribus.

the class CustomTagUtil method setStructure.

public static void setStructure(ITrpShapeType shape, String structureType, boolean recursive, Object who) {
    if (shape == null)
        return;
    logger.trace("setting structure: " + structureType + " id: " + shape.getId() + " type: " + shape.getClass().getSimpleName() + " recursive: " + recursive);
    if (!isTextregionOrLineOrWord(shape))
        return;
    if (shape instanceof TrpTextRegionType) {
        // if this is a text region, also set PAGE structure field if possible
        TextTypeSimpleType s = StructureTag.parseTextType(structureType);
        ((TrpTextRegionType) shape).setType(s);
    }
    // set custom tag:
    if (structureType == null || structureType.equals(""))
        shape.getCustomTagList().removeTags(StructureTag.TAG_NAME);
    else {
        shape.getCustomTagList().addOrMergeTag(new StructureTag(structureType), null);
    }
    if (recursive) {
        for (ITrpShapeType c : shape.getChildren(recursive)) {
            c.setStructure(structureType, recursive, who);
        }
    }
    shape.getObservable().setChangedAndNotifyObservers(new TrpStructureChangedEvent(who));
}
Also used : TextTypeSimpleType(eu.transkribus.core.model.beans.pagecontent.TextTypeSimpleType) TrpStructureChangedEvent(eu.transkribus.core.model.beans.pagecontent_trp.observable.TrpObserveEvent.TrpStructureChangedEvent) TrpTextRegionType(eu.transkribus.core.model.beans.pagecontent_trp.TrpTextRegionType) ITrpShapeType(eu.transkribus.core.model.beans.pagecontent_trp.ITrpShapeType)

Example 2 with TextTypeSimpleType

use of eu.transkribus.core.model.beans.pagecontent.TextTypeSimpleType in project TranskribusCore by Transkribus.

the class FEPLocalDocReader method applyLogicalStructFromMetsToPageFile.

static void applyLogicalStructFromMetsToPageFile(Mets mets, int pageNr, File pageFile) throws TransformerException, IOException, SAXException, ParserConfigurationException, XPathExpressionException {
    HashMap<String, String> structs4Page = getLogicalStructuresForPage(mets, pageNr);
    long t = System.currentTimeMillis();
    Document pageDom = XmlUtils.parseDomFromFile(pageFile, true);
    logger.debug("time to read dom: " + (System.currentTimeMillis() - t));
    for (String blockId : structs4Page.keySet()) {
        String type = structs4Page.get(blockId);
        String typeLc = type.toLowerCase();
        logger.debug("block: " + blockId + " type: " + type);
        // final XPathExpression expr = xpath.compile("//TextRegion[@id='"+block+"']");
        final XPathExpression expr = XPathFactory.newInstance().newXPath().compile("//*[@id='" + blockId + "']");
        DOMNodeList result = (DOMNodeList) expr.evaluate(pageDom, XPathConstants.NODESET);
        if (result.getLength() != 1) {
            throw new IOException("Could not find region with ID '" + blockId + "' in page file " + pageNr);
        }
        Element regionElement = (Element) result.item(0);
        // convert a logical struct region type to corresponding PAGE region element if not same:
        if (STRUCT_NAME_REGION_CLASS_MAP.containsKey(typeLc)) {
            String newElementName = STRUCT_NAME_REGION_CLASS_MAP.get(typeLc);
            if (!newElementName.equals(regionElement.getNodeName())) {
                logger.debug("Converting a region of type " + regionElement.getNodeName() + " to type " + newElementName);
                Element newElement = pageDom.createElementNS(XmlFormat.PAGE_2013.namespace, newElementName);
                NodeList coordsList = regionElement.getElementsByTagName("Coords");
                Node coordsCopy = null;
                for (int i = 0; i < coordsList.getLength(); ++i) {
                    if (coordsList.item(i).getParentNode().equals(regionElement)) {
                        coordsCopy = coordsList.item(i).cloneNode(false);
                        break;
                    }
                }
                if (coordsCopy == null)
                    throw new IOException("Could not find coordinates for region: " + blockId + ", pageNr: " + pageNr);
                newElement.appendChild(coordsCopy);
                newElement.setAttribute("id", blockId);
                regionElement.getParentNode().insertBefore(newElement, regionElement);
                regionElement.getParentNode().removeChild(regionElement);
                regionElement = newElement;
            }
        }
        // if this is a text region -> set logical struct from mets as type attribute and custom tag
        if (regionElement.getTagName().equals(RegionType.TextRegion.getName())) {
            logger.debug("Text region");
            TextTypeSimpleType ts = StructureTag.parseTextType(typeLc);
            if (ts != null) {
                logger.debug("successfully parsed text type: " + typeLc);
                regionElement.setAttribute(REGION_TYPE_ATTRIBUTE, ts.value());
                regionElement.setAttribute(CUSTOM_TAG_ATTRIBUTE, "structure {type:" + ts.value() + "};");
            } else {
                regionElement.setAttribute(CUSTOM_TAG_ATTRIBUTE, "structure {type:" + typeLc + "};");
            }
        }
    }
    XmlUtils.writeDomToFile(pageFile, pageDom);
    logger.debug("time to apply logical structs: " + (System.currentTimeMillis() - t));
}
Also used : XPathExpression(javax.xml.xpath.XPathExpression) TextTypeSimpleType(eu.transkribus.core.model.beans.pagecontent.TextTypeSimpleType) DOMNodeList(net.sf.saxon.dom.DOMNodeList) Element(org.w3c.dom.Element) DOMNodeList(net.sf.saxon.dom.DOMNodeList) NodeList(org.w3c.dom.NodeList) Node(org.w3c.dom.Node) IOException(java.io.IOException) Document(org.w3c.dom.Document)

Aggregations

TextTypeSimpleType (eu.transkribus.core.model.beans.pagecontent.TextTypeSimpleType)2 ITrpShapeType (eu.transkribus.core.model.beans.pagecontent_trp.ITrpShapeType)1 TrpTextRegionType (eu.transkribus.core.model.beans.pagecontent_trp.TrpTextRegionType)1 TrpStructureChangedEvent (eu.transkribus.core.model.beans.pagecontent_trp.observable.TrpObserveEvent.TrpStructureChangedEvent)1 IOException (java.io.IOException)1 XPathExpression (javax.xml.xpath.XPathExpression)1 DOMNodeList (net.sf.saxon.dom.DOMNodeList)1 Document (org.w3c.dom.Document)1 Element (org.w3c.dom.Element)1 Node (org.w3c.dom.Node)1 NodeList (org.w3c.dom.NodeList)1