use of eu.transkribus.core.model.beans.pagecontent.TextTypeSimpleType in project TranskribusCore by Transkribus.
the class CustomTagUtil method setStructure.
public static void setStructure(ITrpShapeType shape, String structureType, boolean recursive, Object who) {
if (shape == null)
return;
logger.trace("setting structure: " + structureType + " id: " + shape.getId() + " type: " + shape.getClass().getSimpleName() + " recursive: " + recursive);
if (!isTextregionOrLineOrWord(shape))
return;
if (shape instanceof TrpTextRegionType) {
// if this is a text region, also set PAGE structure field if possible
TextTypeSimpleType s = StructureTag.parseTextType(structureType);
((TrpTextRegionType) shape).setType(s);
}
// set custom tag:
if (structureType == null || structureType.equals(""))
shape.getCustomTagList().removeTags(StructureTag.TAG_NAME);
else {
shape.getCustomTagList().addOrMergeTag(new StructureTag(structureType), null);
}
if (recursive) {
for (ITrpShapeType c : shape.getChildren(recursive)) {
c.setStructure(structureType, recursive, who);
}
}
shape.getObservable().setChangedAndNotifyObservers(new TrpStructureChangedEvent(who));
}
use of eu.transkribus.core.model.beans.pagecontent.TextTypeSimpleType in project TranskribusCore by Transkribus.
the class FEPLocalDocReader method applyLogicalStructFromMetsToPageFile.
static void applyLogicalStructFromMetsToPageFile(Mets mets, int pageNr, File pageFile) throws TransformerException, IOException, SAXException, ParserConfigurationException, XPathExpressionException {
HashMap<String, String> structs4Page = getLogicalStructuresForPage(mets, pageNr);
long t = System.currentTimeMillis();
Document pageDom = XmlUtils.parseDomFromFile(pageFile, true);
logger.debug("time to read dom: " + (System.currentTimeMillis() - t));
for (String blockId : structs4Page.keySet()) {
String type = structs4Page.get(blockId);
String typeLc = type.toLowerCase();
logger.debug("block: " + blockId + " type: " + type);
// final XPathExpression expr = xpath.compile("//TextRegion[@id='"+block+"']");
final XPathExpression expr = XPathFactory.newInstance().newXPath().compile("//*[@id='" + blockId + "']");
DOMNodeList result = (DOMNodeList) expr.evaluate(pageDom, XPathConstants.NODESET);
if (result.getLength() != 1) {
throw new IOException("Could not find region with ID '" + blockId + "' in page file " + pageNr);
}
Element regionElement = (Element) result.item(0);
// convert a logical struct region type to corresponding PAGE region element if not same:
if (STRUCT_NAME_REGION_CLASS_MAP.containsKey(typeLc)) {
String newElementName = STRUCT_NAME_REGION_CLASS_MAP.get(typeLc);
if (!newElementName.equals(regionElement.getNodeName())) {
logger.debug("Converting a region of type " + regionElement.getNodeName() + " to type " + newElementName);
Element newElement = pageDom.createElementNS(XmlFormat.PAGE_2013.namespace, newElementName);
NodeList coordsList = regionElement.getElementsByTagName("Coords");
Node coordsCopy = null;
for (int i = 0; i < coordsList.getLength(); ++i) {
if (coordsList.item(i).getParentNode().equals(regionElement)) {
coordsCopy = coordsList.item(i).cloneNode(false);
break;
}
}
if (coordsCopy == null)
throw new IOException("Could not find coordinates for region: " + blockId + ", pageNr: " + pageNr);
newElement.appendChild(coordsCopy);
newElement.setAttribute("id", blockId);
regionElement.getParentNode().insertBefore(newElement, regionElement);
regionElement.getParentNode().removeChild(regionElement);
regionElement = newElement;
}
}
// if this is a text region -> set logical struct from mets as type attribute and custom tag
if (regionElement.getTagName().equals(RegionType.TextRegion.getName())) {
logger.debug("Text region");
TextTypeSimpleType ts = StructureTag.parseTextType(typeLc);
if (ts != null) {
logger.debug("successfully parsed text type: " + typeLc);
regionElement.setAttribute(REGION_TYPE_ATTRIBUTE, ts.value());
regionElement.setAttribute(CUSTOM_TAG_ATTRIBUTE, "structure {type:" + ts.value() + "};");
} else {
regionElement.setAttribute(CUSTOM_TAG_ATTRIBUTE, "structure {type:" + typeLc + "};");
}
}
}
XmlUtils.writeDomToFile(pageFile, pageDom);
logger.debug("time to apply logical structs: " + (System.currentTimeMillis() - t));
}
Aggregations