use of eu.transkribus.core.model.beans.pagecontent_trp.TrpTextRegionType in project TranskribusCore by Transkribus.
the class KlosterTeiToPageParser method parsePage.
static void parsePage(Node pbNode, boolean save) throws IOException, JAXBException {
Element pb = (Element) pbNode;
String imgFn = pb.getAttribute("facs");
int pageN = Integer.parseInt(pb.getAttribute("n"));
int pageHeight = Integer.parseInt(pb.getAttribute("xmlns:h"));
int pageWidth = Integer.parseInt(pb.getAttribute("xmlns:w"));
PcGtsType page = PageXmlUtils.createEmptyPcGtsType("imgfn", pageWidth, pageHeight);
TrpTextRegionType region = new TrpTextRegionType();
region.setId("region_1");
System.out.println("page data: imgFn = " + imgFn + " n = " + pageN + " pageWidth = " + pageWidth + " pageHeight = " + pageHeight);
int minX = 999999, minY = 999999, maxX = -1, maxY = -1;
Node sibling = pbNode.getNextSibling();
int lineCount = 0;
while (sibling != null) {
if (sibling.getNodeName().equals("pb")) {
break;
}
// System.out.println("sibling type: "+sibling.getTextContent());
if (sibling.getNodeType() == Node.ELEMENT_NODE && sibling.getNodeName().equals("lb")) {
Element lb = (Element) sibling;
int n = Integer.parseInt(lb.getAttribute("n"));
int x = Integer.parseInt(lb.getAttribute("xmlns:x"));
int y = Integer.parseInt(lb.getAttribute("xmlns:y"));
int w = Integer.parseInt(lb.getAttribute("xmlns:w"));
int h = Integer.parseInt(lb.getAttribute("xmlns:h"));
if (x < minX)
minX = x;
if (y < minY)
minY = y;
if (x + w > maxX)
maxX = x + w;
if (y + h > maxY)
maxY = y + h;
String txt = sibling.getNextSibling().getTextContent();
txt = StringUtils.stripEnd(txt, " \r\n");
// System.out.println("line: txt = "+txt+" [x,y,w,h] = ["+x+","+y+","+w+","+h+"]");
System.out.format("line: n = %d, txt = %s, coords = [%d,%d,%d,%d]\n", n, txt, x, y, w, h);
TrpTextLineType line = new TrpTextLineType();
line.setCoords(bbToCoords(x, y, w, h));
TextEquivType te = new TextEquivType();
te.setUnicode(txt);
line.setTextEquiv(te);
line.setId("line_" + (++lineCount));
// create baseline:
TrpBaselineType bl = new TrpBaselineType();
int yBl = (int) (y + 0.7 * h);
bl.setPoints(x + "," + yBl + " " + (x + w) + "," + yBl);
line.setBaseline(bl);
region.getTextLine().add(line);
}
sibling = sibling.getNextSibling();
// System.out.println("sibling node name: "+sibling.getNodeName());
// if (!sibling.getNodeName().equals("lb"))
// break;
}
if (!region.getTextLine().isEmpty()) {
region.setCoords(bbToCoords(minX, minY, maxX - minX, maxY - minY));
} else {
region.setCoords(bbToCoords(0, 0, pageWidth, pageHeight));
}
page.getPage().getTextRegionOrImageRegionOrLineDrawingRegion().add(region);
if (save && !region.getTextLine().isEmpty()) {
File xmlFile = new File(PAGE_DIR + FilenameUtils.getBaseName(imgFn) + ".xml");
PageXmlUtils.marshalToFile(page, xmlFile);
FileUtils.copyFile(new File(DIR + imgFn), new File(DST_DIR + imgFn));
System.out.println("written page to: " + xmlFile.getAbsolutePath());
}
}
use of eu.transkribus.core.model.beans.pagecontent_trp.TrpTextRegionType in project TranskribusCore by Transkribus.
the class PageXmlUtils method removeAllIndexedTags.
public static void removeAllIndexedTags(PcGtsType pc) {
TrpPageType p = (TrpPageType) pc.getPage();
List<TrpTextRegionType> trList = p.getTextRegions(true);
for (TrpTextRegionType tr : trList) {
tr.getCustomTagList().removeIndexedTags();
List<TextLineType> lineList = tr.getTextLine();
for (TextLineType l : lineList) {
TrpTextLineType trpL = (TrpTextLineType) l;
trpL.getCustomTagList().removeIndexedTags();
List<WordType> wordList = trpL.getWord();
for (WordType w : wordList) {
TrpWordType trpW = (TrpWordType) w;
trpW.getCustomTagList().removeIndexedTags();
}
}
}
}
use of eu.transkribus.core.model.beans.pagecontent_trp.TrpTextRegionType in project TranskribusCore by Transkribus.
the class ExportCache method storeCustomTagMapForDoc.
/**
* @param doc
* @param wordBased
* @param pageIndices
* @param blackening
* @return all (custom) tags of the given document
* @throws JAXBException
* @throws IOException
* @throws InterruptedException
*/
public void storeCustomTagMapForDoc(TrpDoc doc, boolean wordBased, Set<Integer> pageIndices, IProgressMonitor monitor, boolean blackening) throws JAXBException, IOException, InterruptedException {
doBlackening = blackening;
tags.clear();
List<TrpPage> pages = doc.getPages();
int totalPages = pages.size();
int c = 0;
for (int i = 0; i < totalPages; ++i) {
if (pageIndices != null && !pageIndices.contains(i))
continue;
if (monitor != null && monitor.isCanceled()) {
throw new InterruptedException("User canceled the export");
}
// pageTranscripts get fetched before the custom tag map is stored - so normally pageTranscripts.get(i) != null
JAXBPageTranscript tr;
if (pageTranscripts == null || pageTranscripts.get(i) == null) {
TrpPage page = pages.get(i);
TrpTranscriptMetadata md = page.getCurrentTranscript();
tr = new JAXBPageTranscript(md);
} else {
tr = pageTranscripts.get(i);
tr.getPageData();
}
tr.build();
TrpPageType trpPage = tr.getPage();
logger.debug("get tags for page " + (i + 1) + "/" + doc.getNPages());
List<TrpTextRegionType> textRegions = trpPage.getTextRegions(true);
for (int j = 0; j < textRegions.size(); ++j) {
TrpTextRegionType r = textRegions.get(j);
List<TextLineType> lines = r.getTextLine();
for (int k = 0; k < lines.size(); ++k) {
TrpTextLineType trpL = (TrpTextLineType) lines.get(k);
List<WordType> words = trpL.getWord();
getTagsForShapeElement(trpL);
if (wordBased) {
for (int l = 0; l < words.size(); ++l) {
TrpWordType w = (TrpWordType) words.get(l);
getTagsForShapeElement(w);
}
}
// else{
// getTagsForShapeElement(trpL);
// }
}
}
if (monitor != null) {
monitor.setTaskName("Loaded tags for page " + (i + 1));
monitor.worked(++c);
}
}
}
use of eu.transkribus.core.model.beans.pagecontent_trp.TrpTextRegionType in project TranskribusCore by Transkribus.
the class CustomTagUtil method writeReadingOrderCustomTagsToPageFormat.
// public static void createReadingOrderOrderedGroupIndexed(TrpRegionType r, OrderedGroupIndexedType g) {
//
//
//
//
// if (s instanceof TrpRegionType) {
// s.getChildren(recursive)
//
//
// TrpRegionType r = (TrpRegionType) s;
// for (int i=0; i<r.getTextRegionOrImageRegionOrLineDrawingRegion().size(); ++i) {
// TrpRegionType cr = r.getTextRegionOrImageRegionOrLineDrawingRegion().get(i);
//
// if (cr.hasChildren()) {
// OrderedGroupIndexedType cg = PAGETypeFactory.createOrderedGroupIndexed(i, "r_"+CoreUtils.uniqueCurrentTimeMS(), null);
//
//
//
// RegionRefIndexedType rr = PAGETypeFactory.createRegionRefIndexed(index, refObject)
//
// }
//
//
// }
//
// }
//
//
//
// }
//
// public static void writeReadingOrderCustomTagsToPageFormat(TrpPageType page) {
// logger.trace("converting reading order from custom tags to page format... NEW");
//
// ReadingOrderType ro = new ReadingOrderType();
//
// // 1st: create parent group for all reading order elements
// OrderedGroupType group = PAGETypeFactory.createOrderedGroup("ro_"+CoreUtils.uniqueCurrentTimeMS(), "Reading order");
//
// // 2nd: create either a region ref
// for (TrpRegionType r : page.getTextRegionOrImageRegionOrLineDrawingRegion()) {
// xxx
//
//
//
//
// }
//
// OrderedGroupType group = createReadingOrderOrderedGroup(page.getTextRegionOrImageRegionOrLineDrawingRegion(), "Regions reading order");
//
//
//
//
//
//
// OrderedGroupType group = new OrderedGroupType();
// group.setCaption("Regions reading order");
// group.setId("ro_"+CoreUtils.uniqueCurrentTimeMS());
// ro.setOrderedGroup(group);
// boolean readingOrderSet=false;
//
// for (TrpTextRegionType r : page.getTextRegions(false)) {
// if (r.getReadingOrder() != null) {
// readingOrderSet=true;
// RegionRefIndexedType rr = new RegionRefIndexedType();
// rr.setRegionRef(r);
// rr.setIndex(r.getReadingOrder());
// group.getRegionRefIndexedOrOrderedGroupIndexedOrUnorderedGroupIndexed().add(rr);
// readingOrderSet = true;
// }
// }
//
// if (readingOrderSet)
// page.setReadingOrder(ro);
// }
public static void writeReadingOrderCustomTagsToPageFormat(TrpPageType page) {
logger.trace("converting reading order from custom tags to page format...");
ReadingOrderType ro = new ReadingOrderType();
OrderedGroupType group = new OrderedGroupType();
group.setCaption("Regions reading order");
group.setId("ro_" + CoreUtils.uniqueCurrentTimeMS());
ro.setOrderedGroup(group);
boolean readingOrderSet = false;
for (TrpTextRegionType r : page.getTextRegions(false)) {
if (r.getReadingOrder() != null) {
readingOrderSet = true;
RegionRefIndexedType rr = new RegionRefIndexedType();
rr.setRegionRef(r);
rr.setIndex(r.getReadingOrder());
group.getRegionRefIndexedOrOrderedGroupIndexedOrUnorderedGroupIndexed().add(rr);
readingOrderSet = true;
}
}
if (readingOrderSet)
page.setReadingOrder(ro);
}
use of eu.transkribus.core.model.beans.pagecontent_trp.TrpTextRegionType in project TranskribusCore by Transkribus.
the class CustomTagUtil method writeReadingOrderFromPageFormatToCustomTags.
public static void writeReadingOrderFromPageFormatToCustomTags(TrpPageType page) {
logger.trace("converting reading order from page format to custom tags...");
ReadingOrderType ro = page.getReadingOrder();
if (ro == null)
return;
for (Object o : ro.getOrderedGroup().getRegionRefIndexedOrOrderedGroupIndexedOrUnorderedGroupIndexed()) {
logger.trace("ref: " + o);
if (o instanceof RegionRefIndexedType) {
RegionRefIndexedType rr = (RegionRefIndexedType) o;
logger.trace("region ref: " + rr + " ref = " + rr.getRegionRef());
if (rr.getRegionRef() instanceof TrpTextRegionType) {
TrpTextRegionType region = (TrpTextRegionType) rr.getRegionRef();
logger.trace("region: " + region.getId() + " index: " + rr.getIndex());
region.setReadingOrder(rr.getIndex(), region);
}
}
}
}
Aggregations