Search in sources :

Example 16 with TrpTextRegionType

use of eu.transkribus.core.model.beans.pagecontent_trp.TrpTextRegionType in project TranskribusCore by Transkribus.

the class KlosterTeiToPageParser method parsePage.

static void parsePage(Node pbNode, boolean save) throws IOException, JAXBException {
    Element pb = (Element) pbNode;
    String imgFn = pb.getAttribute("facs");
    int pageN = Integer.parseInt(pb.getAttribute("n"));
    int pageHeight = Integer.parseInt(pb.getAttribute("xmlns:h"));
    int pageWidth = Integer.parseInt(pb.getAttribute("xmlns:w"));
    PcGtsType page = PageXmlUtils.createEmptyPcGtsType("imgfn", pageWidth, pageHeight);
    TrpTextRegionType region = new TrpTextRegionType();
    region.setId("region_1");
    System.out.println("page data: imgFn = " + imgFn + " n = " + pageN + " pageWidth = " + pageWidth + " pageHeight = " + pageHeight);
    int minX = 999999, minY = 999999, maxX = -1, maxY = -1;
    Node sibling = pbNode.getNextSibling();
    int lineCount = 0;
    while (sibling != null) {
        if (sibling.getNodeName().equals("pb")) {
            break;
        }
        // System.out.println("sibling type: "+sibling.getTextContent());
        if (sibling.getNodeType() == Node.ELEMENT_NODE && sibling.getNodeName().equals("lb")) {
            Element lb = (Element) sibling;
            int n = Integer.parseInt(lb.getAttribute("n"));
            int x = Integer.parseInt(lb.getAttribute("xmlns:x"));
            int y = Integer.parseInt(lb.getAttribute("xmlns:y"));
            int w = Integer.parseInt(lb.getAttribute("xmlns:w"));
            int h = Integer.parseInt(lb.getAttribute("xmlns:h"));
            if (x < minX)
                minX = x;
            if (y < minY)
                minY = y;
            if (x + w > maxX)
                maxX = x + w;
            if (y + h > maxY)
                maxY = y + h;
            String txt = sibling.getNextSibling().getTextContent();
            txt = StringUtils.stripEnd(txt, " \r\n");
            // System.out.println("line: txt = "+txt+" [x,y,w,h] = ["+x+","+y+","+w+","+h+"]");
            System.out.format("line: n = %d, txt = %s, coords = [%d,%d,%d,%d]\n", n, txt, x, y, w, h);
            TrpTextLineType line = new TrpTextLineType();
            line.setCoords(bbToCoords(x, y, w, h));
            TextEquivType te = new TextEquivType();
            te.setUnicode(txt);
            line.setTextEquiv(te);
            line.setId("line_" + (++lineCount));
            // create baseline:
            TrpBaselineType bl = new TrpBaselineType();
            int yBl = (int) (y + 0.7 * h);
            bl.setPoints(x + "," + yBl + " " + (x + w) + "," + yBl);
            line.setBaseline(bl);
            region.getTextLine().add(line);
        }
        sibling = sibling.getNextSibling();
    // System.out.println("sibling node name: "+sibling.getNodeName());
    // if (!sibling.getNodeName().equals("lb"))
    // break;
    }
    if (!region.getTextLine().isEmpty()) {
        region.setCoords(bbToCoords(minX, minY, maxX - minX, maxY - minY));
    } else {
        region.setCoords(bbToCoords(0, 0, pageWidth, pageHeight));
    }
    page.getPage().getTextRegionOrImageRegionOrLineDrawingRegion().add(region);
    if (save && !region.getTextLine().isEmpty()) {
        File xmlFile = new File(PAGE_DIR + FilenameUtils.getBaseName(imgFn) + ".xml");
        PageXmlUtils.marshalToFile(page, xmlFile);
        FileUtils.copyFile(new File(DIR + imgFn), new File(DST_DIR + imgFn));
        System.out.println("written page to: " + xmlFile.getAbsolutePath());
    }
}
Also used : TrpTextLineType(eu.transkribus.core.model.beans.pagecontent_trp.TrpTextLineType) TrpBaselineType(eu.transkribus.core.model.beans.pagecontent_trp.TrpBaselineType) TextEquivType(eu.transkribus.core.model.beans.pagecontent.TextEquivType) TrpTextRegionType(eu.transkribus.core.model.beans.pagecontent_trp.TrpTextRegionType) Element(org.w3c.dom.Element) Node(org.w3c.dom.Node) PcGtsType(eu.transkribus.core.model.beans.pagecontent.PcGtsType) File(java.io.File)

Example 17 with TrpTextRegionType

use of eu.transkribus.core.model.beans.pagecontent_trp.TrpTextRegionType in project TranskribusCore by Transkribus.

the class PageXmlUtils method removeAllIndexedTags.

public static void removeAllIndexedTags(PcGtsType pc) {
    TrpPageType p = (TrpPageType) pc.getPage();
    List<TrpTextRegionType> trList = p.getTextRegions(true);
    for (TrpTextRegionType tr : trList) {
        tr.getCustomTagList().removeIndexedTags();
        List<TextLineType> lineList = tr.getTextLine();
        for (TextLineType l : lineList) {
            TrpTextLineType trpL = (TrpTextLineType) l;
            trpL.getCustomTagList().removeIndexedTags();
            List<WordType> wordList = trpL.getWord();
            for (WordType w : wordList) {
                TrpWordType trpW = (TrpWordType) w;
                trpW.getCustomTagList().removeIndexedTags();
            }
        }
    }
}
Also used : TrpTextLineType(eu.transkribus.core.model.beans.pagecontent_trp.TrpTextLineType) TextLineType(eu.transkribus.core.model.beans.pagecontent.TextLineType) TrpTextLineType(eu.transkribus.core.model.beans.pagecontent_trp.TrpTextLineType) TrpTextRegionType(eu.transkribus.core.model.beans.pagecontent_trp.TrpTextRegionType) TrpWordType(eu.transkribus.core.model.beans.pagecontent_trp.TrpWordType) TrpPageType(eu.transkribus.core.model.beans.pagecontent_trp.TrpPageType) WordType(eu.transkribus.core.model.beans.pagecontent.WordType) TrpWordType(eu.transkribus.core.model.beans.pagecontent_trp.TrpWordType)

Example 18 with TrpTextRegionType

use of eu.transkribus.core.model.beans.pagecontent_trp.TrpTextRegionType in project TranskribusCore by Transkribus.

the class ExportCache method storeCustomTagMapForDoc.

/**
 * @param doc
 * @param wordBased
 * @param pageIndices
 * @param blackening
 * @return all (custom) tags of the given document
 * @throws JAXBException
 * @throws IOException
 * @throws InterruptedException
 */
public void storeCustomTagMapForDoc(TrpDoc doc, boolean wordBased, Set<Integer> pageIndices, IProgressMonitor monitor, boolean blackening) throws JAXBException, IOException, InterruptedException {
    doBlackening = blackening;
    tags.clear();
    List<TrpPage> pages = doc.getPages();
    int totalPages = pages.size();
    int c = 0;
    for (int i = 0; i < totalPages; ++i) {
        if (pageIndices != null && !pageIndices.contains(i))
            continue;
        if (monitor != null && monitor.isCanceled()) {
            throw new InterruptedException("User canceled the export");
        }
        // pageTranscripts get fetched before the custom tag map is stored - so normally pageTranscripts.get(i) != null
        JAXBPageTranscript tr;
        if (pageTranscripts == null || pageTranscripts.get(i) == null) {
            TrpPage page = pages.get(i);
            TrpTranscriptMetadata md = page.getCurrentTranscript();
            tr = new JAXBPageTranscript(md);
        } else {
            tr = pageTranscripts.get(i);
            tr.getPageData();
        }
        tr.build();
        TrpPageType trpPage = tr.getPage();
        logger.debug("get tags for page " + (i + 1) + "/" + doc.getNPages());
        List<TrpTextRegionType> textRegions = trpPage.getTextRegions(true);
        for (int j = 0; j < textRegions.size(); ++j) {
            TrpTextRegionType r = textRegions.get(j);
            List<TextLineType> lines = r.getTextLine();
            for (int k = 0; k < lines.size(); ++k) {
                TrpTextLineType trpL = (TrpTextLineType) lines.get(k);
                List<WordType> words = trpL.getWord();
                getTagsForShapeElement(trpL);
                if (wordBased) {
                    for (int l = 0; l < words.size(); ++l) {
                        TrpWordType w = (TrpWordType) words.get(l);
                        getTagsForShapeElement(w);
                    }
                }
            // else{
            // getTagsForShapeElement(trpL);
            // }
            }
        }
        if (monitor != null) {
            monitor.setTaskName("Loaded tags for page " + (i + 1));
            monitor.worked(++c);
        }
    }
}
Also used : JAXBPageTranscript(eu.transkribus.core.model.beans.JAXBPageTranscript) TrpPage(eu.transkribus.core.model.beans.TrpPage) TrpTranscriptMetadata(eu.transkribus.core.model.beans.TrpTranscriptMetadata) TrpWordType(eu.transkribus.core.model.beans.pagecontent_trp.TrpWordType) WordType(eu.transkribus.core.model.beans.pagecontent.WordType) TrpWordType(eu.transkribus.core.model.beans.pagecontent_trp.TrpWordType) TrpTextLineType(eu.transkribus.core.model.beans.pagecontent_trp.TrpTextLineType) TextLineType(eu.transkribus.core.model.beans.pagecontent.TextLineType) TrpTextLineType(eu.transkribus.core.model.beans.pagecontent_trp.TrpTextLineType) TrpTextRegionType(eu.transkribus.core.model.beans.pagecontent_trp.TrpTextRegionType) TrpPageType(eu.transkribus.core.model.beans.pagecontent_trp.TrpPageType)

Example 19 with TrpTextRegionType

use of eu.transkribus.core.model.beans.pagecontent_trp.TrpTextRegionType in project TranskribusCore by Transkribus.

the class CustomTagUtil method writeReadingOrderCustomTagsToPageFormat.

// public static void createReadingOrderOrderedGroupIndexed(TrpRegionType r, OrderedGroupIndexedType g) {
// 
// 
// 
// 
// if (s instanceof TrpRegionType) {
// s.getChildren(recursive)
// 
// 
// TrpRegionType r = (TrpRegionType) s;
// for (int i=0; i<r.getTextRegionOrImageRegionOrLineDrawingRegion().size(); ++i) {
// TrpRegionType cr = r.getTextRegionOrImageRegionOrLineDrawingRegion().get(i);
// 
// if (cr.hasChildren()) {
// OrderedGroupIndexedType cg = PAGETypeFactory.createOrderedGroupIndexed(i, "r_"+CoreUtils.uniqueCurrentTimeMS(), null);
// 
// 
// 
// RegionRefIndexedType rr = PAGETypeFactory.createRegionRefIndexed(index, refObject)
// 
// }
// 
// 
// }
// 
// }
// 
// 
// 
// }
// 
// public static void writeReadingOrderCustomTagsToPageFormat(TrpPageType page) {
// logger.trace("converting reading order from custom tags to page format... NEW");
// 
// ReadingOrderType ro = new ReadingOrderType();
// 
// // 1st: create parent group for all reading order elements
// OrderedGroupType group = PAGETypeFactory.createOrderedGroup("ro_"+CoreUtils.uniqueCurrentTimeMS(), "Reading order");
// 
// // 2nd: create either a region ref
// for (TrpRegionType r : page.getTextRegionOrImageRegionOrLineDrawingRegion()) {
// xxx
// 
// 
// 
// 
// }
// 
// OrderedGroupType group = createReadingOrderOrderedGroup(page.getTextRegionOrImageRegionOrLineDrawingRegion(), "Regions reading order");
// 
// 
// 
// 
// 
// 
// OrderedGroupType group = new OrderedGroupType();
// group.setCaption("Regions reading order");
// group.setId("ro_"+CoreUtils.uniqueCurrentTimeMS());
// ro.setOrderedGroup(group);
// boolean readingOrderSet=false;
// 
// for (TrpTextRegionType r : page.getTextRegions(false)) {
// if (r.getReadingOrder() != null) {
// readingOrderSet=true;
// RegionRefIndexedType rr = new RegionRefIndexedType();
// rr.setRegionRef(r);
// rr.setIndex(r.getReadingOrder());
// group.getRegionRefIndexedOrOrderedGroupIndexedOrUnorderedGroupIndexed().add(rr);
// readingOrderSet = true;
// }
// }
// 
// if (readingOrderSet)
// page.setReadingOrder(ro);
// }
public static void writeReadingOrderCustomTagsToPageFormat(TrpPageType page) {
    logger.trace("converting reading order from custom tags to page format...");
    ReadingOrderType ro = new ReadingOrderType();
    OrderedGroupType group = new OrderedGroupType();
    group.setCaption("Regions reading order");
    group.setId("ro_" + CoreUtils.uniqueCurrentTimeMS());
    ro.setOrderedGroup(group);
    boolean readingOrderSet = false;
    for (TrpTextRegionType r : page.getTextRegions(false)) {
        if (r.getReadingOrder() != null) {
            readingOrderSet = true;
            RegionRefIndexedType rr = new RegionRefIndexedType();
            rr.setRegionRef(r);
            rr.setIndex(r.getReadingOrder());
            group.getRegionRefIndexedOrOrderedGroupIndexedOrUnorderedGroupIndexed().add(rr);
            readingOrderSet = true;
        }
    }
    if (readingOrderSet)
        page.setReadingOrder(ro);
}
Also used : RegionRefIndexedType(eu.transkribus.core.model.beans.pagecontent.RegionRefIndexedType) TrpTextRegionType(eu.transkribus.core.model.beans.pagecontent_trp.TrpTextRegionType) ReadingOrderType(eu.transkribus.core.model.beans.pagecontent.ReadingOrderType) OrderedGroupType(eu.transkribus.core.model.beans.pagecontent.OrderedGroupType)

Example 20 with TrpTextRegionType

use of eu.transkribus.core.model.beans.pagecontent_trp.TrpTextRegionType in project TranskribusCore by Transkribus.

the class CustomTagUtil method writeReadingOrderFromPageFormatToCustomTags.

public static void writeReadingOrderFromPageFormatToCustomTags(TrpPageType page) {
    logger.trace("converting reading order from page format to custom tags...");
    ReadingOrderType ro = page.getReadingOrder();
    if (ro == null)
        return;
    for (Object o : ro.getOrderedGroup().getRegionRefIndexedOrOrderedGroupIndexedOrUnorderedGroupIndexed()) {
        logger.trace("ref: " + o);
        if (o instanceof RegionRefIndexedType) {
            RegionRefIndexedType rr = (RegionRefIndexedType) o;
            logger.trace("region ref: " + rr + " ref = " + rr.getRegionRef());
            if (rr.getRegionRef() instanceof TrpTextRegionType) {
                TrpTextRegionType region = (TrpTextRegionType) rr.getRegionRef();
                logger.trace("region: " + region.getId() + " index: " + rr.getIndex());
                region.setReadingOrder(rr.getIndex(), region);
            }
        }
    }
}
Also used : RegionRefIndexedType(eu.transkribus.core.model.beans.pagecontent.RegionRefIndexedType) TrpTextRegionType(eu.transkribus.core.model.beans.pagecontent_trp.TrpTextRegionType) ReadingOrderType(eu.transkribus.core.model.beans.pagecontent.ReadingOrderType)

Aggregations

TrpTextRegionType (eu.transkribus.core.model.beans.pagecontent_trp.TrpTextRegionType)20 TrpTextLineType (eu.transkribus.core.model.beans.pagecontent_trp.TrpTextLineType)14 TrpPageType (eu.transkribus.core.model.beans.pagecontent_trp.TrpPageType)9 TrpWordType (eu.transkribus.core.model.beans.pagecontent_trp.TrpWordType)7 CustomTagList (eu.transkribus.core.model.beans.customtags.CustomTagList)5 TextStyleTag (eu.transkribus.core.model.beans.customtags.TextStyleTag)5 TextLineType (eu.transkribus.core.model.beans.pagecontent.TextLineType)5 WordType (eu.transkribus.core.model.beans.pagecontent.WordType)5 PcGtsType (eu.transkribus.core.model.beans.pagecontent.PcGtsType)4 RegionType (eu.transkribus.core.model.beans.pagecontent.RegionType)4 TrpRegionType (eu.transkribus.core.model.beans.pagecontent_trp.TrpRegionType)4 IOException (java.io.IOException)4 Test (org.junit.Test)4 TrpPage (eu.transkribus.core.model.beans.TrpPage)3 TrpTableRegionType (eu.transkribus.core.model.beans.pagecontent_trp.TrpTableRegionType)3 Point (java.awt.Point)3 Rectangle (java.awt.Rectangle)3 ArrayList (java.util.ArrayList)3 JAXBPageTranscript (eu.transkribus.core.model.beans.JAXBPageTranscript)2 TrpTranscriptMetadata (eu.transkribus.core.model.beans.TrpTranscriptMetadata)2