Search in sources :

Example 11 with TextRegionType

use of eu.transkribus.core.model.beans.pagecontent.TextRegionType in project TranskribusCore by Transkribus.

the class TrpTeiStringBuilder method setContent.

@Override
protected void setContent(List<TrpPage> pages) throws JAXBException, InterruptedException {
    SebisStringBuilder sbFacsimile = new SebisStringBuilder();
    SebisStringBuilder sbText = new SebisStringBuilder();
    sbText.incIndent();
    sbText.addLine("<text>");
    sbText.incIndent();
    sbText.addLine("<body>");
    // sbText.incIndent();
    // text = tei.createElementNS(TEI_NS, "text");
    // body = tei.createElementNS(TEI_NS, "body");
    int totalPages = pageIndices == null ? pages.size() : pageIndices.size();
    if (monitor != null) {
        monitor.beginTask("Creating TEI", totalPages);
    }
    int c = 0;
    for (int i = 0; i < pages.size(); ++i) {
        if (pageIndices != null && !pageIndices.contains(i))
            continue;
        if (monitor != null) {
            if (monitor.isCanceled()) {
                throw new InterruptedException("Export was canceled by user");
            // break;
            }
            monitor.subTask("Processing page " + (c + 1));
        }
        TrpPage p = pages.get(i);
        logger.debug("1Processing page " + p.getPageNr() + ": " + p.getUrl() + " - XML=" + p.getCurrentTranscript().getUrl());
        // check buffer for transcript or unmarshal the page XML
        PcGtsType pc = this.getPcGtsTypeForPage(p);
        if (pars.hasZones()) {
            // create a facsimile element for each page that are appended to the root element of the TEI after header
            openFacsimileElement(sbFacsimile, p, pc);
        }
        // create page-break element for each page as child of body element:
        writePageBreak(sbText, p, pc);
        // 
        // // append all text-regions / lines / words to the xml:
        List<TrpRegionType> regions = pc.getPage().getTextRegionOrImageRegionOrLineDrawingRegion();
        Collections.sort(regions, new TrpElementReadingOrderComparator<RegionType>(true));
        for (TrpRegionType r : regions) {
            // System.out.println(r.getClass());
            if (r instanceof TextRegionType) {
                if (pars.hasZones()) {
                    writeZonesForTextRegion(sbFacsimile, (TrpTextRegionType) r, p.getPageNr());
                }
                writeTextForTextRegion(sbText, (TrpTextRegionType) r, p.getPageNr());
            } else {
                // write other regions
                if (pars.hasZones()) {
                    String facsId = FACS_ID_PREFIX + p.getPageNr();
                    writeZoneForShape(sbFacsimile, r, facsId, true);
                }
            }
        }
        if (pars.hasZones()) {
            closeFacsimilieElement(sbFacsimile);
        }
        ++c;
        if (monitor != null) {
            monitor.worked(c);
        }
    }
    // text.appendChild(body);
    // root.appendChild(text);
    // sbText.decIndent();
    sbText.addLine("</body>");
    sbText.decIndent();
    sbText.addLine("</text>");
    sbText.decIndent();
    sbTotal.sb.append(sbFacsimile.toString());
    sbTotal.sb.append(sbText.toString());
}
Also used : TrpTextRegionType(eu.transkribus.core.model.beans.pagecontent_trp.TrpTextRegionType) TextRegionType(eu.transkribus.core.model.beans.pagecontent.TextRegionType) TrpTextRegionType(eu.transkribus.core.model.beans.pagecontent_trp.TrpTextRegionType) TrpRegionType(eu.transkribus.core.model.beans.pagecontent_trp.TrpRegionType) RegionType(eu.transkribus.core.model.beans.pagecontent.RegionType) TextRegionType(eu.transkribus.core.model.beans.pagecontent.TextRegionType) TrpPage(eu.transkribus.core.model.beans.TrpPage) TrpRegionType(eu.transkribus.core.model.beans.pagecontent_trp.TrpRegionType) SebisStringBuilder(eu.transkribus.core.util.SebisStringBuilder) PcGtsType(eu.transkribus.core.model.beans.pagecontent.PcGtsType) Point(java.awt.Point)

Example 12 with TextRegionType

use of eu.transkribus.core.model.beans.pagecontent.TextRegionType in project TranskribusCore by Transkribus.

the class PageXmlUtils method findTextRegion.

private static TextRegionType findTextRegion(String regId, PcGtsType pc) {
    RegionType reg = findRegion(regId, pc);
    TextRegionType textReg = null;
    if (reg != null && reg instanceof TextRegionType) {
        textReg = (TextRegionType) reg;
        logger.debug("Found textRegion: " + textReg.getId());
    }
    return textReg;
}
Also used : TrpTextRegionType(eu.transkribus.core.model.beans.pagecontent_trp.TrpTextRegionType) TextRegionType(eu.transkribus.core.model.beans.pagecontent.TextRegionType) TrpTextRegionType(eu.transkribus.core.model.beans.pagecontent_trp.TrpTextRegionType) TrpRegionType(eu.transkribus.core.model.beans.pagecontent_trp.TrpRegionType) RegionType(eu.transkribus.core.model.beans.pagecontent.RegionType) TextRegionType(eu.transkribus.core.model.beans.pagecontent.TextRegionType) TableRegionType(eu.transkribus.core.model.beans.pagecontent.TableRegionType)

Example 13 with TextRegionType

use of eu.transkribus.core.model.beans.pagecontent.TextRegionType in project TranskribusCore by Transkribus.

the class PageXmlUtils method getFulltextFromLines.

public static String getFulltextFromLines(PcGtsType pc) {
    List<TextRegionType> regions = PageXmlUtils.getTextRegions(pc);
    TrpElementCoordinatesComparator<TextLineType> comp = new TrpElementCoordinatesComparator<>();
    StringBuilder sb = new StringBuilder();
    for (TextRegionType r : regions) {
        List<TextLineType> lines = r.getTextLine();
        if (lines != null && !lines.isEmpty()) {
            Collections.sort(lines, comp);
            for (TextLineType l : lines) {
                if (l.getTextEquiv() != null && l.getTextEquiv().getUnicode() != null) {
                    sb.append(l.getTextEquiv().getUnicode() + " ");
                }
            }
        }
    }
    return sb.toString();
}
Also used : TrpTextRegionType(eu.transkribus.core.model.beans.pagecontent_trp.TrpTextRegionType) TextRegionType(eu.transkribus.core.model.beans.pagecontent.TextRegionType) TextLineType(eu.transkribus.core.model.beans.pagecontent.TextLineType) TrpTextLineType(eu.transkribus.core.model.beans.pagecontent_trp.TrpTextLineType) TrpElementCoordinatesComparator(eu.transkribus.core.model.beans.pagecontent_trp.TrpElementCoordinatesComparator)

Example 14 with TextRegionType

use of eu.transkribus.core.model.beans.pagecontent.TextRegionType in project TranskribusCore by Transkribus.

the class PageXmlUtils method moveTextRegion.

public static void moveTextRegion(final String regId, PcGtsType sourcePc, PcGtsType targetPc) {
    TextRegionType source = findTextRegion(regId, sourcePc);
    if (source == null) {
        throw new IllegalArgumentException("TextRegion ID=" + regId + " could not be found!");
    }
    TextRegionType target = findTextRegion(regId, targetPc);
    if (target == null) {
        throw new IllegalArgumentException("TextRegion ID=" + regId + " could not be found!");
    }
    setTextRegion(regId, targetPc, source);
    target = source;
}
Also used : TrpTextRegionType(eu.transkribus.core.model.beans.pagecontent_trp.TrpTextRegionType) TextRegionType(eu.transkribus.core.model.beans.pagecontent.TextRegionType)

Example 15 with TextRegionType

use of eu.transkribus.core.model.beans.pagecontent.TextRegionType in project TranskribusCore by Transkribus.

the class PageXmlUtils method removeAllLines.

public static void removeAllLines(PcGtsType pc) {
    if (!hasRegions(pc)) {
        return;
    }
    List<TrpRegionType> regions = pc.getPage().getTextRegionOrImageRegionOrLineDrawingRegion();
    for (RegionType r : regions) {
        if (r instanceof TextRegionType) {
            TextRegionType tr = (TextRegionType) r;
            logger.debug("Clearing text region: " + tr.getId());
            tr.getTextLine().clear();
        }
    }
}
Also used : TrpTextRegionType(eu.transkribus.core.model.beans.pagecontent_trp.TrpTextRegionType) TextRegionType(eu.transkribus.core.model.beans.pagecontent.TextRegionType) TrpTextRegionType(eu.transkribus.core.model.beans.pagecontent_trp.TrpTextRegionType) TrpRegionType(eu.transkribus.core.model.beans.pagecontent_trp.TrpRegionType) RegionType(eu.transkribus.core.model.beans.pagecontent.RegionType) TextRegionType(eu.transkribus.core.model.beans.pagecontent.TextRegionType) TableRegionType(eu.transkribus.core.model.beans.pagecontent.TableRegionType) TrpRegionType(eu.transkribus.core.model.beans.pagecontent_trp.TrpRegionType)

Aggregations

TextRegionType (eu.transkribus.core.model.beans.pagecontent.TextRegionType)19 TrpTextRegionType (eu.transkribus.core.model.beans.pagecontent_trp.TrpTextRegionType)16 RegionType (eu.transkribus.core.model.beans.pagecontent.RegionType)13 TrpRegionType (eu.transkribus.core.model.beans.pagecontent_trp.TrpRegionType)12 TextLineType (eu.transkribus.core.model.beans.pagecontent.TextLineType)9 TrpTextLineType (eu.transkribus.core.model.beans.pagecontent_trp.TrpTextLineType)6 PcGtsType (eu.transkribus.core.model.beans.pagecontent.PcGtsType)5 TableRegionType (eu.transkribus.core.model.beans.pagecontent.TableRegionType)5 WordType (eu.transkribus.core.model.beans.pagecontent.WordType)5 UnknownRegionType (eu.transkribus.core.model.beans.pagecontent.UnknownRegionType)4 TrpTableRegionType (eu.transkribus.core.model.beans.pagecontent_trp.TrpTableRegionType)4 Rectangle (java.awt.Rectangle)4 URL (java.net.URL)4 ArrayList (java.util.ArrayList)4 TrpPage (eu.transkribus.core.model.beans.TrpPage)3 TrpElementCoordinatesComparator (eu.transkribus.core.model.beans.pagecontent_trp.TrpElementCoordinatesComparator)3 TrpTranscriptMetadata (eu.transkribus.core.model.beans.TrpTranscriptMetadata)2 TrpTranscriptStatistics (eu.transkribus.core.model.beans.TrpTranscriptStatistics)2 TrpWordType (eu.transkribus.core.model.beans.pagecontent_trp.TrpWordType)2 Point (java.awt.Point)2