Search in sources :

Example 11 with RegionType

use of eu.transkribus.core.model.beans.pagecontent.RegionType in project TranskribusCore by Transkribus.

the class DocxBuilder method writeDocxForTranscriptWithTables.

private static void writeDocxForTranscriptWithTables(MainDocumentPart mdp, TrpPageType trpPage, boolean wordBased, boolean preserveLineBreaks) {
    boolean rtl = false;
    // TrpTableRegionType is contained in the regions too
    List<TrpRegionType> regions = trpPage.getRegions();
    Collections.sort(regions, new TrpElementReadingOrderComparator<RegionType>(true));
    for (int j = 0; j < regions.size(); ++j) {
        TrpRegionType r = regions.get(j);
        if (r instanceof TrpTableRegionType) {
            logger.debug("is table");
            TrpTableRegionType table = (TrpTableRegionType) r;
            int cols = table.getNCols();
            int rows = table.getNRows();
            // PageXmlUtils.buildPolygon(table.getCoords().getPoints()).getBounds().getMaxX();
            double maxX = table.getBoundingBox().getMaxX();
            // PageXmlUtils.buildPolygon(table.getCoords().getPoints()).getBounds().getMinX();
            double minX = table.getBoundingBox().getMinX();
            int tablesize = (int) (maxX - minX);
            List<List<TrpTableCellType>> allRowCells = new ArrayList<List<TrpTableCellType>>();
            for (int k = 0; k < rows; k++) {
                allRowCells.add(table.getRowCells(k));
            }
            List<HashMap<Integer, TrpTableCellType>> allRows = new ArrayList<HashMap<Integer, TrpTableCellType>>();
            HashMap<Integer, TrpTableCellType> nextRowMap = new HashMap<Integer, TrpTableCellType>();
            for (List<TrpTableCellType> rowCells : allRowCells) {
                HashMap<Integer, TrpTableCellType> currRowMap = new HashMap<Integer, TrpTableCellType>();
                /*
	            	 * fill up all cells which are not set in TRP (needed for vertical cell merge)
	            	 * the nextRowMap contains already all cells which span vertically with the cells above - means they got merged 
	            	 * in the table but have to be considered here 
	            	 */
                currRowMap.putAll(nextRowMap);
                nextRowMap.clear();
                for (TrpTableCellType cell : rowCells) {
                    // logger.debug("table cell text " + cell.getUnicodeTextFromLines());
                    currRowMap.put(cell.getCol(), cell);
                    if (cell.getRowSpan() > 1) {
                        nextRowMap.put(cell.getCol(), null);
                    }
                }
                allRows.add(currRowMap);
            }
            Tbl thisTable;
            try {
                thisTable = getDocxTable(wordMLPackage, wordBased, rows, cols, allRows, tablesize, mdp);
                mdp.addObject(thisTable);
            } catch (Exception e) {
                // TODO Auto-generated catch block
                e.printStackTrace();
            }
            // this Br element is used break the current and go for next line
            Br br = factory.createBr();
            org.docx4j.wml.P p = factory.createP();
            mdp.addObject(p);
            p.getContent().add(br);
        } else if (r instanceof TrpTextRegionType) {
            TrpTextRegionType tr = (TrpTextRegionType) r;
            /*
				 * create one paragraph for each text region
				 * but only if there is some text in it
				 */
            String helper = tr.getUnicodeText().replaceAll("\n", "");
            if (!helper.equals("")) {
                exportTextRegion(tr, wordBased, null, mdp);
            }
        }
    }
}
Also used : TrpTextRegionType(eu.transkribus.core.model.beans.pagecontent_trp.TrpTextRegionType) TrpRegionType(eu.transkribus.core.model.beans.pagecontent_trp.TrpRegionType) TrpTableRegionType(eu.transkribus.core.model.beans.pagecontent_trp.TrpTableRegionType) RegionType(eu.transkribus.core.model.beans.pagecontent.RegionType) HashMap(java.util.HashMap) LinkedHashMap(java.util.LinkedHashMap) ArrayList(java.util.ArrayList) TrpTableRegionType(eu.transkribus.core.model.beans.pagecontent_trp.TrpTableRegionType) TrpRegionType(eu.transkribus.core.model.beans.pagecontent_trp.TrpRegionType) CustomTagList(eu.transkribus.core.model.beans.customtags.CustomTagList) List(java.util.List) ArrayList(java.util.ArrayList) Tbl(org.docx4j.wml.Tbl) P(org.docx4j.wml.P) TrpTableCellType(eu.transkribus.core.model.beans.pagecontent_trp.TrpTableCellType) JAXBException(javax.xml.bind.JAXBException) IOException(java.io.IOException) Docx4JException(org.docx4j.openpackaging.exceptions.Docx4JException) BigInteger(java.math.BigInteger) Br(org.docx4j.wml.Br) TrpTextRegionType(eu.transkribus.core.model.beans.pagecontent_trp.TrpTextRegionType)

Example 12 with RegionType

use of eu.transkribus.core.model.beans.pagecontent.RegionType in project TranskribusCore by Transkribus.

the class TrpTxtBuilder method writeTxtForSinglePage.

private static void writeTxtForSinglePage(File file, TrpPageType trpPage, boolean wordBased, boolean preserveLineBreaks) {
    boolean rtl = false;
    // TrpTableRegionType is contained in the regions too
    List<TrpRegionType> regions = trpPage.getRegions();
    Collections.sort(regions, new TrpElementReadingOrderComparator<RegionType>(true));
    List<String> content = new ArrayList<String>();
    for (int j = 0; j < regions.size(); ++j) {
        TrpRegionType r = regions.get(j);
        if (r instanceof TrpTableRegionType) {
            /*
				 * TODO: for simple txt export: how to handle tables
				 */
            continue;
        } else if (r instanceof TrpTextRegionType) {
            TrpTextRegionType tr = (TrpTextRegionType) r;
            List<TextLineType> lines = tr.getTextLine();
            for (int i = 0; i < lines.size(); ++i) {
                TrpTextLineType trpL = (TrpTextLineType) lines.get(i);
                String textOfCurrLine = trpL.getUnicodeText();
                if (wordBased && trpL.getWord().size() > 0) {
                    for (WordType word : trpL.getWord()) {
                        content.add(((ITrpShapeType) word).getUnicodeText());
                    }
                } else if (textOfCurrLine != "") {
                    content.add(textOfCurrLine);
                }
            // if(preserveLineBreaks){
            // content.add(System.lineSeparator());
            // }
            }
            if (lines.size() > 0) {
                content.add(System.lineSeparator());
            // try {
            // //Add line separator after each region
            // Files.write(Paths.get(file.getAbsolutePath()), new ArrayList<String>() {{ add(System.lineSeparator()); }}, utf8,
            // StandardOpenOption.CREATE, StandardOpenOption.APPEND);
            // } catch (IOException e) {
            // // TODO Auto-generated catch block
            // e.printStackTrace();
            // }
            }
        }
    }
    try {
        logger.debug("path " + Paths.get(file.getAbsolutePath()));
        Files.write(Paths.get(file.getAbsolutePath()), content, utf8, StandardOpenOption.CREATE, StandardOpenOption.APPEND);
    } catch (IOException e) {
        // TODO Auto-generated catch block
        e.printStackTrace();
    }
}
Also used : TrpTextRegionType(eu.transkribus.core.model.beans.pagecontent_trp.TrpTextRegionType) TrpRegionType(eu.transkribus.core.model.beans.pagecontent_trp.TrpRegionType) RegionType(eu.transkribus.core.model.beans.pagecontent.RegionType) TrpTableRegionType(eu.transkribus.core.model.beans.pagecontent_trp.TrpTableRegionType) ArrayList(java.util.ArrayList) IOException(java.io.IOException) ITrpShapeType(eu.transkribus.core.model.beans.pagecontent_trp.ITrpShapeType) WordType(eu.transkribus.core.model.beans.pagecontent.WordType) TrpTextLineType(eu.transkribus.core.model.beans.pagecontent_trp.TrpTextLineType) TrpTableRegionType(eu.transkribus.core.model.beans.pagecontent_trp.TrpTableRegionType) TrpRegionType(eu.transkribus.core.model.beans.pagecontent_trp.TrpRegionType) TrpTextRegionType(eu.transkribus.core.model.beans.pagecontent_trp.TrpTextRegionType) ArrayList(java.util.ArrayList) List(java.util.List)

Example 13 with RegionType

use of eu.transkribus.core.model.beans.pagecontent.RegionType in project TranskribusCore by Transkribus.

the class PageXmlUtils method findTextRegion.

private static TextRegionType findTextRegion(String regId, PcGtsType pc) {
    RegionType reg = findRegion(regId, pc);
    TextRegionType textReg = null;
    if (reg != null && reg instanceof TextRegionType) {
        textReg = (TextRegionType) reg;
        logger.debug("Found textRegion: " + textReg.getId());
    }
    return textReg;
}
Also used : TrpTextRegionType(eu.transkribus.core.model.beans.pagecontent_trp.TrpTextRegionType) TextRegionType(eu.transkribus.core.model.beans.pagecontent.TextRegionType) TrpTextRegionType(eu.transkribus.core.model.beans.pagecontent_trp.TrpTextRegionType) TrpRegionType(eu.transkribus.core.model.beans.pagecontent_trp.TrpRegionType) RegionType(eu.transkribus.core.model.beans.pagecontent.RegionType) TextRegionType(eu.transkribus.core.model.beans.pagecontent.TextRegionType) TableRegionType(eu.transkribus.core.model.beans.pagecontent.TableRegionType)

Example 14 with RegionType

use of eu.transkribus.core.model.beans.pagecontent.RegionType in project TranskribusCore by Transkribus.

the class PageXmlUtils method removeAllLines.

public static void removeAllLines(PcGtsType pc) {
    if (!hasRegions(pc)) {
        return;
    }
    List<TrpRegionType> regions = pc.getPage().getTextRegionOrImageRegionOrLineDrawingRegion();
    for (RegionType r : regions) {
        if (r instanceof TextRegionType) {
            TextRegionType tr = (TextRegionType) r;
            logger.debug("Clearing text region: " + tr.getId());
            tr.getTextLine().clear();
        }
    }
}
Also used : TrpTextRegionType(eu.transkribus.core.model.beans.pagecontent_trp.TrpTextRegionType) TextRegionType(eu.transkribus.core.model.beans.pagecontent.TextRegionType) TrpTextRegionType(eu.transkribus.core.model.beans.pagecontent_trp.TrpTextRegionType) TrpRegionType(eu.transkribus.core.model.beans.pagecontent_trp.TrpRegionType) RegionType(eu.transkribus.core.model.beans.pagecontent.RegionType) TextRegionType(eu.transkribus.core.model.beans.pagecontent.TextRegionType) TableRegionType(eu.transkribus.core.model.beans.pagecontent.TableRegionType) TrpRegionType(eu.transkribus.core.model.beans.pagecontent_trp.TrpRegionType)

Example 15 with RegionType

use of eu.transkribus.core.model.beans.pagecontent.RegionType in project TranskribusCore by Transkribus.

the class PageXmlUtils method cutPolysAtImgBorder.

/**
 * If regions overlap the img border, reset offlimit coordinates to min/max
 *
 * @param pc
 */
public static void cutPolysAtImgBorder(PcGtsType pc) {
    final int maxX = pc.getPage().getImageWidth();
    final int maxY = pc.getPage().getImageHeight();
    List<TrpRegionType> regions = pc.getPage().getTextRegionOrImageRegionOrLineDrawingRegion();
    if (regions == null || regions.isEmpty()) {
        return;
    }
    StringBuilder sb;
    for (RegionType r : regions) {
        sb = new StringBuilder();
        CoordsType c = r.getCoords();
        final String pointsStr = c.getPoints();
        if (pointsStr == null || pointsStr.isEmpty()) {
            continue;
        }
        final String[] coordsArr = pointsStr.split(" ");
        for (int i = 0; i < coordsArr.length; i++) {
            final String[] xy = coordsArr[i].split(",");
            final int x = Integer.parseInt(xy[0]);
            final int y = Integer.parseInt(xy[1]);
            sb.append(x < 0 ? 0 : (x > maxX ? maxX : x));
            sb.append(",");
            sb.append(y < 0 ? 0 : (y > maxY ? maxY : y));
            sb.append(" ");
        }
        c.setPoints(sb.toString().trim());
    }
}
Also used : TrpTextRegionType(eu.transkribus.core.model.beans.pagecontent_trp.TrpTextRegionType) TrpRegionType(eu.transkribus.core.model.beans.pagecontent_trp.TrpRegionType) RegionType(eu.transkribus.core.model.beans.pagecontent.RegionType) TextRegionType(eu.transkribus.core.model.beans.pagecontent.TextRegionType) TableRegionType(eu.transkribus.core.model.beans.pagecontent.TableRegionType) TrpRegionType(eu.transkribus.core.model.beans.pagecontent_trp.TrpRegionType) CoordsType(eu.transkribus.core.model.beans.pagecontent.CoordsType)

Aggregations

RegionType (eu.transkribus.core.model.beans.pagecontent.RegionType)18 TrpRegionType (eu.transkribus.core.model.beans.pagecontent_trp.TrpRegionType)16 TextRegionType (eu.transkribus.core.model.beans.pagecontent.TextRegionType)15 TrpTextRegionType (eu.transkribus.core.model.beans.pagecontent_trp.TrpTextRegionType)15 TrpTableRegionType (eu.transkribus.core.model.beans.pagecontent_trp.TrpTableRegionType)7 TableRegionType (eu.transkribus.core.model.beans.pagecontent.TableRegionType)6 TextLineType (eu.transkribus.core.model.beans.pagecontent.TextLineType)5 UnknownRegionType (eu.transkribus.core.model.beans.pagecontent.UnknownRegionType)5 WordType (eu.transkribus.core.model.beans.pagecontent.WordType)4 Point (java.awt.Point)4 Rectangle (java.awt.Rectangle)4 TrpTextLineType (eu.transkribus.core.model.beans.pagecontent_trp.TrpTextLineType)3 IOException (java.io.IOException)3 ArrayList (java.util.ArrayList)3 PdfContentByte (com.itextpdf.text.pdf.PdfContentByte)2 PcGtsType (eu.transkribus.core.model.beans.pagecontent.PcGtsType)2 ITrpShapeType (eu.transkribus.core.model.beans.pagecontent_trp.ITrpShapeType)2 HashMap (java.util.HashMap)2 List (java.util.List)2 Image (com.itextpdf.text.Image)1