use of eu.transkribus.core.model.beans.pagecontent.RegionType in project TranskribusCore by Transkribus.
the class DocxBuilder method writeDocxForTranscriptWithTables.
private static void writeDocxForTranscriptWithTables(MainDocumentPart mdp, TrpPageType trpPage, boolean wordBased, boolean preserveLineBreaks) {
boolean rtl = false;
// TrpTableRegionType is contained in the regions too
List<TrpRegionType> regions = trpPage.getRegions();
Collections.sort(regions, new TrpElementReadingOrderComparator<RegionType>(true));
for (int j = 0; j < regions.size(); ++j) {
TrpRegionType r = regions.get(j);
if (r instanceof TrpTableRegionType) {
logger.debug("is table");
TrpTableRegionType table = (TrpTableRegionType) r;
int cols = table.getNCols();
int rows = table.getNRows();
// PageXmlUtils.buildPolygon(table.getCoords().getPoints()).getBounds().getMaxX();
double maxX = table.getBoundingBox().getMaxX();
// PageXmlUtils.buildPolygon(table.getCoords().getPoints()).getBounds().getMinX();
double minX = table.getBoundingBox().getMinX();
int tablesize = (int) (maxX - minX);
List<List<TrpTableCellType>> allRowCells = new ArrayList<List<TrpTableCellType>>();
for (int k = 0; k < rows; k++) {
allRowCells.add(table.getRowCells(k));
}
List<HashMap<Integer, TrpTableCellType>> allRows = new ArrayList<HashMap<Integer, TrpTableCellType>>();
HashMap<Integer, TrpTableCellType> nextRowMap = new HashMap<Integer, TrpTableCellType>();
for (List<TrpTableCellType> rowCells : allRowCells) {
HashMap<Integer, TrpTableCellType> currRowMap = new HashMap<Integer, TrpTableCellType>();
/*
* fill up all cells which are not set in TRP (needed for vertical cell merge)
* the nextRowMap contains already all cells which span vertically with the cells above - means they got merged
* in the table but have to be considered here
*/
currRowMap.putAll(nextRowMap);
nextRowMap.clear();
for (TrpTableCellType cell : rowCells) {
// logger.debug("table cell text " + cell.getUnicodeTextFromLines());
currRowMap.put(cell.getCol(), cell);
if (cell.getRowSpan() > 1) {
nextRowMap.put(cell.getCol(), null);
}
}
allRows.add(currRowMap);
}
Tbl thisTable;
try {
thisTable = getDocxTable(wordMLPackage, wordBased, rows, cols, allRows, tablesize, mdp);
mdp.addObject(thisTable);
} catch (Exception e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
// this Br element is used break the current and go for next line
Br br = factory.createBr();
org.docx4j.wml.P p = factory.createP();
mdp.addObject(p);
p.getContent().add(br);
} else if (r instanceof TrpTextRegionType) {
TrpTextRegionType tr = (TrpTextRegionType) r;
/*
* create one paragraph for each text region
* but only if there is some text in it
*/
String helper = tr.getUnicodeText().replaceAll("\n", "");
if (!helper.equals("")) {
exportTextRegion(tr, wordBased, null, mdp);
}
}
}
}
use of eu.transkribus.core.model.beans.pagecontent.RegionType in project TranskribusCore by Transkribus.
the class TrpTxtBuilder method writeTxtForSinglePage.
private static void writeTxtForSinglePage(File file, TrpPageType trpPage, boolean wordBased, boolean preserveLineBreaks) {
boolean rtl = false;
// TrpTableRegionType is contained in the regions too
List<TrpRegionType> regions = trpPage.getRegions();
Collections.sort(regions, new TrpElementReadingOrderComparator<RegionType>(true));
List<String> content = new ArrayList<String>();
for (int j = 0; j < regions.size(); ++j) {
TrpRegionType r = regions.get(j);
if (r instanceof TrpTableRegionType) {
/*
* TODO: for simple txt export: how to handle tables
*/
continue;
} else if (r instanceof TrpTextRegionType) {
TrpTextRegionType tr = (TrpTextRegionType) r;
List<TextLineType> lines = tr.getTextLine();
for (int i = 0; i < lines.size(); ++i) {
TrpTextLineType trpL = (TrpTextLineType) lines.get(i);
String textOfCurrLine = trpL.getUnicodeText();
if (wordBased && trpL.getWord().size() > 0) {
for (WordType word : trpL.getWord()) {
content.add(((ITrpShapeType) word).getUnicodeText());
}
} else if (textOfCurrLine != "") {
content.add(textOfCurrLine);
}
// if(preserveLineBreaks){
// content.add(System.lineSeparator());
// }
}
if (lines.size() > 0) {
content.add(System.lineSeparator());
// try {
// //Add line separator after each region
// Files.write(Paths.get(file.getAbsolutePath()), new ArrayList<String>() {{ add(System.lineSeparator()); }}, utf8,
// StandardOpenOption.CREATE, StandardOpenOption.APPEND);
// } catch (IOException e) {
// // TODO Auto-generated catch block
// e.printStackTrace();
// }
}
}
}
try {
logger.debug("path " + Paths.get(file.getAbsolutePath()));
Files.write(Paths.get(file.getAbsolutePath()), content, utf8, StandardOpenOption.CREATE, StandardOpenOption.APPEND);
} catch (IOException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
}
use of eu.transkribus.core.model.beans.pagecontent.RegionType in project TranskribusCore by Transkribus.
the class PageXmlUtils method findTextRegion.
private static TextRegionType findTextRegion(String regId, PcGtsType pc) {
RegionType reg = findRegion(regId, pc);
TextRegionType textReg = null;
if (reg != null && reg instanceof TextRegionType) {
textReg = (TextRegionType) reg;
logger.debug("Found textRegion: " + textReg.getId());
}
return textReg;
}
use of eu.transkribus.core.model.beans.pagecontent.RegionType in project TranskribusCore by Transkribus.
the class PageXmlUtils method removeAllLines.
public static void removeAllLines(PcGtsType pc) {
if (!hasRegions(pc)) {
return;
}
List<TrpRegionType> regions = pc.getPage().getTextRegionOrImageRegionOrLineDrawingRegion();
for (RegionType r : regions) {
if (r instanceof TextRegionType) {
TextRegionType tr = (TextRegionType) r;
logger.debug("Clearing text region: " + tr.getId());
tr.getTextLine().clear();
}
}
}
use of eu.transkribus.core.model.beans.pagecontent.RegionType in project TranskribusCore by Transkribus.
the class PageXmlUtils method cutPolysAtImgBorder.
/**
* If regions overlap the img border, reset offlimit coordinates to min/max
*
* @param pc
*/
public static void cutPolysAtImgBorder(PcGtsType pc) {
final int maxX = pc.getPage().getImageWidth();
final int maxY = pc.getPage().getImageHeight();
List<TrpRegionType> regions = pc.getPage().getTextRegionOrImageRegionOrLineDrawingRegion();
if (regions == null || regions.isEmpty()) {
return;
}
StringBuilder sb;
for (RegionType r : regions) {
sb = new StringBuilder();
CoordsType c = r.getCoords();
final String pointsStr = c.getPoints();
if (pointsStr == null || pointsStr.isEmpty()) {
continue;
}
final String[] coordsArr = pointsStr.split(" ");
for (int i = 0; i < coordsArr.length; i++) {
final String[] xy = coordsArr[i].split(",");
final int x = Integer.parseInt(xy[0]);
final int y = Integer.parseInt(xy[1]);
sb.append(x < 0 ? 0 : (x > maxX ? maxX : x));
sb.append(",");
sb.append(y < 0 ? 0 : (y > maxY ? maxY : y));
sb.append(" ");
}
c.setPoints(sb.toString().trim());
}
}
Aggregations