use of eu.transkribus.core.model.beans.pagecontent_trp.TrpTableCellType in project TranskribusCore by Transkribus.
the class DocxBuilder method getDocxTable.
private static Tbl getDocxTable(WordprocessingMLPackage wPMLpackage, boolean isWordBased, int rows, int cols, List<HashMap<Integer, TrpTableCellType>> allRows, int tablesize, MainDocumentPart mdp) throws Exception {
int writableWidthTwips = wPMLpackage.getDocumentModel().getSections().get(0).getPageDimensions().getWritableWidthTwips();
int cellWidthTwips = new Double(Math.floor((writableWidthTwips / cols))).intValue();
Tbl table = TblFactory.createTable(0, 0, cellWidthTwips);
TblGrid tblGrid = factory.createTblGrid();
for (int i = 0; i < cols; i++) {
TblGridCol col = factory.createTblGridCol();
col.setW(BigInteger.valueOf(cellWidthTwips));
tblGrid.getGridCol().add(col);
}
table.setTblGrid(tblGrid);
int i = 0;
for (HashMap<Integer, TrpTableCellType> entry : allRows) {
Tr row = factory.createTr();
table.getContent().add(row);
i++;
int d = 0;
if (entry.keySet().size() != cols) {
logger.debug("size of entries does not match columns ");
}
for (Integer key : entry.keySet()) {
Tc cell = factory.createTc();
row.getContent().add(cell);
String rowSpan = null;
int colSpan = 1;
boolean mergedVertical = false;
int colsize = cellWidthTwips;
if (entry.get(key) != null) {
if (entry.get(key).getRowSpan() > 1) {
mergedVertical = true;
rowSpan = "restart";
}
// PageXmlUtils.buildPolygon(entry.get(key).getCoords().getPoints()).getBounds().getMaxX();
double maxX = entry.get(key).getBoundingBox().getMaxX();
// PageXmlUtils.buildPolygon(entry.get(key).getCoords().getPoints()).getBounds().getMinX();
double minX = entry.get(key).getBoundingBox().getMinX();
double colsizeRel = maxX - minX;
double colsizetmp = colsizeRel / (double) tablesize;
// logger.debug("colsizetmp " + colsizetmp);
colsize = (int) (writableWidthTwips * colsizetmp);
colSpan = entry.get(key).getColSpan();
// logger.debug("colsize " + colsize);
// logger.debug("text in this cell is " + entry.get(key).getUnicodeTextFromLines());
int colID = entry.get(key).getCol();
int rowID = entry.get(key).getRow();
} else {
// logger.debug("no cell for this column ");
mergedVertical = true;
}
applyGridSpan(cell, colSpan, rowSpan, colsize, mergedVertical);
P columnPara = factory.createP();
// P columnPara = (P) column.getContent().get(0);
cell.getContent().add(columnPara);
d++;
Text tx = factory.createText();
R run = factory.createR();
if (entry.get(key) != null) {
// old solution till now: tx.setValue(entry.get(key).getUnicodeTextFromLines());
if (entry.get(key).getUnicodeTextFromLines() != "") {
exportTextRegion(entry.get(key), isWordBased, columnPara, mdp);
}
}
run.getContent().add(tx);
columnPara.getContent().add(run);
}
}
return table;
}
use of eu.transkribus.core.model.beans.pagecontent_trp.TrpTableCellType in project TranskribusCore by Transkribus.
the class TrpPdfDocument method exportTable.
private void exportTable(RegionType r, PdfContentByte cb, int cutoffLeft, int cutoffTop, boolean addUniformText, ExportCache cache) throws IOException, DocumentException {
logger.debug("is table");
TrpTableRegionType table = (TrpTableRegionType) r;
int cols = table.getNCols();
int rows = table.getNRows();
List<List<TrpTableCellType>> allRowCells = new ArrayList<List<TrpTableCellType>>();
for (int k = 0; k < rows; k++) {
allRowCells.add(table.getRowCells(k));
}
List<HashMap<Integer, TrpTableCellType>> allRows = new ArrayList<HashMap<Integer, TrpTableCellType>>();
HashMap<Integer, TrpTableCellType> nextRowMap = new HashMap<Integer, TrpTableCellType>();
for (List<TrpTableCellType> rowCells : allRowCells) {
HashMap<Integer, TrpTableCellType> currRowMap = new HashMap<Integer, TrpTableCellType>();
/*
* fill up all cells which are not set in TRP (needed for vertical cell merge)
* the nextRowMap contains already all cells which span vertically with the cells above - means they got merged
* in the table but have to be considered here
*/
currRowMap.putAll(nextRowMap);
nextRowMap.clear();
for (TrpTableCellType cell : rowCells) {
// logger.debug("table cell text " + cell.getUnicodeTextFromLines());
currRowMap.put(cell.getCol(), cell);
if (cell.getRowSpan() > 1) {
nextRowMap.put(cell.getCol(), null);
}
}
allRows.add(currRowMap);
}
for (HashMap<Integer, TrpTableCellType> entry : allRows) {
for (Integer key : entry.keySet()) {
if (addUniformText) {
float textBlockXStart = getAverageBeginningOfBaselines(entry.get(key));
textBlockXStart += 40;
addUniformTextFromTextRegion(entry.get(key), cb, cutoffLeft, cutoffTop, bfArial, textBlockXStart, cache);
} else {
addTextFromTextRegion(entry.get(key), cb, cutoffLeft, cutoffTop, bfArial, cache);
}
}
}
}
use of eu.transkribus.core.model.beans.pagecontent_trp.TrpTableCellType in project TranskribusCore by Transkribus.
the class DocxBuilder method writeDocxForTranscriptWithTables.
private static void writeDocxForTranscriptWithTables(MainDocumentPart mdp, TrpPageType trpPage, boolean wordBased, boolean preserveLineBreaks) {
boolean rtl = false;
// TrpTableRegionType is contained in the regions too
List<TrpRegionType> regions = trpPage.getRegions();
Collections.sort(regions, new TrpElementReadingOrderComparator<RegionType>(true));
for (int j = 0; j < regions.size(); ++j) {
TrpRegionType r = regions.get(j);
if (r instanceof TrpTableRegionType) {
logger.debug("is table");
TrpTableRegionType table = (TrpTableRegionType) r;
int cols = table.getNCols();
int rows = table.getNRows();
// PageXmlUtils.buildPolygon(table.getCoords().getPoints()).getBounds().getMaxX();
double maxX = table.getBoundingBox().getMaxX();
// PageXmlUtils.buildPolygon(table.getCoords().getPoints()).getBounds().getMinX();
double minX = table.getBoundingBox().getMinX();
int tablesize = (int) (maxX - minX);
List<List<TrpTableCellType>> allRowCells = new ArrayList<List<TrpTableCellType>>();
for (int k = 0; k < rows; k++) {
allRowCells.add(table.getRowCells(k));
}
List<HashMap<Integer, TrpTableCellType>> allRows = new ArrayList<HashMap<Integer, TrpTableCellType>>();
HashMap<Integer, TrpTableCellType> nextRowMap = new HashMap<Integer, TrpTableCellType>();
for (List<TrpTableCellType> rowCells : allRowCells) {
HashMap<Integer, TrpTableCellType> currRowMap = new HashMap<Integer, TrpTableCellType>();
/*
* fill up all cells which are not set in TRP (needed for vertical cell merge)
* the nextRowMap contains already all cells which span vertically with the cells above - means they got merged
* in the table but have to be considered here
*/
currRowMap.putAll(nextRowMap);
nextRowMap.clear();
for (TrpTableCellType cell : rowCells) {
// logger.debug("table cell text " + cell.getUnicodeTextFromLines());
currRowMap.put(cell.getCol(), cell);
if (cell.getRowSpan() > 1) {
nextRowMap.put(cell.getCol(), null);
}
}
allRows.add(currRowMap);
}
Tbl thisTable;
try {
thisTable = getDocxTable(wordMLPackage, wordBased, rows, cols, allRows, tablesize, mdp);
mdp.addObject(thisTable);
} catch (Exception e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
// this Br element is used break the current and go for next line
Br br = factory.createBr();
org.docx4j.wml.P p = factory.createP();
mdp.addObject(p);
p.getContent().add(br);
} else if (r instanceof TrpTextRegionType) {
TrpTextRegionType tr = (TrpTextRegionType) r;
/*
* create one paragraph for each text region
* but only if there is some text in it
*/
String helper = tr.getUnicodeText().replaceAll("\n", "");
if (!helper.equals("")) {
exportTextRegion(tr, wordBased, null, mdp);
}
}
}
}
use of eu.transkribus.core.model.beans.pagecontent_trp.TrpTableCellType in project TranskribusCore by Transkribus.
the class TrpXlsxTableBuilder method createTable.
private static void createTable(int rows, int cols, List<HashMap<Integer, TrpTableCellType>> allRows, int tableID) {
String tableName = "table_" + tableID;
Sheet currSheet = wb.createSheet(WorkbookUtil.createSafeSheetName(tableName));
CellStyle style = wb.createCellStyle();
style.setBorderBottom(HSSFCellStyle.BORDER_THIN);
style.setBorderTop(HSSFCellStyle.BORDER_THIN);
style.setBorderRight(HSSFCellStyle.BORDER_THIN);
style.setBorderLeft(HSSFCellStyle.BORDER_THIN);
CellStyle rowStyle = (CellStyle) wb.createCellStyle();
rowStyle.setWrapText(true);
int i = 0;
int colIdtmp = 0;
for (HashMap<Integer, TrpTableCellType> entry : allRows) {
if (entry.keySet().size() != cols) {
logger.debug("size of entries does not match columns ");
}
Row nextRow = currSheet.createRow(i);
nextRow.setRowStyle(rowStyle);
i++;
for (Integer key : entry.keySet()) {
int colSpan = 0;
int rowSpan = 0;
boolean mergedVertical = false;
boolean mergedHorizontal = false;
if (entry.get(key) != null) {
colSpan = entry.get(key).getColSpan();
rowSpan = entry.get(key).getRowSpan();
if (rowSpan > 1) {
mergedVertical = true;
}
if (colSpan > 1) {
mergedHorizontal = true;
}
int colID = entry.get(key).getCol();
int rowID = entry.get(key).getRow();
Cell currCell = nextRow.createCell(colID);
currCell.setCellStyle(style);
currCell.setCellValue(entry.get(key).getUnicodeTextFromLines());
// sheet.addMergedRegion(rowFrom,rowTo,colFrom,colTo);
if (mergedVertical) {
currSheet.addMergedRegion(new CellRangeAddress(rowID, rowID + rowSpan - 1, colID, colID));
}
if (mergedHorizontal) {
currSheet.addMergedRegion(new CellRangeAddress(rowID, rowID, colID, colID + colSpan - 1));
}
} else {
Cell currCell = nextRow.createCell(key);
currCell.setCellStyle(style);
}
}
}
}
use of eu.transkribus.core.model.beans.pagecontent_trp.TrpTableCellType in project TranskribusCore by Transkribus.
the class TrpXlsxTableBuilder method writeXlsxForTables.
public static void writeXlsxForTables(TrpDoc doc, File exportFile, Set<Integer> pageIndices, IProgressMonitor monitor, ExportCache cache) throws NoTablesException, IOException, InterruptedException {
// TrpTableRegionType is contained in the regions too
List<TrpPage> pages = doc.getPages();
String exportPath = exportFile.getPath();
int totalPages = pageIndices == null ? pages.size() : pageIndices.size();
if (monitor != null) {
monitor.beginTask("Exporting tables to Excel", totalPages);
}
wb = new XSSFWorkbook();
int c = 0;
int tableId = 0;
for (int i = 0; i < pages.size(); ++i) {
if (pageIndices != null && !pageIndices.contains(i))
continue;
if (monitor != null) {
if (monitor.isCanceled()) {
throw new InterruptedException("Export was canceled by user");
// logger.debug("Xlsx export cancelled!");
// return;
}
monitor.subTask("Processing page " + (c + 1));
}
TrpPage page = pages.get(i);
// try to get previously loaded JAXB transcript
JAXBPageTranscript tr = null;
if (cache != null) {
tr = cache.getPageTranscriptAtIndex(i);
}
if (tr == null) {
TrpTranscriptMetadata md = page.getCurrentTranscript();
tr = new JAXBPageTranscript(md);
tr.build();
}
TrpPageType trpPage = tr.getPage();
List<TrpRegionType> regions = trpPage.getRegions();
for (int j = 0; j < regions.size(); ++j) {
TrpRegionType r = regions.get(j);
if (r instanceof TrpTableRegionType) {
tableId++;
logger.debug("is table");
TrpTableRegionType table = (TrpTableRegionType) r;
int cols = table.getNCols();
int rows = table.getNRows();
// double maxX = PageXmlUtils.buildPolygon(table.getCoords().getPoints()).getBounds().getMaxX();
// double minX = PageXmlUtils.buildPolygon(table.getCoords().getPoints()).getBounds().getMinX();
// int tablesize = (int) (maxX - minX);
List<List<TrpTableCellType>> allRowCells = new ArrayList<List<TrpTableCellType>>();
for (int k = 0; k < rows; k++) {
allRowCells.add(table.getRowCells(k));
}
List<HashMap<Integer, TrpTableCellType>> allRows = new ArrayList<HashMap<Integer, TrpTableCellType>>();
HashMap<Integer, TrpTableCellType> nextRowMap = new HashMap<Integer, TrpTableCellType>();
for (List<TrpTableCellType> rowCells : allRowCells) {
HashMap<Integer, TrpTableCellType> currRowMap = new HashMap<Integer, TrpTableCellType>();
/*
* fill up all cells which are not set in TRP (needed for vertical cell merge)
* the nextRowMap contains already all cells which span vertically with the cells above - means they got merged
* in the table but have to be considered here
*/
currRowMap.putAll(nextRowMap);
nextRowMap.clear();
for (TrpTableCellType cell : rowCells) {
// logger.debug("table cell text " + cell.getUnicodeTextFromLines());
currRowMap.put(cell.getCol(), cell);
// only one row or col span is considered -> FIXME: do it for all spans, but may happens never?
if (cell.getRowSpan() > 1) {
nextRowMap.put(cell.getCol(), null);
}
if (cell.getColSpan() > 1) {
currRowMap.put(cell.getCol() + 1, null);
}
}
allRows.add(currRowMap);
}
createTable(rows, cols, allRows, tableId);
}
logger.debug("writing xlsx for page " + (i + 1) + "/" + doc.getNPages());
++c;
if (monitor != null) {
monitor.worked(c);
}
}
}
/*
* auto size the columns
*/
for (int i = 0; i < wb.getNumberOfSheets(); i++) {
int numberOfCells = 0;
Iterator rowIterator = wb.getSheetAt(i).rowIterator();
/**
* Escape the header row *
*/
if (rowIterator.hasNext()) {
Row headerRow = (Row) rowIterator.next();
// get the number of cells in the header row
numberOfCells = headerRow.getPhysicalNumberOfCells();
for (int j = 0; j < numberOfCells; j++) {
wb.getSheetAt(i).autoSizeColumn(j, true);
}
}
}
FileOutputStream fOut;
try {
// means no tables at all
if (wb.getNumberOfSheets() == 0) {
throw new NoTablesException("Sorry - No tables available for export");
}
fOut = new FileOutputStream(exportPath);
wb.write(fOut);
fOut.close();
} catch (IOException e) {
if (!(e instanceof NoTablesException)) {
logger.error(e.getMessage(), e);
}
throw e;
}
logger.info("wrote xlsx to: " + exportPath);
}
Aggregations