Search in sources :

Example 1 with TrpTableCellType

use of eu.transkribus.core.model.beans.pagecontent_trp.TrpTableCellType in project TranskribusCore by Transkribus.

the class DocxBuilder method getDocxTable.

private static Tbl getDocxTable(WordprocessingMLPackage wPMLpackage, boolean isWordBased, int rows, int cols, List<HashMap<Integer, TrpTableCellType>> allRows, int tablesize, MainDocumentPart mdp) throws Exception {
    int writableWidthTwips = wPMLpackage.getDocumentModel().getSections().get(0).getPageDimensions().getWritableWidthTwips();
    int cellWidthTwips = new Double(Math.floor((writableWidthTwips / cols))).intValue();
    Tbl table = TblFactory.createTable(0, 0, cellWidthTwips);
    TblGrid tblGrid = factory.createTblGrid();
    for (int i = 0; i < cols; i++) {
        TblGridCol col = factory.createTblGridCol();
        col.setW(BigInteger.valueOf(cellWidthTwips));
        tblGrid.getGridCol().add(col);
    }
    table.setTblGrid(tblGrid);
    int i = 0;
    for (HashMap<Integer, TrpTableCellType> entry : allRows) {
        Tr row = factory.createTr();
        table.getContent().add(row);
        i++;
        int d = 0;
        if (entry.keySet().size() != cols) {
            logger.debug("size of entries does not match columns ");
        }
        for (Integer key : entry.keySet()) {
            Tc cell = factory.createTc();
            row.getContent().add(cell);
            String rowSpan = null;
            int colSpan = 1;
            boolean mergedVertical = false;
            int colsize = cellWidthTwips;
            if (entry.get(key) != null) {
                if (entry.get(key).getRowSpan() > 1) {
                    mergedVertical = true;
                    rowSpan = "restart";
                }
                // PageXmlUtils.buildPolygon(entry.get(key).getCoords().getPoints()).getBounds().getMaxX();
                double maxX = entry.get(key).getBoundingBox().getMaxX();
                // PageXmlUtils.buildPolygon(entry.get(key).getCoords().getPoints()).getBounds().getMinX();
                double minX = entry.get(key).getBoundingBox().getMinX();
                double colsizeRel = maxX - minX;
                double colsizetmp = colsizeRel / (double) tablesize;
                // logger.debug("colsizetmp " + colsizetmp);
                colsize = (int) (writableWidthTwips * colsizetmp);
                colSpan = entry.get(key).getColSpan();
                // logger.debug("colsize " + colsize);
                // logger.debug("text in this cell is " + entry.get(key).getUnicodeTextFromLines());
                int colID = entry.get(key).getCol();
                int rowID = entry.get(key).getRow();
            } else {
                // logger.debug("no cell for this column ");
                mergedVertical = true;
            }
            applyGridSpan(cell, colSpan, rowSpan, colsize, mergedVertical);
            P columnPara = factory.createP();
            // P columnPara = (P) column.getContent().get(0);
            cell.getContent().add(columnPara);
            d++;
            Text tx = factory.createText();
            R run = factory.createR();
            if (entry.get(key) != null) {
                // old solution till now: tx.setValue(entry.get(key).getUnicodeTextFromLines());
                if (entry.get(key).getUnicodeTextFromLines() != "") {
                    exportTextRegion(entry.get(key), isWordBased, columnPara, mdp);
                }
            }
            run.getContent().add(tx);
            columnPara.getContent().add(run);
        }
    }
    return table;
}
Also used : TrpTableCellType(eu.transkribus.core.model.beans.pagecontent_trp.TrpTableCellType) Text(org.docx4j.wml.Text) Tc(org.docx4j.wml.Tc) BigInteger(java.math.BigInteger) P(org.docx4j.wml.P) R(org.docx4j.wml.R) TblGrid(org.docx4j.wml.TblGrid) Tr(org.docx4j.wml.Tr) Tbl(org.docx4j.wml.Tbl) TblGridCol(org.docx4j.wml.TblGridCol)

Example 2 with TrpTableCellType

use of eu.transkribus.core.model.beans.pagecontent_trp.TrpTableCellType in project TranskribusCore by Transkribus.

the class TrpPdfDocument method exportTable.

private void exportTable(RegionType r, PdfContentByte cb, int cutoffLeft, int cutoffTop, boolean addUniformText, ExportCache cache) throws IOException, DocumentException {
    logger.debug("is table");
    TrpTableRegionType table = (TrpTableRegionType) r;
    int cols = table.getNCols();
    int rows = table.getNRows();
    List<List<TrpTableCellType>> allRowCells = new ArrayList<List<TrpTableCellType>>();
    for (int k = 0; k < rows; k++) {
        allRowCells.add(table.getRowCells(k));
    }
    List<HashMap<Integer, TrpTableCellType>> allRows = new ArrayList<HashMap<Integer, TrpTableCellType>>();
    HashMap<Integer, TrpTableCellType> nextRowMap = new HashMap<Integer, TrpTableCellType>();
    for (List<TrpTableCellType> rowCells : allRowCells) {
        HashMap<Integer, TrpTableCellType> currRowMap = new HashMap<Integer, TrpTableCellType>();
        /*
        	 * fill up all cells which are not set in TRP (needed for vertical cell merge)
        	 * the nextRowMap contains already all cells which span vertically with the cells above - means they got merged 
        	 * in the table but have to be considered here 
        	 */
        currRowMap.putAll(nextRowMap);
        nextRowMap.clear();
        for (TrpTableCellType cell : rowCells) {
            // logger.debug("table cell text " + cell.getUnicodeTextFromLines());
            currRowMap.put(cell.getCol(), cell);
            if (cell.getRowSpan() > 1) {
                nextRowMap.put(cell.getCol(), null);
            }
        }
        allRows.add(currRowMap);
    }
    for (HashMap<Integer, TrpTableCellType> entry : allRows) {
        for (Integer key : entry.keySet()) {
            if (addUniformText) {
                float textBlockXStart = getAverageBeginningOfBaselines(entry.get(key));
                textBlockXStart += 40;
                addUniformTextFromTextRegion(entry.get(key), cb, cutoffLeft, cutoffTop, bfArial, textBlockXStart, cache);
            } else {
                addTextFromTextRegion(entry.get(key), cb, cutoffLeft, cutoffTop, bfArial, cache);
            }
        }
    }
}
Also used : TrpTableCellType(eu.transkribus.core.model.beans.pagecontent_trp.TrpTableCellType) TrpTableRegionType(eu.transkribus.core.model.beans.pagecontent_trp.TrpTableRegionType) HashMap(java.util.HashMap) ArrayList(java.util.ArrayList) List(java.util.List) ArrayList(java.util.ArrayList) Point(java.awt.Point)

Example 3 with TrpTableCellType

use of eu.transkribus.core.model.beans.pagecontent_trp.TrpTableCellType in project TranskribusCore by Transkribus.

the class DocxBuilder method writeDocxForTranscriptWithTables.

private static void writeDocxForTranscriptWithTables(MainDocumentPart mdp, TrpPageType trpPage, boolean wordBased, boolean preserveLineBreaks) {
    boolean rtl = false;
    // TrpTableRegionType is contained in the regions too
    List<TrpRegionType> regions = trpPage.getRegions();
    Collections.sort(regions, new TrpElementReadingOrderComparator<RegionType>(true));
    for (int j = 0; j < regions.size(); ++j) {
        TrpRegionType r = regions.get(j);
        if (r instanceof TrpTableRegionType) {
            logger.debug("is table");
            TrpTableRegionType table = (TrpTableRegionType) r;
            int cols = table.getNCols();
            int rows = table.getNRows();
            // PageXmlUtils.buildPolygon(table.getCoords().getPoints()).getBounds().getMaxX();
            double maxX = table.getBoundingBox().getMaxX();
            // PageXmlUtils.buildPolygon(table.getCoords().getPoints()).getBounds().getMinX();
            double minX = table.getBoundingBox().getMinX();
            int tablesize = (int) (maxX - minX);
            List<List<TrpTableCellType>> allRowCells = new ArrayList<List<TrpTableCellType>>();
            for (int k = 0; k < rows; k++) {
                allRowCells.add(table.getRowCells(k));
            }
            List<HashMap<Integer, TrpTableCellType>> allRows = new ArrayList<HashMap<Integer, TrpTableCellType>>();
            HashMap<Integer, TrpTableCellType> nextRowMap = new HashMap<Integer, TrpTableCellType>();
            for (List<TrpTableCellType> rowCells : allRowCells) {
                HashMap<Integer, TrpTableCellType> currRowMap = new HashMap<Integer, TrpTableCellType>();
                /*
	            	 * fill up all cells which are not set in TRP (needed for vertical cell merge)
	            	 * the nextRowMap contains already all cells which span vertically with the cells above - means they got merged 
	            	 * in the table but have to be considered here 
	            	 */
                currRowMap.putAll(nextRowMap);
                nextRowMap.clear();
                for (TrpTableCellType cell : rowCells) {
                    // logger.debug("table cell text " + cell.getUnicodeTextFromLines());
                    currRowMap.put(cell.getCol(), cell);
                    if (cell.getRowSpan() > 1) {
                        nextRowMap.put(cell.getCol(), null);
                    }
                }
                allRows.add(currRowMap);
            }
            Tbl thisTable;
            try {
                thisTable = getDocxTable(wordMLPackage, wordBased, rows, cols, allRows, tablesize, mdp);
                mdp.addObject(thisTable);
            } catch (Exception e) {
                // TODO Auto-generated catch block
                e.printStackTrace();
            }
            // this Br element is used break the current and go for next line
            Br br = factory.createBr();
            org.docx4j.wml.P p = factory.createP();
            mdp.addObject(p);
            p.getContent().add(br);
        } else if (r instanceof TrpTextRegionType) {
            TrpTextRegionType tr = (TrpTextRegionType) r;
            /*
				 * create one paragraph for each text region
				 * but only if there is some text in it
				 */
            String helper = tr.getUnicodeText().replaceAll("\n", "");
            if (!helper.equals("")) {
                exportTextRegion(tr, wordBased, null, mdp);
            }
        }
    }
}
Also used : TrpTextRegionType(eu.transkribus.core.model.beans.pagecontent_trp.TrpTextRegionType) TrpRegionType(eu.transkribus.core.model.beans.pagecontent_trp.TrpRegionType) TrpTableRegionType(eu.transkribus.core.model.beans.pagecontent_trp.TrpTableRegionType) RegionType(eu.transkribus.core.model.beans.pagecontent.RegionType) HashMap(java.util.HashMap) LinkedHashMap(java.util.LinkedHashMap) ArrayList(java.util.ArrayList) TrpTableRegionType(eu.transkribus.core.model.beans.pagecontent_trp.TrpTableRegionType) TrpRegionType(eu.transkribus.core.model.beans.pagecontent_trp.TrpRegionType) CustomTagList(eu.transkribus.core.model.beans.customtags.CustomTagList) List(java.util.List) ArrayList(java.util.ArrayList) Tbl(org.docx4j.wml.Tbl) P(org.docx4j.wml.P) TrpTableCellType(eu.transkribus.core.model.beans.pagecontent_trp.TrpTableCellType) JAXBException(javax.xml.bind.JAXBException) IOException(java.io.IOException) Docx4JException(org.docx4j.openpackaging.exceptions.Docx4JException) BigInteger(java.math.BigInteger) Br(org.docx4j.wml.Br) TrpTextRegionType(eu.transkribus.core.model.beans.pagecontent_trp.TrpTextRegionType)

Example 4 with TrpTableCellType

use of eu.transkribus.core.model.beans.pagecontent_trp.TrpTableCellType in project TranskribusCore by Transkribus.

the class TrpXlsxTableBuilder method createTable.

private static void createTable(int rows, int cols, List<HashMap<Integer, TrpTableCellType>> allRows, int tableID) {
    String tableName = "table_" + tableID;
    Sheet currSheet = wb.createSheet(WorkbookUtil.createSafeSheetName(tableName));
    CellStyle style = wb.createCellStyle();
    style.setBorderBottom(HSSFCellStyle.BORDER_THIN);
    style.setBorderTop(HSSFCellStyle.BORDER_THIN);
    style.setBorderRight(HSSFCellStyle.BORDER_THIN);
    style.setBorderLeft(HSSFCellStyle.BORDER_THIN);
    CellStyle rowStyle = (CellStyle) wb.createCellStyle();
    rowStyle.setWrapText(true);
    int i = 0;
    int colIdtmp = 0;
    for (HashMap<Integer, TrpTableCellType> entry : allRows) {
        if (entry.keySet().size() != cols) {
            logger.debug("size of entries does not match columns ");
        }
        Row nextRow = currSheet.createRow(i);
        nextRow.setRowStyle(rowStyle);
        i++;
        for (Integer key : entry.keySet()) {
            int colSpan = 0;
            int rowSpan = 0;
            boolean mergedVertical = false;
            boolean mergedHorizontal = false;
            if (entry.get(key) != null) {
                colSpan = entry.get(key).getColSpan();
                rowSpan = entry.get(key).getRowSpan();
                if (rowSpan > 1) {
                    mergedVertical = true;
                }
                if (colSpan > 1) {
                    mergedHorizontal = true;
                }
                int colID = entry.get(key).getCol();
                int rowID = entry.get(key).getRow();
                Cell currCell = nextRow.createCell(colID);
                currCell.setCellStyle(style);
                currCell.setCellValue(entry.get(key).getUnicodeTextFromLines());
                // sheet.addMergedRegion(rowFrom,rowTo,colFrom,colTo);
                if (mergedVertical) {
                    currSheet.addMergedRegion(new CellRangeAddress(rowID, rowID + rowSpan - 1, colID, colID));
                }
                if (mergedHorizontal) {
                    currSheet.addMergedRegion(new CellRangeAddress(rowID, rowID, colID, colID + colSpan - 1));
                }
            } else {
                Cell currCell = nextRow.createCell(key);
                currCell.setCellStyle(style);
            }
        }
    }
}
Also used : TrpTableCellType(eu.transkribus.core.model.beans.pagecontent_trp.TrpTableCellType) HSSFCellStyle(org.apache.poi.hssf.usermodel.HSSFCellStyle) CellStyle(org.apache.poi.ss.usermodel.CellStyle) Row(org.apache.poi.ss.usermodel.Row) CellRangeAddress(org.apache.poi.ss.util.CellRangeAddress) Sheet(org.apache.poi.ss.usermodel.Sheet) Cell(org.apache.poi.ss.usermodel.Cell)

Example 5 with TrpTableCellType

use of eu.transkribus.core.model.beans.pagecontent_trp.TrpTableCellType in project TranskribusCore by Transkribus.

the class TrpXlsxTableBuilder method writeXlsxForTables.

public static void writeXlsxForTables(TrpDoc doc, File exportFile, Set<Integer> pageIndices, IProgressMonitor monitor, ExportCache cache) throws NoTablesException, IOException, InterruptedException {
    // TrpTableRegionType is contained in the regions too
    List<TrpPage> pages = doc.getPages();
    String exportPath = exportFile.getPath();
    int totalPages = pageIndices == null ? pages.size() : pageIndices.size();
    if (monitor != null) {
        monitor.beginTask("Exporting tables to Excel", totalPages);
    }
    wb = new XSSFWorkbook();
    int c = 0;
    int tableId = 0;
    for (int i = 0; i < pages.size(); ++i) {
        if (pageIndices != null && !pageIndices.contains(i))
            continue;
        if (monitor != null) {
            if (monitor.isCanceled()) {
                throw new InterruptedException("Export was canceled by user");
            // logger.debug("Xlsx export cancelled!");
            // return;
            }
            monitor.subTask("Processing page " + (c + 1));
        }
        TrpPage page = pages.get(i);
        // try to get previously loaded JAXB transcript
        JAXBPageTranscript tr = null;
        if (cache != null) {
            tr = cache.getPageTranscriptAtIndex(i);
        }
        if (tr == null) {
            TrpTranscriptMetadata md = page.getCurrentTranscript();
            tr = new JAXBPageTranscript(md);
            tr.build();
        }
        TrpPageType trpPage = tr.getPage();
        List<TrpRegionType> regions = trpPage.getRegions();
        for (int j = 0; j < regions.size(); ++j) {
            TrpRegionType r = regions.get(j);
            if (r instanceof TrpTableRegionType) {
                tableId++;
                logger.debug("is table");
                TrpTableRegionType table = (TrpTableRegionType) r;
                int cols = table.getNCols();
                int rows = table.getNRows();
                // double maxX = PageXmlUtils.buildPolygon(table.getCoords().getPoints()).getBounds().getMaxX();
                // double minX = PageXmlUtils.buildPolygon(table.getCoords().getPoints()).getBounds().getMinX();
                // int tablesize = (int) (maxX - minX);
                List<List<TrpTableCellType>> allRowCells = new ArrayList<List<TrpTableCellType>>();
                for (int k = 0; k < rows; k++) {
                    allRowCells.add(table.getRowCells(k));
                }
                List<HashMap<Integer, TrpTableCellType>> allRows = new ArrayList<HashMap<Integer, TrpTableCellType>>();
                HashMap<Integer, TrpTableCellType> nextRowMap = new HashMap<Integer, TrpTableCellType>();
                for (List<TrpTableCellType> rowCells : allRowCells) {
                    HashMap<Integer, TrpTableCellType> currRowMap = new HashMap<Integer, TrpTableCellType>();
                    /*
		            	 * fill up all cells which are not set in TRP (needed for vertical cell merge)
		            	 * the nextRowMap contains already all cells which span vertically with the cells above - means they got merged 
		            	 * in the table but have to be considered here 
		            	 */
                    currRowMap.putAll(nextRowMap);
                    nextRowMap.clear();
                    for (TrpTableCellType cell : rowCells) {
                        // logger.debug("table cell text " + cell.getUnicodeTextFromLines());
                        currRowMap.put(cell.getCol(), cell);
                        // only one row or col span is considered -> FIXME: do it for all spans, but may happens never?
                        if (cell.getRowSpan() > 1) {
                            nextRowMap.put(cell.getCol(), null);
                        }
                        if (cell.getColSpan() > 1) {
                            currRowMap.put(cell.getCol() + 1, null);
                        }
                    }
                    allRows.add(currRowMap);
                }
                createTable(rows, cols, allRows, tableId);
            }
            logger.debug("writing xlsx for page " + (i + 1) + "/" + doc.getNPages());
            ++c;
            if (monitor != null) {
                monitor.worked(c);
            }
        }
    }
    /*
		 * auto size the columns
		 */
    for (int i = 0; i < wb.getNumberOfSheets(); i++) {
        int numberOfCells = 0;
        Iterator rowIterator = wb.getSheetAt(i).rowIterator();
        /**
         * Escape the header row *
         */
        if (rowIterator.hasNext()) {
            Row headerRow = (Row) rowIterator.next();
            // get the number of cells in the header row
            numberOfCells = headerRow.getPhysicalNumberOfCells();
            for (int j = 0; j < numberOfCells; j++) {
                wb.getSheetAt(i).autoSizeColumn(j, true);
            }
        }
    }
    FileOutputStream fOut;
    try {
        // means no tables at all
        if (wb.getNumberOfSheets() == 0) {
            throw new NoTablesException("Sorry - No tables available for export");
        }
        fOut = new FileOutputStream(exportPath);
        wb.write(fOut);
        fOut.close();
    } catch (IOException e) {
        if (!(e instanceof NoTablesException)) {
            logger.error(e.getMessage(), e);
        }
        throw e;
    }
    logger.info("wrote xlsx to: " + exportPath);
}
Also used : JAXBPageTranscript(eu.transkribus.core.model.beans.JAXBPageTranscript) HashMap(java.util.HashMap) ArrayList(java.util.ArrayList) TrpTranscriptMetadata(eu.transkribus.core.model.beans.TrpTranscriptMetadata) TrpTableRegionType(eu.transkribus.core.model.beans.pagecontent_trp.TrpTableRegionType) NoTablesException(eu.transkribus.core.model.builder.NoTablesException) TrpRegionType(eu.transkribus.core.model.beans.pagecontent_trp.TrpRegionType) Iterator(java.util.Iterator) XSSFWorkbook(org.apache.poi.xssf.usermodel.XSSFWorkbook) ArrayList(java.util.ArrayList) List(java.util.List) TrpPageType(eu.transkribus.core.model.beans.pagecontent_trp.TrpPageType) TrpTableCellType(eu.transkribus.core.model.beans.pagecontent_trp.TrpTableCellType) TrpPage(eu.transkribus.core.model.beans.TrpPage) IOException(java.io.IOException) FileOutputStream(java.io.FileOutputStream) Row(org.apache.poi.ss.usermodel.Row)

Aggregations

TrpTableCellType (eu.transkribus.core.model.beans.pagecontent_trp.TrpTableCellType)5 TrpTableRegionType (eu.transkribus.core.model.beans.pagecontent_trp.TrpTableRegionType)3 ArrayList (java.util.ArrayList)3 HashMap (java.util.HashMap)3 List (java.util.List)3 TrpRegionType (eu.transkribus.core.model.beans.pagecontent_trp.TrpRegionType)2 IOException (java.io.IOException)2 BigInteger (java.math.BigInteger)2 Row (org.apache.poi.ss.usermodel.Row)2 P (org.docx4j.wml.P)2 Tbl (org.docx4j.wml.Tbl)2 JAXBPageTranscript (eu.transkribus.core.model.beans.JAXBPageTranscript)1 TrpPage (eu.transkribus.core.model.beans.TrpPage)1 TrpTranscriptMetadata (eu.transkribus.core.model.beans.TrpTranscriptMetadata)1 CustomTagList (eu.transkribus.core.model.beans.customtags.CustomTagList)1 RegionType (eu.transkribus.core.model.beans.pagecontent.RegionType)1 TrpPageType (eu.transkribus.core.model.beans.pagecontent_trp.TrpPageType)1 TrpTextRegionType (eu.transkribus.core.model.beans.pagecontent_trp.TrpTextRegionType)1 NoTablesException (eu.transkribus.core.model.builder.NoTablesException)1 Point (java.awt.Point)1