Search in sources :

Example 1 with NoTablesException

use of eu.transkribus.core.model.builder.NoTablesException in project TranskribusCore by Transkribus.

the class TrpXlsxTableBuilder method writeXlsxForTables.

public static void writeXlsxForTables(TrpDoc doc, File exportFile, Set<Integer> pageIndices, IProgressMonitor monitor, ExportCache cache) throws NoTablesException, IOException, InterruptedException {
    // TrpTableRegionType is contained in the regions too
    List<TrpPage> pages = doc.getPages();
    String exportPath = exportFile.getPath();
    int totalPages = pageIndices == null ? pages.size() : pageIndices.size();
    if (monitor != null) {
        monitor.beginTask("Exporting tables to Excel", totalPages);
    }
    wb = new XSSFWorkbook();
    int c = 0;
    int tableId = 0;
    for (int i = 0; i < pages.size(); ++i) {
        if (pageIndices != null && !pageIndices.contains(i))
            continue;
        if (monitor != null) {
            if (monitor.isCanceled()) {
                throw new InterruptedException("Export was canceled by user");
            // logger.debug("Xlsx export cancelled!");
            // return;
            }
            monitor.subTask("Processing page " + (c + 1));
        }
        TrpPage page = pages.get(i);
        // try to get previously loaded JAXB transcript
        JAXBPageTranscript tr = null;
        if (cache != null) {
            tr = cache.getPageTranscriptAtIndex(i);
        }
        if (tr == null) {
            TrpTranscriptMetadata md = page.getCurrentTranscript();
            tr = new JAXBPageTranscript(md);
            tr.build();
        }
        TrpPageType trpPage = tr.getPage();
        List<TrpRegionType> regions = trpPage.getRegions();
        for (int j = 0; j < regions.size(); ++j) {
            TrpRegionType r = regions.get(j);
            if (r instanceof TrpTableRegionType) {
                tableId++;
                logger.debug("is table");
                TrpTableRegionType table = (TrpTableRegionType) r;
                int cols = table.getNCols();
                int rows = table.getNRows();
                // double maxX = PageXmlUtils.buildPolygon(table.getCoords().getPoints()).getBounds().getMaxX();
                // double minX = PageXmlUtils.buildPolygon(table.getCoords().getPoints()).getBounds().getMinX();
                // int tablesize = (int) (maxX - minX);
                List<List<TrpTableCellType>> allRowCells = new ArrayList<List<TrpTableCellType>>();
                for (int k = 0; k < rows; k++) {
                    allRowCells.add(table.getRowCells(k));
                }
                List<HashMap<Integer, TrpTableCellType>> allRows = new ArrayList<HashMap<Integer, TrpTableCellType>>();
                HashMap<Integer, TrpTableCellType> nextRowMap = new HashMap<Integer, TrpTableCellType>();
                for (List<TrpTableCellType> rowCells : allRowCells) {
                    HashMap<Integer, TrpTableCellType> currRowMap = new HashMap<Integer, TrpTableCellType>();
                    /*
		            	 * fill up all cells which are not set in TRP (needed for vertical cell merge)
		            	 * the nextRowMap contains already all cells which span vertically with the cells above - means they got merged 
		            	 * in the table but have to be considered here 
		            	 */
                    currRowMap.putAll(nextRowMap);
                    nextRowMap.clear();
                    for (TrpTableCellType cell : rowCells) {
                        // logger.debug("table cell text " + cell.getUnicodeTextFromLines());
                        currRowMap.put(cell.getCol(), cell);
                        // only one row or col span is considered -> FIXME: do it for all spans, but may happens never?
                        if (cell.getRowSpan() > 1) {
                            nextRowMap.put(cell.getCol(), null);
                        }
                        if (cell.getColSpan() > 1) {
                            currRowMap.put(cell.getCol() + 1, null);
                        }
                    }
                    allRows.add(currRowMap);
                }
                createTable(rows, cols, allRows, tableId);
            }
            logger.debug("writing xlsx for page " + (i + 1) + "/" + doc.getNPages());
            ++c;
            if (monitor != null) {
                monitor.worked(c);
            }
        }
    }
    /*
		 * auto size the columns
		 */
    for (int i = 0; i < wb.getNumberOfSheets(); i++) {
        int numberOfCells = 0;
        Iterator rowIterator = wb.getSheetAt(i).rowIterator();
        /**
         * Escape the header row *
         */
        if (rowIterator.hasNext()) {
            Row headerRow = (Row) rowIterator.next();
            // get the number of cells in the header row
            numberOfCells = headerRow.getPhysicalNumberOfCells();
            for (int j = 0; j < numberOfCells; j++) {
                wb.getSheetAt(i).autoSizeColumn(j, true);
            }
        }
    }
    FileOutputStream fOut;
    try {
        // means no tables at all
        if (wb.getNumberOfSheets() == 0) {
            throw new NoTablesException("Sorry - No tables available for export");
        }
        fOut = new FileOutputStream(exportPath);
        wb.write(fOut);
        fOut.close();
    } catch (IOException e) {
        if (!(e instanceof NoTablesException)) {
            logger.error(e.getMessage(), e);
        }
        throw e;
    }
    logger.info("wrote xlsx to: " + exportPath);
}
Also used : JAXBPageTranscript(eu.transkribus.core.model.beans.JAXBPageTranscript) HashMap(java.util.HashMap) ArrayList(java.util.ArrayList) TrpTranscriptMetadata(eu.transkribus.core.model.beans.TrpTranscriptMetadata) TrpTableRegionType(eu.transkribus.core.model.beans.pagecontent_trp.TrpTableRegionType) NoTablesException(eu.transkribus.core.model.builder.NoTablesException) TrpRegionType(eu.transkribus.core.model.beans.pagecontent_trp.TrpRegionType) Iterator(java.util.Iterator) XSSFWorkbook(org.apache.poi.xssf.usermodel.XSSFWorkbook) ArrayList(java.util.ArrayList) List(java.util.List) TrpPageType(eu.transkribus.core.model.beans.pagecontent_trp.TrpPageType) TrpTableCellType(eu.transkribus.core.model.beans.pagecontent_trp.TrpTableCellType) TrpPage(eu.transkribus.core.model.beans.TrpPage) IOException(java.io.IOException) FileOutputStream(java.io.FileOutputStream) Row(org.apache.poi.ss.usermodel.Row)

Aggregations

JAXBPageTranscript (eu.transkribus.core.model.beans.JAXBPageTranscript)1 TrpPage (eu.transkribus.core.model.beans.TrpPage)1 TrpTranscriptMetadata (eu.transkribus.core.model.beans.TrpTranscriptMetadata)1 TrpPageType (eu.transkribus.core.model.beans.pagecontent_trp.TrpPageType)1 TrpRegionType (eu.transkribus.core.model.beans.pagecontent_trp.TrpRegionType)1 TrpTableCellType (eu.transkribus.core.model.beans.pagecontent_trp.TrpTableCellType)1 TrpTableRegionType (eu.transkribus.core.model.beans.pagecontent_trp.TrpTableRegionType)1 NoTablesException (eu.transkribus.core.model.builder.NoTablesException)1 FileOutputStream (java.io.FileOutputStream)1 IOException (java.io.IOException)1 ArrayList (java.util.ArrayList)1 HashMap (java.util.HashMap)1 Iterator (java.util.Iterator)1 List (java.util.List)1 Row (org.apache.poi.ss.usermodel.Row)1 XSSFWorkbook (org.apache.poi.xssf.usermodel.XSSFWorkbook)1