Search in sources :

Example 1 with NoTagsException

use of eu.transkribus.core.model.builder.NoTagsException in project TranskribusCore by Transkribus.

the class TrpXlsxBuilder method writeXlsxForDoc.

public static void writeXlsxForDoc(TrpDoc doc, boolean wordBased, File exportFile, Set<Integer> pageIndices, IProgressMonitor monitor, ExportCache cache) throws NoTagsException, Exception {
    if (cache == null) {
        throw new IllegalArgumentException("ExportCache must not be null.");
    }
    if (cache.getCustomTagMapForDoc().isEmpty()) {
        logger.info("No tags to store -> Xlsx export cancelled");
        throw new NoTagsException("No tags available to store into Xlsx");
    }
    List<TrpPage> pages = doc.getPages();
    String exportPath = exportFile.getPath();
    Set<String> selectedTags = cache.getOnlySelectedTagnames(ExportUtils.getOnlyWantedTagnames(CustomTagFactory.getRegisteredTagNames()));
    int totalPages = pageIndices == null ? pages.size() : pageIndices.size();
    if (monitor != null) {
        monitor.beginTask("Exporting to Excel", totalPages);
    }
    wb = new XSSFWorkbook();
    int c = 0;
    for (int i = 0; i < pages.size(); ++i) {
        if (pageIndices != null && !pageIndices.contains(i))
            continue;
        if (monitor != null) {
            if (monitor.isCanceled()) {
                throw new InterruptedException("Export was canceled by user");
            // logger.debug("Xlsx export cancelled!");
            // return;
            }
            monitor.subTask("Processing page " + (c + 1));
        }
        TrpPage page = pages.get(i);
        // try to get previously loaded JAXB transcript
        JAXBPageTranscript tr = null;
        if (cache != null) {
            tr = cache.getPageTranscriptAtIndex(i);
        }
        if (tr == null) {
            TrpTranscriptMetadata md = page.getCurrentTranscript();
            tr = new JAXBPageTranscript(md);
            tr.build();
        }
        // old version
        // TrpPage page = pages.get(i);
        // TrpTranscriptMetadata md = page.getCurrentTranscript();
        // JAXBPageTranscript tr = new JAXBPageTranscript(md);
        // tr.build();
        TrpPageType trpPage = tr.getPage();
        logger.debug("writing xlsx for page " + (i + 1) + "/" + doc.getNPages());
        List<TrpTextRegionType> textRegions = trpPage.getTextRegions(true);
        for (int j = 0; j < textRegions.size(); ++j) {
            TrpTextRegionType r = textRegions.get(j);
            List<TextLineType> lines = r.getTextLine();
            for (int k = 0; k < lines.size(); ++k) {
                TrpTextLineType trpL = (TrpTextLineType) lines.get(k);
                List<WordType> words = trpL.getWord();
                if (wordBased) {
                    for (int l = 0; l < words.size(); ++l) {
                        TrpWordType w = (TrpWordType) words.get(l);
                        writeTagsForShapeElement(w, trpL.getUnicodeText(), String.valueOf(doc.getId()), String.valueOf(page.getPageNr()), r.getId(), trpL.getId(), w.getId(), selectedTags);
                    }
                } else {
                    writeTagsForShapeElement(trpL, trpL.getUnicodeText(), String.valueOf(doc.getId()), String.valueOf(page.getPageNr()), r.getId(), trpL.getId(), "", selectedTags);
                }
            }
        }
        ++c;
        if (monitor != null) {
            monitor.worked(c);
        }
    }
    /*
		 * auto size the columns
		 */
    for (int i = 0; i < wb.getNumberOfSheets(); i++) {
        int numberOfCells = 0;
        Iterator rowIterator = wb.getSheetAt(i).rowIterator();
        /**
         * Escape the header row *
         */
        if (rowIterator.hasNext()) {
            Row headerRow = (Row) rowIterator.next();
            // get the number of cells in the header row
            numberOfCells = headerRow.getPhysicalNumberOfCells();
            for (int j = 0; j < numberOfCells; j++) {
                wb.getSheetAt(i).autoSizeColumn(j);
            }
        }
    }
    FileOutputStream fOut;
    try {
        // means no tags at all
        if (wb.getNumberOfSheets() == 0) {
            throw new IOException("Sorry - No tags available for export");
        }
        fOut = new FileOutputStream(exportPath);
        wb.write(fOut);
        fOut.close();
    } catch (IOException e) {
        // TODO Auto-generated catch block
        e.printStackTrace();
        throw e;
    }
    logger.info("wrote xlsx to: " + exportPath);
}
Also used : NoTagsException(eu.transkribus.core.model.builder.NoTagsException) JAXBPageTranscript(eu.transkribus.core.model.beans.JAXBPageTranscript) TrpPage(eu.transkribus.core.model.beans.TrpPage) TrpTranscriptMetadata(eu.transkribus.core.model.beans.TrpTranscriptMetadata) IOException(java.io.IOException) TrpWordType(eu.transkribus.core.model.beans.pagecontent_trp.TrpWordType) WordType(eu.transkribus.core.model.beans.pagecontent.WordType) TrpWordType(eu.transkribus.core.model.beans.pagecontent_trp.TrpWordType) TrpTextLineType(eu.transkribus.core.model.beans.pagecontent_trp.TrpTextLineType) TextLineType(eu.transkribus.core.model.beans.pagecontent.TextLineType) TrpTextLineType(eu.transkribus.core.model.beans.pagecontent_trp.TrpTextLineType) TrpTextRegionType(eu.transkribus.core.model.beans.pagecontent_trp.TrpTextRegionType) FileOutputStream(java.io.FileOutputStream) Iterator(java.util.Iterator) XSSFWorkbook(org.apache.poi.xssf.usermodel.XSSFWorkbook) Row(org.apache.poi.ss.usermodel.Row) TrpPageType(eu.transkribus.core.model.beans.pagecontent_trp.TrpPageType)

Aggregations

JAXBPageTranscript (eu.transkribus.core.model.beans.JAXBPageTranscript)1 TrpPage (eu.transkribus.core.model.beans.TrpPage)1 TrpTranscriptMetadata (eu.transkribus.core.model.beans.TrpTranscriptMetadata)1 TextLineType (eu.transkribus.core.model.beans.pagecontent.TextLineType)1 WordType (eu.transkribus.core.model.beans.pagecontent.WordType)1 TrpPageType (eu.transkribus.core.model.beans.pagecontent_trp.TrpPageType)1 TrpTextLineType (eu.transkribus.core.model.beans.pagecontent_trp.TrpTextLineType)1 TrpTextRegionType (eu.transkribus.core.model.beans.pagecontent_trp.TrpTextRegionType)1 TrpWordType (eu.transkribus.core.model.beans.pagecontent_trp.TrpWordType)1 NoTagsException (eu.transkribus.core.model.builder.NoTagsException)1 FileOutputStream (java.io.FileOutputStream)1 IOException (java.io.IOException)1 Iterator (java.util.Iterator)1 Row (org.apache.poi.ss.usermodel.Row)1 XSSFWorkbook (org.apache.poi.xssf.usermodel.XSSFWorkbook)1