Search in sources :

Example 11 with JAXBPageTranscript

use of eu.transkribus.core.model.beans.JAXBPageTranscript in project TranskribusCore by Transkribus.

the class TrpTxtBuilder method writeTxtForDoc.

public static void writeTxtForDoc(TrpDoc doc, boolean addTitle, boolean wordBased, boolean preserveLineBreaks, File file, Set<Integer> pageIndices, IProgressMonitor monitor, ExportCache cache) throws JAXBException, IOException, Docx4JException, InterruptedException {
    // delete file if already exists
    Files.deleteIfExists(Paths.get(file.getAbsolutePath()));
    if (addTitle) {
        addTitlePage(doc, file);
    }
    List<TrpPage> pages = doc.getPages();
    int totalPages = pageIndices == null ? pages.size() : pageIndices.size();
    if (monitor != null) {
        monitor.beginTask("Exporting to text file", totalPages);
    }
    int c = 0;
    for (int i = 0; i < pages.size(); ++i) {
        if (pageIndices != null && !pageIndices.contains(i))
            continue;
        if (monitor != null) {
            if (monitor.isCanceled()) {
                throw new InterruptedException("Export canceled by the user");
            }
            monitor.subTask("Processing page " + (c + 1));
        }
        JAXBPageTranscript tr = null;
        if (cache != null) {
            tr = cache.getPageTranscriptAtIndex(i);
        }
        if (tr == null) {
            TrpPage page = pages.get(i);
            TrpTranscriptMetadata md = page.getCurrentTranscript();
            // md.getStatus().equals("Done");
            tr = new JAXBPageTranscript(md);
            tr.build();
        }
        TrpPageType trpPage = tr.getPage();
        logger.debug("writing text file for the page " + (i + 1) + "/" + pages.size());
        writeTxtForSinglePage(file, trpPage, wordBased, preserveLineBreaks);
        ++c;
        if (monitor != null) {
            monitor.worked(c);
        }
    }
    logger.debug("Saved " + file.getAbsolutePath());
}
Also used : JAXBPageTranscript(eu.transkribus.core.model.beans.JAXBPageTranscript) TrpPage(eu.transkribus.core.model.beans.TrpPage) TrpTranscriptMetadata(eu.transkribus.core.model.beans.TrpTranscriptMetadata) TrpPageType(eu.transkribus.core.model.beans.pagecontent_trp.TrpPageType)

Example 12 with JAXBPageTranscript

use of eu.transkribus.core.model.beans.JAXBPageTranscript in project TranskribusCore by Transkribus.

the class ExportCache method storeCustomTagMapForDoc.

/**
 * @param doc
 * @param wordBased
 * @param pageIndices
 * @param blackening
 * @return all (custom) tags of the given document
 * @throws JAXBException
 * @throws IOException
 * @throws InterruptedException
 */
public void storeCustomTagMapForDoc(TrpDoc doc, boolean wordBased, Set<Integer> pageIndices, IProgressMonitor monitor, boolean blackening) throws JAXBException, IOException, InterruptedException {
    doBlackening = blackening;
    tags.clear();
    List<TrpPage> pages = doc.getPages();
    int totalPages = pages.size();
    int c = 0;
    for (int i = 0; i < totalPages; ++i) {
        if (pageIndices != null && !pageIndices.contains(i))
            continue;
        if (monitor != null && monitor.isCanceled()) {
            throw new InterruptedException("User canceled the export");
        }
        // pageTranscripts get fetched before the custom tag map is stored - so normally pageTranscripts.get(i) != null
        JAXBPageTranscript tr;
        if (pageTranscripts == null || pageTranscripts.get(i) == null) {
            TrpPage page = pages.get(i);
            TrpTranscriptMetadata md = page.getCurrentTranscript();
            tr = new JAXBPageTranscript(md);
        } else {
            tr = pageTranscripts.get(i);
            tr.getPageData();
        }
        tr.build();
        TrpPageType trpPage = tr.getPage();
        logger.debug("get tags for page " + (i + 1) + "/" + doc.getNPages());
        List<TrpTextRegionType> textRegions = trpPage.getTextRegions(true);
        for (int j = 0; j < textRegions.size(); ++j) {
            TrpTextRegionType r = textRegions.get(j);
            List<TextLineType> lines = r.getTextLine();
            for (int k = 0; k < lines.size(); ++k) {
                TrpTextLineType trpL = (TrpTextLineType) lines.get(k);
                List<WordType> words = trpL.getWord();
                getTagsForShapeElement(trpL);
                if (wordBased) {
                    for (int l = 0; l < words.size(); ++l) {
                        TrpWordType w = (TrpWordType) words.get(l);
                        getTagsForShapeElement(w);
                    }
                }
            // else{
            // getTagsForShapeElement(trpL);
            // }
            }
        }
        if (monitor != null) {
            monitor.setTaskName("Loaded tags for page " + (i + 1));
            monitor.worked(++c);
        }
    }
}
Also used : JAXBPageTranscript(eu.transkribus.core.model.beans.JAXBPageTranscript) TrpPage(eu.transkribus.core.model.beans.TrpPage) TrpTranscriptMetadata(eu.transkribus.core.model.beans.TrpTranscriptMetadata) TrpWordType(eu.transkribus.core.model.beans.pagecontent_trp.TrpWordType) WordType(eu.transkribus.core.model.beans.pagecontent.WordType) TrpWordType(eu.transkribus.core.model.beans.pagecontent_trp.TrpWordType) TrpTextLineType(eu.transkribus.core.model.beans.pagecontent_trp.TrpTextLineType) TextLineType(eu.transkribus.core.model.beans.pagecontent.TextLineType) TrpTextLineType(eu.transkribus.core.model.beans.pagecontent_trp.TrpTextLineType) TrpTextRegionType(eu.transkribus.core.model.beans.pagecontent_trp.TrpTextRegionType) TrpPageType(eu.transkribus.core.model.beans.pagecontent_trp.TrpPageType)

Aggregations

JAXBPageTranscript (eu.transkribus.core.model.beans.JAXBPageTranscript)12 TrpTranscriptMetadata (eu.transkribus.core.model.beans.TrpTranscriptMetadata)11 TrpPage (eu.transkribus.core.model.beans.TrpPage)10 TrpPageType (eu.transkribus.core.model.beans.pagecontent_trp.TrpPageType)7 IOException (java.io.IOException)7 PcGtsType (eu.transkribus.core.model.beans.pagecontent.PcGtsType)3 JAXBException (javax.xml.bind.JAXBException)3 Rtf (com.tutego.jrtf.Rtf)2 TrpDoc (eu.transkribus.core.model.beans.TrpDoc)2 CustomTag (eu.transkribus.core.model.beans.customtags.CustomTag)2 TextLineType (eu.transkribus.core.model.beans.pagecontent.TextLineType)2 WordType (eu.transkribus.core.model.beans.pagecontent.WordType)2 TrpRegionType (eu.transkribus.core.model.beans.pagecontent_trp.TrpRegionType)2 TrpTextLineType (eu.transkribus.core.model.beans.pagecontent_trp.TrpTextLineType)2 TrpTextRegionType (eu.transkribus.core.model.beans.pagecontent_trp.TrpTextRegionType)2 TrpWordType (eu.transkribus.core.model.beans.pagecontent_trp.TrpWordType)2 File (java.io.File)2 FileOutputStream (java.io.FileOutputStream)2 FileWriter (java.io.FileWriter)2 URL (java.net.URL)2