Search in sources :

Example 26 with TrpPage

use of eu.transkribus.core.model.beans.TrpPage in project TranskribusCore by Transkribus.

the class TrpTxtBuilder method writeTxtForDoc.

public static void writeTxtForDoc(TrpDoc doc, boolean addTitle, boolean wordBased, boolean preserveLineBreaks, File file, Set<Integer> pageIndices, IProgressMonitor monitor, ExportCache cache) throws JAXBException, IOException, Docx4JException, InterruptedException {
    // delete file if already exists
    Files.deleteIfExists(Paths.get(file.getAbsolutePath()));
    if (addTitle) {
        addTitlePage(doc, file);
    }
    List<TrpPage> pages = doc.getPages();
    int totalPages = pageIndices == null ? pages.size() : pageIndices.size();
    if (monitor != null) {
        monitor.beginTask("Exporting to text file", totalPages);
    }
    int c = 0;
    for (int i = 0; i < pages.size(); ++i) {
        if (pageIndices != null && !pageIndices.contains(i))
            continue;
        if (monitor != null) {
            if (monitor.isCanceled()) {
                throw new InterruptedException("Export canceled by the user");
            }
            monitor.subTask("Processing page " + (c + 1));
        }
        JAXBPageTranscript tr = null;
        if (cache != null) {
            tr = cache.getPageTranscriptAtIndex(i);
        }
        if (tr == null) {
            TrpPage page = pages.get(i);
            TrpTranscriptMetadata md = page.getCurrentTranscript();
            // md.getStatus().equals("Done");
            tr = new JAXBPageTranscript(md);
            tr.build();
        }
        TrpPageType trpPage = tr.getPage();
        logger.debug("writing text file for the page " + (i + 1) + "/" + pages.size());
        writeTxtForSinglePage(file, trpPage, wordBased, preserveLineBreaks);
        ++c;
        if (monitor != null) {
            monitor.worked(c);
        }
    }
    logger.debug("Saved " + file.getAbsolutePath());
}
Also used : JAXBPageTranscript(eu.transkribus.core.model.beans.JAXBPageTranscript) TrpPage(eu.transkribus.core.model.beans.TrpPage) TrpTranscriptMetadata(eu.transkribus.core.model.beans.TrpTranscriptMetadata) TrpPageType(eu.transkribus.core.model.beans.pagecontent_trp.TrpPageType)

Example 27 with TrpPage

use of eu.transkribus.core.model.beans.TrpPage in project TranskribusCore by Transkribus.

the class FakeDocProvider method createPage.

private static TrpPage createPage(int docid, int pageNum, boolean useLocalFiles) {
    TrpPage page = new TrpPage();
    page.setDocId(docid);
    page.setPageNr(pageNum);
    if (!useLocalFiles) {
        final String imgKey = fileKeys.get(pageNum)[0];
        page.setKey(imgKey);
        try {
            page.setUrl((new FimgStoreUriBuilder()).getImgUri(imgKey, ImgType.view).toURL());
        } catch (MalformedURLException e) {
            e.printStackTrace();
        } catch (IllegalArgumentException e) {
            e.printStackTrace();
        }
    // try {
    // page.setImageUrl(builder.getImgUri(imgKey, ImgType.view).toURL());
    // } catch (MalformedURLException | IllegalArgumentException e) {
    // //coffee++
    // e.printStackTrace();
    // }
    } else {
        page.setUrl(localFileUrls.get(pageNum)[0]);
    }
    TrpTranscriptMetadata ts = new TrpTranscriptMetadata();
    ts.setPageReferenceForLocalDocs(page);
    ts.setStatus(EditStatus.NEW);
    Calendar cal = Calendar.getInstance();
    cal.set(2013, 9, 30, 16, 43 + pageNum, 0);
    ts.setTimestamp(cal.getTime().getTime());
    ts.setUserName("Schorsch");
    if (!useLocalFiles) {
        final String xmlKey = fileKeys.get(pageNum)[1];
        ts.setKey(xmlKey);
        try {
            ts.setUrl(builder.getFileUri(xmlKey).toURL());
        } catch (IllegalArgumentException | MalformedURLException e) {
            // coffee++
            e.printStackTrace();
        }
    } else {
        ts.setUrl(localFileUrls.get(pageNum)[1]);
    }
    page.getTranscripts().add(ts);
    return page;
}
Also used : MalformedURLException(java.net.MalformedURLException) TrpPage(eu.transkribus.core.model.beans.TrpPage) Calendar(java.util.Calendar) TrpTranscriptMetadata(eu.transkribus.core.model.beans.TrpTranscriptMetadata) FimgStoreUriBuilder(org.dea.fimgstoreclient.utils.FimgStoreUriBuilder)

Example 28 with TrpPage

use of eu.transkribus.core.model.beans.TrpPage in project TranskribusCore by Transkribus.

the class FakeDocProvider method create.

public static TrpDoc create(boolean useLocalFiles) {
    final int docid = 1;
    // initialize pageNum here so it can be set in docMd
    nrOfPages = useLocalFiles ? localFileUrls.size() : fileKeys.size();
    // set up a TrpDoc
    TrpDoc doc = new TrpDoc();
    TrpDocMetadata md = createDocMd(docid);
    doc.setMd(md);
    // set up the pages
    // List<TrpPage> pages = new ArrayList<>(nrOfPages);
    TrpPage page;
    for (int i = 0; i < nrOfPages; i++) {
        page = createPage(docid, i, useLocalFiles);
        doc.addPage(page);
    // pages.add(page);
    }
    return doc;
}
Also used : TrpPage(eu.transkribus.core.model.beans.TrpPage) TrpDoc(eu.transkribus.core.model.beans.TrpDoc) TrpDocMetadata(eu.transkribus.core.model.beans.TrpDocMetadata)

Example 29 with TrpPage

use of eu.transkribus.core.model.beans.TrpPage in project TranskribusCore by Transkribus.

the class ScalePageCoordinatesToImageDimension method fixAltoMmToPx.

private static void fixAltoMmToPx(final TrpDoc doc) throws IOException, JAXBException {
    for (TrpPage p : doc.getPages()) {
        final double imgWidth = p.getWidth();
        final double imgHeight = p.getHeight();
        File f = FileUtils.toFile(p.getCurrentTranscript().getUrl());
        PcGtsType pc = PageXmlUtils.unmarshal(f);
        final double altoWidth = pc.getPage().getImageWidth();
        final double altoHeight = pc.getPage().getImageHeight();
        logger.info("Img: " + imgWidth + "x" + imgHeight + " | ALTO: " + altoWidth + "x" + altoHeight);
        double scaleX = (imgWidth / (altoWidth / 100f)) / 100f;
        double scaleY = (imgHeight / (altoHeight / 100f)) / 100f;
        logger.info("Scale factor X: " + scaleX);
        logger.info("Scale factor Y: " + scaleY);
        TrpPageTypeUtils.applyAffineTransformation(pc.getPage(), 0, 0, scaleX, scaleY, 0);
        PageXmlUtils.marshalToFile(pc, f);
    }
}
Also used : TrpPage(eu.transkribus.core.model.beans.TrpPage) File(java.io.File) PcGtsType(eu.transkribus.core.model.beans.pagecontent.PcGtsType)

Example 30 with TrpPage

use of eu.transkribus.core.model.beans.TrpPage in project TranskribusCore by Transkribus.

the class ExportCache method storeCustomTagMapForDoc.

/**
 * @param doc
 * @param wordBased
 * @param pageIndices
 * @param blackening
 * @return all (custom) tags of the given document
 * @throws JAXBException
 * @throws IOException
 * @throws InterruptedException
 */
public void storeCustomTagMapForDoc(TrpDoc doc, boolean wordBased, Set<Integer> pageIndices, IProgressMonitor monitor, boolean blackening) throws JAXBException, IOException, InterruptedException {
    doBlackening = blackening;
    tags.clear();
    List<TrpPage> pages = doc.getPages();
    int totalPages = pages.size();
    int c = 0;
    for (int i = 0; i < totalPages; ++i) {
        if (pageIndices != null && !pageIndices.contains(i))
            continue;
        if (monitor != null && monitor.isCanceled()) {
            throw new InterruptedException("User canceled the export");
        }
        // pageTranscripts get fetched before the custom tag map is stored - so normally pageTranscripts.get(i) != null
        JAXBPageTranscript tr;
        if (pageTranscripts == null || pageTranscripts.get(i) == null) {
            TrpPage page = pages.get(i);
            TrpTranscriptMetadata md = page.getCurrentTranscript();
            tr = new JAXBPageTranscript(md);
        } else {
            tr = pageTranscripts.get(i);
            tr.getPageData();
        }
        tr.build();
        TrpPageType trpPage = tr.getPage();
        logger.debug("get tags for page " + (i + 1) + "/" + doc.getNPages());
        List<TrpTextRegionType> textRegions = trpPage.getTextRegions(true);
        for (int j = 0; j < textRegions.size(); ++j) {
            TrpTextRegionType r = textRegions.get(j);
            List<TextLineType> lines = r.getTextLine();
            for (int k = 0; k < lines.size(); ++k) {
                TrpTextLineType trpL = (TrpTextLineType) lines.get(k);
                List<WordType> words = trpL.getWord();
                getTagsForShapeElement(trpL);
                if (wordBased) {
                    for (int l = 0; l < words.size(); ++l) {
                        TrpWordType w = (TrpWordType) words.get(l);
                        getTagsForShapeElement(w);
                    }
                }
            // else{
            // getTagsForShapeElement(trpL);
            // }
            }
        }
        if (monitor != null) {
            monitor.setTaskName("Loaded tags for page " + (i + 1));
            monitor.worked(++c);
        }
    }
}
Also used : JAXBPageTranscript(eu.transkribus.core.model.beans.JAXBPageTranscript) TrpPage(eu.transkribus.core.model.beans.TrpPage) TrpTranscriptMetadata(eu.transkribus.core.model.beans.TrpTranscriptMetadata) TrpWordType(eu.transkribus.core.model.beans.pagecontent_trp.TrpWordType) WordType(eu.transkribus.core.model.beans.pagecontent.WordType) TrpWordType(eu.transkribus.core.model.beans.pagecontent_trp.TrpWordType) TrpTextLineType(eu.transkribus.core.model.beans.pagecontent_trp.TrpTextLineType) TextLineType(eu.transkribus.core.model.beans.pagecontent.TextLineType) TrpTextLineType(eu.transkribus.core.model.beans.pagecontent_trp.TrpTextLineType) TrpTextRegionType(eu.transkribus.core.model.beans.pagecontent_trp.TrpTextRegionType) TrpPageType(eu.transkribus.core.model.beans.pagecontent_trp.TrpPageType)

Aggregations

TrpPage (eu.transkribus.core.model.beans.TrpPage)32 TrpTranscriptMetadata (eu.transkribus.core.model.beans.TrpTranscriptMetadata)14 File (java.io.File)14 IOException (java.io.IOException)14 JAXBPageTranscript (eu.transkribus.core.model.beans.JAXBPageTranscript)10 PcGtsType (eu.transkribus.core.model.beans.pagecontent.PcGtsType)7 TrpPageType (eu.transkribus.core.model.beans.pagecontent_trp.TrpPageType)7 URL (java.net.URL)7 ArrayList (java.util.ArrayList)7 TrpDoc (eu.transkribus.core.model.beans.TrpDoc)6 TrpDocMetadata (eu.transkribus.core.model.beans.TrpDocMetadata)5 FileType (eu.transkribus.core.model.beans.mets.FileType)5 JAXBException (javax.xml.bind.JAXBException)5 TrpTextRegionType (eu.transkribus.core.model.beans.pagecontent_trp.TrpTextRegionType)4 Dimension (java.awt.Dimension)4 FileNotFoundException (java.io.FileNotFoundException)4 CorruptImageException (eu.transkribus.core.exceptions.CorruptImageException)3 DivType (eu.transkribus.core.model.beans.mets.DivType)3 Fptr (eu.transkribus.core.model.beans.mets.DivType.Fptr)3 FileGrpType (eu.transkribus.core.model.beans.mets.FileGrpType)3