Search in sources :

Example 11 with TrpPage

use of eu.transkribus.core.model.beans.TrpPage in project TranskribusCore by Transkribus.

the class AltoExporter method export.

public void export(final TrpDoc doc, final String path) throws DocumentException, MalformedURLException, IOException, JAXBException, TransformerException {
    File altoOutputDir = createAltoOuputDir(doc, path);
    // TrpPdfDocument pdf = new TrpPdfDocument(pdfFile, useWordLevel);
    notifyObservers("Exporting Altos...");
    setChanged();
    for (int i = 0; i < doc.getPages().size(); i++) {
        logger.info("Processing page " + (i + 1));
        notifyObservers(Integer.valueOf(i + 1));
        setChanged();
        TrpPage p = doc.getPages().get(i);
        // 3rd parameter says 'splitLineIntoWords'
        File altoFile = exportAltoFile(p, altoOutputDir, false);
    // XslTransformer.transform(pc, PAGE_TO_ALTO_XSLT, pdfFile);
    }
    notifyObservers("Alto written at: " + path);
    setChanged();
    logger.info("ALTO files written at: " + path);
// return outputDir;
}
Also used : TrpPage(eu.transkribus.core.model.beans.TrpPage) File(java.io.File)

Example 12 with TrpPage

use of eu.transkribus.core.model.beans.TrpPage in project TranskribusCore by Transkribus.

the class MetsUtil method getTrpPages.

/**
 * Builds the set of TrpPage objects with
 * local file references from the mets master file group and structmap.
 * The method is strict regarding PAGE XML existence! Each image file must have a correspondent PAGE XML.
 * @param mets
 * @param parentDir
 * @return
 * @throws IOException
 */
public static List<TrpPage> getTrpPages(Mets mets, File parentDir) throws IOException {
    // check filesection. needs img group and xml group to distinguish them without going for mimetypes
    List<FileGrpType> typeGrps = getMasterFileGrp(mets);
    List<FileType> xmlGrp = null;
    List<FileType> imgGrp = null;
    for (FileGrpType type : typeGrps) {
        switch(type.getID()) {
            case TrpMetsBuilder.IMG_GROUP_ID:
                imgGrp = type.getFile();
                break;
            case TrpMetsBuilder.PAGE_GROUP_ID:
                xmlGrp = type.getFile();
                break;
            default:
                break;
        }
    }
    if (imgGrp == null)
        throw new IOException("METS file has no image file list!");
    if (xmlGrp == null)
        throw new IOException("METS file has no xml file list!");
    List<DivType> pageDivs = getPageDivsFromStructMap(mets);
    if (pageDivs == null) {
        throw new IOException("No valid StructMap was found!");
    }
    List<TrpPage> pages = new ArrayList<TrpPage>(pageDivs.size());
    for (DivType div : pageDivs) {
        TrpPage page = buildPage(div, imgGrp, xmlGrp, parentDir);
        pages.add(page);
    }
    return pages;
}
Also used : FileGrpType(eu.transkribus.core.model.beans.mets.FileGrpType) DivType(eu.transkribus.core.model.beans.mets.DivType) FileType(eu.transkribus.core.model.beans.mets.FileType) TrpPage(eu.transkribus.core.model.beans.TrpPage) ArrayList(java.util.ArrayList) IOException(java.io.IOException)

Example 13 with TrpPage

use of eu.transkribus.core.model.beans.TrpPage in project TranskribusCore by Transkribus.

the class MetsUtil method buildPage.

private static TrpPage buildPage(DivType div, List<FileType> imgGrp, List<FileType> xmlGrp, File parentDir) throws IOException {
    TrpPage page = new TrpPage();
    int nr = div.getORDER().intValue();
    page.setPageNr(nr);
    File imgFile = null;
    File xmlFile = null;
    // FIXME this will only work for local files
    for (Fptr ptr : div.getFptr()) {
        FileType type = (FileType) ptr.getArea().getFILEID();
        if (imgGrp.contains(type)) {
            imgFile = MetsUtil.getFile(type, parentDir);
        } else if (xmlGrp.contains(type)) {
            xmlFile = MetsUtil.getFile(type, parentDir);
        }
    }
    if (imgFile == null) {
        logger.error("No master image mapped for page " + nr + " in the structmap!");
    } else {
        logger.info("Page " + page.getPageNr() + " image: " + imgFile.getAbsolutePath());
    }
    // FIXME NullpointerException if imgFile == null!
    page.setUrl(imgFile.toURI().toURL());
    page.setKey(null);
    page.setDocId(-1);
    page.setImgFileName(imgFile.getName());
    if (xmlFile == null) {
        logger.error("No master xml mapped for page " + nr + " in the structmap!");
    } else {
        logger.info("Page " + page.getPageNr() + " xml: " + xmlFile.getAbsolutePath());
    }
    // FIXME NullpointerException if xmlFile == null!
    TrpTranscriptMetadata tmd = new TrpTranscriptMetadata();
    tmd.setPageReferenceForLocalDocs(page);
    tmd.setPageId(page.getPageId());
    tmd.setUrl(xmlFile.toURI().toURL());
    tmd.setKey(null);
    tmd.setStatus(EditStatus.NEW);
    tmd.setTimestamp(new Date().getTime());
    tmd.setUserName("LocalDocReader");
    page.getTranscripts().add(tmd);
    return page;
}
Also used : TrpPage(eu.transkribus.core.model.beans.TrpPage) FileType(eu.transkribus.core.model.beans.mets.FileType) Fptr(eu.transkribus.core.model.beans.mets.DivType.Fptr) TrpTranscriptMetadata(eu.transkribus.core.model.beans.TrpTranscriptMetadata) File(java.io.File) Date(java.util.Date)

Example 14 with TrpPage

use of eu.transkribus.core.model.beans.TrpPage in project TranskribusCore by Transkribus.

the class PdfExporter method export.

public File export(final TrpDoc doc, final String path, Set<Integer> pageIndices, final boolean useWordLevel, final boolean addTextPages, final boolean imagesOnly, final boolean highlightTags, final boolean doBlackening, boolean createTitle, ExportCache cache) throws DocumentException, MalformedURLException, IOException, JAXBException, URISyntaxException, InterruptedException {
    if (doc == null) {
        throw new IllegalArgumentException("TrpDoc is null!");
    }
    if (path == null) {
        throw new IllegalArgumentException("path is null!");
    }
    if (cache == null) {
        cache = new ExportCache();
    }
    // if(startPage == null || startPage < 1) startPage = 1;
    // final int nrOfPages = doc.getPages().size();
    // if(endPage == null || endPage > nrOfPages+1) endPage = nrOfPages;
    // 
    // if(startPage > endPage){
    // throw new IllegalArgumentException("Start page must be smaller than end page!");
    // }
    File pdfFile = new File(path);
    TrpPdfDocument pdf = new TrpPdfDocument(pdfFile, useWordLevel, highlightTags, doBlackening, createTitle);
    setChanged();
    notifyObservers("Creating PDF document...");
    boolean onePagePrinted = false;
    // for(int i = startPage-1; i <= endPage-1; i++){
    for (int i = 0; i < doc.getPages().size(); ++i) {
        if (pageIndices != null && !pageIndices.contains(i))
            continue;
        logger.info("Processing page " + (i + 1));
        TrpPage p = doc.getPages().get(i);
        URL imgUrl = p.getUrl();
        /*
			 * md is only needed for getting resolution because in the image it may be missing
			 * But if it is a local doc we have to try to get from img because md is null
			 */
        FimgStoreImgMd md = null;
        if (doc.isRemoteDoc()) {
            FimgStoreGetClient getter = new FimgStoreGetClient(p.getUrl());
            md = (FimgStoreImgMd) getter.getFileMd(p.getKey());
        }
        URL xmlUrl = p.getCurrentTranscript().getUrl();
        logger.debug("output with tags " + highlightTags);
        // PcGtsType pc = PageXmlUtils.unmarshal(xmlUrl);
        // should be the same as above
        JAXBPageTranscript pt = null;
        if (cache != null) {
            pt = cache.getPageTranscriptAtIndex(i);
        }
        PcGtsType pc;
        if (pt != null) {
            pc = pt.getPageData();
        } else {
            pc = PageXmlUtils.unmarshal(xmlUrl);
        }
        if (!onePagePrinted) {
            // add first page and previously add a title page with doc metadata and editorial declarations (if this option is set)
            pdf.addPage(imgUrl, doc, pc, addTextPages, imagesOnly, md, doBlackening, cache);
            onePagePrinted = true;
        } else {
            pdf.addPage(imgUrl, null, pc, addTextPages, imagesOnly, md, doBlackening, cache);
        }
        setChanged();
        notifyObservers(Integer.valueOf(i + 1));
        if (cancel) {
            pdf.close();
            File file = new File(path);
            if (!file.delete()) {
                throw new IOException("Could not delete the incomplete PDF file during export cancel");
            }
            throw new InterruptedException("Export canceled by the user");
        // break;
        }
    }
    if (highlightTags) {
        pdf.addTags(doc, pageIndices, useWordLevel, cache);
    }
    pdf.close();
    setChanged();
    notifyObservers("PDF written at: " + path);
    logger.info("PDF written at: " + path);
    return pdfFile;
}
Also used : FimgStoreImgMd(org.dea.fimgstoreclient.beans.FimgStoreImgMd) JAXBPageTranscript(eu.transkribus.core.model.beans.JAXBPageTranscript) TrpPage(eu.transkribus.core.model.beans.TrpPage) IOException(java.io.IOException) PcGtsType(eu.transkribus.core.model.beans.pagecontent.PcGtsType) URL(java.net.URL) FimgStoreGetClient(org.dea.fimgstoreclient.FimgStoreGetClient) ExportCache(eu.transkribus.core.model.builder.ExportCache) File(java.io.File)

Example 15 with TrpPage

use of eu.transkribus.core.model.beans.TrpPage in project TranskribusCore by Transkribus.

the class TrpRtfBuilder method writeRtfForDoc.

public static void writeRtfForDoc(TrpDoc doc, boolean wordBased, boolean writeTags, boolean doBlackening, File file, Set<Integer> pageIndices, IProgressMonitor monitor, ExportCache cache) throws JAXBException, IOException {
    exportTags = writeTags;
    tagnames = cache.getSelectedTags();
    TrpRtfBuilder.doBlackening = doBlackening;
    /*
		 * get all names of tags
		 */
    // tagnames = CustomTagFactory.getRegisteredTagNames();
    Rtf rtf = Rtf.rtf();
    List<TrpPage> pages = doc.getPages();
    int totalPages = pageIndices == null ? pages.size() : pageIndices.size();
    if (monitor != null) {
        monitor.beginTask("Exporting to RTF", totalPages);
    }
    int c = 0;
    for (int i = 0; i < pages.size(); ++i) {
        if (pageIndices != null && !pageIndices.contains(i))
            continue;
        if (monitor != null) {
            if (monitor.isCanceled()) {
                logger.debug("RTF export cancelled!");
                return;
            }
            monitor.subTask("Processing page " + (c + 1));
        }
        TrpPage page = pages.get(i);
        TrpTranscriptMetadata md = page.getCurrentTranscript();
        JAXBPageTranscript tr = new JAXBPageTranscript(md);
        tr.build();
        TrpPageType trpPage = tr.getPage();
        logger.debug("writing rtf for page " + (i + 1) + "/" + doc.getNPages());
        // rtf().header(color( 204, 0, 0 ).at( 0 ),
        // color( 0, 0xff, 0 ).at( 1 ),
        // color( 0, 0, 0xff ).at( 2 ),
        // font( "Calibri" ).at( 0 ) );
        // RtfHeaderColor color = RtfHeaderColor.color(0xff, 0, 0);
        rtf.header(color(204, 0, 0).at(0), color(0, 0xff, 0).at(1)).section(getRtfParagraphsForTranscript(trpPage, wordBased));
        ++c;
        if (monitor != null) {
            monitor.worked(c);
        }
    }
    // write tags at end of last page
    if (exportTags) {
        // RtfText headline = RtfText.text("Person names in this document (amount of found persons: " + persons.size() + ")", "\n");
        /*
			 * for all different tagnames:
			 * find all custom tags in doc
			 * create list and 
			 */
        ArrayList<RtfPara> tagParas = new ArrayList<RtfPara>();
        // tagnames = all user choosen tags via export dialog
        for (String currTagname : tagnames) {
            // logger.debug("curr tagname " + currTagname);
            // get all custom tags with currTagname and text
            HashMap<CustomTag, String> allTagsOfThisTagname = cache.getTags(currTagname);
            if (allTagsOfThisTagname.size() > 0) {
                tagParas.add(RtfPara.p(RtfText.text(RtfText.underline(currTagname + " tags in this document: " + allTagsOfThisTagname.size()))));
                // ArrayList<RtfText> tagTexts = new ArrayList<RtfText>();
                Collection<String> valueSet = allTagsOfThisTagname.values();
                RtfText[] tagTexts = new RtfText[valueSet.size()];
                int l = 0;
                for (String currEntry : valueSet) {
                    tagTexts[l++] = RtfText.text(currEntry.concat("\n"));
                // logger.debug("tag value is " + currEntry);
                }
                tagParas.add(RtfPara.p(tagTexts));
            }
        }
        // int parSize = getParsNumber();
        // int k = 0;
        // 
        // if (persons.size() > 0){
        // logger.debug("k is " + k);
        // List<String> newPersonList = new ArrayList<String>(new HashSet<String>(persons));
        // tagParas[k++]=RtfPara.p(RtfText.text("Person names in this document (amount of found persons: " + newPersonList.size() + ")", "\n"));
        // logger.debug("k is " + k);
        // //rtf.p("Person names in this document (amount of found persons: " + persons.size() + ")", "\n");
        // //to make the list contain only unique values
        // 
        // RtfText[] personTexts = new RtfText[newPersonList.size()];
        // for (int j=0; j<newPersonList.size(); ++j) {
        // personTexts[j] = RtfText.text(newPersonList.get(j), "\n");
        // logger.debug("person is " + newPersonList.get(j));
        // }
        // tagParas[k++] = RtfPara.p(personTexts);
        // }
        // 
        // if (places.size() > 0){
        // List<String> newPlaceList = new ArrayList<String>(new HashSet<String>(places));
        // tagParas[k++]=RtfPara.p(RtfText.text("Places in this document (amount of found places " + newPlaceList.size() + ")", "\n"));
        // 
        // RtfText[] placeTexts = new RtfText[newPlaceList.size()];
        // for (int j=0; j<newPlaceList.size(); ++j) {
        // //RtfText.color(0, "red");
        // placeTexts[j] = RtfText.color(0, newPlaceList.get(j).concat("\n"));
        // logger.debug("place is " + newPlaceList.get(j));
        // }
        // RtfPara par2 = RtfPara.p(placeTexts);
        // tagParas[k++] = par2;
        // }
        // 
        // if(addresses.size() > 0){
        // List<String> newAddressList = new ArrayList<String>(new HashSet<String>(addresses));
        // tagParas[k++]=RtfPara.p(RtfText.text("Addresses in this document (amount of found addresses " + newAddressList.size() + ")", "\n"));
        // 
        // RtfText[] addresseTexts = new RtfText[newAddressList.size()];
        // for (int j=0; j<newAddressList.size(); ++j) {
        // addresseTexts[j] = RtfText.text(newAddressList.get(j), "\n");
        // logger.debug("addresse is " + newAddressList.get(j));
        // }
        // RtfPara par3 = RtfPara.p(addresseTexts);
        // tagParas[k++] = par3;
        // }
        // rtf.section(par3);
        rtf.header(color(204, 0, 0).at(0)).section(tagParas);
    }
    rtf.out(new FileWriter(file));
    logger.info("wrote rtf to: " + file.getAbsolutePath());
}
Also used : JAXBPageTranscript(eu.transkribus.core.model.beans.JAXBPageTranscript) Rtf(com.tutego.jrtf.Rtf) TrpPage(eu.transkribus.core.model.beans.TrpPage) RtfText(com.tutego.jrtf.RtfText) FileWriter(java.io.FileWriter) ArrayList(java.util.ArrayList) TrpTranscriptMetadata(eu.transkribus.core.model.beans.TrpTranscriptMetadata) CustomTag(eu.transkribus.core.model.beans.customtags.CustomTag) RtfPara(com.tutego.jrtf.RtfPara) TrpPageType(eu.transkribus.core.model.beans.pagecontent_trp.TrpPageType)

Aggregations

TrpPage (eu.transkribus.core.model.beans.TrpPage)32 TrpTranscriptMetadata (eu.transkribus.core.model.beans.TrpTranscriptMetadata)14 File (java.io.File)14 IOException (java.io.IOException)14 JAXBPageTranscript (eu.transkribus.core.model.beans.JAXBPageTranscript)10 PcGtsType (eu.transkribus.core.model.beans.pagecontent.PcGtsType)7 TrpPageType (eu.transkribus.core.model.beans.pagecontent_trp.TrpPageType)7 URL (java.net.URL)7 ArrayList (java.util.ArrayList)7 TrpDoc (eu.transkribus.core.model.beans.TrpDoc)6 TrpDocMetadata (eu.transkribus.core.model.beans.TrpDocMetadata)5 FileType (eu.transkribus.core.model.beans.mets.FileType)5 JAXBException (javax.xml.bind.JAXBException)5 TrpTextRegionType (eu.transkribus.core.model.beans.pagecontent_trp.TrpTextRegionType)4 Dimension (java.awt.Dimension)4 FileNotFoundException (java.io.FileNotFoundException)4 CorruptImageException (eu.transkribus.core.exceptions.CorruptImageException)3 DivType (eu.transkribus.core.model.beans.mets.DivType)3 Fptr (eu.transkribus.core.model.beans.mets.DivType.Fptr)3 FileGrpType (eu.transkribus.core.model.beans.mets.FileGrpType)3