Search in sources :

Example 6 with TrpTranscriptMetadata

use of eu.transkribus.core.model.beans.TrpTranscriptMetadata in project TranskribusCore by Transkribus.

the class TrpRtfBuilder method writeRtfForDoc.

public static void writeRtfForDoc(TrpDoc doc, boolean wordBased, boolean writeTags, boolean doBlackening, File file, Set<Integer> pageIndices, IProgressMonitor monitor, ExportCache cache) throws JAXBException, IOException {
    exportTags = writeTags;
    tagnames = cache.getSelectedTags();
    TrpRtfBuilder.doBlackening = doBlackening;
    /*
		 * get all names of tags
		 */
    // tagnames = CustomTagFactory.getRegisteredTagNames();
    Rtf rtf = Rtf.rtf();
    List<TrpPage> pages = doc.getPages();
    int totalPages = pageIndices == null ? pages.size() : pageIndices.size();
    if (monitor != null) {
        monitor.beginTask("Exporting to RTF", totalPages);
    }
    int c = 0;
    for (int i = 0; i < pages.size(); ++i) {
        if (pageIndices != null && !pageIndices.contains(i))
            continue;
        if (monitor != null) {
            if (monitor.isCanceled()) {
                logger.debug("RTF export cancelled!");
                return;
            }
            monitor.subTask("Processing page " + (c + 1));
        }
        TrpPage page = pages.get(i);
        TrpTranscriptMetadata md = page.getCurrentTranscript();
        JAXBPageTranscript tr = new JAXBPageTranscript(md);
        tr.build();
        TrpPageType trpPage = tr.getPage();
        logger.debug("writing rtf for page " + (i + 1) + "/" + doc.getNPages());
        // rtf().header(color( 204, 0, 0 ).at( 0 ),
        // color( 0, 0xff, 0 ).at( 1 ),
        // color( 0, 0, 0xff ).at( 2 ),
        // font( "Calibri" ).at( 0 ) );
        // RtfHeaderColor color = RtfHeaderColor.color(0xff, 0, 0);
        rtf.header(color(204, 0, 0).at(0), color(0, 0xff, 0).at(1)).section(getRtfParagraphsForTranscript(trpPage, wordBased));
        ++c;
        if (monitor != null) {
            monitor.worked(c);
        }
    }
    // write tags at end of last page
    if (exportTags) {
        // RtfText headline = RtfText.text("Person names in this document (amount of found persons: " + persons.size() + ")", "\n");
        /*
			 * for all different tagnames:
			 * find all custom tags in doc
			 * create list and 
			 */
        ArrayList<RtfPara> tagParas = new ArrayList<RtfPara>();
        // tagnames = all user choosen tags via export dialog
        for (String currTagname : tagnames) {
            // logger.debug("curr tagname " + currTagname);
            // get all custom tags with currTagname and text
            HashMap<CustomTag, String> allTagsOfThisTagname = cache.getTags(currTagname);
            if (allTagsOfThisTagname.size() > 0) {
                tagParas.add(RtfPara.p(RtfText.text(RtfText.underline(currTagname + " tags in this document: " + allTagsOfThisTagname.size()))));
                // ArrayList<RtfText> tagTexts = new ArrayList<RtfText>();
                Collection<String> valueSet = allTagsOfThisTagname.values();
                RtfText[] tagTexts = new RtfText[valueSet.size()];
                int l = 0;
                for (String currEntry : valueSet) {
                    tagTexts[l++] = RtfText.text(currEntry.concat("\n"));
                // logger.debug("tag value is " + currEntry);
                }
                tagParas.add(RtfPara.p(tagTexts));
            }
        }
        // int parSize = getParsNumber();
        // int k = 0;
        // 
        // if (persons.size() > 0){
        // logger.debug("k is " + k);
        // List<String> newPersonList = new ArrayList<String>(new HashSet<String>(persons));
        // tagParas[k++]=RtfPara.p(RtfText.text("Person names in this document (amount of found persons: " + newPersonList.size() + ")", "\n"));
        // logger.debug("k is " + k);
        // //rtf.p("Person names in this document (amount of found persons: " + persons.size() + ")", "\n");
        // //to make the list contain only unique values
        // 
        // RtfText[] personTexts = new RtfText[newPersonList.size()];
        // for (int j=0; j<newPersonList.size(); ++j) {
        // personTexts[j] = RtfText.text(newPersonList.get(j), "\n");
        // logger.debug("person is " + newPersonList.get(j));
        // }
        // tagParas[k++] = RtfPara.p(personTexts);
        // }
        // 
        // if (places.size() > 0){
        // List<String> newPlaceList = new ArrayList<String>(new HashSet<String>(places));
        // tagParas[k++]=RtfPara.p(RtfText.text("Places in this document (amount of found places " + newPlaceList.size() + ")", "\n"));
        // 
        // RtfText[] placeTexts = new RtfText[newPlaceList.size()];
        // for (int j=0; j<newPlaceList.size(); ++j) {
        // //RtfText.color(0, "red");
        // placeTexts[j] = RtfText.color(0, newPlaceList.get(j).concat("\n"));
        // logger.debug("place is " + newPlaceList.get(j));
        // }
        // RtfPara par2 = RtfPara.p(placeTexts);
        // tagParas[k++] = par2;
        // }
        // 
        // if(addresses.size() > 0){
        // List<String> newAddressList = new ArrayList<String>(new HashSet<String>(addresses));
        // tagParas[k++]=RtfPara.p(RtfText.text("Addresses in this document (amount of found addresses " + newAddressList.size() + ")", "\n"));
        // 
        // RtfText[] addresseTexts = new RtfText[newAddressList.size()];
        // for (int j=0; j<newAddressList.size(); ++j) {
        // addresseTexts[j] = RtfText.text(newAddressList.get(j), "\n");
        // logger.debug("addresse is " + newAddressList.get(j));
        // }
        // RtfPara par3 = RtfPara.p(addresseTexts);
        // tagParas[k++] = par3;
        // }
        // rtf.section(par3);
        rtf.header(color(204, 0, 0).at(0)).section(tagParas);
    }
    rtf.out(new FileWriter(file));
    logger.info("wrote rtf to: " + file.getAbsolutePath());
}
Also used : JAXBPageTranscript(eu.transkribus.core.model.beans.JAXBPageTranscript) Rtf(com.tutego.jrtf.Rtf) TrpPage(eu.transkribus.core.model.beans.TrpPage) RtfText(com.tutego.jrtf.RtfText) FileWriter(java.io.FileWriter) ArrayList(java.util.ArrayList) TrpTranscriptMetadata(eu.transkribus.core.model.beans.TrpTranscriptMetadata) CustomTag(eu.transkribus.core.model.beans.customtags.CustomTag) RtfPara(com.tutego.jrtf.RtfPara) TrpPageType(eu.transkribus.core.model.beans.pagecontent_trp.TrpPageType)

Example 7 with TrpTranscriptMetadata

use of eu.transkribus.core.model.beans.TrpTranscriptMetadata in project TranskribusCore by Transkribus.

the class TrpRtfBuilder method writeRtfForDoc.

public static void writeRtfForDoc(TrpDoc doc, boolean wordBased, File file, Set<Integer> pageIndices, IProgressMonitor monitor) throws JAXBException, IOException {
    Rtf rtf = Rtf.rtf();
    List<TrpPage> pages = doc.getPages();
    int totalPages = pageIndices == null ? pages.size() : pageIndices.size();
    if (monitor != null) {
        monitor.beginTask("Exporting to RTF", totalPages);
    }
    int c = 0;
    for (int i = 0; i < pages.size(); ++i) {
        if (pageIndices != null && !pageIndices.contains(i))
            continue;
        if (monitor != null) {
            if (monitor.isCanceled()) {
                logger.debug("RTF export cancelled!");
                return;
            }
            monitor.subTask("Processing page " + (c + 1));
        }
        TrpPage page = pages.get(i);
        TrpTranscriptMetadata md = page.getCurrentTranscript();
        JAXBPageTranscript tr = new JAXBPageTranscript(md);
        tr.build();
        TrpPageType trpPage = tr.getPage();
        logger.debug("writing rtf for page " + (i + 1) + "/" + doc.getNPages());
        rtf.section(getRtfParagraphsForTranscript(trpPage, wordBased));
        ++c;
        if (monitor != null) {
            monitor.worked(c);
        }
    }
    rtf.out(new FileWriter(file));
    logger.info("wrote rtf to: " + file.getAbsolutePath());
}
Also used : JAXBPageTranscript(eu.transkribus.core.model.beans.JAXBPageTranscript) Rtf(com.tutego.jrtf.Rtf) TrpPage(eu.transkribus.core.model.beans.TrpPage) FileWriter(java.io.FileWriter) TrpTranscriptMetadata(eu.transkribus.core.model.beans.TrpTranscriptMetadata) TrpPageType(eu.transkribus.core.model.beans.pagecontent_trp.TrpPageType)

Example 8 with TrpTranscriptMetadata

use of eu.transkribus.core.model.beans.TrpTranscriptMetadata in project TranskribusCore by Transkribus.

the class ExportCache method storePageTranscripts4Export.

public void storePageTranscripts4Export(TrpDoc doc, Set<Integer> pageIndices, IProgressMonitor monitor, String versionStatus, int pageIdx, TrpTranscriptMetadata loadedTranscript) throws Exception {
    pageTranscripts = new ArrayList<JAXBPageTranscript>();
    List<TrpPage> pages = doc.getPages();
    int totalPages = pages.size();
    int c = 0;
    for (int i = 0; i < totalPages; ++i) {
        if (pageIndices != null && !pageIndices.contains(i)) {
            // fill up with null to have the proper index of each page later on
            // logger.debug(" add null to transcripts " + i);
            pageTranscripts.add(null);
            continue;
        }
        if (monitor != null && monitor.isCanceled()) {
            throw new Exception("User canceled the export");
        }
        TrpPage page = pages.get(i);
        TrpTranscriptMetadata md = null;
        if (versionStatus.contains("Latest")) {
            // current transcript
            md = page.getCurrentTranscript();
        } else if (versionStatus.contains("Loaded")) {
            // if loaded page idx == i than we can export the loaded version and for all other pages the latest
            if (i == pageIdx && loadedTranscript != null) {
                md = loadedTranscript;
            // String loadedStatus = loadedTranscript.getStatus().getStr();
            // md = page.getTranscriptWithStatus(loadedStatus);
            } else {
                md = page.getCurrentTranscript();
            }
        } else {
            // logger.debug("We want to export pages with status: " + versionStatus);
            md = page.getTranscriptWithStatusOrNull(versionStatus);
        }
        /*
			 * for pages where we have not found versions with the defined status -> remove from the page list so that
			 * they will not exported
			 */
        if (md == null) {
            // pageIndices contained but has not the desired status -> add transcript 0;
            if (pageIndices != null && pageIndices.contains(i)) {
                // logger.debug("remove page index " + i);
                pageIndices.remove(new Integer(i));
                pageTranscripts.add(null);
            }
            continue;
        }
        JAXBPageTranscript tr = new JAXBPageTranscript(md);
        tr.build();
        pageTranscripts.add(tr);
        logger.debug("Loaded Transcript from page " + (i + 1));
        if (monitor != null) {
            monitor.setTaskName("Loaded Transcript from page " + (i + 1));
            monitor.worked(++c);
        }
    }
}
Also used : JAXBPageTranscript(eu.transkribus.core.model.beans.JAXBPageTranscript) TrpPage(eu.transkribus.core.model.beans.TrpPage) TrpTranscriptMetadata(eu.transkribus.core.model.beans.TrpTranscriptMetadata) IOException(java.io.IOException) JAXBException(javax.xml.bind.JAXBException)

Example 9 with TrpTranscriptMetadata

use of eu.transkribus.core.model.beans.TrpTranscriptMetadata in project TranskribusCore by Transkribus.

the class PageXmlDaoTest method main.

public static void main(String[] args) {
    TrpDoc doc = FakeDocProvider.create(false);
    TrpTranscriptMetadata md = doc.getPages().get(0).getTranscripts().get(0);
    try {
        JAXBPageTranscript transcript = new JAXBPageTranscript(md);
        transcript.build();
        // JAXBPageTranscript transcript = TrpPageTranscriptBuilder.build(md);
        // get Source Document as String
        // DOMSource domSource = new DOMSource(transcript.getSourceDoc());
        // StringWriter writer = new StringWriter();
        // StreamResult result = new StreamResult(writer);
        // TransformerFactory tf = TransformerFactory.newInstance();
        // Transformer transformer = tf.newTransformer();
        // transformer.transform(domSource, result);
        // System.out.println("XML IN String format is: \n" + writer.toString());
        // check JaxB Element
        PcGtsType page = transcript.getPageData();
        if (page == null) {
            System.out.println("page XML is null");
            System.exit(0);
        }
        PageType pageType = page.getPage();
        System.out.println(page.getMetadata());
        if (pageType == null) {
            System.out.println("pagetype element is null");
            System.exit(0);
        }
        if (pageType.getTextRegionOrImageRegionOrLineDrawingRegion() == null) {
            System.out.println("Region list is null");
            System.exit(0);
        }
        int i = 0;
        for (TextRegionType tr : transcript.getPage().getTextRegions(true)) {
            tr.setId("" + i++);
        }
        List<TrpRegionType> regions = pageType.getTextRegionOrImageRegionOrLineDrawingRegion();
        for (RegionType r : regions) {
            if (r instanceof TextRegionType) {
                TextRegionType t = (TextRegionType) r;
                System.out.println(t.getId());
            }
        }
    } catch (IllegalArgumentException | IOException e) {
        e.printStackTrace();
    }
// try {
// PrimaPageTranscript ppt = PageXmlDao.getPrimaPageTranscript(md);
// Page page = ppt.getPageData();
// 
// System.out.println(page.getImageFilename());
// Region r = page.getLayout().getRegion("tempReg357564684568544579089");
// System.out.println(r.getType().getName());
// //			System.out.println(page.getLayout().getParentChildRelation(r.getType(), r.getId().toString()).getRelationType());
// 
// IdRegister idr = r.getIdRegister();
// //			idr.
// System.out.println(idr);
// } catch (IllegalArgumentException e) {
// 
// e.printStackTrace();
// } catch (MalformedURLException e) {
// 
// e.printStackTrace();
// } catch (UnsupportedFormatVersionException e) {
// 
// e.printStackTrace();
// }
}
Also used : TrpRegionType(eu.transkribus.core.model.beans.pagecontent_trp.TrpRegionType) RegionType(eu.transkribus.core.model.beans.pagecontent.RegionType) TextRegionType(eu.transkribus.core.model.beans.pagecontent.TextRegionType) JAXBPageTranscript(eu.transkribus.core.model.beans.JAXBPageTranscript) TrpTranscriptMetadata(eu.transkribus.core.model.beans.TrpTranscriptMetadata) IOException(java.io.IOException) PcGtsType(eu.transkribus.core.model.beans.pagecontent.PcGtsType) TextRegionType(eu.transkribus.core.model.beans.pagecontent.TextRegionType) TrpRegionType(eu.transkribus.core.model.beans.pagecontent_trp.TrpRegionType) TrpDoc(eu.transkribus.core.model.beans.TrpDoc) PageType(eu.transkribus.core.model.beans.pagecontent.PageType)

Example 10 with TrpTranscriptMetadata

use of eu.transkribus.core.model.beans.TrpTranscriptMetadata in project TranskribusCore by Transkribus.

the class LocalDocReader method buildPage.

// private static void startThumbCreationThread(final TrpDoc doc) {
// Runnable thumbCreator = new Runnable(){
// @Override
// public void run() {
// try{
// LocalDocWriter.createThumbsForDoc(doc, false);
// }catch (Exception e){
// logger.error(e);
// return;
// }
// }
// };
// new Thread(thumbCreator).start();
// }
/**
 * Builds a TrpPage object with file URLs set
 * @param inputDir the path where the local document is stored
 *
 * @param pageNr
 *            of the page to be built
 * @param img
 *            the img file to include
 * @param pageXml
 *            the corresponding PAGE XML
 * @param thumb
 * 			  the thumbnail file for this image
 * @param useDummyImage
 * 			  treat the image file as corrupt/missing.
 * 			XML may then be null as none could be created due to missing Dimension.
 * @return a TrpPage object with Transcript. The Transcript is null, if
 *         pageXml is null.
 * @throws MalformedURLException if an URL can't be constructed from parentDir
 */
protected static TrpPage buildPage(File inputDir, int pageNr, File img, File pageXml, File thumb, Dimension dim, final String missingImageRemark) throws IOException {
    logger.debug(pageNr + ": XML = " + (pageXml == null ? "null" : pageXml.getName()) + " - IMG = " + (img == null ? "null" : img.getName()));
    // FIXME handle broken images
    TrpPage page = new TrpPage();
    page.setPageNr(pageNr);
    page.setKey(null);
    page.setDocId(-1);
    if (img != null) {
        page.setImgFileName(img.getName());
        final URL imgUrl = img.toURI().toURL();
        page.setUrl(imgUrl);
    } else {
        page.setImgFileName(LocalDocConst.NO_IMAGE_FILENAME);
    }
    if (!StringUtils.isEmpty(missingImageRemark)) {
        URL dummyUrl = LocalDocConst.getDummyImageUrl();
        page.setUrl(dummyUrl);
        page.setImgFileProblem(missingImageRemark);
    }
    if (thumb != null) {
        final URL thumbUrl = thumb.toURI().toURL();
        page.setThumbUrl(thumbUrl);
    }
    if (dim != null) {
        page.setWidth(dim.width);
        page.setHeight(dim.height);
    }
    if (pageXml != null) {
        final URL xmlUrl = pageXml.toURI().toURL();
        TrpTranscriptMetadata tmd = new TrpTranscriptMetadata();
        tmd.setPageReferenceForLocalDocs(page);
        tmd.setPageNr(pageNr);
        tmd.setKey(null);
        tmd.setUrl(xmlUrl);
        tmd.setStatus(EditStatus.NEW);
        tmd.setLocalFolder(inputDir);
        tmd.setTimestamp(new Date().getTime());
        tmd.setUserName("LocalDocReader");
        // TODO real status, time and user parsed from PageXML?
        page.getTranscripts().add(tmd);
    }
    return page;
}
Also used : TrpPage(eu.transkribus.core.model.beans.TrpPage) TrpTranscriptMetadata(eu.transkribus.core.model.beans.TrpTranscriptMetadata) URL(java.net.URL) Date(java.util.Date)

Aggregations

TrpTranscriptMetadata (eu.transkribus.core.model.beans.TrpTranscriptMetadata)21 TrpPage (eu.transkribus.core.model.beans.TrpPage)14 JAXBPageTranscript (eu.transkribus.core.model.beans.JAXBPageTranscript)11 TrpPageType (eu.transkribus.core.model.beans.pagecontent_trp.TrpPageType)7 IOException (java.io.IOException)7 File (java.io.File)6 URL (java.net.URL)4 PcGtsType (eu.transkribus.core.model.beans.pagecontent.PcGtsType)3 FileOutputStream (java.io.FileOutputStream)3 ArrayList (java.util.ArrayList)3 Date (java.util.Date)3 JAXBException (javax.xml.bind.JAXBException)3 Rtf (com.tutego.jrtf.Rtf)2 TrpDoc (eu.transkribus.core.model.beans.TrpDoc)2 CustomTag (eu.transkribus.core.model.beans.customtags.CustomTag)2 Fptr (eu.transkribus.core.model.beans.mets.DivType.Fptr)2 FileType (eu.transkribus.core.model.beans.mets.FileType)2 Mets (eu.transkribus.core.model.beans.mets.Mets)2 TextLineType (eu.transkribus.core.model.beans.pagecontent.TextLineType)2 WordType (eu.transkribus.core.model.beans.pagecontent.WordType)2