Search in sources :

Example 16 with CustomTag

use of eu.transkribus.core.model.beans.customtags.CustomTag in project TranskribusCore by Transkribus.

the class DocxBuilder method writeDocxForDoc.

public static void writeDocxForDoc(TrpDoc doc, boolean wordBased, boolean writeTags, boolean doBlackeningSensibleData, File file, Set<Integer> pageIndices, IProgressMonitor monitor, boolean createTitle, boolean markUnclear, boolean expandAbbreviations, boolean replaceAbbrevs, boolean keepLineBreaks, boolean showSuppliedInBrackets, boolean ignoreSuppliedTags, ExportCache cache) throws JAXBException, IOException, Docx4JException, InterruptedException {
    // ch.qos.logback.classic.Logger root = logger.getClass().get(ch.qos.logback.classic.Logger) org.slf4j.LoggerFactory.getLogger(ch.qos.logback.classic.Logger.ROOT_LOGGER_NAME);
    ((ch.qos.logback.classic.Logger) logger).setLevel(ch.qos.logback.classic.Level.DEBUG);
    exportTags = writeTags;
    doBlackening = doBlackeningSensibleData;
    tagnames = cache.getOnlySelectedTagnames(ExportUtils.getOnlyWantedTagnames(CustomTagFactory.getRegisteredTagNames()));
    markUnclearWords = markUnclear;
    expandAbbrevs = expandAbbreviations;
    preserveLineBreaks = keepLineBreaks;
    substituteAbbrevs = replaceAbbrevs;
    showSuppliedWithBrackets = showSuppliedInBrackets;
    ignoreSupplied = ignoreSuppliedTags;
    /*
		 * get all names of tags
		 */
    // tagnames = CustomTagFactory.getRegisteredTagNames();
    // main document part
    wordMLPackage = WordprocessingMLPackage.createPackage();
    MainDocumentPart mdp = wordMLPackage.getMainDocumentPart();
    org.docx4j.wml.ObjectFactory factory = Context.getWmlObjectFactory();
    List<TrpPage> pages = doc.getPages();
    int totalPages = pageIndices == null ? pages.size() : pageIndices.size();
    if (monitor != null) {
        monitor.beginTask("Exporting to docx", totalPages);
    }
    int c = 0;
    boolean atLeastOnePageWritten = false;
    // can be used as page break every time we need one
    Br objBr = new Br();
    objBr.setType(STBrType.PAGE);
    P pageBreakP = factory.createP();
    pageBreakP.getContent().add(objBr);
    for (int i = 0; i < pages.size(); ++i) {
        if (pageIndices != null && !pageIndices.contains(i))
            continue;
        if (!atLeastOnePageWritten && createTitle) {
            addTitlePage(doc, mdp);
            // add page break
            mdp.addObject(pageBreakP);
        }
        if (monitor != null) {
            if (monitor.isCanceled()) {
                throw new InterruptedException("Export canceled by the user");
            // logger.debug("docx export cancelled!");
            // return;
            }
            monitor.subTask("Processing page " + (c + 1));
        }
        // TrpPage page = pages.get(i);
        // TrpTranscriptMetadata md = page.getCurrentTranscript();
        // JAXBPageTranscript tr = new JAXBPageTranscript(md);
        // tr.build();
        JAXBPageTranscript tr = null;
        if (cache != null) {
            tr = cache.getPageTranscriptAtIndex(i);
        }
        if (tr == null) {
            TrpPage page = pages.get(i);
            TrpTranscriptMetadata md = page.getCurrentTranscript();
            // md.getStatus().equals("Done");
            tr = new JAXBPageTranscript(md);
            tr.build();
        }
        TrpPageType trpPage = tr.getPage();
        logger.debug("writing docx for the page " + (i + 1) + "/" + doc.getNPages());
        writeDocxForTranscriptWithTables(mdp, trpPage, wordBased, preserveLineBreaks);
        atLeastOnePageWritten = true;
        ++c;
        if (monitor != null) {
            monitor.worked(c);
        }
    }
    P p = factory.createP();
    mdp.getContent().add(p);
    addComplexField(p, " INDEX \\e \"", "\" \\c \"1\" \\z \"1031\"");
    FieldUpdater updater = new FieldUpdater(wordMLPackage);
    updater.update(true);
    // write tags at end of last page
    if (false) {
        // RtfText headline = RtfText.text("Person names in this document (amount of found persons: " + persons.size() + ")", "\n");
        logger.debug("export tags ");
        boolean firstExport = true;
        // tagnames = all user choosen tags via export dialog
        for (String currTagname : tagnames) {
            // logger.debug("curr tagname " + currTagname);
            // get all custom tags with currTagname and text
            HashMap<CustomTag, String> allTagsOfThisTagname = cache.getTags(currTagname);
            // one paragraph for each tagname
            org.docx4j.wml.P p4Tag = factory.createP();
            if (allTagsOfThisTagname.size() > 0 && !currTagname.equals("textStyle") && !currTagname.equals("gap") && !currTagname.equals("comment")) {
                // new page if tag export starts
                if (firstExport) {
                    // Br objBr = new Br();
                    // objBr.setType(STBrType.PAGE);
                    p4Tag.getContent().add(objBr);
                    firstExport = false;
                }
                // logger.debug("allTagsOfThisTagname " + allTagsOfThisTagname.size());
                // one run for headline and thanfor each entry
                org.docx4j.wml.Text t = factory.createText();
                t.setValue(currTagname + " tags in this document: " + allTagsOfThisTagname.size());
                t.setSpace("preserve");
                org.docx4j.wml.R run = factory.createR();
                run.getContent().add(t);
                org.docx4j.wml.RPr rpr = factory.createRPr();
                org.docx4j.wml.BooleanDefaultTrue b = new org.docx4j.wml.BooleanDefaultTrue();
                b.setVal(true);
                U u = factory.createU();
                u.setVal(UnderlineEnumeration.SINGLE);
                rpr.setB(b);
                rpr.setU(u);
                run.setRPr(rpr);
                // this Br element is used break the current and go for next line
                Br br = factory.createBr();
                run.getContent().add(br);
                p4Tag.getContent().add(run);
                // ArrayList<RtfText> tagTexts = new ArrayList<RtfText>();
                Collection<String> valueSet = allTagsOfThisTagname.values();
                int l = 0;
                for (String currEntry : valueSet) {
                    org.docx4j.wml.R currRun = factory.createR();
                    org.docx4j.wml.Text currText = factory.createText();
                    currText.setValue(currEntry);
                    currText.setSpace("preserve");
                    currRun.getContent().add(currText);
                    // reuse linebreak
                    currRun.getContent().add(br);
                    p4Tag.getContent().add(currRun);
                }
            }
            mdp.getContent().add(p4Tag);
        }
    }
    // finally save the file
    wordMLPackage.save(file);
    logger.info("Saved " + file.getAbsolutePath());
}
Also used : JAXBPageTranscript(eu.transkribus.core.model.beans.JAXBPageTranscript) TrpTranscriptMetadata(eu.transkribus.core.model.beans.TrpTranscriptMetadata) CustomTag(eu.transkribus.core.model.beans.customtags.CustomTag) Logger(org.slf4j.Logger) RPr(org.docx4j.wml.RPr) P(org.docx4j.wml.P) U(org.docx4j.wml.U) R(org.docx4j.wml.R) Text(org.docx4j.wml.Text) TrpPageType(eu.transkribus.core.model.beans.pagecontent_trp.TrpPageType) P(org.docx4j.wml.P) FieldUpdater(org.docx4j.model.fields.FieldUpdater) TrpPage(eu.transkribus.core.model.beans.TrpPage) MainDocumentPart(org.docx4j.openpackaging.parts.WordprocessingML.MainDocumentPart) Br(org.docx4j.wml.Br)

Example 17 with CustomTag

use of eu.transkribus.core.model.beans.customtags.CustomTag in project TranskribusCore by Transkribus.

the class DocxBuilder method addIndexEntry.

private static void addIndexEntry(int idx, P p, String textStr, boolean rtl) {
    ArrayList<CustomTag> allTagsAtThisPlace = idxList.get(idx);
    for (CustomTag ct : allTagsAtThisPlace) {
        int begin = ct.getOffset();
        String tagname = ct.getTagName();
        String idxText = textStr.substring(begin, idx);
        if (rtl) {
            idxText = reverseString(idxText);
        // logger.debug("reversed index text is " + idxText);
        }
        if (ct instanceof AbbrevTag) {
            AbbrevTag at = (AbbrevTag) ct;
            if (!at.getExpansion().equals("")) {
                if (!rtl)
                    idxText = idxText.concat(" [" + at.getExpansion() + "]");
                else {
                    String tmp = reverseString(at.getExpansion());
                    idxText = "[" + tmp + "] ".concat(idxText);
                }
            }
        }
        if (!idxText.matches("[*]+")) {
            addComplexField(p, "XE\"" + tagname + ":" + idxText + "\"", "");
        }
    }
}
Also used : CustomTag(eu.transkribus.core.model.beans.customtags.CustomTag) AbbrevTag(eu.transkribus.core.model.beans.customtags.AbbrevTag)

Example 18 with CustomTag

use of eu.transkribus.core.model.beans.customtags.CustomTag in project TranskribusCore by Transkribus.

the class TrpRtfBuilder method getRtfTextForShapeElement.

private static RtfText getRtfTextForShapeElement(ITrpShapeType element) throws IOException {
    String textStr = element.getUnicodeText();
    CustomTagList cl = element.getCustomTagList();
    if (textStr == null || cl == null)
        throw new IOException("Element has no text or custom tag list: " + element + ", class: " + element.getClass().getName());
    if (doBlackening) {
        // format according to tags:CustomTagList
        for (CustomTag nonIndexedTag : cl.getNonIndexedTags()) {
            if (nonIndexedTag.getTagName().equals(RegionTypeUtil.BLACKENING_REGION.toLowerCase())) {
                // logger.debug("nonindexed tag found ");
                textStr = ExportUtils.blackenString(nonIndexedTag, textStr);
            }
        }
        for (CustomTag indexedTag : cl.getIndexedTags()) {
            if (indexedTag.getTagName().equals(RegionTypeUtil.BLACKENING_REGION.toLowerCase())) {
                // logger.debug("nonindexed tag found ");
                textStr = ExportUtils.blackenString(indexedTag, textStr);
            }
        }
    }
    List<TextStyleTag> textStylesTags = element.getTextStyleTags();
    // if (exportTags){
    // getTagsForShapeElement(element);
    // }
    RtfText[] chars = new RtfText[textStr.length()];
    for (int i = 0; i < textStr.length(); ++i) {
        chars[i] = RtfText.text(textStr.substring(i, i + 1));
        // format according to "global" text style
        chars[i] = formatRtfText(chars[i], element.getTextStyle());
        /*
			 * format according to custom style tag - check for each char in the text if a special style should be set
			 */
        for (TextStyleTag styleTag : textStylesTags) {
            if (i >= styleTag.getOffset() && i < (styleTag.getOffset() + styleTag.getLength())) {
                chars[i] = formatRtfText(chars[i], styleTag.getTextStyle());
            }
        }
    // // format according to tags:
    // for (String nonIndexedTag : cl.getNonIndexedTagNames()) {
    // charText = formatRtfText(charText, element);
    // // TODO
    // }
    // for (String indexedTag : cl.getIndexedTagNames()) {
    // // TODO
    // }
    // TODO: include structure types!! (also possible in custom tags!!)
    // TODO: include reading order!!!
    }
    RtfText totalText = RtfText.text(false, (Object[]) chars);
    return totalText;
}
Also used : TextStyleTag(eu.transkribus.core.model.beans.customtags.TextStyleTag) RtfText(com.tutego.jrtf.RtfText) CustomTag(eu.transkribus.core.model.beans.customtags.CustomTag) CustomTagList(eu.transkribus.core.model.beans.customtags.CustomTagList) IOException(java.io.IOException)

Example 19 with CustomTag

use of eu.transkribus.core.model.beans.customtags.CustomTag in project TranskribusCore by Transkribus.

the class ExportCache method getTagsForShapeElement.

private void getTagsForShapeElement(ITrpShapeType element) throws IOException {
    String textStr = element.getUnicodeText();
    CustomTagList cl = element.getCustomTagList();
    if (textStr == null || cl == null)
        throw new IOException("Element has no text or custom tag list: " + element + ", class: " + element.getClass().getName());
    for (CustomTag nonIndexedTag : cl.getNonIndexedTags()) {
        // logger.debug("nonindexed tag found ");
        if (!nonIndexedTag.getTagName().equals("readingOrder")) {
            storeCustomTag(nonIndexedTag, textStr);
        }
    }
    /*
		 * blacken String if necessary
		 */
    if (doBlackening) {
        for (CustomTag indexedTag : cl.getIndexedTags()) {
            if (indexedTag instanceof BlackeningTag) {
                // logger.debug("blackening found " + textStr);
                textStr = ExportUtils.blackenString(indexedTag, textStr);
            }
        }
    }
    for (CustomTag indexedTag : cl.getIndexedTags()) {
        // logger.debug("tagname " + indexedTag.getTagName());
        storeCustomTag(indexedTag, textStr);
    }
}
Also used : BlackeningTag(eu.transkribus.core.model.beans.customtags.BlackeningTag) CustomTag(eu.transkribus.core.model.beans.customtags.CustomTag) CustomTagList(eu.transkribus.core.model.beans.customtags.CustomTagList) IOException(java.io.IOException)

Aggregations

CustomTag (eu.transkribus.core.model.beans.customtags.CustomTag)19 CustomTagList (eu.transkribus.core.model.beans.customtags.CustomTagList)9 TextStyleTag (eu.transkribus.core.model.beans.customtags.TextStyleTag)5 IOException (java.io.IOException)5 TrpPageType (eu.transkribus.core.model.beans.pagecontent_trp.TrpPageType)4 TrpTextLineType (eu.transkribus.core.model.beans.pagecontent_trp.TrpTextLineType)4 Point (java.awt.Point)4 ArrayList (java.util.ArrayList)4 AbbrevTag (eu.transkribus.core.model.beans.customtags.AbbrevTag)3 HashMap (java.util.HashMap)3 Map (java.util.Map)3 Entry (java.util.Map.Entry)3 RtfText (com.tutego.jrtf.RtfText)2 JAXBPageTranscript (eu.transkribus.core.model.beans.JAXBPageTranscript)2 TrpPage (eu.transkribus.core.model.beans.TrpPage)2 TrpTranscriptMetadata (eu.transkribus.core.model.beans.TrpTranscriptMetadata)2 CommentTag (eu.transkribus.core.model.beans.customtags.CommentTag)2 GapTag (eu.transkribus.core.model.beans.customtags.GapTag)2 TrpBaselineType (eu.transkribus.core.model.beans.pagecontent_trp.TrpBaselineType)2 TrpTextRegionType (eu.transkribus.core.model.beans.pagecontent_trp.TrpTextRegionType)2