Examples with TextStyleTag - eu.transkribus.core.model.beans.customtags.TextStyleTag

Example 11 with TextStyleTag

use of eu.transkribus.core.model.beans.customtags.TextStyleTag in project TranskribusCore by Transkribus.

the class TrpTeiStringBuilder method createTagStart.

String createTagStart(CustomTag t) {
    String ts = "";
    if (t instanceof TextStyleTag) {
        // TODO!!
        TextStyleTag tst = (TextStyleTag) t;
        ts = "<hi rend='" + tst.getAttributeCssStr() + "'>";
    } else if (t instanceof AbbrevTag) {
        AbbrevTag at = (AbbrevTag) t;
        ts = "<choice><expan>" + StringEscapeUtils.escapeXml(at.getExpansion()) + "</expan><abbr>";
    } else if (t instanceof PersonTag) {
        PersonTag pt = (PersonTag) t;
        ts = "<persName>";
        if (!StringUtils.isEmpty(pt.getFirstname())) {
            ts += "<forename>" + StringEscapeUtils.escapeXml(pt.getFirstname()) + "</forename>";
        }
        if (!StringUtils.isEmpty(pt.getLastname())) {
            ts += "<surname>" + StringEscapeUtils.escapeXml(pt.getLastname()) + "</surname>";
        }
        if (!StringUtils.isEmpty(pt.getDateOfBirth())) {
            ts += "<birth>" + StringEscapeUtils.escapeXml(pt.getDateOfBirth()) + "</birth>";
        }
        if (!StringUtils.isEmpty(pt.getDateOfBirth())) {
            ts += "<death>" + StringEscapeUtils.escapeXml(pt.getDateOfDeath()) + "</death>";
        }
        if (!StringUtils.isEmpty(pt.getNotice())) {
            ts += "<notice>" + StringEscapeUtils.escapeXml(pt.getNotice()) + "</notice>";
        }
    } else if (t instanceof PlaceTag) {
        PlaceTag pt = (PlaceTag) t;
        ts = "<placeName>";
        if (!StringUtils.isEmpty(pt.getCountry())) {
            ts += "<country>" + StringEscapeUtils.escapeXml(pt.getCountry()) + "</country>";
        }
    } else if (t instanceof OrganizationTag) {
        OrganizationTag ot = (OrganizationTag) t;
        ts = "<orgName>";
    } else if (t instanceof SpeechTag) {
        SpeechTag st = (SpeechTag) t;
        ts = "<sp>";
        if (!StringUtils.isEmpty(st.getSpeaker())) {
            ts += "<speaker>" + StringEscapeUtils.escapeXml(st.getSpeaker()) + "</speaker>";
        }
    } else if (t instanceof GapTag) {
        ts = "<gap />";
    } else // do nothing because comment tag is added at the end of the tag entry as note in the createTagEnd method
    if (t instanceof CommentTag) {
        ts = "";
    } else {
        // general tag
        ts = "<" + t.getTagName();
        for (String an : t.getAttributeNames()) {
            if (CustomTag.isOffsetOrLengthOrContinuedProperty(an))
                continue;
            Object v = t.getAttributeValue(an);
            if (v != null) {
                ts += " " + StringEscapeUtils.escapeXml(an) + "='" + StringEscapeUtils.escapeXml(v.toString()) + "'";
            }
        }
        ts += ">";
    }
    return ts;
}

Also used : TextStyleTag(eu.transkribus.core.model.beans.customtags.TextStyleTag) CommentTag(eu.transkribus.core.model.beans.customtags.CommentTag) PlaceTag(eu.transkribus.core.model.beans.customtags.PlaceTag) OrganizationTag(eu.transkribus.core.model.beans.customtags.OrganizationTag) GapTag(eu.transkribus.core.model.beans.customtags.GapTag) PersonTag(eu.transkribus.core.model.beans.customtags.PersonTag) AbbrevTag(eu.transkribus.core.model.beans.customtags.AbbrevTag) SpeechTag(eu.transkribus.core.model.beans.customtags.SpeechTag)

Example 12 with TextStyleTag

use of eu.transkribus.core.model.beans.customtags.TextStyleTag in project TranskribusCore by Transkribus.

the class DocxBuilder method getFormattedTextForShapeElement.

private static void getFormattedTextForShapeElement(ITrpShapeType element, P p, MainDocumentPart mdp) throws Exception {
    ArrayList<R> listOfallRuns = new ArrayList<R>();
    String textStr = element.getUnicodeText();
    CustomTagList cl = element.getCustomTagList();
    if (textStr == null || cl == null)
        throw new IOException("Element has no text or custom tag list: " + element + ", class: " + element.getClass().getName());
    if (textStr.isEmpty()) {
        return;
    }
    boolean rtl = false;
    // from right to left
    if (Character.getDirectionality(textStr.charAt(0)) == Character.DIRECTIONALITY_RIGHT_TO_LEFT || Character.getDirectionality(textStr.charAt(0)) == Character.DIRECTIONALITY_RIGHT_TO_LEFT_ARABIC || Character.getDirectionality(textStr.charAt(0)) == Character.DIRECTIONALITY_RIGHT_TO_LEFT_EMBEDDING || Character.getDirectionality(textStr.charAt(0)) == Character.DIRECTIONALITY_RIGHT_TO_LEFT_OVERRIDE) {
        logger.debug("&&&&&&&& STRING IS RTL : ");
        deleteCharAtIndex(0, textStr);
        rtl = true;
    }
    // format according to tags:CustomTagList
    for (CustomTag nonIndexedTag : cl.getNonIndexedTags()) {
        // exchange chars with * if wished to be blackened
        if (doBlackening && nonIndexedTag.getTagName().equals(RegionTypeUtil.BLACKENING_REGION.toLowerCase())) {
            // logger.debug("nonindexed tag found ");
            textStr = ExportUtils.blackenString(nonIndexedTag, textStr);
        }
        /*
			 * for gap and comment: remember their position to find and add them to their corresponding 'run' later on 
			 * 
			 */
        if (nonIndexedTag.getTagName().equals("gap")) {
            GapTag gap = (GapTag) nonIndexedTag;
            gapList.put(nonIndexedTag.getOffset(), gap);
        }
    // unclear and comments can not be non-indexed
    // if (nonIndexedTag.getTagName().equals("comment")){
    // logger.debug("nonindexed comment tag found ");
    // CommentTag ct = (CommentTag) nonIndexedTag;
    // commentList.put(nonIndexedTag.getEnd()-1, ct.getComment());
    // }
    // if(nonIndexedTag.getTagName().equals("unclear")){
    // logger.debug("unclear tag found ");
    // unclearList.put(nonIndexedTag.getOffset(), nonIndexedTag.getOffset()+nonIndexedTag.getLength());
    // }
    }
    for (CustomTag indexedTag : cl.getIndexedTags()) {
        if (doBlackening && indexedTag.getTagName().equals(RegionTypeUtil.BLACKENING_REGION.toLowerCase())) {
            textStr = ExportUtils.blackenString(indexedTag, textStr);
        }
        /*
			 * find all gaps and store the offset
			 */
        if (indexedTag.getTagName().equals("gap")) {
            GapTag gap = (GapTag) indexedTag;
            gapList.put(indexedTag.getOffset(), gap);
        }
        if (indexedTag.getTagName().equals("comment")) {
            // logger.debug("indexed comment tag found at pos " + (indexedTag.getEnd()-1));
            CommentTag ct = (CommentTag) indexedTag;
            commentList.put(indexedTag.getEnd() - 1, ct.getComment());
        }
        // if(exportTags){
        if (markUnclearWords && indexedTag.getTagName().equals("unclear")) {
            // logger.debug("unclear tag found ");
            // logger.debug("unclear start is: " + indexedTag.getOffset());
            // logger.debug("unclear end is: " + (indexedTag.getEnd()-1));
            unclearList.put(indexedTag.getOffset(), indexedTag.getEnd() - 1);
        }
        if (expandAbbrevs && indexedTag.getTagName().equals("abbrev")) {
            logger.debug("abbrev tag found ");
            AbbrevTag at = (AbbrevTag) indexedTag;
            String expansion = at.getExpansion();
            // only add if an expansion was typed
            if (!expansion.equals("")) {
                expandAbbrevList.put(indexedTag.getEnd(), at.getExpansion());
            }
        }
        if (substituteAbbrevs && indexedTag.getTagName().equals("abbrev")) {
            // logger.debug("abbrev tag found ");
            AbbrevTag at = (AbbrevTag) indexedTag;
            String expansion = at.getExpansion();
            // key is the start of the abbrev
            if (!expansion.equals("")) {
                substituteAbbrevList.put(indexedTag.getOffset(), at);
            }
        }
        if (showSuppliedWithBrackets && indexedTag.getTagName().equals("supplied")) {
            // logger.debug("supplied tag found ");
            SuppliedTag at = (SuppliedTag) indexedTag;
            String text = at.getContainedText();
            // only add if an expansion was typed
            if (!text.equals("")) {
                showSuppliedList.put(indexedTag.getOffset(), text);
            }
        }
        if (ignoreSupplied && indexedTag.getTagName().equals("supplied")) {
            // logger.debug("supplied tag found ");
            SuppliedTag at = (SuppliedTag) indexedTag;
            String text = at.getContainedText();
            // only add if an expansion was typed
            if (!text.equals("")) {
                ignoreSuppliedList.put(indexedTag.getOffset(), text);
            }
        }
        // create index for all choosen tagnames
        if (exportTags && tagnames.contains(indexedTag.getTagName()) && !indexedTag.getTagName().equals("gap")) {
            // logger.debug("export tag as idx entry " + indexedTag.getOffset());
            addValuesToIdxList(idxList, indexedTag.getEnd(), indexedTag);
        }
    // }
    }
    List<TextStyleTag> textStylesTags = element.getTextStyleTags();
    // ArrayList<R> runs = new ArrayList<R>();
    boolean shapeEnded = false;
    for (int i = 0; i <= textStr.length(); ++i) {
        // use of abbrevIdx: this is necessary for the appearance at the end of a textline
        // otherwise the abbrev expansion would not appear at the end of a line because then the index i would be too small
        shapeEnded = (i + 1 >= textStr.length() ? true : false);
        /*
			 * is this case the abbrev gets totally replaced by its expansion
			 * so if the start of the abbrev was found the expansion is written and we can break the writing of the abbrev
			 */
        if (substituteAbbrevList.containsKey(i)) {
            String exp = substituteAbbrevList.get(i).getExpansion();
            if (rtl) {
                exp = reverseString(exp);
            }
            org.docx4j.wml.Text abbrevText = factory.createText();
            abbrevText.setValue(exp);
            org.docx4j.wml.R abbrevRun = factory.createR();
            // p.getContent().add(abbrevRun);
            abbrevRun.getContent().add(abbrevText);
            listOfallRuns.add(abbrevRun);
            // go to end of the abbreviation and proceed with remaining text
            i += substituteAbbrevList.get(i).getLength();
            shapeEnded = (i == textStr.length() ? true : false);
        }
        /*
			 * add expansion in brackets behind the abbrev		
			 * the abbrev list contains as key the end index of the abbrev	
			 */
        if (expandAbbrevList.containsKey(i)) {
            String exp = expandAbbrevList.get(i);
            if (rtl) {
                exp = reverseString(exp);
            }
            org.docx4j.wml.Text abbrevText = factory.createText();
            abbrevText.setValue("[" + exp + "]");
            org.docx4j.wml.R abbrevRun = factory.createR();
            // p.getContent().add(abbrevRun);
            abbrevRun.getContent().add(abbrevText);
            listOfallRuns.add(abbrevRun);
        }
        /*
			 * in this case the supplied tag is expanded either with or without brackets
			 * 
			 */
        if (showSuppliedList.containsKey(i)) {
            String exp = showSuppliedList.get(i);
            if (rtl) {
                exp = reverseString(exp);
            }
            org.docx4j.wml.Text suppliedText = factory.createText();
            suppliedText.setValue("[" + exp + "]");
            org.docx4j.wml.R suppliedRun = factory.createR();
            suppliedRun.getContent().add(suppliedText);
            listOfallRuns.add(suppliedRun);
            // supplied is handled now - so set i to the end of supplied
            i += showSuppliedList.get(i).length();
            shapeEnded = (i == textStr.length() ? true : false);
        }
        /*
			 * in this case the supplied tag gets ignored
			 * this means that index i must be incremented by the length of this supplied tag text
			 */
        if (ignoreSuppliedList.containsKey(i)) {
            i += ignoreSuppliedList.get(i).length();
            shapeEnded = (i == textStr.length() ? true : false);
        }
        /*
			 * gap is at this position
			 * hence create extra run with [...] as value and then go on
			 * of if suppied attribute is set handle supplied as set in the export settings
			 */
        if (gapList.containsKey(i)) {
            org.docx4j.wml.Text t = factory.createText();
            // if (!rtl)
            // t.setValue("[...] ");
            // else
            // t.setValue(" [...]");
            GapTag gt = gapList.get(i);
            String cta = (String) gt.getAttributeValue("supplied");
            // attribute supplied is set in the gap tag -> handle supplied as wanted
            if (cta != null && !cta.equals("")) {
                // may the gap with supplied attribute gets ignored
                if (!ignoreSupplied) {
                    if (showSuppliedWithBrackets) {
                        t.setValue("[" + cta + "]");
                    }
                // do not show supplied attribute by default!?
                // else{
                // t.setValue(cta);
                // }
                }
            } else // nothing supplied, so show [...] for the gap tag
            {
                t.setValue("[...]");
                t.setSpace("preserve");
            }
            org.docx4j.wml.R run = factory.createR();
            // p.getContent().add(run);
            run.getContent().add(t);
            listOfallRuns.add(run);
        }
        // begin of unclear word should be marked with [ and end with ]
        if (unclearList.containsKey(i)) {
            org.docx4j.wml.Text t = factory.createText();
            if (!rtl)
                t.setValue("[");
            else
                t.setValue("]");
            org.docx4j.wml.R run = factory.createR();
            // p.getContent().add(run);
            run.getContent().add(t);
            listOfallRuns.add(run);
        }
        /*
			 * if so we create an index entry for this text string in the docx
			 */
        if (idxList.containsKey(i)) {
            addIndexEntry(i, p, textStr, rtl);
        }
        String currText = "";
        if (i + 1 <= textStr.length()) {
            currText = textStr.substring(i, i + 1);
        // logger.debug("&&&&&&&& current single char : " + currText);
        }
        /*
			 * 2nd is (should be) soft hyphen with Unicode U+00AD
			 * First arg is not sign and was initially used for soft hyphen by Diggitexx
			 * need to be at the line end - otherwise 
			 * 
			 */
        if ((currText.equals("¬") || currText.equals("") || currText.equals("-")) && !preserveLineBreaks && shapeEnded) {
            break;
        }
        org.docx4j.wml.Text t = factory.createText();
        t.setValue(currText);
        t.setSpace("preserve");
        org.docx4j.wml.R run = factory.createR();
        // p.getContent().add(run);
        run.getContent().add(t);
        listOfallRuns.add(run);
        // end of unclear tag
        if (unclearList.containsValue(i)) {
            org.docx4j.wml.Text unclearEnd = factory.createText();
            if (!rtl)
                unclearEnd.setValue("]");
            else
                unclearEnd.setValue("[");
            org.docx4j.wml.R unclearRun = factory.createR();
            // p.getContent().add(unclearRun);
            unclearRun.getContent().add(unclearEnd);
            listOfallRuns.add(unclearRun);
        }
        // the properties of this text section
        org.docx4j.wml.RPr rpr = factory.createRPr();
        /*
			 * format according to custom style tag - check for each char in the text if a special style should be set
			 */
        for (TextStyleTag styleTag : textStylesTags) {
            if (i >= styleTag.getOffset() && i < (styleTag.getOffset() + styleTag.getLength())) {
                org.docx4j.wml.BooleanDefaultTrue b = new org.docx4j.wml.BooleanDefaultTrue();
                b.setVal(true);
                TextStyleType ts = styleTag.getTextStyle();
                if (ts == null)
                    continue;
                if (CoreUtils.val(ts.isBold())) {
                    rpr.setB(b);
                }
                if (CoreUtils.val(ts.isItalic())) {
                    rpr.setI(b);
                }
                if (CoreUtils.val(ts.isLetterSpaced())) {
                // ????
                }
                if (CoreUtils.val(ts.isMonospace())) {
                // ????
                }
                if (CoreUtils.val(ts.isReverseVideo())) {
                // ????
                }
                if (CoreUtils.val(ts.isSerif())) {
                // ????
                }
                if (CoreUtils.val(ts.isSmallCaps())) {
                    rpr.setSmallCaps(b);
                }
                if (CoreUtils.val(ts.isStrikethrough())) {
                    rpr.setStrike(b);
                }
                if (CoreUtils.val(ts.isSubscript())) {
                    org.docx4j.wml.CTVerticalAlignRun al = factory.createCTVerticalAlignRun();
                    al.setVal(STVerticalAlignRun.SUBSCRIPT);
                    rpr.setVertAlign(al);
                }
                if (CoreUtils.val(ts.isSuperscript())) {
                    org.docx4j.wml.CTVerticalAlignRun al = factory.createCTVerticalAlignRun();
                    al.setVal(STVerticalAlignRun.SUPERSCRIPT);
                    rpr.setVertAlign(al);
                }
                if (CoreUtils.val(ts.isUnderlined())) {
                    U u = factory.createU();
                    u.setVal(UnderlineEnumeration.SINGLE);
                    rpr.setU(u);
                }
            // BooleanDefaultTrue bdt = Context.getWmlObjectFactory().createBooleanDefaultTrue();
            // bdt.setVal(Boolean.TRUE);
            // rpr.setRtl(bdt);
            // rpr.setHighlight(new Highlight());
            }
        }
        // at the run properties (= text styles) to the run
        run.setRPr(rpr);
        // find position of footnote/comment
        if (commentList.containsKey(i)) {
            // logger.debug("position of comment: " + i);
            // logger.debug("value of comment: " + commentList.get(i));
            // creates the footnote at the end of the wished text - this position was found at the beginning of this method
            org.docx4j.wml.R fnRun = factory.createR();
            // p.getContent().add(fnRun);
            createFootnote(commentList.get(i), fnRun, mdp);
            listOfallRuns.add(fnRun);
        }
        /*
			 * add space at end of line if line breaks are not preserved
			 */
        if (!preserveLineBreaks && shapeEnded) {
            org.docx4j.wml.Text space = factory.createText();
            space.setValue(" ");
            space.setSpace("preserve");
            org.docx4j.wml.R runSpace = factory.createR();
            // p.getContent().add(runSpace);
            runSpace.getContent().add(space);
            listOfallRuns.add(runSpace);
        }
    // runs.add(run);
    }
    if (rtl) {
        PPr paragraphProperties = factory.createPPr();
        Jc justification = factory.createJc();
        justification.setVal(JcEnumeration.RIGHT);
        paragraphProperties.setJc(justification);
        p.setPPr(paragraphProperties);
    }
    for (int i = listOfallRuns.size() - 1; i >= 0; i--) {
        if (rtl) {
            p.getContent().add(listOfallRuns.get(i));
        } else {
            p.getContent().addAll(listOfallRuns);
            break;
        }
    }
    clearAllLists();
}

Also used : ArrayList(java.util.ArrayList) CustomTag(eu.transkribus.core.model.beans.customtags.CustomTag) RPr(org.docx4j.wml.RPr) R(org.docx4j.wml.R) U(org.docx4j.wml.U) R(org.docx4j.wml.R) Jc(org.docx4j.wml.Jc) Text(org.docx4j.wml.Text) TextStyleType(eu.transkribus.core.model.beans.pagecontent.TextStyleType) SuppliedTag(eu.transkribus.core.model.beans.customtags.SuppliedTag) CustomTagList(eu.transkribus.core.model.beans.customtags.CustomTagList) IOException(java.io.IOException) CommentTag(eu.transkribus.core.model.beans.customtags.CommentTag) TextStyleTag(eu.transkribus.core.model.beans.customtags.TextStyleTag) PPr(org.docx4j.wml.PPr) GapTag(eu.transkribus.core.model.beans.customtags.GapTag) AbbrevTag(eu.transkribus.core.model.beans.customtags.AbbrevTag)

Example 13 with TextStyleTag

use of eu.transkribus.core.model.beans.customtags.TextStyleTag in project TranskribusCore by Transkribus.

the class TrpRtfBuilder method getRtfTextForShapeElement.

private static RtfText getRtfTextForShapeElement(ITrpShapeType element) throws IOException {
    String textStr = element.getUnicodeText();
    CustomTagList cl = element.getCustomTagList();
    if (textStr == null || cl == null)
        throw new IOException("Element has no text or custom tag list: " + element + ", class: " + element.getClass().getName());
    if (doBlackening) {
        // format according to tags:CustomTagList
        for (CustomTag nonIndexedTag : cl.getNonIndexedTags()) {
            if (nonIndexedTag.getTagName().equals(RegionTypeUtil.BLACKENING_REGION.toLowerCase())) {
                // logger.debug("nonindexed tag found ");
                textStr = ExportUtils.blackenString(nonIndexedTag, textStr);
            }
        }
        for (CustomTag indexedTag : cl.getIndexedTags()) {
            if (indexedTag.getTagName().equals(RegionTypeUtil.BLACKENING_REGION.toLowerCase())) {
                // logger.debug("nonindexed tag found ");
                textStr = ExportUtils.blackenString(indexedTag, textStr);
            }
        }
    }
    List<TextStyleTag> textStylesTags = element.getTextStyleTags();
    // if (exportTags){
    // getTagsForShapeElement(element);
    // }
    RtfText[] chars = new RtfText[textStr.length()];
    for (int i = 0; i < textStr.length(); ++i) {
        chars[i] = RtfText.text(textStr.substring(i, i + 1));
        // format according to "global" text style
        chars[i] = formatRtfText(chars[i], element.getTextStyle());
        /*
			 * format according to custom style tag - check for each char in the text if a special style should be set
			 */
        for (TextStyleTag styleTag : textStylesTags) {
            if (i >= styleTag.getOffset() && i < (styleTag.getOffset() + styleTag.getLength())) {
                chars[i] = formatRtfText(chars[i], styleTag.getTextStyle());
            }
        }
    // // format according to tags:
    // for (String nonIndexedTag : cl.getNonIndexedTagNames()) {
    // charText = formatRtfText(charText, element);
    // // TODO
    // }
    // for (String indexedTag : cl.getIndexedTagNames()) {
    // // TODO
    // }
    // TODO: include structure types!! (also possible in custom tags!!)
    // TODO: include reading order!!!
    }
    RtfText totalText = RtfText.text(false, (Object[]) chars);
    return totalText;
}

Also used : TextStyleTag(eu.transkribus.core.model.beans.customtags.TextStyleTag) RtfText(com.tutego.jrtf.RtfText) CustomTag(eu.transkribus.core.model.beans.customtags.CustomTag) CustomTagList(eu.transkribus.core.model.beans.customtags.CustomTagList) IOException(java.io.IOException)

Aggregations

TextStyleTag (eu.transkribus.core.model.beans.customtags.TextStyleTag)13 CustomTagList (eu.transkribus.core.model.beans.customtags.CustomTagList)6 Test (org.junit.Test)6 CustomTag (eu.transkribus.core.model.beans.customtags.CustomTag)5 TrpTextLineType (eu.transkribus.core.model.beans.pagecontent_trp.TrpTextLineType)5 TrpTextRegionType (eu.transkribus.core.model.beans.pagecontent_trp.TrpTextRegionType)5 TrpPageType (eu.transkribus.core.model.beans.pagecontent_trp.TrpPageType)4 Chunk (com.itextpdf.text.Chunk)2 AbbrevTag (eu.transkribus.core.model.beans.customtags.AbbrevTag)2 CommentTag (eu.transkribus.core.model.beans.customtags.CommentTag)2 GapTag (eu.transkribus.core.model.beans.customtags.GapTag)2 Point (java.awt.Point)2 IOException (java.io.IOException)2 ArrayList (java.util.ArrayList)2 BaseColor (com.itextpdf.text.BaseColor)1 Phrase (com.itextpdf.text.Phrase)1 RtfText (com.tutego.jrtf.RtfText)1 OrganizationTag (eu.transkribus.core.model.beans.customtags.OrganizationTag)1 PersonTag (eu.transkribus.core.model.beans.customtags.PersonTag)1 PlaceTag (eu.transkribus.core.model.beans.customtags.PlaceTag)1