Search in sources :

Example 16 with TextLineType

use of eu.transkribus.core.model.beans.pagecontent.TextLineType in project TranskribusCore by Transkribus.

the class TrpTeiStringBuilder method writeTextForTextRegion.

void writeTextForTextRegion(SebisStringBuilder sb, TextRegionType r, int pageNr) {
    String facsId = FACS_ID_PREFIX + pageNr;
    if (r.getTextLine().isEmpty()) {
        logger.warn("skipping empty region: " + r.getId());
        return;
    }
    writeTextRegion(sb, r, facsId);
    for (TextLineType tl : r.getTextLine()) {
        TrpTextLineType ttl = (TrpTextLineType) tl;
        if (!commonPars.isWriteTextOnWordLevel()) {
            writeLineOrWord(sb, ttl, facsId);
        } else {
            String lStart = getLineOrWordStart(ttl, facsId);
            sb.incIndent();
            sb.addLine(lStart);
            // TODO: write text for words???
            for (WordType w : ttl.getWord()) {
                writeLineOrWord(sb, (TrpWordType) w, facsId);
            }
            String lEnd = getLineOrWordEnd(ttl, facsId);
            sb.addLine(lEnd);
            // sb.append("\n");
            sb.decIndent();
        }
    }
    closeTextRegion(sb);
}
Also used : TrpTextLineType(eu.transkribus.core.model.beans.pagecontent_trp.TrpTextLineType) TextLineType(eu.transkribus.core.model.beans.pagecontent.TextLineType) TrpTextLineType(eu.transkribus.core.model.beans.pagecontent_trp.TrpTextLineType) WordType(eu.transkribus.core.model.beans.pagecontent.WordType) TrpWordType(eu.transkribus.core.model.beans.pagecontent_trp.TrpWordType)

Example 17 with TextLineType

use of eu.transkribus.core.model.beans.pagecontent.TextLineType in project TranskribusCore by Transkribus.

the class DocxBuilder method exportTextRegion.

private static void exportTextRegion(TrpTextRegionType tr, boolean wordBased, P p, MainDocumentPart mdp) {
    if (p == null) {
        p = factory.createP();
        mdp.addObject(p);
    }
    List<TextLineType> lines = tr.getTextLine();
    for (int i = 0; i < lines.size(); ++i) {
        TrpTextLineType trpL = (TrpTextLineType) lines.get(i);
        try {
            if (wordBased && trpL.getWord().size() > 0) {
                getFormattedTextForLineElement(trpL.getWord(), p, mdp);
            } else {
                getFormattedTextForShapeElement(trpL, p, mdp);
            }
        } catch (Exception e) {
            // TODO Auto-generated catch block
            e.printStackTrace();
        }
        /* with ΒΆ the user can mark a new paragraph inside a text region
			 * unicode is \u00B6
			 */
        if (trpL.getCustomTagList().containsParagraphTag()) {
            // then new paragraph should be used;
            p = factory.createP();
            mdp.addObject(p);
        } else /*add line break after each text line
			 * or omit this if explicitely wished to have dense lines
			 * No line break at end of paragraph
			 */
        if (preserveLineBreaks && !(i + 1 == lines.size())) {
            // this Br element is used break the current and go for next line
            Br br = factory.createBr();
            p.getContent().add(br);
        }
    }
}
Also used : TrpTextLineType(eu.transkribus.core.model.beans.pagecontent_trp.TrpTextLineType) Br(org.docx4j.wml.Br) TextLineType(eu.transkribus.core.model.beans.pagecontent.TextLineType) TrpTextLineType(eu.transkribus.core.model.beans.pagecontent_trp.TrpTextLineType) JAXBException(javax.xml.bind.JAXBException) IOException(java.io.IOException) Docx4JException(org.docx4j.openpackaging.exceptions.Docx4JException)

Example 18 with TextLineType

use of eu.transkribus.core.model.beans.pagecontent.TextLineType in project TranskribusCore by Transkribus.

the class TrpRtfBuilder method getRtfParagraphsForTranscript.

// public static void writeRtfForElement(Rtf rtf, ITrpShapeType element, boolean wordBased, File file, boolean append) throws IOException, JAXBException {
// element.getUnicodeText();
// CustomTagList cl = element.getCustomTagList();
// 
// RtfText text = RtfText.text(element.getUnicodeText());
// text = formatRtfText(text, element.getTextStyle());
// 
// 
// 
// if (element instanceof TextLineType || element instanceof TextRegionType) {// TODO words vs lines and regions
// rtf.p(text);
// } else if (element instanceof TrpWordType) {
// //			rtf.p(texts);
// }
// 
// 
// //		cl.getCustomTagAndContinuations(tag)
// 
// 
// 
// }
public static RtfPara[] getRtfParagraphsForTranscript(TrpPageType trpPage, boolean wordBased) throws IOException, JAXBException {
    boolean rtl = false;
    List<TrpTextRegionType> textRegions = trpPage.getTextRegions(true);
    // List<TrpTextRegionType> textRegions = trpPage.getTextRegionsAndTextRegionsFromTableRegions(true);
    RtfPara[] paras = new RtfPara[textRegions.size()];
    for (int j = 0; j < textRegions.size(); ++j) {
        TrpTextRegionType r = textRegions.get(j);
        // if (exportTags){
        // getTagsForShapeElement(r);
        // }
        List<TextLineType> lines = r.getTextLine();
        RtfText[] linesTexts = new RtfText[lines.size()];
        for (int i = 0; i < lines.size(); ++i) {
            TrpTextLineType trpL = (TrpTextLineType) lines.get(i);
            linesTexts[i] = (wordBased && trpL.getWord().size() > 0) ? getRtfTextForLineFromWords(trpL) : getRtfTextForShapeElement(trpL);
            linesTexts[i] = RtfText.text(linesTexts[i], "\n");
        }
        // read from right to left -> alignment is right
        if (rtl) {
        // paras[j] = RtfPara.p(linesTexts).footnote("Test").alignRight();
        } else {
            String test = "test";
            paras[j] = RtfPara.p(linesTexts);
        // paras[j] = RtfPara.p(linesTexts, RtfText.footnote("Test")).alignLeft();
        }
    }
    return paras;
// Rtf rtf = Rtf.rtf().section(paras);
// return rtf;
// for (RegionType r : trpPage.getTextRegionOrImageRegionOrLineDrawingRegion()) {
// if (r instanceof GraphicRegionType) {
// GraphicRegionType gr = (GraphicRegionType) r;
// // TODO: how to export images in pdf??
// r.getTextRegions(recursive);
// }
// }
// tr.getPage().getTextRegions(recursive);
// Rtf.rtf();
// RtfWriter;
}
Also used : RtfPara(com.tutego.jrtf.RtfPara) TrpTextLineType(eu.transkribus.core.model.beans.pagecontent_trp.TrpTextLineType) TextLineType(eu.transkribus.core.model.beans.pagecontent.TextLineType) TrpTextLineType(eu.transkribus.core.model.beans.pagecontent_trp.TrpTextLineType) TrpTextRegionType(eu.transkribus.core.model.beans.pagecontent_trp.TrpTextRegionType) RtfText(com.tutego.jrtf.RtfText)

Example 19 with TextLineType

use of eu.transkribus.core.model.beans.pagecontent.TextLineType in project TranskribusCore by Transkribus.

the class PageXmlUtils method getFulltextFromLines.

public static String getFulltextFromLines(PcGtsType pc) {
    List<TextRegionType> regions = PageXmlUtils.getTextRegions(pc);
    TrpElementCoordinatesComparator<TextLineType> comp = new TrpElementCoordinatesComparator<>();
    StringBuilder sb = new StringBuilder();
    for (TextRegionType r : regions) {
        List<TextLineType> lines = r.getTextLine();
        if (lines != null && !lines.isEmpty()) {
            Collections.sort(lines, comp);
            for (TextLineType l : lines) {
                if (l.getTextEquiv() != null && l.getTextEquiv().getUnicode() != null) {
                    sb.append(l.getTextEquiv().getUnicode() + " ");
                }
            }
        }
    }
    return sb.toString();
}
Also used : TrpTextRegionType(eu.transkribus.core.model.beans.pagecontent_trp.TrpTextRegionType) TextRegionType(eu.transkribus.core.model.beans.pagecontent.TextRegionType) TextLineType(eu.transkribus.core.model.beans.pagecontent.TextLineType) TrpTextLineType(eu.transkribus.core.model.beans.pagecontent_trp.TrpTextLineType) TrpElementCoordinatesComparator(eu.transkribus.core.model.beans.pagecontent_trp.TrpElementCoordinatesComparator)

Example 20 with TextLineType

use of eu.transkribus.core.model.beans.pagecontent.TextLineType in project TranskribusCore by Transkribus.

the class PageXmlUtils method extractStats.

public static TrpTranscriptStatistics extractStats(PcGtsType page) {
    TrpTranscriptStatistics s = new TrpTranscriptStatistics();
    int nrOfRegions, nrOfTranscribedRegions, nrOfWordsInRegions, nrOfLines, nrOfTranscribedLines, nrOfWordsInLines, nrOfWords, nrOfTranscribedWords;
    nrOfRegions = nrOfTranscribedRegions = nrOfWordsInRegions = nrOfLines = nrOfTranscribedLines = nrOfWordsInLines = nrOfWords = nrOfTranscribedWords = 0;
    List<TextRegionType> regs = PageXmlUtils.getTextRegions(page);
    nrOfRegions = regs.size();
    for (TextRegionType r : regs) {
        if (r.getTextEquiv() != null && r.getTextEquiv().getUnicode() != null && !r.getTextEquiv().getUnicode().trim().isEmpty()) {
            nrOfTranscribedRegions += 1;
            // TODO use tokenizer here
            nrOfWordsInRegions += r.getTextEquiv().getUnicode().split(" ").length;
        }
        List<TextLineType> lines = r.getTextLine();
        nrOfLines += lines.size();
        for (TextLineType l : lines) {
            if (l.getTextEquiv() != null && l.getTextEquiv().getUnicode() != null && !l.getTextEquiv().getUnicode().trim().isEmpty()) {
                nrOfTranscribedLines += 1;
                // TODO use tokenizer here
                nrOfWordsInLines += l.getTextEquiv().getUnicode().split(" ").length;
            }
            List<WordType> words = l.getWord();
            nrOfWords += words.size();
            for (WordType w : words) {
                if (w.getTextEquiv() != null && w.getTextEquiv().getUnicode() != null && !w.getTextEquiv().getUnicode().trim().isEmpty()) {
                    nrOfTranscribedWords += 1;
                }
            }
        }
    }
    s.setNrOfLines(nrOfLines);
    s.setNrOfRegions(nrOfRegions);
    s.setNrOfTranscribedLines(nrOfTranscribedLines);
    s.setNrOfTranscribedWords(nrOfTranscribedWords);
    s.setNrOfTranscribedRegions(nrOfTranscribedRegions);
    s.setNrOfWords(nrOfWords);
    s.setNrOfWordsInLines(nrOfWordsInLines);
    s.setNrOfWordsInRegions(nrOfWordsInRegions);
    return s;
}
Also used : TrpTextRegionType(eu.transkribus.core.model.beans.pagecontent_trp.TrpTextRegionType) TextRegionType(eu.transkribus.core.model.beans.pagecontent.TextRegionType) TextLineType(eu.transkribus.core.model.beans.pagecontent.TextLineType) TrpTextLineType(eu.transkribus.core.model.beans.pagecontent_trp.TrpTextLineType) TrpTranscriptStatistics(eu.transkribus.core.model.beans.TrpTranscriptStatistics) WordType(eu.transkribus.core.model.beans.pagecontent.WordType) TrpWordType(eu.transkribus.core.model.beans.pagecontent_trp.TrpWordType)

Aggregations

TextLineType (eu.transkribus.core.model.beans.pagecontent.TextLineType)27 TrpTextLineType (eu.transkribus.core.model.beans.pagecontent_trp.TrpTextLineType)19 WordType (eu.transkribus.core.model.beans.pagecontent.WordType)13 TrpTextRegionType (eu.transkribus.core.model.beans.pagecontent_trp.TrpTextRegionType)13 TextRegionType (eu.transkribus.core.model.beans.pagecontent.TextRegionType)9 TrpWordType (eu.transkribus.core.model.beans.pagecontent_trp.TrpWordType)9 RegionType (eu.transkribus.core.model.beans.pagecontent.RegionType)6 Rectangle (java.awt.Rectangle)6 ArrayList (java.util.ArrayList)5 TrpPage (eu.transkribus.core.model.beans.TrpPage)4 TrpElementCoordinatesComparator (eu.transkribus.core.model.beans.pagecontent_trp.TrpElementCoordinatesComparator)4 TrpPageType (eu.transkribus.core.model.beans.pagecontent_trp.TrpPageType)4 TrpRegionType (eu.transkribus.core.model.beans.pagecontent_trp.TrpRegionType)4 TrpTranscriptMetadata (eu.transkribus.core.model.beans.TrpTranscriptMetadata)3 TrpBaselineType (eu.transkribus.core.model.beans.pagecontent_trp.TrpBaselineType)3 Point (java.awt.Point)3 JAXBPageTranscript (eu.transkribus.core.model.beans.JAXBPageTranscript)2 TrpTranscriptStatistics (eu.transkribus.core.model.beans.TrpTranscriptStatistics)2 PcGtsType (eu.transkribus.core.model.beans.pagecontent.PcGtsType)2 TextEquivType (eu.transkribus.core.model.beans.pagecontent.TextEquivType)2