Search in sources :

Example 11 with TrpWordType

use of eu.transkribus.core.model.beans.pagecontent_trp.TrpWordType in project TranskribusCore by Transkribus.

the class TrpRtfBuilder method getRtfTextForLineFromWords.

// private static void getTagsForShapeElement(ITrpShapeType element) throws IOException{
// 
// String textStr = element.getUnicodeText();
// CustomTagList cl = element.getCustomTagList();
// if (textStr == null || cl == null)
// throw new IOException("Element has no text or custom tag list: "+element+", class: "+element.getClass().getName());
// 
// for (CustomTag nonIndexedTag : cl.getNonIndexedTags()) {
// 
// logger.debug("nonindexed tag found ");
// storeCustomTag(nonIndexedTag, textStr);
// 
// }
// for (CustomTag indexedTag : cl.getIndexedTags()) {
// 
// logger.debug("indexed tag found ");
// storeCustomTag(indexedTag, textStr);
// 
// }
// 
// }
// 
// private static void storeCustomTag(CustomTag currTag, String textStr) {
// if (!currTag.getTagName().equals("textStyle")){
// 
// if (currTag.getOffset() != -1 && currTag.getLength() != -1 && (currTag.getOffset()+currTag.getLength() <= textStr.length())){
// tags.put(currTag, textStr.substring(currTag.getOffset(), currTag.getOffset()+currTag.getLength()));
// }
// else{
// tags.put(currTag, textStr);
// }
// logger.debug("++tag name is " + currTag.getTagName());
// logger.debug("text " + tags.get(currTag));
// }
// 
// if (currTag.getTagName().equals("Person")){
// if (currTag.getOffset() != -1 && currTag.getLength() != -1 && (currTag.getOffset()+currTag.getLength() <= textStr.length())){
// persons.add(textStr.substring(currTag.getOffset(), currTag.getOffset()+currTag.getLength()));
// }
// else{
// logger.debug("with index is something wrong: offset " + currTag.getOffset() + " length " + currTag.getLength()) ;
// //throw new Exception("Something wrong with indexed tag for text: " + textStr);
// }
// }
// else if (currTag.getTagName().equals("Place")){
// if (currTag.getOffset() != -1 && currTag.getLength() != -1 && (currTag.getOffset()+currTag.getLength() <= textStr.length())){
// places.add(textStr.substring(currTag.getOffset(), currTag.getOffset()+currTag.getLength()));
// }
// }
// 
// }
private static RtfText getRtfTextForLineFromWords(TrpTextLineType line) throws IOException {
    List<WordType> words = line.getWord();
    RtfText[] wordTexts = new RtfText[words.size()];
    for (int i = 0; i < wordTexts.length; ++i) {
        TrpWordType w = (TrpWordType) words.get(i);
        wordTexts[i] = getRtfTextForShapeElement(w);
    }
    RtfText totalText = RtfText.text(true, wordTexts);
    return totalText;
}
Also used : RtfText(com.tutego.jrtf.RtfText) TrpWordType(eu.transkribus.core.model.beans.pagecontent_trp.TrpWordType) WordType(eu.transkribus.core.model.beans.pagecontent.WordType) TrpWordType(eu.transkribus.core.model.beans.pagecontent_trp.TrpWordType)

Example 12 with TrpWordType

use of eu.transkribus.core.model.beans.pagecontent_trp.TrpWordType in project TranskribusCore by Transkribus.

the class PageXmlUtils method removeAllIndexedTags.

public static void removeAllIndexedTags(PcGtsType pc) {
    TrpPageType p = (TrpPageType) pc.getPage();
    List<TrpTextRegionType> trList = p.getTextRegions(true);
    for (TrpTextRegionType tr : trList) {
        tr.getCustomTagList().removeIndexedTags();
        List<TextLineType> lineList = tr.getTextLine();
        for (TextLineType l : lineList) {
            TrpTextLineType trpL = (TrpTextLineType) l;
            trpL.getCustomTagList().removeIndexedTags();
            List<WordType> wordList = trpL.getWord();
            for (WordType w : wordList) {
                TrpWordType trpW = (TrpWordType) w;
                trpW.getCustomTagList().removeIndexedTags();
            }
        }
    }
}
Also used : TrpTextLineType(eu.transkribus.core.model.beans.pagecontent_trp.TrpTextLineType) TextLineType(eu.transkribus.core.model.beans.pagecontent.TextLineType) TrpTextLineType(eu.transkribus.core.model.beans.pagecontent_trp.TrpTextLineType) TrpTextRegionType(eu.transkribus.core.model.beans.pagecontent_trp.TrpTextRegionType) TrpWordType(eu.transkribus.core.model.beans.pagecontent_trp.TrpWordType) TrpPageType(eu.transkribus.core.model.beans.pagecontent_trp.TrpPageType) WordType(eu.transkribus.core.model.beans.pagecontent.WordType) TrpWordType(eu.transkribus.core.model.beans.pagecontent_trp.TrpWordType)

Example 13 with TrpWordType

use of eu.transkribus.core.model.beans.pagecontent_trp.TrpWordType in project TranskribusCore by Transkribus.

the class ExportCache method storeCustomTagMapForDoc.

/**
 * @param doc
 * @param wordBased
 * @param pageIndices
 * @param blackening
 * @return all (custom) tags of the given document
 * @throws JAXBException
 * @throws IOException
 * @throws InterruptedException
 */
public void storeCustomTagMapForDoc(TrpDoc doc, boolean wordBased, Set<Integer> pageIndices, IProgressMonitor monitor, boolean blackening) throws JAXBException, IOException, InterruptedException {
    doBlackening = blackening;
    tags.clear();
    List<TrpPage> pages = doc.getPages();
    int totalPages = pages.size();
    int c = 0;
    for (int i = 0; i < totalPages; ++i) {
        if (pageIndices != null && !pageIndices.contains(i))
            continue;
        if (monitor != null && monitor.isCanceled()) {
            throw new InterruptedException("User canceled the export");
        }
        // pageTranscripts get fetched before the custom tag map is stored - so normally pageTranscripts.get(i) != null
        JAXBPageTranscript tr;
        if (pageTranscripts == null || pageTranscripts.get(i) == null) {
            TrpPage page = pages.get(i);
            TrpTranscriptMetadata md = page.getCurrentTranscript();
            tr = new JAXBPageTranscript(md);
        } else {
            tr = pageTranscripts.get(i);
            tr.getPageData();
        }
        tr.build();
        TrpPageType trpPage = tr.getPage();
        logger.debug("get tags for page " + (i + 1) + "/" + doc.getNPages());
        List<TrpTextRegionType> textRegions = trpPage.getTextRegions(true);
        for (int j = 0; j < textRegions.size(); ++j) {
            TrpTextRegionType r = textRegions.get(j);
            List<TextLineType> lines = r.getTextLine();
            for (int k = 0; k < lines.size(); ++k) {
                TrpTextLineType trpL = (TrpTextLineType) lines.get(k);
                List<WordType> words = trpL.getWord();
                getTagsForShapeElement(trpL);
                if (wordBased) {
                    for (int l = 0; l < words.size(); ++l) {
                        TrpWordType w = (TrpWordType) words.get(l);
                        getTagsForShapeElement(w);
                    }
                }
            // else{
            // getTagsForShapeElement(trpL);
            // }
            }
        }
        if (monitor != null) {
            monitor.setTaskName("Loaded tags for page " + (i + 1));
            monitor.worked(++c);
        }
    }
}
Also used : JAXBPageTranscript(eu.transkribus.core.model.beans.JAXBPageTranscript) TrpPage(eu.transkribus.core.model.beans.TrpPage) TrpTranscriptMetadata(eu.transkribus.core.model.beans.TrpTranscriptMetadata) TrpWordType(eu.transkribus.core.model.beans.pagecontent_trp.TrpWordType) WordType(eu.transkribus.core.model.beans.pagecontent.WordType) TrpWordType(eu.transkribus.core.model.beans.pagecontent_trp.TrpWordType) TrpTextLineType(eu.transkribus.core.model.beans.pagecontent_trp.TrpTextLineType) TextLineType(eu.transkribus.core.model.beans.pagecontent.TextLineType) TrpTextLineType(eu.transkribus.core.model.beans.pagecontent_trp.TrpTextLineType) TrpTextRegionType(eu.transkribus.core.model.beans.pagecontent_trp.TrpTextRegionType) TrpPageType(eu.transkribus.core.model.beans.pagecontent_trp.TrpPageType)

Aggregations

TrpWordType (eu.transkribus.core.model.beans.pagecontent_trp.TrpWordType)13 TrpTextLineType (eu.transkribus.core.model.beans.pagecontent_trp.TrpTextLineType)11 WordType (eu.transkribus.core.model.beans.pagecontent.WordType)8 TextLineType (eu.transkribus.core.model.beans.pagecontent.TextLineType)7 TrpTextRegionType (eu.transkribus.core.model.beans.pagecontent_trp.TrpTextRegionType)7 Rectangle (java.awt.Rectangle)6 TrpPageType (eu.transkribus.core.model.beans.pagecontent_trp.TrpPageType)5 TrpBaselineType (eu.transkribus.core.model.beans.pagecontent_trp.TrpBaselineType)3 IOException (java.io.IOException)3 JAXBPageTranscript (eu.transkribus.core.model.beans.JAXBPageTranscript)2 TrpPage (eu.transkribus.core.model.beans.TrpPage)2 TrpTranscriptMetadata (eu.transkribus.core.model.beans.TrpTranscriptMetadata)2 CustomTag (eu.transkribus.core.model.beans.customtags.CustomTag)2 PcGtsType (eu.transkribus.core.model.beans.pagecontent.PcGtsType)2 Point (java.awt.Point)2 HashMap (java.util.HashMap)2 Map (java.util.Map)2 Entry (java.util.Map.Entry)2 Line2D (com.itextpdf.awt.geom.Line2D)1 Chunk (com.itextpdf.text.Chunk)1