use of eu.transkribus.core.model.beans.pagecontent_trp.TrpWordType in project TranskribusCore by Transkribus.
the class TrpRtfBuilder method getRtfTextForLineFromWords.
// private static void getTagsForShapeElement(ITrpShapeType element) throws IOException{
//
// String textStr = element.getUnicodeText();
// CustomTagList cl = element.getCustomTagList();
// if (textStr == null || cl == null)
// throw new IOException("Element has no text or custom tag list: "+element+", class: "+element.getClass().getName());
//
// for (CustomTag nonIndexedTag : cl.getNonIndexedTags()) {
//
// logger.debug("nonindexed tag found ");
// storeCustomTag(nonIndexedTag, textStr);
//
// }
// for (CustomTag indexedTag : cl.getIndexedTags()) {
//
// logger.debug("indexed tag found ");
// storeCustomTag(indexedTag, textStr);
//
// }
//
// }
//
// private static void storeCustomTag(CustomTag currTag, String textStr) {
// if (!currTag.getTagName().equals("textStyle")){
//
// if (currTag.getOffset() != -1 && currTag.getLength() != -1 && (currTag.getOffset()+currTag.getLength() <= textStr.length())){
// tags.put(currTag, textStr.substring(currTag.getOffset(), currTag.getOffset()+currTag.getLength()));
// }
// else{
// tags.put(currTag, textStr);
// }
// logger.debug("++tag name is " + currTag.getTagName());
// logger.debug("text " + tags.get(currTag));
// }
//
// if (currTag.getTagName().equals("Person")){
// if (currTag.getOffset() != -1 && currTag.getLength() != -1 && (currTag.getOffset()+currTag.getLength() <= textStr.length())){
// persons.add(textStr.substring(currTag.getOffset(), currTag.getOffset()+currTag.getLength()));
// }
// else{
// logger.debug("with index is something wrong: offset " + currTag.getOffset() + " length " + currTag.getLength()) ;
// //throw new Exception("Something wrong with indexed tag for text: " + textStr);
// }
// }
// else if (currTag.getTagName().equals("Place")){
// if (currTag.getOffset() != -1 && currTag.getLength() != -1 && (currTag.getOffset()+currTag.getLength() <= textStr.length())){
// places.add(textStr.substring(currTag.getOffset(), currTag.getOffset()+currTag.getLength()));
// }
// }
//
// }
private static RtfText getRtfTextForLineFromWords(TrpTextLineType line) throws IOException {
List<WordType> words = line.getWord();
RtfText[] wordTexts = new RtfText[words.size()];
for (int i = 0; i < wordTexts.length; ++i) {
TrpWordType w = (TrpWordType) words.get(i);
wordTexts[i] = getRtfTextForShapeElement(w);
}
RtfText totalText = RtfText.text(true, wordTexts);
return totalText;
}
use of eu.transkribus.core.model.beans.pagecontent_trp.TrpWordType in project TranskribusCore by Transkribus.
the class PageXmlUtils method removeAllIndexedTags.
public static void removeAllIndexedTags(PcGtsType pc) {
TrpPageType p = (TrpPageType) pc.getPage();
List<TrpTextRegionType> trList = p.getTextRegions(true);
for (TrpTextRegionType tr : trList) {
tr.getCustomTagList().removeIndexedTags();
List<TextLineType> lineList = tr.getTextLine();
for (TextLineType l : lineList) {
TrpTextLineType trpL = (TrpTextLineType) l;
trpL.getCustomTagList().removeIndexedTags();
List<WordType> wordList = trpL.getWord();
for (WordType w : wordList) {
TrpWordType trpW = (TrpWordType) w;
trpW.getCustomTagList().removeIndexedTags();
}
}
}
}
use of eu.transkribus.core.model.beans.pagecontent_trp.TrpWordType in project TranskribusCore by Transkribus.
the class ExportCache method storeCustomTagMapForDoc.
/**
* @param doc
* @param wordBased
* @param pageIndices
* @param blackening
* @return all (custom) tags of the given document
* @throws JAXBException
* @throws IOException
* @throws InterruptedException
*/
public void storeCustomTagMapForDoc(TrpDoc doc, boolean wordBased, Set<Integer> pageIndices, IProgressMonitor monitor, boolean blackening) throws JAXBException, IOException, InterruptedException {
doBlackening = blackening;
tags.clear();
List<TrpPage> pages = doc.getPages();
int totalPages = pages.size();
int c = 0;
for (int i = 0; i < totalPages; ++i) {
if (pageIndices != null && !pageIndices.contains(i))
continue;
if (monitor != null && monitor.isCanceled()) {
throw new InterruptedException("User canceled the export");
}
// pageTranscripts get fetched before the custom tag map is stored - so normally pageTranscripts.get(i) != null
JAXBPageTranscript tr;
if (pageTranscripts == null || pageTranscripts.get(i) == null) {
TrpPage page = pages.get(i);
TrpTranscriptMetadata md = page.getCurrentTranscript();
tr = new JAXBPageTranscript(md);
} else {
tr = pageTranscripts.get(i);
tr.getPageData();
}
tr.build();
TrpPageType trpPage = tr.getPage();
logger.debug("get tags for page " + (i + 1) + "/" + doc.getNPages());
List<TrpTextRegionType> textRegions = trpPage.getTextRegions(true);
for (int j = 0; j < textRegions.size(); ++j) {
TrpTextRegionType r = textRegions.get(j);
List<TextLineType> lines = r.getTextLine();
for (int k = 0; k < lines.size(); ++k) {
TrpTextLineType trpL = (TrpTextLineType) lines.get(k);
List<WordType> words = trpL.getWord();
getTagsForShapeElement(trpL);
if (wordBased) {
for (int l = 0; l < words.size(); ++l) {
TrpWordType w = (TrpWordType) words.get(l);
getTagsForShapeElement(w);
}
}
// else{
// getTagsForShapeElement(trpL);
// }
}
}
if (monitor != null) {
monitor.setTaskName("Loaded tags for page " + (i + 1));
monitor.worked(++c);
}
}
}
Aggregations