use of eu.transkribus.core.model.beans.pagecontent.TextLineType in project TranskribusCore by Transkribus.
the class PageXmlUtils method setTextToLine.
public static void setTextToLine(String text, PcGtsType pc, String lineId) {
TextLineType tl = findLineById(pc, lineId);
if (tl == null) {
logger.info("Line does not exist: " + lineId);
return;
}
logger.debug("Setting text in line=" + lineId + ": " + text);
if (tl.getTextEquiv() == null) {
logger.debug("Creating new TextEquiv element.");
TextEquivType textEquiv = new TextEquivType();
textEquiv.setUnicode(text);
tl.setTextEquiv(textEquiv);
} else {
logger.debug("Setting text in existing TextEquiv element.");
tl.getTextEquiv().setUnicode(text);
}
}
use of eu.transkribus.core.model.beans.pagecontent.TextLineType in project TranskribusCore by Transkribus.
the class PageXmlUtils method removeAllIndexedTags.
public static void removeAllIndexedTags(PcGtsType pc) {
TrpPageType p = (TrpPageType) pc.getPage();
List<TrpTextRegionType> trList = p.getTextRegions(true);
for (TrpTextRegionType tr : trList) {
tr.getCustomTagList().removeIndexedTags();
List<TextLineType> lineList = tr.getTextLine();
for (TextLineType l : lineList) {
TrpTextLineType trpL = (TrpTextLineType) l;
trpL.getCustomTagList().removeIndexedTags();
List<WordType> wordList = trpL.getWord();
for (WordType w : wordList) {
TrpWordType trpW = (TrpWordType) w;
trpW.getCustomTagList().removeIndexedTags();
}
}
}
}
use of eu.transkribus.core.model.beans.pagecontent.TextLineType in project TranskribusCore by Transkribus.
the class ExportCache method storeCustomTagMapForDoc.
/**
* @param doc
* @param wordBased
* @param pageIndices
* @param blackening
* @return all (custom) tags of the given document
* @throws JAXBException
* @throws IOException
* @throws InterruptedException
*/
public void storeCustomTagMapForDoc(TrpDoc doc, boolean wordBased, Set<Integer> pageIndices, IProgressMonitor monitor, boolean blackening) throws JAXBException, IOException, InterruptedException {
doBlackening = blackening;
tags.clear();
List<TrpPage> pages = doc.getPages();
int totalPages = pages.size();
int c = 0;
for (int i = 0; i < totalPages; ++i) {
if (pageIndices != null && !pageIndices.contains(i))
continue;
if (monitor != null && monitor.isCanceled()) {
throw new InterruptedException("User canceled the export");
}
// pageTranscripts get fetched before the custom tag map is stored - so normally pageTranscripts.get(i) != null
JAXBPageTranscript tr;
if (pageTranscripts == null || pageTranscripts.get(i) == null) {
TrpPage page = pages.get(i);
TrpTranscriptMetadata md = page.getCurrentTranscript();
tr = new JAXBPageTranscript(md);
} else {
tr = pageTranscripts.get(i);
tr.getPageData();
}
tr.build();
TrpPageType trpPage = tr.getPage();
logger.debug("get tags for page " + (i + 1) + "/" + doc.getNPages());
List<TrpTextRegionType> textRegions = trpPage.getTextRegions(true);
for (int j = 0; j < textRegions.size(); ++j) {
TrpTextRegionType r = textRegions.get(j);
List<TextLineType> lines = r.getTextLine();
for (int k = 0; k < lines.size(); ++k) {
TrpTextLineType trpL = (TrpTextLineType) lines.get(k);
List<WordType> words = trpL.getWord();
getTagsForShapeElement(trpL);
if (wordBased) {
for (int l = 0; l < words.size(); ++l) {
TrpWordType w = (TrpWordType) words.get(l);
getTagsForShapeElement(w);
}
}
// else{
// getTagsForShapeElement(trpL);
// }
}
}
if (monitor != null) {
monitor.setTaskName("Loaded tags for page " + (i + 1));
monitor.worked(++c);
}
}
}
use of eu.transkribus.core.model.beans.pagecontent.TextLineType in project TranskribusCore by Transkribus.
the class FinereaderUtils method addTextStyleToWords.
public static void addTextStyleToWords(TrpDoc doc) throws JAXBException, FileNotFoundException {
/*
* Ich hab im folgenden Ordner das Buch abgelegt, wo die Sprecherangaben automatisiert als „letter-spaced“ zu markieren wären (sofern sich das mit angemessenem Aufwand bewältigen lässt):
ftp://ftp.uibk.ac.at/private/x3061015_20140902_78e054475d7532953c204ce6d392d8e9/Andy_Barbara_Bettina/zu_bearbeiten/
dabei handelt es sich um folgende Namen, sofern sie am Zeilenanfang stehen:
Ernst
Albrecht
Preising
Marschall
Pappenheim
Pienzenau
Bern
Törring
Nothafft von Wernberg
Frauenhoven
Hans von Läubelfing
Caspar Bernauer
Agnes
Theobald
Knippeldollinger
Bürgermeister
Barbara
Martha
Stachus
Der Kastellan
Herold
Legat
FIXME Der Herold
FIXME Der Legat
*/
String[] names = { "Ernst", "Albrecht", "Preising", "Marschall", "Pappenheim", "Pienzenau", "Bern", "Törring", "Nothafft von Wernberg", "Frauenhoven", "Hans von Läubelfing", "Caspar Bernauer", "Agnes", "Theobald", "Knippeldollinger", "Bürgermeister", "Barbara", "Martha", "Stachus", "Der Kastellan", "Der Herold", "Der Legat" };
List<String[]> nameList = new ArrayList<>(names.length);
List<String> nameStartList = new ArrayList<>(names.length);
// List<String> nameSet = new ArrayLilst<>();
String[] tmp;
for (int i = 0; i < names.length; i++) {
String s = names[i];
tmp = s.split(" ");
String tmpStr = "{ ";
for (String t : tmp) {
tmpStr += t + "|";
}
System.out.println(i + "\t- splitting: " + tmpStr + "}");
nameList.add(i, tmp);
nameStartList.add(tmp[0]);
}
TrpElementCoordinatesComparator<WordType> wordComp = new TrpElementCoordinatesComparator<WordType>();
for (TrpPage p : doc.getPages()) {
System.out.println("Processing page: " + p.getPageNr());
URL url = p.getCurrentTranscript().getUrl();
final String xmlPath = FileUtils.toFile(url).getAbsolutePath();
File xmlFile = new File(xmlPath);
PcGtsType pc = JaxbUtils.unmarshal(xmlFile, PcGtsType.class);
List<TextRegionType> regions = PageXmlUtils.getTextRegions(pc);
for (TextRegionType r : regions) {
// System.out.println("Processing region: " + r.getId());
List<Integer> candidatesIndex;
int i;
for (TextLineType l : r.getTextLine()) {
candidatesIndex = new LinkedList<>();
i = 0;
// System.out.println("Processing line: " + l.getId());
List<WordType> words = l.getWord();
if (words != null && !words.isEmpty()) {
Collections.sort(words, wordComp);
// read first word and iterate to second
WordType w1 = words.get(i);
// List<Integer> candidates = new LinkedList<>();
for (int j = 0; j < nameStartList.size(); j++) {
String e = nameStartList.get(j);
if (w1.getTextEquiv() != null && w1.getTextEquiv().getUnicode() != null && isMatch(w1.getTextEquiv().getUnicode(), e)) {
candidatesIndex.add(j);
// System.out.println("Found candidate word: " + j + " - " + w1.getTextEquiv().getUnicode());
}
}
if (!candidatesIndex.isEmpty()) {
for (Integer index : candidatesIndex) {
String[] name = nameList.get(index);
if (name.length == 1) {
// Done.
w1.getTextStyle().setLetterSpaced(true);
System.out.println("OK: " + name[i]);
break;
} else {
List<WordType> wordList = new ArrayList<>(name.length);
boolean isName = true;
wordList.add(w1);
String nameStr = w1.getTextEquiv().getUnicode() + " ";
// check subsequent words
for (i = 1; i < name.length; i++) {
WordType wi = words.get(i);
if (isMatch(wi.getTextEquiv().getUnicode(), name[i])) {
nameStr += wi.getTextEquiv().getUnicode() + " ";
wordList.add(wi);
} else {
System.out.println("NEGATIVE: " + nameStr + words.get(i).getTextEquiv().getUnicode() + " != " + name[i]);
isName = false;
break;
}
}
if (isName) {
System.out.println("OK : " + nameStr);
for (WordType w : wordList) {
// System.out.println(w.getTextEquiv().getUnicode());
w.getTextStyle().setLetterSpaced(true);
}
break;
}
}
}
}
}
}
}
// TODO store pageXML
JaxbUtils.marshalToFile(pc, xmlFile);
}
}
use of eu.transkribus.core.model.beans.pagecontent.TextLineType in project TranskribusCore by Transkribus.
the class TrpElementCoordinatesComparator method compare.
@Override
public int compare(T o1, T o2) {
// if (!isRegionLineOrWord(o1) || !isRegionLineOrWord(o2))
// return 0;
logger.trace("compare in TrpElementCoordinatesComparator");
// try {
String coords1 = "", coords2 = "";
if (o1 instanceof RegionType) {
RegionType r1 = (RegionType) o1;
RegionType r2 = (RegionType) o2;
if (r1.getCoords() != null && r2.getCoords() != null) {
coords1 = r1.getCoords().getPoints();
coords2 = r2.getCoords().getPoints();
}
} else if (TextLineType.class.isAssignableFrom(o1.getClass())) {
// if existing, take baseline to compare position of lines
if (((TextLineType) o1).getBaseline() != null && ((TextLineType) o2).getBaseline() != null) {
coords1 = ((TextLineType) o1).getBaseline().getPoints();
coords2 = ((TextLineType) o2).getBaseline().getPoints();
} else {
// fall back if there are no baselines
coords1 = ((TextLineType) o1).getCoords().getPoints();
coords2 = ((TextLineType) o2).getCoords().getPoints();
}
} else if (o1 instanceof TrpBaselineType) {
coords1 = ((TrpBaselineType) o1).getPoints();
coords2 = ((TrpBaselineType) o2).getPoints();
} else if (WordType.class.isAssignableFrom(o1.getClass())) {
WordType w1 = (WordType) o1;
WordType w2 = (WordType) o2;
if (w1.getCoords() != null && w2.getCoords() != null) {
coords1 = w1.getCoords().getPoints();
coords2 = w2.getCoords().getPoints();
}
}
// if (coords1.isEmpty() || coords2.isEmpty()) {
// throw new Exception("No coordinates in one of the objects - should not happen!");
// }
// determine orientation of (parent) text regions
Float orientation = null;
if (o1 instanceof ITrpShapeType && o2 instanceof ITrpShapeType && !(o1 instanceof RegionType) && !(o2 instanceof RegionType)) {
TrpTextRegionType tr1 = TrpShapeTypeUtils.getTextRegion((ITrpShapeType) o1);
TrpTextRegionType tr2 = TrpShapeTypeUtils.getTextRegion((ITrpShapeType) o2);
if (tr1 != null && tr2 != null && StringUtils.equals(tr1.getId(), tr2.getId()) && tr1.getOrientation() != null) {
orientation = tr1.getOrientation();
}
}
// --------------------------
java.awt.Polygon p1 = new java.awt.Polygon();
try {
for (java.awt.Point p : PointStrUtils.parsePoints(coords1)) p1.addPoint(p.x, p.y);
} catch (Exception e) {
logger.error(e.getMessage(), e);
}
java.awt.Polygon p2 = new java.awt.Polygon();
try {
for (java.awt.Point p : PointStrUtils.parsePoints(coords2)) p2.addPoint(p.x, p.y);
} catch (Exception e) {
logger.error(e.getMessage(), e);
}
Rectangle b1 = p1.getBounds();
Rectangle b2 = p2.getBounds();
Point pt1 = new Point(b1.x, b1.y);
Point pt2 = new Point(b2.x, b2.y);
if (orientation != null) {
pt1 = GeomUtils.rotate(pt1, orientation);
pt2 = GeomUtils.rotate(pt2, orientation);
logger.trace("orientation set: " + orientation + " rotated points: " + pt1 + ", " + pt2);
}
if (compareByYX == null) {
// if compareByYX was not set by constructor, determine via shape
compareByYX = !WordType.class.isAssignableFrom(o1.getClass());
}
if (!compareByYX) {
// return compareByXY(b1.x, b2.x, b1.y, b2.y);
return compareByXY(pt1.x, pt2.x, pt1.y, pt2.y);
} else {
return compareByYX(pt1.x, pt2.x, pt1.y, pt2.y);
// return compareByYX(b1.x, b2.x, b1.y, b2.y);
// return compareBy_YOverlap_X(b1, b2);
}
// }
// catch (Exception e) {
// e.printStackTrace();
// return 0;
// }
}
Aggregations