Search in sources :

Example 21 with TextLineType

use of eu.transkribus.core.model.beans.pagecontent.TextLineType in project TranskribusCore by Transkribus.

the class PageXmlUtils method setTextToLine.

public static void setTextToLine(String text, PcGtsType pc, String lineId) {
    TextLineType tl = findLineById(pc, lineId);
    if (tl == null) {
        logger.info("Line does not exist: " + lineId);
        return;
    }
    logger.debug("Setting text in line=" + lineId + ": " + text);
    if (tl.getTextEquiv() == null) {
        logger.debug("Creating new TextEquiv element.");
        TextEquivType textEquiv = new TextEquivType();
        textEquiv.setUnicode(text);
        tl.setTextEquiv(textEquiv);
    } else {
        logger.debug("Setting text in existing TextEquiv element.");
        tl.getTextEquiv().setUnicode(text);
    }
}
Also used : TextEquivType(eu.transkribus.core.model.beans.pagecontent.TextEquivType) TextLineType(eu.transkribus.core.model.beans.pagecontent.TextLineType) TrpTextLineType(eu.transkribus.core.model.beans.pagecontent_trp.TrpTextLineType)

Example 22 with TextLineType

use of eu.transkribus.core.model.beans.pagecontent.TextLineType in project TranskribusCore by Transkribus.

the class PageXmlUtils method removeAllIndexedTags.

public static void removeAllIndexedTags(PcGtsType pc) {
    TrpPageType p = (TrpPageType) pc.getPage();
    List<TrpTextRegionType> trList = p.getTextRegions(true);
    for (TrpTextRegionType tr : trList) {
        tr.getCustomTagList().removeIndexedTags();
        List<TextLineType> lineList = tr.getTextLine();
        for (TextLineType l : lineList) {
            TrpTextLineType trpL = (TrpTextLineType) l;
            trpL.getCustomTagList().removeIndexedTags();
            List<WordType> wordList = trpL.getWord();
            for (WordType w : wordList) {
                TrpWordType trpW = (TrpWordType) w;
                trpW.getCustomTagList().removeIndexedTags();
            }
        }
    }
}
Also used : TrpTextLineType(eu.transkribus.core.model.beans.pagecontent_trp.TrpTextLineType) TextLineType(eu.transkribus.core.model.beans.pagecontent.TextLineType) TrpTextLineType(eu.transkribus.core.model.beans.pagecontent_trp.TrpTextLineType) TrpTextRegionType(eu.transkribus.core.model.beans.pagecontent_trp.TrpTextRegionType) TrpWordType(eu.transkribus.core.model.beans.pagecontent_trp.TrpWordType) TrpPageType(eu.transkribus.core.model.beans.pagecontent_trp.TrpPageType) WordType(eu.transkribus.core.model.beans.pagecontent.WordType) TrpWordType(eu.transkribus.core.model.beans.pagecontent_trp.TrpWordType)

Example 23 with TextLineType

use of eu.transkribus.core.model.beans.pagecontent.TextLineType in project TranskribusCore by Transkribus.

the class ExportCache method storeCustomTagMapForDoc.

/**
 * @param doc
 * @param wordBased
 * @param pageIndices
 * @param blackening
 * @return all (custom) tags of the given document
 * @throws JAXBException
 * @throws IOException
 * @throws InterruptedException
 */
public void storeCustomTagMapForDoc(TrpDoc doc, boolean wordBased, Set<Integer> pageIndices, IProgressMonitor monitor, boolean blackening) throws JAXBException, IOException, InterruptedException {
    doBlackening = blackening;
    tags.clear();
    List<TrpPage> pages = doc.getPages();
    int totalPages = pages.size();
    int c = 0;
    for (int i = 0; i < totalPages; ++i) {
        if (pageIndices != null && !pageIndices.contains(i))
            continue;
        if (monitor != null && monitor.isCanceled()) {
            throw new InterruptedException("User canceled the export");
        }
        // pageTranscripts get fetched before the custom tag map is stored - so normally pageTranscripts.get(i) != null
        JAXBPageTranscript tr;
        if (pageTranscripts == null || pageTranscripts.get(i) == null) {
            TrpPage page = pages.get(i);
            TrpTranscriptMetadata md = page.getCurrentTranscript();
            tr = new JAXBPageTranscript(md);
        } else {
            tr = pageTranscripts.get(i);
            tr.getPageData();
        }
        tr.build();
        TrpPageType trpPage = tr.getPage();
        logger.debug("get tags for page " + (i + 1) + "/" + doc.getNPages());
        List<TrpTextRegionType> textRegions = trpPage.getTextRegions(true);
        for (int j = 0; j < textRegions.size(); ++j) {
            TrpTextRegionType r = textRegions.get(j);
            List<TextLineType> lines = r.getTextLine();
            for (int k = 0; k < lines.size(); ++k) {
                TrpTextLineType trpL = (TrpTextLineType) lines.get(k);
                List<WordType> words = trpL.getWord();
                getTagsForShapeElement(trpL);
                if (wordBased) {
                    for (int l = 0; l < words.size(); ++l) {
                        TrpWordType w = (TrpWordType) words.get(l);
                        getTagsForShapeElement(w);
                    }
                }
            // else{
            // getTagsForShapeElement(trpL);
            // }
            }
        }
        if (monitor != null) {
            monitor.setTaskName("Loaded tags for page " + (i + 1));
            monitor.worked(++c);
        }
    }
}
Also used : JAXBPageTranscript(eu.transkribus.core.model.beans.JAXBPageTranscript) TrpPage(eu.transkribus.core.model.beans.TrpPage) TrpTranscriptMetadata(eu.transkribus.core.model.beans.TrpTranscriptMetadata) TrpWordType(eu.transkribus.core.model.beans.pagecontent_trp.TrpWordType) WordType(eu.transkribus.core.model.beans.pagecontent.WordType) TrpWordType(eu.transkribus.core.model.beans.pagecontent_trp.TrpWordType) TrpTextLineType(eu.transkribus.core.model.beans.pagecontent_trp.TrpTextLineType) TextLineType(eu.transkribus.core.model.beans.pagecontent.TextLineType) TrpTextLineType(eu.transkribus.core.model.beans.pagecontent_trp.TrpTextLineType) TrpTextRegionType(eu.transkribus.core.model.beans.pagecontent_trp.TrpTextRegionType) TrpPageType(eu.transkribus.core.model.beans.pagecontent_trp.TrpPageType)

Example 24 with TextLineType

use of eu.transkribus.core.model.beans.pagecontent.TextLineType in project TranskribusCore by Transkribus.

the class FinereaderUtils method addTextStyleToWords.

public static void addTextStyleToWords(TrpDoc doc) throws JAXBException, FileNotFoundException {
    /*
	 * Ich hab im folgenden Ordner das Buch abgelegt, wo die Sprecherangaben automatisiert als „letter-spaced“ zu markieren wären (sofern sich das mit angemessenem Aufwand bewältigen lässt):
		ftp://ftp.uibk.ac.at/private/x3061015_20140902_78e054475d7532953c204ce6d392d8e9/Andy_Barbara_Bettina/zu_bearbeiten/
		dabei handelt es sich um folgende Namen, sofern sie am Zeilenanfang stehen:

		Ernst
		Albrecht
		Preising
		Marschall
		Pappenheim
		Pienzenau
		Bern
		Törring
		Nothafft von Wernberg
		Frauenhoven
		Hans von Läubelfing
		Caspar Bernauer
		Agnes
		Theobald
		Knippeldollinger
		Bürgermeister
		Barbara
		Martha
		Stachus
		Der Kastellan
		Herold
		Legat
		
		FIXME Der Herold
		FIXME Der Legat

	 */
    String[] names = { "Ernst", "Albrecht", "Preising", "Marschall", "Pappenheim", "Pienzenau", "Bern", "Törring", "Nothafft von Wernberg", "Frauenhoven", "Hans von Läubelfing", "Caspar Bernauer", "Agnes", "Theobald", "Knippeldollinger", "Bürgermeister", "Barbara", "Martha", "Stachus", "Der Kastellan", "Der Herold", "Der Legat" };
    List<String[]> nameList = new ArrayList<>(names.length);
    List<String> nameStartList = new ArrayList<>(names.length);
    // List<String> nameSet = new ArrayLilst<>();
    String[] tmp;
    for (int i = 0; i < names.length; i++) {
        String s = names[i];
        tmp = s.split(" ");
        String tmpStr = "{ ";
        for (String t : tmp) {
            tmpStr += t + "|";
        }
        System.out.println(i + "\t- splitting: " + tmpStr + "}");
        nameList.add(i, tmp);
        nameStartList.add(tmp[0]);
    }
    TrpElementCoordinatesComparator<WordType> wordComp = new TrpElementCoordinatesComparator<WordType>();
    for (TrpPage p : doc.getPages()) {
        System.out.println("Processing page: " + p.getPageNr());
        URL url = p.getCurrentTranscript().getUrl();
        final String xmlPath = FileUtils.toFile(url).getAbsolutePath();
        File xmlFile = new File(xmlPath);
        PcGtsType pc = JaxbUtils.unmarshal(xmlFile, PcGtsType.class);
        List<TextRegionType> regions = PageXmlUtils.getTextRegions(pc);
        for (TextRegionType r : regions) {
            // System.out.println("Processing region: " + r.getId());
            List<Integer> candidatesIndex;
            int i;
            for (TextLineType l : r.getTextLine()) {
                candidatesIndex = new LinkedList<>();
                i = 0;
                // System.out.println("Processing line: " + l.getId());
                List<WordType> words = l.getWord();
                if (words != null && !words.isEmpty()) {
                    Collections.sort(words, wordComp);
                    // read first word and iterate to second
                    WordType w1 = words.get(i);
                    // List<Integer> candidates = new LinkedList<>();
                    for (int j = 0; j < nameStartList.size(); j++) {
                        String e = nameStartList.get(j);
                        if (w1.getTextEquiv() != null && w1.getTextEquiv().getUnicode() != null && isMatch(w1.getTextEquiv().getUnicode(), e)) {
                            candidatesIndex.add(j);
                        // System.out.println("Found candidate word: " + j + " - " + w1.getTextEquiv().getUnicode());
                        }
                    }
                    if (!candidatesIndex.isEmpty()) {
                        for (Integer index : candidatesIndex) {
                            String[] name = nameList.get(index);
                            if (name.length == 1) {
                                // Done.
                                w1.getTextStyle().setLetterSpaced(true);
                                System.out.println("OK: " + name[i]);
                                break;
                            } else {
                                List<WordType> wordList = new ArrayList<>(name.length);
                                boolean isName = true;
                                wordList.add(w1);
                                String nameStr = w1.getTextEquiv().getUnicode() + " ";
                                // check subsequent words
                                for (i = 1; i < name.length; i++) {
                                    WordType wi = words.get(i);
                                    if (isMatch(wi.getTextEquiv().getUnicode(), name[i])) {
                                        nameStr += wi.getTextEquiv().getUnicode() + " ";
                                        wordList.add(wi);
                                    } else {
                                        System.out.println("NEGATIVE: " + nameStr + words.get(i).getTextEquiv().getUnicode() + " != " + name[i]);
                                        isName = false;
                                        break;
                                    }
                                }
                                if (isName) {
                                    System.out.println("OK : " + nameStr);
                                    for (WordType w : wordList) {
                                        // System.out.println(w.getTextEquiv().getUnicode());
                                        w.getTextStyle().setLetterSpaced(true);
                                    }
                                    break;
                                }
                            }
                        }
                    }
                }
            }
        }
        // TODO store pageXML
        JaxbUtils.marshalToFile(pc, xmlFile);
    }
}
Also used : TrpPage(eu.transkribus.core.model.beans.TrpPage) ArrayList(java.util.ArrayList) TrpElementCoordinatesComparator(eu.transkribus.core.model.beans.pagecontent_trp.TrpElementCoordinatesComparator) PcGtsType(eu.transkribus.core.model.beans.pagecontent.PcGtsType) URL(java.net.URL) WordType(eu.transkribus.core.model.beans.pagecontent.WordType) TrpTextRegionType(eu.transkribus.core.model.beans.pagecontent_trp.TrpTextRegionType) TextRegionType(eu.transkribus.core.model.beans.pagecontent.TextRegionType) TextLineType(eu.transkribus.core.model.beans.pagecontent.TextLineType) File(java.io.File)

Example 25 with TextLineType

use of eu.transkribus.core.model.beans.pagecontent.TextLineType in project TranskribusCore by Transkribus.

the class TrpElementCoordinatesComparator method compare.

@Override
public int compare(T o1, T o2) {
    // if (!isRegionLineOrWord(o1) || !isRegionLineOrWord(o2))
    // return 0;
    logger.trace("compare in TrpElementCoordinatesComparator");
    // try {
    String coords1 = "", coords2 = "";
    if (o1 instanceof RegionType) {
        RegionType r1 = (RegionType) o1;
        RegionType r2 = (RegionType) o2;
        if (r1.getCoords() != null && r2.getCoords() != null) {
            coords1 = r1.getCoords().getPoints();
            coords2 = r2.getCoords().getPoints();
        }
    } else if (TextLineType.class.isAssignableFrom(o1.getClass())) {
        // if existing, take baseline to compare position of lines
        if (((TextLineType) o1).getBaseline() != null && ((TextLineType) o2).getBaseline() != null) {
            coords1 = ((TextLineType) o1).getBaseline().getPoints();
            coords2 = ((TextLineType) o2).getBaseline().getPoints();
        } else {
            // fall back if there are no baselines
            coords1 = ((TextLineType) o1).getCoords().getPoints();
            coords2 = ((TextLineType) o2).getCoords().getPoints();
        }
    } else if (o1 instanceof TrpBaselineType) {
        coords1 = ((TrpBaselineType) o1).getPoints();
        coords2 = ((TrpBaselineType) o2).getPoints();
    } else if (WordType.class.isAssignableFrom(o1.getClass())) {
        WordType w1 = (WordType) o1;
        WordType w2 = (WordType) o2;
        if (w1.getCoords() != null && w2.getCoords() != null) {
            coords1 = w1.getCoords().getPoints();
            coords2 = w2.getCoords().getPoints();
        }
    }
    // if (coords1.isEmpty() || coords2.isEmpty()) {
    // throw new Exception("No coordinates in one of the objects - should not happen!");
    // }
    // determine orientation of (parent) text regions
    Float orientation = null;
    if (o1 instanceof ITrpShapeType && o2 instanceof ITrpShapeType && !(o1 instanceof RegionType) && !(o2 instanceof RegionType)) {
        TrpTextRegionType tr1 = TrpShapeTypeUtils.getTextRegion((ITrpShapeType) o1);
        TrpTextRegionType tr2 = TrpShapeTypeUtils.getTextRegion((ITrpShapeType) o2);
        if (tr1 != null && tr2 != null && StringUtils.equals(tr1.getId(), tr2.getId()) && tr1.getOrientation() != null) {
            orientation = tr1.getOrientation();
        }
    }
    // --------------------------
    java.awt.Polygon p1 = new java.awt.Polygon();
    try {
        for (java.awt.Point p : PointStrUtils.parsePoints(coords1)) p1.addPoint(p.x, p.y);
    } catch (Exception e) {
        logger.error(e.getMessage(), e);
    }
    java.awt.Polygon p2 = new java.awt.Polygon();
    try {
        for (java.awt.Point p : PointStrUtils.parsePoints(coords2)) p2.addPoint(p.x, p.y);
    } catch (Exception e) {
        logger.error(e.getMessage(), e);
    }
    Rectangle b1 = p1.getBounds();
    Rectangle b2 = p2.getBounds();
    Point pt1 = new Point(b1.x, b1.y);
    Point pt2 = new Point(b2.x, b2.y);
    if (orientation != null) {
        pt1 = GeomUtils.rotate(pt1, orientation);
        pt2 = GeomUtils.rotate(pt2, orientation);
        logger.trace("orientation set: " + orientation + " rotated points: " + pt1 + ", " + pt2);
    }
    if (compareByYX == null) {
        // if compareByYX was not set by constructor, determine via shape
        compareByYX = !WordType.class.isAssignableFrom(o1.getClass());
    }
    if (!compareByYX) {
        // return compareByXY(b1.x, b2.x, b1.y, b2.y);
        return compareByXY(pt1.x, pt2.x, pt1.y, pt2.y);
    } else {
        return compareByYX(pt1.x, pt2.x, pt1.y, pt2.y);
    // return compareByYX(b1.x, b2.x, b1.y, b2.y);
    // return compareBy_YOverlap_X(b1, b2);
    }
// }
// catch (Exception e) {
// e.printStackTrace();
// return 0;
// }
}
Also used : RegionType(eu.transkribus.core.model.beans.pagecontent.RegionType) Rectangle(java.awt.Rectangle) Point(java.awt.Point) Point(java.awt.Point) WordType(eu.transkribus.core.model.beans.pagecontent.WordType) TextLineType(eu.transkribus.core.model.beans.pagecontent.TextLineType)

Aggregations

TextLineType (eu.transkribus.core.model.beans.pagecontent.TextLineType)27 TrpTextLineType (eu.transkribus.core.model.beans.pagecontent_trp.TrpTextLineType)19 WordType (eu.transkribus.core.model.beans.pagecontent.WordType)13 TrpTextRegionType (eu.transkribus.core.model.beans.pagecontent_trp.TrpTextRegionType)13 TextRegionType (eu.transkribus.core.model.beans.pagecontent.TextRegionType)9 TrpWordType (eu.transkribus.core.model.beans.pagecontent_trp.TrpWordType)9 RegionType (eu.transkribus.core.model.beans.pagecontent.RegionType)6 Rectangle (java.awt.Rectangle)6 ArrayList (java.util.ArrayList)5 TrpPage (eu.transkribus.core.model.beans.TrpPage)4 TrpElementCoordinatesComparator (eu.transkribus.core.model.beans.pagecontent_trp.TrpElementCoordinatesComparator)4 TrpPageType (eu.transkribus.core.model.beans.pagecontent_trp.TrpPageType)4 TrpRegionType (eu.transkribus.core.model.beans.pagecontent_trp.TrpRegionType)4 TrpTranscriptMetadata (eu.transkribus.core.model.beans.TrpTranscriptMetadata)3 TrpBaselineType (eu.transkribus.core.model.beans.pagecontent_trp.TrpBaselineType)3 Point (java.awt.Point)3 JAXBPageTranscript (eu.transkribus.core.model.beans.JAXBPageTranscript)2 TrpTranscriptStatistics (eu.transkribus.core.model.beans.TrpTranscriptStatistics)2 PcGtsType (eu.transkribus.core.model.beans.pagecontent.PcGtsType)2 TextEquivType (eu.transkribus.core.model.beans.pagecontent.TextEquivType)2