Search in sources :

Example 11 with TextLineType

use of eu.transkribus.core.model.beans.pagecontent.TextLineType in project TranskribusCore by Transkribus.

the class TrpTextRegionType method getTextFromWords.

public String getTextFromWords(boolean fillEmptyWords) {
    String textAll = "";
    for (TextLineType l : getTextLine()) {
        TrpTextLineType trp_l = (TrpTextLineType) l;
        textAll += trp_l.getTextFromWords(fillEmptyWords) + "\n";
    }
    textAll = StringUtils.removeEnd(textAll, "\n");
    return textAll;
}
Also used : TextLineType(eu.transkribus.core.model.beans.pagecontent.TextLineType)

Example 12 with TextLineType

use of eu.transkribus.core.model.beans.pagecontent.TextLineType in project TranskribusCore by Transkribus.

the class TrpTextRegionType method clearTextForAllLines.

public void clearTextForAllLines(Object who) {
    for (TextLineType tl : getTextLine()) {
        TrpTextLineType trpTl = (TrpTextLineType) tl;
        trpTl.setUnicodeText("", who);
    }
}
Also used : TextLineType(eu.transkribus.core.model.beans.pagecontent.TextLineType)

Example 13 with TextLineType

use of eu.transkribus.core.model.beans.pagecontent.TextLineType in project TranskribusCore by Transkribus.

the class TrpXlsxBuilder method writeXlsxForDoc.

public static void writeXlsxForDoc(TrpDoc doc, boolean wordBased, File exportFile, Set<Integer> pageIndices, IProgressMonitor monitor, ExportCache cache) throws NoTagsException, Exception {
    if (cache == null) {
        throw new IllegalArgumentException("ExportCache must not be null.");
    }
    if (cache.getCustomTagMapForDoc().isEmpty()) {
        logger.info("No tags to store -> Xlsx export cancelled");
        throw new NoTagsException("No tags available to store into Xlsx");
    }
    List<TrpPage> pages = doc.getPages();
    String exportPath = exportFile.getPath();
    Set<String> selectedTags = cache.getOnlySelectedTagnames(ExportUtils.getOnlyWantedTagnames(CustomTagFactory.getRegisteredTagNames()));
    int totalPages = pageIndices == null ? pages.size() : pageIndices.size();
    if (monitor != null) {
        monitor.beginTask("Exporting to Excel", totalPages);
    }
    wb = new XSSFWorkbook();
    int c = 0;
    for (int i = 0; i < pages.size(); ++i) {
        if (pageIndices != null && !pageIndices.contains(i))
            continue;
        if (monitor != null) {
            if (monitor.isCanceled()) {
                throw new InterruptedException("Export was canceled by user");
            // logger.debug("Xlsx export cancelled!");
            // return;
            }
            monitor.subTask("Processing page " + (c + 1));
        }
        TrpPage page = pages.get(i);
        // try to get previously loaded JAXB transcript
        JAXBPageTranscript tr = null;
        if (cache != null) {
            tr = cache.getPageTranscriptAtIndex(i);
        }
        if (tr == null) {
            TrpTranscriptMetadata md = page.getCurrentTranscript();
            tr = new JAXBPageTranscript(md);
            tr.build();
        }
        // old version
        // TrpPage page = pages.get(i);
        // TrpTranscriptMetadata md = page.getCurrentTranscript();
        // JAXBPageTranscript tr = new JAXBPageTranscript(md);
        // tr.build();
        TrpPageType trpPage = tr.getPage();
        logger.debug("writing xlsx for page " + (i + 1) + "/" + doc.getNPages());
        List<TrpTextRegionType> textRegions = trpPage.getTextRegions(true);
        for (int j = 0; j < textRegions.size(); ++j) {
            TrpTextRegionType r = textRegions.get(j);
            List<TextLineType> lines = r.getTextLine();
            for (int k = 0; k < lines.size(); ++k) {
                TrpTextLineType trpL = (TrpTextLineType) lines.get(k);
                List<WordType> words = trpL.getWord();
                if (wordBased) {
                    for (int l = 0; l < words.size(); ++l) {
                        TrpWordType w = (TrpWordType) words.get(l);
                        writeTagsForShapeElement(w, trpL.getUnicodeText(), String.valueOf(doc.getId()), String.valueOf(page.getPageNr()), r.getId(), trpL.getId(), w.getId(), selectedTags);
                    }
                } else {
                    writeTagsForShapeElement(trpL, trpL.getUnicodeText(), String.valueOf(doc.getId()), String.valueOf(page.getPageNr()), r.getId(), trpL.getId(), "", selectedTags);
                }
            }
        }
        ++c;
        if (monitor != null) {
            monitor.worked(c);
        }
    }
    /*
		 * auto size the columns
		 */
    for (int i = 0; i < wb.getNumberOfSheets(); i++) {
        int numberOfCells = 0;
        Iterator rowIterator = wb.getSheetAt(i).rowIterator();
        /**
         * Escape the header row *
         */
        if (rowIterator.hasNext()) {
            Row headerRow = (Row) rowIterator.next();
            // get the number of cells in the header row
            numberOfCells = headerRow.getPhysicalNumberOfCells();
            for (int j = 0; j < numberOfCells; j++) {
                wb.getSheetAt(i).autoSizeColumn(j);
            }
        }
    }
    FileOutputStream fOut;
    try {
        // means no tags at all
        if (wb.getNumberOfSheets() == 0) {
            throw new IOException("Sorry - No tags available for export");
        }
        fOut = new FileOutputStream(exportPath);
        wb.write(fOut);
        fOut.close();
    } catch (IOException e) {
        // TODO Auto-generated catch block
        e.printStackTrace();
        throw e;
    }
    logger.info("wrote xlsx to: " + exportPath);
}
Also used : NoTagsException(eu.transkribus.core.model.builder.NoTagsException) JAXBPageTranscript(eu.transkribus.core.model.beans.JAXBPageTranscript) TrpPage(eu.transkribus.core.model.beans.TrpPage) TrpTranscriptMetadata(eu.transkribus.core.model.beans.TrpTranscriptMetadata) IOException(java.io.IOException) TrpWordType(eu.transkribus.core.model.beans.pagecontent_trp.TrpWordType) WordType(eu.transkribus.core.model.beans.pagecontent.WordType) TrpWordType(eu.transkribus.core.model.beans.pagecontent_trp.TrpWordType) TrpTextLineType(eu.transkribus.core.model.beans.pagecontent_trp.TrpTextLineType) TextLineType(eu.transkribus.core.model.beans.pagecontent.TextLineType) TrpTextLineType(eu.transkribus.core.model.beans.pagecontent_trp.TrpTextLineType) TrpTextRegionType(eu.transkribus.core.model.beans.pagecontent_trp.TrpTextRegionType) FileOutputStream(java.io.FileOutputStream) Iterator(java.util.Iterator) XSSFWorkbook(org.apache.poi.xssf.usermodel.XSSFWorkbook) Row(org.apache.poi.ss.usermodel.Row) TrpPageType(eu.transkribus.core.model.beans.pagecontent_trp.TrpPageType)

Example 14 with TextLineType

use of eu.transkribus.core.model.beans.pagecontent.TextLineType in project TranskribusCore by Transkribus.

the class TrpPdfDocument method hasSmallerColumn.

/*
	 * checks if there is at least one text region on the left of the actual one
	 * But: if text region is completely contained in the other it should not have an effect
	 */
private boolean hasSmallerColumn(List<TrpRegionType> regions, TextRegionType regionToCompare) throws DocumentException, IOException {
    float minX = 0;
    float minY = 0;
    float maxX = 0;
    float maxY = 0;
    float meanX = 0;
    float meanY = 0;
    // java.awt.Rectangle compareBlock = PageXmlUtils.buildPolygon(regionToCompare.getCoords().getPoints()).getBounds();
    java.awt.Rectangle compareBlock = regionToCompare.getBoundingBox();
    float compareMinX = (float) compareBlock.getMinX();
    float compareMinY = (float) compareBlock.getMinY();
    float compareMaxX = (float) compareBlock.getMaxX();
    float compareMaxY = (float) compareBlock.getMaxY();
    float compareMeanX = compareMinX + (compareMaxX - compareMinX) / 2;
    float compareMeanY = compareMinY + (compareMaxY - compareMinY) / 2;
    boolean foundSmallerColumn = false;
    smallerRegionMaxX = 0;
    if (regions.size() == 1) {
        return false;
    } else {
        for (RegionType r : regions) {
            // TODO add paths for tables etc.
            if (r instanceof TextRegionType && r.getId() != regionToCompare.getId()) {
                TextRegionType tr = (TextRegionType) r;
                // empty region can be ignored
                if (tr.getTextLine().isEmpty())
                    continue;
                else {
                    // region with empty lines can also be ignored
                    boolean textFound = false;
                    for (TextLineType tlt : tr.getTextLine()) {
                        TrpTextLineType l = (TrpTextLineType) tlt;
                        textFound = !l.getUnicodeText().isEmpty();
                        if (textFound) {
                            break;
                        }
                    }
                    // no text in region -> go to next region
                    if (!textFound) {
                        continue;
                    }
                }
                // logger.debug("tr id " + tr.getId());
                // compute average text region start
                // java.awt.Rectangle block = PageXmlUtils.buildPolygon(tr.getCoords().getPoints()).getBounds();
                java.awt.Rectangle block = tr.getBoundingBox();
                minX = (float) block.getMinX();
                maxX = (float) block.getMaxX();
                minY = (float) block.getMinY();
                maxY = (float) block.getMaxY();
                // meanX = minX+(maxX - minX)/2;
                meanY = minY + (maxY - minY) / 2;
                if (((meanY > compareMinY && meanY < compareMaxY) || (compareMeanY > minY && compareMeanY < maxY)) && (maxX < compareMeanX)) {
                    // to find the biggest maxX if there are several smaller columns
                    if (maxX > smallerRegionMaxX) {
                        smallerRegionMaxX = maxX;
                    }
                    foundSmallerColumn = true;
                }
            }
        }
    }
    return foundSmallerColumn;
}
Also used : TrpTextRegionType(eu.transkribus.core.model.beans.pagecontent_trp.TrpTextRegionType) TextRegionType(eu.transkribus.core.model.beans.pagecontent.TextRegionType) TrpTextLineType(eu.transkribus.core.model.beans.pagecontent_trp.TrpTextLineType) Rectangle(java.awt.Rectangle) TrpTextRegionType(eu.transkribus.core.model.beans.pagecontent_trp.TrpTextRegionType) UnknownRegionType(eu.transkribus.core.model.beans.pagecontent.UnknownRegionType) TrpRegionType(eu.transkribus.core.model.beans.pagecontent_trp.TrpRegionType) TrpTableRegionType(eu.transkribus.core.model.beans.pagecontent_trp.TrpTableRegionType) RegionType(eu.transkribus.core.model.beans.pagecontent.RegionType) TextRegionType(eu.transkribus.core.model.beans.pagecontent.TextRegionType) TextLineType(eu.transkribus.core.model.beans.pagecontent.TextLineType) TrpTextLineType(eu.transkribus.core.model.beans.pagecontent_trp.TrpTextLineType)

Example 15 with TextLineType

use of eu.transkribus.core.model.beans.pagecontent.TextLineType in project TranskribusCore by Transkribus.

the class TrpTeiStringBuilder method writeZonesForTextRegion.

void writeZonesForTextRegion(SebisStringBuilder sb, TextRegionType r, int pageNr) {
    String facsId = FACS_ID_PREFIX + pageNr;
    if (pars.regionZones) {
        writeZoneForShape(sb, (TrpTextRegionType) r, facsId, !pars.lineZones && !pars.wordZones);
    }
    if (!pars.lineZones && !pars.wordZones)
        return;
    for (TextLineType tl : r.getTextLine()) {
        TrpTextLineType ttl = (TrpTextLineType) tl;
        if (pars.lineZones) {
            writeZoneForShape(sb, ttl, facsId, !pars.wordZones);
        }
        if (pars.wordZones) {
            for (WordType w : ttl.getWord()) {
                TrpWordType tw = (TrpWordType) w;
                writeZoneForShape(sb, tw, facsId, true);
            }
            if (pars.lineZones) {
                closeElement(sb, "zone");
            }
        }
    }
    if (pars.regionZones) {
        closeElement(sb, "zone");
    }
}
Also used : TrpTextLineType(eu.transkribus.core.model.beans.pagecontent_trp.TrpTextLineType) TextLineType(eu.transkribus.core.model.beans.pagecontent.TextLineType) TrpTextLineType(eu.transkribus.core.model.beans.pagecontent_trp.TrpTextLineType) TrpWordType(eu.transkribus.core.model.beans.pagecontent_trp.TrpWordType) WordType(eu.transkribus.core.model.beans.pagecontent.WordType) TrpWordType(eu.transkribus.core.model.beans.pagecontent_trp.TrpWordType)

Aggregations

TextLineType (eu.transkribus.core.model.beans.pagecontent.TextLineType)27 TrpTextLineType (eu.transkribus.core.model.beans.pagecontent_trp.TrpTextLineType)19 WordType (eu.transkribus.core.model.beans.pagecontent.WordType)13 TrpTextRegionType (eu.transkribus.core.model.beans.pagecontent_trp.TrpTextRegionType)13 TextRegionType (eu.transkribus.core.model.beans.pagecontent.TextRegionType)9 TrpWordType (eu.transkribus.core.model.beans.pagecontent_trp.TrpWordType)9 RegionType (eu.transkribus.core.model.beans.pagecontent.RegionType)6 Rectangle (java.awt.Rectangle)6 ArrayList (java.util.ArrayList)5 TrpPage (eu.transkribus.core.model.beans.TrpPage)4 TrpElementCoordinatesComparator (eu.transkribus.core.model.beans.pagecontent_trp.TrpElementCoordinatesComparator)4 TrpPageType (eu.transkribus.core.model.beans.pagecontent_trp.TrpPageType)4 TrpRegionType (eu.transkribus.core.model.beans.pagecontent_trp.TrpRegionType)4 TrpTranscriptMetadata (eu.transkribus.core.model.beans.TrpTranscriptMetadata)3 TrpBaselineType (eu.transkribus.core.model.beans.pagecontent_trp.TrpBaselineType)3 Point (java.awt.Point)3 JAXBPageTranscript (eu.transkribus.core.model.beans.JAXBPageTranscript)2 TrpTranscriptStatistics (eu.transkribus.core.model.beans.TrpTranscriptStatistics)2 PcGtsType (eu.transkribus.core.model.beans.pagecontent.PcGtsType)2 TextEquivType (eu.transkribus.core.model.beans.pagecontent.TextEquivType)2