Search in sources :

Example 6 with TrpPageType

use of eu.transkribus.core.model.beans.pagecontent_trp.TrpPageType in project TranskribusCore by Transkribus.

the class TrpRtfBuilder method writeRtfForDoc.

public static void writeRtfForDoc(TrpDoc doc, boolean wordBased, File file, Set<Integer> pageIndices, IProgressMonitor monitor) throws JAXBException, IOException {
    Rtf rtf = Rtf.rtf();
    List<TrpPage> pages = doc.getPages();
    int totalPages = pageIndices == null ? pages.size() : pageIndices.size();
    if (monitor != null) {
        monitor.beginTask("Exporting to RTF", totalPages);
    }
    int c = 0;
    for (int i = 0; i < pages.size(); ++i) {
        if (pageIndices != null && !pageIndices.contains(i))
            continue;
        if (monitor != null) {
            if (monitor.isCanceled()) {
                logger.debug("RTF export cancelled!");
                return;
            }
            monitor.subTask("Processing page " + (c + 1));
        }
        TrpPage page = pages.get(i);
        TrpTranscriptMetadata md = page.getCurrentTranscript();
        JAXBPageTranscript tr = new JAXBPageTranscript(md);
        tr.build();
        TrpPageType trpPage = tr.getPage();
        logger.debug("writing rtf for page " + (i + 1) + "/" + doc.getNPages());
        rtf.section(getRtfParagraphsForTranscript(trpPage, wordBased));
        ++c;
        if (monitor != null) {
            monitor.worked(c);
        }
    }
    rtf.out(new FileWriter(file));
    logger.info("wrote rtf to: " + file.getAbsolutePath());
}
Also used : JAXBPageTranscript(eu.transkribus.core.model.beans.JAXBPageTranscript) Rtf(com.tutego.jrtf.Rtf) TrpPage(eu.transkribus.core.model.beans.TrpPage) FileWriter(java.io.FileWriter) TrpTranscriptMetadata(eu.transkribus.core.model.beans.TrpTranscriptMetadata) TrpPageType(eu.transkribus.core.model.beans.pagecontent_trp.TrpPageType)

Example 7 with TrpPageType

use of eu.transkribus.core.model.beans.pagecontent_trp.TrpPageType in project TranskribusCore by Transkribus.

the class CustomTagListTest method testCommonTag.

@Test
public void testCommonTag() {
    TrpTextLineType line = new TrpTextLineType(new TrpTextRegionType(new TrpPageType()));
    line.setUnicodeText("Hello world!", null);
    CustomTagList tl = new CustomTagList(line);
    TextStyleTag ts1 = new TextStyleTag(0, 10);
    ts1.setBold(true);
    tl.addOrMergeTag(ts1, null);
    logger.debug("ts1 = " + tl);
    TextStyleTag ts2 = new TextStyleTag(3, 3);
    ts2.setItalic(true);
    tl.addOrMergeTag(ts2, "italic");
    logger.debug("ts2 = " + tl);
    // ts2.setFontFamily("hello");
    // tl.addOrMergeTag(ts2, null);
    // TextStyleTag t1 = tl.getCommonIndexedCustomTag(TextStyleTag.TAG_NAME, 3, 3);
    // logger.debug("t1 = "+t1);
    Assert.assertEquals("Nr. of merged elements must be 3", 3, tl.getTags().size());
    TextStyleTag common = tl.getCommonIndexedCustomTag(TextStyleTag.TAG_NAME, 2, 6);
    TextStyleTag check = new TextStyleTag();
    check.setBold(true);
    check.setOffset(2);
    check.setLength(6);
    logger.debug("common = " + common);
    logger.debug("check = " + common);
    Assert.assertTrue("", common.equalsEffectiveValues(check, true));
}
Also used : TrpTextLineType(eu.transkribus.core.model.beans.pagecontent_trp.TrpTextLineType) TextStyleTag(eu.transkribus.core.model.beans.customtags.TextStyleTag) TrpTextRegionType(eu.transkribus.core.model.beans.pagecontent_trp.TrpTextRegionType) CustomTagList(eu.transkribus.core.model.beans.customtags.CustomTagList) TrpPageType(eu.transkribus.core.model.beans.pagecontent_trp.TrpPageType) Test(org.junit.Test)

Example 8 with TrpPageType

use of eu.transkribus.core.model.beans.pagecontent_trp.TrpPageType in project TranskribusCore by Transkribus.

the class CustomTagListTest method testMultipleRandomIndexedAddOrMergeTag.

// @Ignore
@Test
public void testMultipleRandomIndexedAddOrMergeTag() {
    TrpTextLineType line = new TrpTextLineType(new TrpTextRegionType(new TrpPageType()));
    line.setUnicodeText("Hello world!", null);
    CustomTagList tl = new CustomTagList(line);
    int textLength = tl.getTextLength();
    CustomTag wholeRangeTag = new CustomTag("test", 0, textLength);
    String[] nonIndexedTags = new String[] { "a_non_indexed", "b_ni", "c_balbla_non_indexed" };
    // + overlap to test exceptions when index out of bounds!
    int rangeOfTags = textLength + 5;
    final int N = (int) 1e3;
    for (int i = 0; i < N; ++i) {
        // int sizeBefore = tl.getIndexedTags("test").size();
        int o = rand.nextInt(rangeOfTags);
        int l = rand.nextInt(rangeOfTags - o) + 1;
        CustomTag ct = null;
        int d = rand.nextInt(3);
        if (d == 0) {
            // structure tag
            ct = new CustomTag("a_test_indexed", o, l);
        } else if (d == 1) {
            // text style tag
            ct = new TextStyleTag(o, l);
            ((TextStyleTag) ct).setBold(rand.nextBoolean());
            ((TextStyleTag) ct).setItalic(rand.nextBoolean());
            ((TextStyleTag) ct).setMonospace(rand.nextBoolean());
        } else {
            ct = new CustomTag(nonIndexedTags[rand.nextInt(3)]);
        // ct = new CustomTag("non-indexed");
        }
        // logger.info("range: "+o+","+l);
        // CustomTag ct = new CustomTag("test", o, l);
        logger.trace("i=" + i + "/" + N);
        logger.trace("adding custom tag: " + ct);
        logger.trace("list before = " + tl);
        try {
            tl.addOrMergeTag(ct, null);
            Assert.assertTrue("Indexed CustomTag was not inside but no exception thrown: " + ct, !ct.isIndexed() || wholeRangeTag.getOverlapType(ct) == OverlapType.INSIDE);
        } catch (IndexOutOfBoundsException ie) {
            Assert.assertTrue("CustomTag was inside but exception thrown: " + ct, wholeRangeTag.getOverlapType(ct) != OverlapType.INSIDE);
            logger.trace("Exception for tag not inside: " + ct);
        }
        logger.trace("list after = " + tl);
        checkIntegrity(tl);
    // int sizeAfter = tl.getIndexedTags("test").size();
    // logger.debug("sizeAfter = "+sizeAfter);
    }
    logger.info("list = " + tl);
}
Also used : TrpTextLineType(eu.transkribus.core.model.beans.pagecontent_trp.TrpTextLineType) TextStyleTag(eu.transkribus.core.model.beans.customtags.TextStyleTag) TrpTextRegionType(eu.transkribus.core.model.beans.pagecontent_trp.TrpTextRegionType) CustomTag(eu.transkribus.core.model.beans.customtags.CustomTag) CustomTagList(eu.transkribus.core.model.beans.customtags.CustomTagList) TrpPageType(eu.transkribus.core.model.beans.pagecontent_trp.TrpPageType) Test(org.junit.Test)

Example 9 with TrpPageType

use of eu.transkribus.core.model.beans.pagecontent_trp.TrpPageType in project TranskribusCore by Transkribus.

the class CustomTagListTest method testAddBothOverlap.

// @Ignore
@Test
public void testAddBothOverlap() {
    TrpTextLineType line = new TrpTextLineType(new TrpTextRegionType(new TrpPageType()));
    line.setUnicodeText("Hello world and all parallel worlds!", null);
    // line.setUnicodeText("hello", who);
    CustomTagList tl = new CustomTagList(line);
    // int l=10;
    TextStyleTag ts1 = new TextStyleTag(2, 3);
    ts1.setBold(true);
    ts1.setItalic(true);
    tl.addOrMergeTag(ts1, null);
    logger.debug("tl1 = " + tl);
    TextStyleTag ts2 = new TextStyleTag(6, 2);
    ts2.setBold(true);
    ts2.setSuperscript(true);
    tl.addOrMergeTag(ts2, null);
    logger.debug("tl2 = " + tl);
    TextStyleTag ts3 = new TextStyleTag(9, 11);
    tl.addOrMergeTag(ts3, null);
    logger.debug("tl3 = " + tl);
    Assert.assertEquals("Nr. of merged elements must be 2 --> empty tags shall be removed!", 2, tl.getTags().size());
}
Also used : TrpTextLineType(eu.transkribus.core.model.beans.pagecontent_trp.TrpTextLineType) TextStyleTag(eu.transkribus.core.model.beans.customtags.TextStyleTag) TrpTextRegionType(eu.transkribus.core.model.beans.pagecontent_trp.TrpTextRegionType) CustomTagList(eu.transkribus.core.model.beans.customtags.CustomTagList) TrpPageType(eu.transkribus.core.model.beans.pagecontent_trp.TrpPageType) Test(org.junit.Test)

Example 10 with TrpPageType

use of eu.transkribus.core.model.beans.pagecontent_trp.TrpPageType in project TranskribusCore by Transkribus.

the class TrpXlsxBuilder method writeXlsxForDoc.

public static void writeXlsxForDoc(TrpDoc doc, boolean wordBased, File exportFile, Set<Integer> pageIndices, IProgressMonitor monitor, ExportCache cache) throws NoTagsException, Exception {
    if (cache == null) {
        throw new IllegalArgumentException("ExportCache must not be null.");
    }
    if (cache.getCustomTagMapForDoc().isEmpty()) {
        logger.info("No tags to store -> Xlsx export cancelled");
        throw new NoTagsException("No tags available to store into Xlsx");
    }
    List<TrpPage> pages = doc.getPages();
    String exportPath = exportFile.getPath();
    Set<String> selectedTags = cache.getOnlySelectedTagnames(ExportUtils.getOnlyWantedTagnames(CustomTagFactory.getRegisteredTagNames()));
    int totalPages = pageIndices == null ? pages.size() : pageIndices.size();
    if (monitor != null) {
        monitor.beginTask("Exporting to Excel", totalPages);
    }
    wb = new XSSFWorkbook();
    int c = 0;
    for (int i = 0; i < pages.size(); ++i) {
        if (pageIndices != null && !pageIndices.contains(i))
            continue;
        if (monitor != null) {
            if (monitor.isCanceled()) {
                throw new InterruptedException("Export was canceled by user");
            // logger.debug("Xlsx export cancelled!");
            // return;
            }
            monitor.subTask("Processing page " + (c + 1));
        }
        TrpPage page = pages.get(i);
        // try to get previously loaded JAXB transcript
        JAXBPageTranscript tr = null;
        if (cache != null) {
            tr = cache.getPageTranscriptAtIndex(i);
        }
        if (tr == null) {
            TrpTranscriptMetadata md = page.getCurrentTranscript();
            tr = new JAXBPageTranscript(md);
            tr.build();
        }
        // old version
        // TrpPage page = pages.get(i);
        // TrpTranscriptMetadata md = page.getCurrentTranscript();
        // JAXBPageTranscript tr = new JAXBPageTranscript(md);
        // tr.build();
        TrpPageType trpPage = tr.getPage();
        logger.debug("writing xlsx for page " + (i + 1) + "/" + doc.getNPages());
        List<TrpTextRegionType> textRegions = trpPage.getTextRegions(true);
        for (int j = 0; j < textRegions.size(); ++j) {
            TrpTextRegionType r = textRegions.get(j);
            List<TextLineType> lines = r.getTextLine();
            for (int k = 0; k < lines.size(); ++k) {
                TrpTextLineType trpL = (TrpTextLineType) lines.get(k);
                List<WordType> words = trpL.getWord();
                if (wordBased) {
                    for (int l = 0; l < words.size(); ++l) {
                        TrpWordType w = (TrpWordType) words.get(l);
                        writeTagsForShapeElement(w, trpL.getUnicodeText(), String.valueOf(doc.getId()), String.valueOf(page.getPageNr()), r.getId(), trpL.getId(), w.getId(), selectedTags);
                    }
                } else {
                    writeTagsForShapeElement(trpL, trpL.getUnicodeText(), String.valueOf(doc.getId()), String.valueOf(page.getPageNr()), r.getId(), trpL.getId(), "", selectedTags);
                }
            }
        }
        ++c;
        if (monitor != null) {
            monitor.worked(c);
        }
    }
    /*
		 * auto size the columns
		 */
    for (int i = 0; i < wb.getNumberOfSheets(); i++) {
        int numberOfCells = 0;
        Iterator rowIterator = wb.getSheetAt(i).rowIterator();
        /**
         * Escape the header row *
         */
        if (rowIterator.hasNext()) {
            Row headerRow = (Row) rowIterator.next();
            // get the number of cells in the header row
            numberOfCells = headerRow.getPhysicalNumberOfCells();
            for (int j = 0; j < numberOfCells; j++) {
                wb.getSheetAt(i).autoSizeColumn(j);
            }
        }
    }
    FileOutputStream fOut;
    try {
        // means no tags at all
        if (wb.getNumberOfSheets() == 0) {
            throw new IOException("Sorry - No tags available for export");
        }
        fOut = new FileOutputStream(exportPath);
        wb.write(fOut);
        fOut.close();
    } catch (IOException e) {
        // TODO Auto-generated catch block
        e.printStackTrace();
        throw e;
    }
    logger.info("wrote xlsx to: " + exportPath);
}
Also used : NoTagsException(eu.transkribus.core.model.builder.NoTagsException) JAXBPageTranscript(eu.transkribus.core.model.beans.JAXBPageTranscript) TrpPage(eu.transkribus.core.model.beans.TrpPage) TrpTranscriptMetadata(eu.transkribus.core.model.beans.TrpTranscriptMetadata) IOException(java.io.IOException) TrpWordType(eu.transkribus.core.model.beans.pagecontent_trp.TrpWordType) WordType(eu.transkribus.core.model.beans.pagecontent.WordType) TrpWordType(eu.transkribus.core.model.beans.pagecontent_trp.TrpWordType) TrpTextLineType(eu.transkribus.core.model.beans.pagecontent_trp.TrpTextLineType) TextLineType(eu.transkribus.core.model.beans.pagecontent.TextLineType) TrpTextLineType(eu.transkribus.core.model.beans.pagecontent_trp.TrpTextLineType) TrpTextRegionType(eu.transkribus.core.model.beans.pagecontent_trp.TrpTextRegionType) FileOutputStream(java.io.FileOutputStream) Iterator(java.util.Iterator) XSSFWorkbook(org.apache.poi.xssf.usermodel.XSSFWorkbook) Row(org.apache.poi.ss.usermodel.Row) TrpPageType(eu.transkribus.core.model.beans.pagecontent_trp.TrpPageType)

Aggregations

TrpPageType (eu.transkribus.core.model.beans.pagecontent_trp.TrpPageType)15 TrpTextLineType (eu.transkribus.core.model.beans.pagecontent_trp.TrpTextLineType)9 TrpTextRegionType (eu.transkribus.core.model.beans.pagecontent_trp.TrpTextRegionType)9 JAXBPageTranscript (eu.transkribus.core.model.beans.JAXBPageTranscript)7 TrpPage (eu.transkribus.core.model.beans.TrpPage)7 TrpTranscriptMetadata (eu.transkribus.core.model.beans.TrpTranscriptMetadata)7 TrpWordType (eu.transkribus.core.model.beans.pagecontent_trp.TrpWordType)5 CustomTag (eu.transkribus.core.model.beans.customtags.CustomTag)4 CustomTagList (eu.transkribus.core.model.beans.customtags.CustomTagList)4 TextStyleTag (eu.transkribus.core.model.beans.customtags.TextStyleTag)4 Test (org.junit.Test)4 PcGtsType (eu.transkribus.core.model.beans.pagecontent.PcGtsType)3 TextLineType (eu.transkribus.core.model.beans.pagecontent.TextLineType)3 WordType (eu.transkribus.core.model.beans.pagecontent.WordType)3 IOException (java.io.IOException)3 Rtf (com.tutego.jrtf.Rtf)2 Rectangle (java.awt.Rectangle)2 FileOutputStream (java.io.FileOutputStream)2 FileWriter (java.io.FileWriter)2 ArrayList (java.util.ArrayList)2