use of eu.transkribus.core.model.beans.pagecontent_trp.TrpPageType in project TranskribusCore by Transkribus.
the class TrpRtfBuilder method writeRtfForDoc.
public static void writeRtfForDoc(TrpDoc doc, boolean wordBased, File file, Set<Integer> pageIndices, IProgressMonitor monitor) throws JAXBException, IOException {
Rtf rtf = Rtf.rtf();
List<TrpPage> pages = doc.getPages();
int totalPages = pageIndices == null ? pages.size() : pageIndices.size();
if (monitor != null) {
monitor.beginTask("Exporting to RTF", totalPages);
}
int c = 0;
for (int i = 0; i < pages.size(); ++i) {
if (pageIndices != null && !pageIndices.contains(i))
continue;
if (monitor != null) {
if (monitor.isCanceled()) {
logger.debug("RTF export cancelled!");
return;
}
monitor.subTask("Processing page " + (c + 1));
}
TrpPage page = pages.get(i);
TrpTranscriptMetadata md = page.getCurrentTranscript();
JAXBPageTranscript tr = new JAXBPageTranscript(md);
tr.build();
TrpPageType trpPage = tr.getPage();
logger.debug("writing rtf for page " + (i + 1) + "/" + doc.getNPages());
rtf.section(getRtfParagraphsForTranscript(trpPage, wordBased));
++c;
if (monitor != null) {
monitor.worked(c);
}
}
rtf.out(new FileWriter(file));
logger.info("wrote rtf to: " + file.getAbsolutePath());
}
use of eu.transkribus.core.model.beans.pagecontent_trp.TrpPageType in project TranskribusCore by Transkribus.
the class CustomTagListTest method testCommonTag.
@Test
public void testCommonTag() {
TrpTextLineType line = new TrpTextLineType(new TrpTextRegionType(new TrpPageType()));
line.setUnicodeText("Hello world!", null);
CustomTagList tl = new CustomTagList(line);
TextStyleTag ts1 = new TextStyleTag(0, 10);
ts1.setBold(true);
tl.addOrMergeTag(ts1, null);
logger.debug("ts1 = " + tl);
TextStyleTag ts2 = new TextStyleTag(3, 3);
ts2.setItalic(true);
tl.addOrMergeTag(ts2, "italic");
logger.debug("ts2 = " + tl);
// ts2.setFontFamily("hello");
// tl.addOrMergeTag(ts2, null);
// TextStyleTag t1 = tl.getCommonIndexedCustomTag(TextStyleTag.TAG_NAME, 3, 3);
// logger.debug("t1 = "+t1);
Assert.assertEquals("Nr. of merged elements must be 3", 3, tl.getTags().size());
TextStyleTag common = tl.getCommonIndexedCustomTag(TextStyleTag.TAG_NAME, 2, 6);
TextStyleTag check = new TextStyleTag();
check.setBold(true);
check.setOffset(2);
check.setLength(6);
logger.debug("common = " + common);
logger.debug("check = " + common);
Assert.assertTrue("", common.equalsEffectiveValues(check, true));
}
use of eu.transkribus.core.model.beans.pagecontent_trp.TrpPageType in project TranskribusCore by Transkribus.
the class CustomTagListTest method testMultipleRandomIndexedAddOrMergeTag.
// @Ignore
@Test
public void testMultipleRandomIndexedAddOrMergeTag() {
TrpTextLineType line = new TrpTextLineType(new TrpTextRegionType(new TrpPageType()));
line.setUnicodeText("Hello world!", null);
CustomTagList tl = new CustomTagList(line);
int textLength = tl.getTextLength();
CustomTag wholeRangeTag = new CustomTag("test", 0, textLength);
String[] nonIndexedTags = new String[] { "a_non_indexed", "b_ni", "c_balbla_non_indexed" };
// + overlap to test exceptions when index out of bounds!
int rangeOfTags = textLength + 5;
final int N = (int) 1e3;
for (int i = 0; i < N; ++i) {
// int sizeBefore = tl.getIndexedTags("test").size();
int o = rand.nextInt(rangeOfTags);
int l = rand.nextInt(rangeOfTags - o) + 1;
CustomTag ct = null;
int d = rand.nextInt(3);
if (d == 0) {
// structure tag
ct = new CustomTag("a_test_indexed", o, l);
} else if (d == 1) {
// text style tag
ct = new TextStyleTag(o, l);
((TextStyleTag) ct).setBold(rand.nextBoolean());
((TextStyleTag) ct).setItalic(rand.nextBoolean());
((TextStyleTag) ct).setMonospace(rand.nextBoolean());
} else {
ct = new CustomTag(nonIndexedTags[rand.nextInt(3)]);
// ct = new CustomTag("non-indexed");
}
// logger.info("range: "+o+","+l);
// CustomTag ct = new CustomTag("test", o, l);
logger.trace("i=" + i + "/" + N);
logger.trace("adding custom tag: " + ct);
logger.trace("list before = " + tl);
try {
tl.addOrMergeTag(ct, null);
Assert.assertTrue("Indexed CustomTag was not inside but no exception thrown: " + ct, !ct.isIndexed() || wholeRangeTag.getOverlapType(ct) == OverlapType.INSIDE);
} catch (IndexOutOfBoundsException ie) {
Assert.assertTrue("CustomTag was inside but exception thrown: " + ct, wholeRangeTag.getOverlapType(ct) != OverlapType.INSIDE);
logger.trace("Exception for tag not inside: " + ct);
}
logger.trace("list after = " + tl);
checkIntegrity(tl);
// int sizeAfter = tl.getIndexedTags("test").size();
// logger.debug("sizeAfter = "+sizeAfter);
}
logger.info("list = " + tl);
}
use of eu.transkribus.core.model.beans.pagecontent_trp.TrpPageType in project TranskribusCore by Transkribus.
the class CustomTagListTest method testAddBothOverlap.
// @Ignore
@Test
public void testAddBothOverlap() {
TrpTextLineType line = new TrpTextLineType(new TrpTextRegionType(new TrpPageType()));
line.setUnicodeText("Hello world and all parallel worlds!", null);
// line.setUnicodeText("hello", who);
CustomTagList tl = new CustomTagList(line);
// int l=10;
TextStyleTag ts1 = new TextStyleTag(2, 3);
ts1.setBold(true);
ts1.setItalic(true);
tl.addOrMergeTag(ts1, null);
logger.debug("tl1 = " + tl);
TextStyleTag ts2 = new TextStyleTag(6, 2);
ts2.setBold(true);
ts2.setSuperscript(true);
tl.addOrMergeTag(ts2, null);
logger.debug("tl2 = " + tl);
TextStyleTag ts3 = new TextStyleTag(9, 11);
tl.addOrMergeTag(ts3, null);
logger.debug("tl3 = " + tl);
Assert.assertEquals("Nr. of merged elements must be 2 --> empty tags shall be removed!", 2, tl.getTags().size());
}
use of eu.transkribus.core.model.beans.pagecontent_trp.TrpPageType in project TranskribusCore by Transkribus.
the class TrpXlsxBuilder method writeXlsxForDoc.
public static void writeXlsxForDoc(TrpDoc doc, boolean wordBased, File exportFile, Set<Integer> pageIndices, IProgressMonitor monitor, ExportCache cache) throws NoTagsException, Exception {
if (cache == null) {
throw new IllegalArgumentException("ExportCache must not be null.");
}
if (cache.getCustomTagMapForDoc().isEmpty()) {
logger.info("No tags to store -> Xlsx export cancelled");
throw new NoTagsException("No tags available to store into Xlsx");
}
List<TrpPage> pages = doc.getPages();
String exportPath = exportFile.getPath();
Set<String> selectedTags = cache.getOnlySelectedTagnames(ExportUtils.getOnlyWantedTagnames(CustomTagFactory.getRegisteredTagNames()));
int totalPages = pageIndices == null ? pages.size() : pageIndices.size();
if (monitor != null) {
monitor.beginTask("Exporting to Excel", totalPages);
}
wb = new XSSFWorkbook();
int c = 0;
for (int i = 0; i < pages.size(); ++i) {
if (pageIndices != null && !pageIndices.contains(i))
continue;
if (monitor != null) {
if (monitor.isCanceled()) {
throw new InterruptedException("Export was canceled by user");
// logger.debug("Xlsx export cancelled!");
// return;
}
monitor.subTask("Processing page " + (c + 1));
}
TrpPage page = pages.get(i);
// try to get previously loaded JAXB transcript
JAXBPageTranscript tr = null;
if (cache != null) {
tr = cache.getPageTranscriptAtIndex(i);
}
if (tr == null) {
TrpTranscriptMetadata md = page.getCurrentTranscript();
tr = new JAXBPageTranscript(md);
tr.build();
}
// old version
// TrpPage page = pages.get(i);
// TrpTranscriptMetadata md = page.getCurrentTranscript();
// JAXBPageTranscript tr = new JAXBPageTranscript(md);
// tr.build();
TrpPageType trpPage = tr.getPage();
logger.debug("writing xlsx for page " + (i + 1) + "/" + doc.getNPages());
List<TrpTextRegionType> textRegions = trpPage.getTextRegions(true);
for (int j = 0; j < textRegions.size(); ++j) {
TrpTextRegionType r = textRegions.get(j);
List<TextLineType> lines = r.getTextLine();
for (int k = 0; k < lines.size(); ++k) {
TrpTextLineType trpL = (TrpTextLineType) lines.get(k);
List<WordType> words = trpL.getWord();
if (wordBased) {
for (int l = 0; l < words.size(); ++l) {
TrpWordType w = (TrpWordType) words.get(l);
writeTagsForShapeElement(w, trpL.getUnicodeText(), String.valueOf(doc.getId()), String.valueOf(page.getPageNr()), r.getId(), trpL.getId(), w.getId(), selectedTags);
}
} else {
writeTagsForShapeElement(trpL, trpL.getUnicodeText(), String.valueOf(doc.getId()), String.valueOf(page.getPageNr()), r.getId(), trpL.getId(), "", selectedTags);
}
}
}
++c;
if (monitor != null) {
monitor.worked(c);
}
}
/*
* auto size the columns
*/
for (int i = 0; i < wb.getNumberOfSheets(); i++) {
int numberOfCells = 0;
Iterator rowIterator = wb.getSheetAt(i).rowIterator();
/**
* Escape the header row *
*/
if (rowIterator.hasNext()) {
Row headerRow = (Row) rowIterator.next();
// get the number of cells in the header row
numberOfCells = headerRow.getPhysicalNumberOfCells();
for (int j = 0; j < numberOfCells; j++) {
wb.getSheetAt(i).autoSizeColumn(j);
}
}
}
FileOutputStream fOut;
try {
// means no tags at all
if (wb.getNumberOfSheets() == 0) {
throw new IOException("Sorry - No tags available for export");
}
fOut = new FileOutputStream(exportPath);
wb.write(fOut);
fOut.close();
} catch (IOException e) {
// TODO Auto-generated catch block
e.printStackTrace();
throw e;
}
logger.info("wrote xlsx to: " + exportPath);
}
Aggregations