use of eu.transkribus.core.model.beans.pagecontent_trp.TrpTableRegionType in project TranskribusCore by Transkribus.
the class TrpTxtBuilder method writeTxtForSinglePage.
private static void writeTxtForSinglePage(File file, TrpPageType trpPage, boolean wordBased, boolean preserveLineBreaks) {
boolean rtl = false;
// TrpTableRegionType is contained in the regions too
List<TrpRegionType> regions = trpPage.getRegions();
Collections.sort(regions, new TrpElementReadingOrderComparator<RegionType>(true));
List<String> content = new ArrayList<String>();
for (int j = 0; j < regions.size(); ++j) {
TrpRegionType r = regions.get(j);
if (r instanceof TrpTableRegionType) {
/*
* TODO: for simple txt export: how to handle tables
*/
continue;
} else if (r instanceof TrpTextRegionType) {
TrpTextRegionType tr = (TrpTextRegionType) r;
List<TextLineType> lines = tr.getTextLine();
for (int i = 0; i < lines.size(); ++i) {
TrpTextLineType trpL = (TrpTextLineType) lines.get(i);
String textOfCurrLine = trpL.getUnicodeText();
if (wordBased && trpL.getWord().size() > 0) {
for (WordType word : trpL.getWord()) {
content.add(((ITrpShapeType) word).getUnicodeText());
}
} else if (textOfCurrLine != "") {
content.add(textOfCurrLine);
}
// if(preserveLineBreaks){
// content.add(System.lineSeparator());
// }
}
if (lines.size() > 0) {
content.add(System.lineSeparator());
// try {
// //Add line separator after each region
// Files.write(Paths.get(file.getAbsolutePath()), new ArrayList<String>() {{ add(System.lineSeparator()); }}, utf8,
// StandardOpenOption.CREATE, StandardOpenOption.APPEND);
// } catch (IOException e) {
// // TODO Auto-generated catch block
// e.printStackTrace();
// }
}
}
}
try {
logger.debug("path " + Paths.get(file.getAbsolutePath()));
Files.write(Paths.get(file.getAbsolutePath()), content, utf8, StandardOpenOption.CREATE, StandardOpenOption.APPEND);
} catch (IOException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
}
Aggregations