use of eu.transkribus.core.model.beans.TrpTranscriptMetadata in project TranskribusCore by Transkribus.
the class TrpRtfBuilder method writeRtfForDoc.
public static void writeRtfForDoc(TrpDoc doc, boolean wordBased, boolean writeTags, boolean doBlackening, File file, Set<Integer> pageIndices, IProgressMonitor monitor, ExportCache cache) throws JAXBException, IOException {
exportTags = writeTags;
tagnames = cache.getSelectedTags();
TrpRtfBuilder.doBlackening = doBlackening;
/*
* get all names of tags
*/
// tagnames = CustomTagFactory.getRegisteredTagNames();
Rtf rtf = Rtf.rtf();
List<TrpPage> pages = doc.getPages();
int totalPages = pageIndices == null ? pages.size() : pageIndices.size();
if (monitor != null) {
monitor.beginTask("Exporting to RTF", totalPages);
}
int c = 0;
for (int i = 0; i < pages.size(); ++i) {
if (pageIndices != null && !pageIndices.contains(i))
continue;
if (monitor != null) {
if (monitor.isCanceled()) {
logger.debug("RTF export cancelled!");
return;
}
monitor.subTask("Processing page " + (c + 1));
}
TrpPage page = pages.get(i);
TrpTranscriptMetadata md = page.getCurrentTranscript();
JAXBPageTranscript tr = new JAXBPageTranscript(md);
tr.build();
TrpPageType trpPage = tr.getPage();
logger.debug("writing rtf for page " + (i + 1) + "/" + doc.getNPages());
// rtf().header(color( 204, 0, 0 ).at( 0 ),
// color( 0, 0xff, 0 ).at( 1 ),
// color( 0, 0, 0xff ).at( 2 ),
// font( "Calibri" ).at( 0 ) );
// RtfHeaderColor color = RtfHeaderColor.color(0xff, 0, 0);
rtf.header(color(204, 0, 0).at(0), color(0, 0xff, 0).at(1)).section(getRtfParagraphsForTranscript(trpPage, wordBased));
++c;
if (monitor != null) {
monitor.worked(c);
}
}
// write tags at end of last page
if (exportTags) {
// RtfText headline = RtfText.text("Person names in this document (amount of found persons: " + persons.size() + ")", "\n");
/*
* for all different tagnames:
* find all custom tags in doc
* create list and
*/
ArrayList<RtfPara> tagParas = new ArrayList<RtfPara>();
// tagnames = all user choosen tags via export dialog
for (String currTagname : tagnames) {
// logger.debug("curr tagname " + currTagname);
// get all custom tags with currTagname and text
HashMap<CustomTag, String> allTagsOfThisTagname = cache.getTags(currTagname);
if (allTagsOfThisTagname.size() > 0) {
tagParas.add(RtfPara.p(RtfText.text(RtfText.underline(currTagname + " tags in this document: " + allTagsOfThisTagname.size()))));
// ArrayList<RtfText> tagTexts = new ArrayList<RtfText>();
Collection<String> valueSet = allTagsOfThisTagname.values();
RtfText[] tagTexts = new RtfText[valueSet.size()];
int l = 0;
for (String currEntry : valueSet) {
tagTexts[l++] = RtfText.text(currEntry.concat("\n"));
// logger.debug("tag value is " + currEntry);
}
tagParas.add(RtfPara.p(tagTexts));
}
}
// int parSize = getParsNumber();
// int k = 0;
//
// if (persons.size() > 0){
// logger.debug("k is " + k);
// List<String> newPersonList = new ArrayList<String>(new HashSet<String>(persons));
// tagParas[k++]=RtfPara.p(RtfText.text("Person names in this document (amount of found persons: " + newPersonList.size() + ")", "\n"));
// logger.debug("k is " + k);
// //rtf.p("Person names in this document (amount of found persons: " + persons.size() + ")", "\n");
// //to make the list contain only unique values
//
// RtfText[] personTexts = new RtfText[newPersonList.size()];
// for (int j=0; j<newPersonList.size(); ++j) {
// personTexts[j] = RtfText.text(newPersonList.get(j), "\n");
// logger.debug("person is " + newPersonList.get(j));
// }
// tagParas[k++] = RtfPara.p(personTexts);
// }
//
// if (places.size() > 0){
// List<String> newPlaceList = new ArrayList<String>(new HashSet<String>(places));
// tagParas[k++]=RtfPara.p(RtfText.text("Places in this document (amount of found places " + newPlaceList.size() + ")", "\n"));
//
// RtfText[] placeTexts = new RtfText[newPlaceList.size()];
// for (int j=0; j<newPlaceList.size(); ++j) {
// //RtfText.color(0, "red");
// placeTexts[j] = RtfText.color(0, newPlaceList.get(j).concat("\n"));
// logger.debug("place is " + newPlaceList.get(j));
// }
// RtfPara par2 = RtfPara.p(placeTexts);
// tagParas[k++] = par2;
// }
//
// if(addresses.size() > 0){
// List<String> newAddressList = new ArrayList<String>(new HashSet<String>(addresses));
// tagParas[k++]=RtfPara.p(RtfText.text("Addresses in this document (amount of found addresses " + newAddressList.size() + ")", "\n"));
//
// RtfText[] addresseTexts = new RtfText[newAddressList.size()];
// for (int j=0; j<newAddressList.size(); ++j) {
// addresseTexts[j] = RtfText.text(newAddressList.get(j), "\n");
// logger.debug("addresse is " + newAddressList.get(j));
// }
// RtfPara par3 = RtfPara.p(addresseTexts);
// tagParas[k++] = par3;
// }
// rtf.section(par3);
rtf.header(color(204, 0, 0).at(0)).section(tagParas);
}
rtf.out(new FileWriter(file));
logger.info("wrote rtf to: " + file.getAbsolutePath());
}
use of eu.transkribus.core.model.beans.TrpTranscriptMetadata in project TranskribusCore by Transkribus.
the class TrpRtfBuilder method writeRtfForDoc.
public static void writeRtfForDoc(TrpDoc doc, boolean wordBased, File file, Set<Integer> pageIndices, IProgressMonitor monitor) throws JAXBException, IOException {
Rtf rtf = Rtf.rtf();
List<TrpPage> pages = doc.getPages();
int totalPages = pageIndices == null ? pages.size() : pageIndices.size();
if (monitor != null) {
monitor.beginTask("Exporting to RTF", totalPages);
}
int c = 0;
for (int i = 0; i < pages.size(); ++i) {
if (pageIndices != null && !pageIndices.contains(i))
continue;
if (monitor != null) {
if (monitor.isCanceled()) {
logger.debug("RTF export cancelled!");
return;
}
monitor.subTask("Processing page " + (c + 1));
}
TrpPage page = pages.get(i);
TrpTranscriptMetadata md = page.getCurrentTranscript();
JAXBPageTranscript tr = new JAXBPageTranscript(md);
tr.build();
TrpPageType trpPage = tr.getPage();
logger.debug("writing rtf for page " + (i + 1) + "/" + doc.getNPages());
rtf.section(getRtfParagraphsForTranscript(trpPage, wordBased));
++c;
if (monitor != null) {
monitor.worked(c);
}
}
rtf.out(new FileWriter(file));
logger.info("wrote rtf to: " + file.getAbsolutePath());
}
use of eu.transkribus.core.model.beans.TrpTranscriptMetadata in project TranskribusCore by Transkribus.
the class ExportCache method storePageTranscripts4Export.
public void storePageTranscripts4Export(TrpDoc doc, Set<Integer> pageIndices, IProgressMonitor monitor, String versionStatus, int pageIdx, TrpTranscriptMetadata loadedTranscript) throws Exception {
pageTranscripts = new ArrayList<JAXBPageTranscript>();
List<TrpPage> pages = doc.getPages();
int totalPages = pages.size();
int c = 0;
for (int i = 0; i < totalPages; ++i) {
if (pageIndices != null && !pageIndices.contains(i)) {
// fill up with null to have the proper index of each page later on
// logger.debug(" add null to transcripts " + i);
pageTranscripts.add(null);
continue;
}
if (monitor != null && monitor.isCanceled()) {
throw new Exception("User canceled the export");
}
TrpPage page = pages.get(i);
TrpTranscriptMetadata md = null;
if (versionStatus.contains("Latest")) {
// current transcript
md = page.getCurrentTranscript();
} else if (versionStatus.contains("Loaded")) {
// if loaded page idx == i than we can export the loaded version and for all other pages the latest
if (i == pageIdx && loadedTranscript != null) {
md = loadedTranscript;
// String loadedStatus = loadedTranscript.getStatus().getStr();
// md = page.getTranscriptWithStatus(loadedStatus);
} else {
md = page.getCurrentTranscript();
}
} else {
// logger.debug("We want to export pages with status: " + versionStatus);
md = page.getTranscriptWithStatusOrNull(versionStatus);
}
/*
* for pages where we have not found versions with the defined status -> remove from the page list so that
* they will not exported
*/
if (md == null) {
// pageIndices contained but has not the desired status -> add transcript 0;
if (pageIndices != null && pageIndices.contains(i)) {
// logger.debug("remove page index " + i);
pageIndices.remove(new Integer(i));
pageTranscripts.add(null);
}
continue;
}
JAXBPageTranscript tr = new JAXBPageTranscript(md);
tr.build();
pageTranscripts.add(tr);
logger.debug("Loaded Transcript from page " + (i + 1));
if (monitor != null) {
monitor.setTaskName("Loaded Transcript from page " + (i + 1));
monitor.worked(++c);
}
}
}
use of eu.transkribus.core.model.beans.TrpTranscriptMetadata in project TranskribusCore by Transkribus.
the class PageXmlDaoTest method main.
public static void main(String[] args) {
TrpDoc doc = FakeDocProvider.create(false);
TrpTranscriptMetadata md = doc.getPages().get(0).getTranscripts().get(0);
try {
JAXBPageTranscript transcript = new JAXBPageTranscript(md);
transcript.build();
// JAXBPageTranscript transcript = TrpPageTranscriptBuilder.build(md);
// get Source Document as String
// DOMSource domSource = new DOMSource(transcript.getSourceDoc());
// StringWriter writer = new StringWriter();
// StreamResult result = new StreamResult(writer);
// TransformerFactory tf = TransformerFactory.newInstance();
// Transformer transformer = tf.newTransformer();
// transformer.transform(domSource, result);
// System.out.println("XML IN String format is: \n" + writer.toString());
// check JaxB Element
PcGtsType page = transcript.getPageData();
if (page == null) {
System.out.println("page XML is null");
System.exit(0);
}
PageType pageType = page.getPage();
System.out.println(page.getMetadata());
if (pageType == null) {
System.out.println("pagetype element is null");
System.exit(0);
}
if (pageType.getTextRegionOrImageRegionOrLineDrawingRegion() == null) {
System.out.println("Region list is null");
System.exit(0);
}
int i = 0;
for (TextRegionType tr : transcript.getPage().getTextRegions(true)) {
tr.setId("" + i++);
}
List<TrpRegionType> regions = pageType.getTextRegionOrImageRegionOrLineDrawingRegion();
for (RegionType r : regions) {
if (r instanceof TextRegionType) {
TextRegionType t = (TextRegionType) r;
System.out.println(t.getId());
}
}
} catch (IllegalArgumentException | IOException e) {
e.printStackTrace();
}
// try {
// PrimaPageTranscript ppt = PageXmlDao.getPrimaPageTranscript(md);
// Page page = ppt.getPageData();
//
// System.out.println(page.getImageFilename());
// Region r = page.getLayout().getRegion("tempReg357564684568544579089");
// System.out.println(r.getType().getName());
// // System.out.println(page.getLayout().getParentChildRelation(r.getType(), r.getId().toString()).getRelationType());
//
// IdRegister idr = r.getIdRegister();
// // idr.
// System.out.println(idr);
// } catch (IllegalArgumentException e) {
//
// e.printStackTrace();
// } catch (MalformedURLException e) {
//
// e.printStackTrace();
// } catch (UnsupportedFormatVersionException e) {
//
// e.printStackTrace();
// }
}
use of eu.transkribus.core.model.beans.TrpTranscriptMetadata in project TranskribusCore by Transkribus.
the class LocalDocReader method buildPage.
// private static void startThumbCreationThread(final TrpDoc doc) {
// Runnable thumbCreator = new Runnable(){
// @Override
// public void run() {
// try{
// LocalDocWriter.createThumbsForDoc(doc, false);
// }catch (Exception e){
// logger.error(e);
// return;
// }
// }
// };
// new Thread(thumbCreator).start();
// }
/**
* Builds a TrpPage object with file URLs set
* @param inputDir the path where the local document is stored
*
* @param pageNr
* of the page to be built
* @param img
* the img file to include
* @param pageXml
* the corresponding PAGE XML
* @param thumb
* the thumbnail file for this image
* @param useDummyImage
* treat the image file as corrupt/missing.
* XML may then be null as none could be created due to missing Dimension.
* @return a TrpPage object with Transcript. The Transcript is null, if
* pageXml is null.
* @throws MalformedURLException if an URL can't be constructed from parentDir
*/
protected static TrpPage buildPage(File inputDir, int pageNr, File img, File pageXml, File thumb, Dimension dim, final String missingImageRemark) throws IOException {
logger.debug(pageNr + ": XML = " + (pageXml == null ? "null" : pageXml.getName()) + " - IMG = " + (img == null ? "null" : img.getName()));
// FIXME handle broken images
TrpPage page = new TrpPage();
page.setPageNr(pageNr);
page.setKey(null);
page.setDocId(-1);
if (img != null) {
page.setImgFileName(img.getName());
final URL imgUrl = img.toURI().toURL();
page.setUrl(imgUrl);
} else {
page.setImgFileName(LocalDocConst.NO_IMAGE_FILENAME);
}
if (!StringUtils.isEmpty(missingImageRemark)) {
URL dummyUrl = LocalDocConst.getDummyImageUrl();
page.setUrl(dummyUrl);
page.setImgFileProblem(missingImageRemark);
}
if (thumb != null) {
final URL thumbUrl = thumb.toURI().toURL();
page.setThumbUrl(thumbUrl);
}
if (dim != null) {
page.setWidth(dim.width);
page.setHeight(dim.height);
}
if (pageXml != null) {
final URL xmlUrl = pageXml.toURI().toURL();
TrpTranscriptMetadata tmd = new TrpTranscriptMetadata();
tmd.setPageReferenceForLocalDocs(page);
tmd.setPageNr(pageNr);
tmd.setKey(null);
tmd.setUrl(xmlUrl);
tmd.setStatus(EditStatus.NEW);
tmd.setLocalFolder(inputDir);
tmd.setTimestamp(new Date().getTime());
tmd.setUserName("LocalDocReader");
// TODO real status, time and user parsed from PageXML?
page.getTranscripts().add(tmd);
}
return page;
}
Aggregations