use of eu.transkribus.core.model.beans.TrpPage in project TranskribusCore by Transkribus.
the class AltoExporter method export.
public void export(final TrpDoc doc, final String path) throws DocumentException, MalformedURLException, IOException, JAXBException, TransformerException {
File altoOutputDir = createAltoOuputDir(doc, path);
// TrpPdfDocument pdf = new TrpPdfDocument(pdfFile, useWordLevel);
notifyObservers("Exporting Altos...");
setChanged();
for (int i = 0; i < doc.getPages().size(); i++) {
logger.info("Processing page " + (i + 1));
notifyObservers(Integer.valueOf(i + 1));
setChanged();
TrpPage p = doc.getPages().get(i);
// 3rd parameter says 'splitLineIntoWords'
File altoFile = exportAltoFile(p, altoOutputDir, false);
// XslTransformer.transform(pc, PAGE_TO_ALTO_XSLT, pdfFile);
}
notifyObservers("Alto written at: " + path);
setChanged();
logger.info("ALTO files written at: " + path);
// return outputDir;
}
use of eu.transkribus.core.model.beans.TrpPage in project TranskribusCore by Transkribus.
the class MetsUtil method getTrpPages.
/**
* Builds the set of TrpPage objects with
* local file references from the mets master file group and structmap.
* The method is strict regarding PAGE XML existence! Each image file must have a correspondent PAGE XML.
* @param mets
* @param parentDir
* @return
* @throws IOException
*/
public static List<TrpPage> getTrpPages(Mets mets, File parentDir) throws IOException {
// check filesection. needs img group and xml group to distinguish them without going for mimetypes
List<FileGrpType> typeGrps = getMasterFileGrp(mets);
List<FileType> xmlGrp = null;
List<FileType> imgGrp = null;
for (FileGrpType type : typeGrps) {
switch(type.getID()) {
case TrpMetsBuilder.IMG_GROUP_ID:
imgGrp = type.getFile();
break;
case TrpMetsBuilder.PAGE_GROUP_ID:
xmlGrp = type.getFile();
break;
default:
break;
}
}
if (imgGrp == null)
throw new IOException("METS file has no image file list!");
if (xmlGrp == null)
throw new IOException("METS file has no xml file list!");
List<DivType> pageDivs = getPageDivsFromStructMap(mets);
if (pageDivs == null) {
throw new IOException("No valid StructMap was found!");
}
List<TrpPage> pages = new ArrayList<TrpPage>(pageDivs.size());
for (DivType div : pageDivs) {
TrpPage page = buildPage(div, imgGrp, xmlGrp, parentDir);
pages.add(page);
}
return pages;
}
use of eu.transkribus.core.model.beans.TrpPage in project TranskribusCore by Transkribus.
the class MetsUtil method buildPage.
private static TrpPage buildPage(DivType div, List<FileType> imgGrp, List<FileType> xmlGrp, File parentDir) throws IOException {
TrpPage page = new TrpPage();
int nr = div.getORDER().intValue();
page.setPageNr(nr);
File imgFile = null;
File xmlFile = null;
// FIXME this will only work for local files
for (Fptr ptr : div.getFptr()) {
FileType type = (FileType) ptr.getArea().getFILEID();
if (imgGrp.contains(type)) {
imgFile = MetsUtil.getFile(type, parentDir);
} else if (xmlGrp.contains(type)) {
xmlFile = MetsUtil.getFile(type, parentDir);
}
}
if (imgFile == null) {
logger.error("No master image mapped for page " + nr + " in the structmap!");
} else {
logger.info("Page " + page.getPageNr() + " image: " + imgFile.getAbsolutePath());
}
// FIXME NullpointerException if imgFile == null!
page.setUrl(imgFile.toURI().toURL());
page.setKey(null);
page.setDocId(-1);
page.setImgFileName(imgFile.getName());
if (xmlFile == null) {
logger.error("No master xml mapped for page " + nr + " in the structmap!");
} else {
logger.info("Page " + page.getPageNr() + " xml: " + xmlFile.getAbsolutePath());
}
// FIXME NullpointerException if xmlFile == null!
TrpTranscriptMetadata tmd = new TrpTranscriptMetadata();
tmd.setPageReferenceForLocalDocs(page);
tmd.setPageId(page.getPageId());
tmd.setUrl(xmlFile.toURI().toURL());
tmd.setKey(null);
tmd.setStatus(EditStatus.NEW);
tmd.setTimestamp(new Date().getTime());
tmd.setUserName("LocalDocReader");
page.getTranscripts().add(tmd);
return page;
}
use of eu.transkribus.core.model.beans.TrpPage in project TranskribusCore by Transkribus.
the class PdfExporter method export.
public File export(final TrpDoc doc, final String path, Set<Integer> pageIndices, final boolean useWordLevel, final boolean addTextPages, final boolean imagesOnly, final boolean highlightTags, final boolean doBlackening, boolean createTitle, ExportCache cache) throws DocumentException, MalformedURLException, IOException, JAXBException, URISyntaxException, InterruptedException {
if (doc == null) {
throw new IllegalArgumentException("TrpDoc is null!");
}
if (path == null) {
throw new IllegalArgumentException("path is null!");
}
if (cache == null) {
cache = new ExportCache();
}
// if(startPage == null || startPage < 1) startPage = 1;
// final int nrOfPages = doc.getPages().size();
// if(endPage == null || endPage > nrOfPages+1) endPage = nrOfPages;
//
// if(startPage > endPage){
// throw new IllegalArgumentException("Start page must be smaller than end page!");
// }
File pdfFile = new File(path);
TrpPdfDocument pdf = new TrpPdfDocument(pdfFile, useWordLevel, highlightTags, doBlackening, createTitle);
setChanged();
notifyObservers("Creating PDF document...");
boolean onePagePrinted = false;
// for(int i = startPage-1; i <= endPage-1; i++){
for (int i = 0; i < doc.getPages().size(); ++i) {
if (pageIndices != null && !pageIndices.contains(i))
continue;
logger.info("Processing page " + (i + 1));
TrpPage p = doc.getPages().get(i);
URL imgUrl = p.getUrl();
/*
* md is only needed for getting resolution because in the image it may be missing
* But if it is a local doc we have to try to get from img because md is null
*/
FimgStoreImgMd md = null;
if (doc.isRemoteDoc()) {
FimgStoreGetClient getter = new FimgStoreGetClient(p.getUrl());
md = (FimgStoreImgMd) getter.getFileMd(p.getKey());
}
URL xmlUrl = p.getCurrentTranscript().getUrl();
logger.debug("output with tags " + highlightTags);
// PcGtsType pc = PageXmlUtils.unmarshal(xmlUrl);
// should be the same as above
JAXBPageTranscript pt = null;
if (cache != null) {
pt = cache.getPageTranscriptAtIndex(i);
}
PcGtsType pc;
if (pt != null) {
pc = pt.getPageData();
} else {
pc = PageXmlUtils.unmarshal(xmlUrl);
}
if (!onePagePrinted) {
// add first page and previously add a title page with doc metadata and editorial declarations (if this option is set)
pdf.addPage(imgUrl, doc, pc, addTextPages, imagesOnly, md, doBlackening, cache);
onePagePrinted = true;
} else {
pdf.addPage(imgUrl, null, pc, addTextPages, imagesOnly, md, doBlackening, cache);
}
setChanged();
notifyObservers(Integer.valueOf(i + 1));
if (cancel) {
pdf.close();
File file = new File(path);
if (!file.delete()) {
throw new IOException("Could not delete the incomplete PDF file during export cancel");
}
throw new InterruptedException("Export canceled by the user");
// break;
}
}
if (highlightTags) {
pdf.addTags(doc, pageIndices, useWordLevel, cache);
}
pdf.close();
setChanged();
notifyObservers("PDF written at: " + path);
logger.info("PDF written at: " + path);
return pdfFile;
}
use of eu.transkribus.core.model.beans.TrpPage in project TranskribusCore by Transkribus.
the class TrpRtfBuilder method writeRtfForDoc.
public static void writeRtfForDoc(TrpDoc doc, boolean wordBased, boolean writeTags, boolean doBlackening, File file, Set<Integer> pageIndices, IProgressMonitor monitor, ExportCache cache) throws JAXBException, IOException {
exportTags = writeTags;
tagnames = cache.getSelectedTags();
TrpRtfBuilder.doBlackening = doBlackening;
/*
* get all names of tags
*/
// tagnames = CustomTagFactory.getRegisteredTagNames();
Rtf rtf = Rtf.rtf();
List<TrpPage> pages = doc.getPages();
int totalPages = pageIndices == null ? pages.size() : pageIndices.size();
if (monitor != null) {
monitor.beginTask("Exporting to RTF", totalPages);
}
int c = 0;
for (int i = 0; i < pages.size(); ++i) {
if (pageIndices != null && !pageIndices.contains(i))
continue;
if (monitor != null) {
if (monitor.isCanceled()) {
logger.debug("RTF export cancelled!");
return;
}
monitor.subTask("Processing page " + (c + 1));
}
TrpPage page = pages.get(i);
TrpTranscriptMetadata md = page.getCurrentTranscript();
JAXBPageTranscript tr = new JAXBPageTranscript(md);
tr.build();
TrpPageType trpPage = tr.getPage();
logger.debug("writing rtf for page " + (i + 1) + "/" + doc.getNPages());
// rtf().header(color( 204, 0, 0 ).at( 0 ),
// color( 0, 0xff, 0 ).at( 1 ),
// color( 0, 0, 0xff ).at( 2 ),
// font( "Calibri" ).at( 0 ) );
// RtfHeaderColor color = RtfHeaderColor.color(0xff, 0, 0);
rtf.header(color(204, 0, 0).at(0), color(0, 0xff, 0).at(1)).section(getRtfParagraphsForTranscript(trpPage, wordBased));
++c;
if (monitor != null) {
monitor.worked(c);
}
}
// write tags at end of last page
if (exportTags) {
// RtfText headline = RtfText.text("Person names in this document (amount of found persons: " + persons.size() + ")", "\n");
/*
* for all different tagnames:
* find all custom tags in doc
* create list and
*/
ArrayList<RtfPara> tagParas = new ArrayList<RtfPara>();
// tagnames = all user choosen tags via export dialog
for (String currTagname : tagnames) {
// logger.debug("curr tagname " + currTagname);
// get all custom tags with currTagname and text
HashMap<CustomTag, String> allTagsOfThisTagname = cache.getTags(currTagname);
if (allTagsOfThisTagname.size() > 0) {
tagParas.add(RtfPara.p(RtfText.text(RtfText.underline(currTagname + " tags in this document: " + allTagsOfThisTagname.size()))));
// ArrayList<RtfText> tagTexts = new ArrayList<RtfText>();
Collection<String> valueSet = allTagsOfThisTagname.values();
RtfText[] tagTexts = new RtfText[valueSet.size()];
int l = 0;
for (String currEntry : valueSet) {
tagTexts[l++] = RtfText.text(currEntry.concat("\n"));
// logger.debug("tag value is " + currEntry);
}
tagParas.add(RtfPara.p(tagTexts));
}
}
// int parSize = getParsNumber();
// int k = 0;
//
// if (persons.size() > 0){
// logger.debug("k is " + k);
// List<String> newPersonList = new ArrayList<String>(new HashSet<String>(persons));
// tagParas[k++]=RtfPara.p(RtfText.text("Person names in this document (amount of found persons: " + newPersonList.size() + ")", "\n"));
// logger.debug("k is " + k);
// //rtf.p("Person names in this document (amount of found persons: " + persons.size() + ")", "\n");
// //to make the list contain only unique values
//
// RtfText[] personTexts = new RtfText[newPersonList.size()];
// for (int j=0; j<newPersonList.size(); ++j) {
// personTexts[j] = RtfText.text(newPersonList.get(j), "\n");
// logger.debug("person is " + newPersonList.get(j));
// }
// tagParas[k++] = RtfPara.p(personTexts);
// }
//
// if (places.size() > 0){
// List<String> newPlaceList = new ArrayList<String>(new HashSet<String>(places));
// tagParas[k++]=RtfPara.p(RtfText.text("Places in this document (amount of found places " + newPlaceList.size() + ")", "\n"));
//
// RtfText[] placeTexts = new RtfText[newPlaceList.size()];
// for (int j=0; j<newPlaceList.size(); ++j) {
// //RtfText.color(0, "red");
// placeTexts[j] = RtfText.color(0, newPlaceList.get(j).concat("\n"));
// logger.debug("place is " + newPlaceList.get(j));
// }
// RtfPara par2 = RtfPara.p(placeTexts);
// tagParas[k++] = par2;
// }
//
// if(addresses.size() > 0){
// List<String> newAddressList = new ArrayList<String>(new HashSet<String>(addresses));
// tagParas[k++]=RtfPara.p(RtfText.text("Addresses in this document (amount of found addresses " + newAddressList.size() + ")", "\n"));
//
// RtfText[] addresseTexts = new RtfText[newAddressList.size()];
// for (int j=0; j<newAddressList.size(); ++j) {
// addresseTexts[j] = RtfText.text(newAddressList.get(j), "\n");
// logger.debug("addresse is " + newAddressList.get(j));
// }
// RtfPara par3 = RtfPara.p(addresseTexts);
// tagParas[k++] = par3;
// }
// rtf.section(par3);
rtf.header(color(204, 0, 0).at(0)).section(tagParas);
}
rtf.out(new FileWriter(file));
logger.info("wrote rtf to: " + file.getAbsolutePath());
}
Aggregations