use of eu.transkribus.core.model.beans.pagecontent_trp.TrpTextRegionType in project TranskribusCore by Transkribus.
the class PageXmlUtils method createPcGtsTypeFromText.
public static PcGtsType createPcGtsTypeFromText(final String imgFileName, Dimension dim, String text, TranscriptionLevel level, boolean skipEmptyLines) throws IOException {
// create empty page
PcGtsType pcGtsType = createEmptyPcGtsType(imgFileName, dim);
TrpPageType page = (TrpPageType) pcGtsType.getPage();
// create and add text region with size of image
Rectangle r = new Rectangle(0, 0, page.getImageWidth(), page.getImageHeight());
String defaultCoords = PointStrUtils.pointsToString(r);
TrpTextRegionType region = new TrpTextRegionType((TrpPageType) page);
region.setId("region_1");
region.setCoordinates(defaultCoords, null);
page.getTextRegionOrImageRegionOrLineDrawingRegion().add(region);
if (level == null) {
level = TranscriptionLevel.LINE_BASED;
}
if (level != TranscriptionLevel.REGION_BASED && level != TranscriptionLevel.LINE_BASED && level != TranscriptionLevel.WORD_BASED) {
throw new IOException("Invalide TranscriptionLevel: " + level);
}
if (level == TranscriptionLevel.REGION_BASED) {
region.setUnicodeText(text, null);
} else {
String splitRegex = skipEmptyLines ? "[\\r\\n]+" : "\\r?\\n";
String[] lines = text.split(splitRegex);
logger.debug("nr of lines = " + lines.length);
int lc = 1;
for (String lineText : lines) {
TrpTextLineType line = new TrpTextLineType(region);
line.setId("line_" + (lc++));
line.setCoordinates(defaultCoords, null);
region.getTextLine().add(line);
if (level == TranscriptionLevel.LINE_BASED) {
line.setUnicodeText(lineText, null);
} else if (level == TranscriptionLevel.WORD_BASED) {
int wc = 1;
for (String wordText : lineText.split(" ")) {
// TODO: better word splitting??
TrpWordType word = new TrpWordType(line);
word.setId("word_" + (wc++));
word.setCoordinates(defaultCoords, null);
word.setUnicodeText(wordText, null);
line.getWord().add(word);
}
}
}
}
return pcGtsType;
}
use of eu.transkribus.core.model.beans.pagecontent_trp.TrpTextRegionType in project TranskribusCore by Transkribus.
the class TextStyleTypeUtils method addTextStyleTag.
/**
* Add this text style tag to the given shape. Also checks if the resulting text style tag is covering the whole area and then sets the global text style also if so.
*/
public static void addTextStyleTag(ITrpShapeType shape, TextStyleTag s, String addOnlyThisProperty, /*boolean recursive,*/
Object who) {
if (!(shape instanceof TrpTextRegionType || shape instanceof TrpTextLineType || shape instanceof TrpWordType))
return;
// add text style tag to custom list:
shape.getCustomTagList().addOrMergeTag(s, addOnlyThisProperty);
logger.debug("customtaglist=" + shape.getCustomTagList());
// apply text style tag to global text style if the text style tag is a single index tag over the whole range of the shape:
boolean isS = shape.getCustomTagList().isSingleIndexedTagOverShapeRange(TextStyleTag.TAG_NAME);
logger.debug("isSingleIndexedTagOverShapeRange: " + isS);
// deactivate observer to avoid excessive events...
boolean isActive = shape.getObservable().isActive();
shape.getObservable().setActive(false);
// if (USE_GLOBAL_TEXT_STYLE) {
if (isS) {
// logger.debug("HERE");
shape.setTextStyle(s.getTextStyle(), false, shape);
} else {
shape.setTextStyle(null, false, shape);
}
// }
// else {
// // shape.setTextStyle(null);
// shape.setTextStyle(null, false, shape); // erase global text style
// }
logger.debug("CUSTOM AFTER: " + shape.getCustom());
shape.getObservable().setActive(isActive);
// apply recursively:
// if (recursive) {
// for (ITrpShapeType c : shape.getChildren(recursive)) {
// c.getObservable().setActive(false);
// c.addTextStyleTag(s, addOnlyThisProperty, /*recursive,*/ who);
// c.getObservable().setActive(true);
// }
// }
// send text style changed event:
shape.getObservable().setChangedAndNotifyObservers(new TrpTextStyleChangedEvent(who));
}
use of eu.transkribus.core.model.beans.pagecontent_trp.TrpTextRegionType in project TranskribusCore by Transkribus.
the class CustomTagListTest method testSimpleAddOrMergeTagWithTextStyles.
// @Ignore
@Test
public void testSimpleAddOrMergeTagWithTextStyles() {
TrpTextLineType line = new TrpTextLineType(new TrpTextRegionType(new TrpPageType()));
line.setUnicodeText("Hello world!", null);
CustomTagList tl = new CustomTagList(line);
TextStyleTag tst = new TextStyleTag(0, 10);
tst.setFontFamily("testFont");
tl.addOrMergeTag(tst, null);
TextStyleTag ts1 = new TextStyleTag(2, 5);
ts1.setBold(true);
tl.addOrMergeTag(ts1, null);
logger.trace(tl.toString());
Assert.assertEquals("Nr of text styles must be 3!", 3, tl.getTags().size());
TextStyleTag ts2 = new TextStyleTag(3, 4);
ts2.setItalic(true);
tl.addOrMergeTag(ts2, null);
Assert.assertEquals("Nr of text styles must be 4!", 4, tl.getTags().size());
logger.trace(tl.toString());
// Assert.assertEquals("Nr of text styles must be 5!", 5, tl.getTags().size());
Assert.assertTrue("offset = 0", tl.getTags().get(0).getOffset() == 0);
CustomTag last = tl.getTags().get(tl.getTags().size() - 1);
Assert.assertTrue("offset+length = 10", (last.getOffset() + last.getLength()) == 10);
}
use of eu.transkribus.core.model.beans.pagecontent_trp.TrpTextRegionType in project TranskribusCore by Transkribus.
the class CustomTagUtil method setStructure.
public static void setStructure(ITrpShapeType shape, String structureType, boolean recursive, Object who) {
if (shape == null)
return;
logger.trace("setting structure: " + structureType + " id: " + shape.getId() + " type: " + shape.getClass().getSimpleName() + " recursive: " + recursive);
if (!isTextregionOrLineOrWord(shape))
return;
if (shape instanceof TrpTextRegionType) {
// if this is a text region, also set PAGE structure field if possible
TextTypeSimpleType s = StructureTag.parseTextType(structureType);
((TrpTextRegionType) shape).setType(s);
}
// set custom tag:
if (structureType == null || structureType.equals(""))
shape.getCustomTagList().removeTags(StructureTag.TAG_NAME);
else {
shape.getCustomTagList().addOrMergeTag(new StructureTag(structureType), null);
}
if (recursive) {
for (ITrpShapeType c : shape.getChildren(recursive)) {
c.setStructure(structureType, recursive, who);
}
}
shape.getObservable().setChangedAndNotifyObservers(new TrpStructureChangedEvent(who));
}
use of eu.transkribus.core.model.beans.pagecontent_trp.TrpTextRegionType in project TranskribusCore by Transkribus.
the class Pdf2TrpDoc method main.
public static void main(String[] args) {
if (args.length != 1) {
return;
}
File in = new File(args[0]);
final String name = in.getName();
File outDir = new File("/tmp/");
outDir.mkdirs();
try {
// PageImageWriter imgWriter = new PageImageWriter();
// String imgDirPath = imgWriter.extractImages(in.getAbsolutePath(), outDir.getAbsolutePath());
String imgDirPath = "/tmp/KurzgefaĆte_Geschichte_Statistik_und_Topographie_von_Tirol";
File pageDir = new File(imgDirPath + File.separator + "page");
pageDir.mkdirs();
TreeMap<String, File> imgs = LocalDocReader.findImgFiles(new File(imgDirPath));
ArrayList<PDFPage> pages = PDFTextExtractor.processPDF(in.getAbsolutePath());
if (imgs.size() != pages.size()) {
logger.error("Nr. of image files does not match nr. of text pages!");
return;
}
int i = 0;
for (Entry<String, File> img : imgs.entrySet()) {
PDFPage pdfPage = pages.get(i++);
Dimension dim = ImgUtils.readImageDimensions(img.getValue());
PcGtsType pc = PageXmlUtils.createEmptyPcGtsType(img.getValue(), dim);
final File xmlOut = new File(pageDir.getAbsolutePath() + File.separator + img.getKey() + ".xml");
Rectangle printspace = pdfPage.getContentRect();
if (printspace != null) {
TrpPrintSpaceType psType = new TrpPrintSpaceType();
psType.setCoords(rect2Coords(printspace));
TrpPageType pageType = (TrpPageType) pc.getPage();
// ((ITrpShapeType) pageType).getObservable().setActive(false);
pageType.setPrintSpace(psType);
for (PDFRegion r : pdfPage.regions) {
TrpTextRegionType rType = new TrpTextRegionType(pageType);
rType.setCoords(rect2Coords(r.getRect()));
rType.setUnicodeText(r.getText(), null);
for (PDFLine l : r.lines) {
TrpTextLineType lType = new TrpTextLineType(rType);
lType.setCoords(rect2Coords(l.getRect()));
lType.setUnicodeText(l.getText(), null);
for (PDFString s : l.strings) {
TrpWordType wType = new TrpWordType(lType);
wType.setCoords(rect2Coords(s.getRect()));
wType.setUnicodeText(s.value, null);
lType.getWord().add(wType);
}
rType.getTextLine().add(lType);
}
pageType.getRegions().add(rType);
}
}
PageXmlUtils.marshalToFile(pc, xmlOut);
}
} catch (Exception e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
}
Aggregations