use of eu.transkribus.core.model.beans.TrpTranscriptStatistics in project TranskribusCore by Transkribus.
the class PageXmlUtils method extractStats.
public static TrpTranscriptStatistics extractStats(PcGtsType page) {
TrpTranscriptStatistics s = new TrpTranscriptStatistics();
int nrOfRegions, nrOfTranscribedRegions, nrOfWordsInRegions, nrOfLines, nrOfTranscribedLines, nrOfWordsInLines, nrOfWords, nrOfTranscribedWords;
nrOfRegions = nrOfTranscribedRegions = nrOfWordsInRegions = nrOfLines = nrOfTranscribedLines = nrOfWordsInLines = nrOfWords = nrOfTranscribedWords = 0;
List<TextRegionType> regs = PageXmlUtils.getTextRegions(page);
nrOfRegions = regs.size();
for (TextRegionType r : regs) {
if (r.getTextEquiv() != null && r.getTextEquiv().getUnicode() != null && !r.getTextEquiv().getUnicode().trim().isEmpty()) {
nrOfTranscribedRegions += 1;
// TODO use tokenizer here
nrOfWordsInRegions += r.getTextEquiv().getUnicode().split(" ").length;
}
List<TextLineType> lines = r.getTextLine();
nrOfLines += lines.size();
for (TextLineType l : lines) {
if (l.getTextEquiv() != null && l.getTextEquiv().getUnicode() != null && !l.getTextEquiv().getUnicode().trim().isEmpty()) {
nrOfTranscribedLines += 1;
// TODO use tokenizer here
nrOfWordsInLines += l.getTextEquiv().getUnicode().split(" ").length;
}
List<WordType> words = l.getWord();
nrOfWords += words.size();
for (WordType w : words) {
if (w.getTextEquiv() != null && w.getTextEquiv().getUnicode() != null && !w.getTextEquiv().getUnicode().trim().isEmpty()) {
nrOfTranscribedWords += 1;
}
}
}
}
s.setNrOfLines(nrOfLines);
s.setNrOfRegions(nrOfRegions);
s.setNrOfTranscribedLines(nrOfTranscribedLines);
s.setNrOfTranscribedWords(nrOfTranscribedWords);
s.setNrOfTranscribedRegions(nrOfTranscribedRegions);
s.setNrOfWords(nrOfWords);
s.setNrOfWordsInLines(nrOfWordsInLines);
s.setNrOfWordsInRegions(nrOfWordsInRegions);
return s;
}
use of eu.transkribus.core.model.beans.TrpTranscriptStatistics in project TranskribusCore by Transkribus.
the class DocStatisticsBuilder method compute.
public TrpTranscriptStatistics compute(TrpDoc doc) throws JAXBException {
if (doc == null) {
throw new IllegalArgumentException("TrpDoc is null!");
}
TrpTranscriptStatistics stats = new TrpTranscriptStatistics();
List<TrpPage> pages = doc.getPages();
for (TrpPage p : pages) {
final String msg = "Computing stats: page " + p.getPageNr() + "/" + pages.size();
logger.debug(msg);
notifyObservers(msg);
setChanged();
URL xmlUrl = p.getCurrentTranscript().getUrl();
PcGtsType pc = PageXmlUtils.unmarshal(xmlUrl);
TrpTranscriptStatistics pageStats = PageXmlUtils.extractStats(pc);
stats.add(pageStats);
}
return stats;
}
Aggregations