use of com.joliciel.jochre.doc.ImageDocumentExtractor in project jochre by urieli.
the class TextController method onClick$btnAnalyse.
@Listen("onClick = #btnAnalyse")
public void onClick$btnAnalyse(Event event) {
try {
LOG.debug("onClick$btnAnalyse");
if (currentFile != null) {
progressBox.setVisible(true);
lblAwaitingFile.setVisible(false);
int startPage = txtStartPage.getValue().length() == 0 ? -1 : Integer.parseInt(txtStartPage.getValue());
int endPage = txtEndPage.getValue().length() == 0 ? -1 : Integer.parseInt(txtEndPage.getValue());
if (this.currentDoc != null) {
this.currentDoc.setFileName(currentFile.getName());
this.currentDoc.save();
this.documentGenerator = new JochreDocumentGenerator(this.currentDoc, jochreSession);
this.documentGenerator.requestSave(currentUser);
} else {
this.documentGenerator = new JochreDocumentGenerator(currentFile.getName(), "", jochreSession);
}
String letterModelPath = jochreSession.getLetterModelPath();
if (letterModelPath != null) {
MostLikelyWordChooser wordChooser = new MostLikelyWordChooser(jochreSession);
documentGenerator.requestAnalysis(wordChooser);
}
this.documentHtmlGenerator = new DocumentHtmlGenerator();
documentGenerator.addDocumentObserver(this.documentHtmlGenerator);
String lowerCaseFileName = currentFile.getName().toLowerCase();
Thread thread = null;
final Set<Integer> pages;
if (startPage < 0 && endPage < 0)
pages = Collections.emptySet();
else {
if (startPage < 0)
startPage = 0;
if (endPage < 0)
endPage = 2000;
pages = IntStream.rangeClosed(startPage, endPage).boxed().collect(Collectors.toSet());
}
if (lowerCaseFileName.endsWith(".pdf")) {
PdfDocumentProcessor pdfDocumentProcessor = new PdfDocumentProcessor(currentFile, pages, documentGenerator);
this.progressMonitor = pdfDocumentProcessor.monitorTask();
this.currentHtmlIndex = 0;
thread = new Thread(pdfDocumentProcessor);
thread.setName(currentFile.getName() + " Processor");
progressTimer.setRunning(true);
} else if (lowerCaseFileName.endsWith(".png") || lowerCaseFileName.endsWith(".jpg") || lowerCaseFileName.endsWith(".jpeg") || lowerCaseFileName.endsWith(".gif")) {
ImageDocumentExtractor extractor = new ImageDocumentExtractor(currentFile, documentGenerator);
if (startPage >= 0)
extractor.setPageNumber(startPage);
this.progressMonitor = extractor.monitorTask();
this.currentHtmlIndex = 0;
thread = new Thread(extractor);
thread.setName(currentFile.getName() + " Processor");
} else {
throw new RuntimeException("Unrecognised file extension");
}
thread.start();
currentThread = thread;
progressTimer.setRunning(true);
btnAnalyse.setDisabled(true);
btnInterrupt.setVisible(true);
btnInterrupt.setDisabled(false);
}
} catch (Exception e) {
LOG.error("Failure in onClick$btnAnalyse", e);
throw new RuntimeException(e);
}
}
use of com.joliciel.jochre.doc.ImageDocumentExtractor in project jochre by urieli.
the class Jochre method doCommandAnalyse.
/**
* Full analysis, including merge, split and letter guessing.
*
* @param pages
* the pages to process, empty means all
*/
public void doCommandAnalyse(File sourceFile, MostLikelyWordChooser wordChooser, Set<Integer> pages, List<DocumentObserver> observers, List<PdfImageObserver> imageObservers) throws IOException {
ClassificationModel letterModel = jochreSession.getLetterModel();
List<String> letterFeatureDescriptors = letterModel.getFeatureDescriptors();
LetterFeatureParser letterFeatureParser = new LetterFeatureParser();
Set<LetterFeature<?>> letterFeatures = letterFeatureParser.getLetterFeatureSet(letterFeatureDescriptors);
LetterGuesser letterGuesser = new LetterGuesser(letterFeatures, letterModel.getDecisionMaker());
BoundaryDetector boundaryDetector = null;
LetterGuessObserver letterGuessObserver = null;
if (jochreSession.getSplitModel() != null && jochreSession.getMergeModel() != null) {
boundaryDetector = new DeterministicBoundaryDetector(jochreSession.getSplitModel(), jochreSession.getMergeModel(), jochreSession);
OriginalShapeLetterAssigner shapeLetterAssigner = new OriginalShapeLetterAssigner();
shapeLetterAssigner.setEvaluate(false);
shapeLetterAssigner.setSingleLetterMethod(false);
letterGuessObserver = shapeLetterAssigner;
} else {
boundaryDetector = new OriginalBoundaryDetector();
LetterAssigner letterAssigner = new LetterAssigner();
letterGuessObserver = letterAssigner;
}
ImageAnalyser analyser = new BeamSearchImageAnalyser(boundaryDetector, letterGuesser, wordChooser, jochreSession);
analyser.addObserver(letterGuessObserver);
JochreDocumentGenerator documentGenerator = new JochreDocumentGenerator(sourceFile.getName(), "", jochreSession);
documentGenerator.addDocumentObserver(analyser);
for (DocumentObserver observer : observers) documentGenerator.addDocumentObserver(observer);
if (!sourceFile.exists())
throw new JochreException("The file " + sourceFile.getPath() + " does not exist");
if (sourceFile.getName().toLowerCase().endsWith(".pdf")) {
PdfDocumentProcessor pdfDocumentProcessor = new PdfDocumentProcessor(sourceFile, pages, documentGenerator);
for (PdfImageObserver imageObserver : imageObservers) {
pdfDocumentProcessor.addImageObserver(imageObserver);
}
pdfDocumentProcessor.process();
} else if (sourceFile.getName().toLowerCase().endsWith(".png") || sourceFile.getName().toLowerCase().endsWith(".jpg") || sourceFile.getName().toLowerCase().endsWith(".jpeg") || sourceFile.getName().toLowerCase().endsWith(".gif")) {
ImageDocumentExtractor extractor = new ImageDocumentExtractor(sourceFile, documentGenerator);
extractor.extractDocument();
} else if (sourceFile.isDirectory()) {
ImageDocumentExtractor extractor = new ImageDocumentExtractor(sourceFile, documentGenerator);
extractor.extractDocument();
} else {
throw new RuntimeException("Unrecognised file extension");
}
}
use of com.joliciel.jochre.doc.ImageDocumentExtractor in project jochre by urieli.
the class Jochre method doCommandSegment.
/**
* Segment a file, without analysing it.
*
* @param filename
* the path of the file to load
* @param userFriendlyName
* a name to store against this file in the database
* @param save
* should we save this file to the database?
* @param pages
* the pages to process, empty means all
*/
public void doCommandSegment(String filename, String userFriendlyName, File outputDir, boolean save, Set<Integer> pages) {
if (filename.length() == 0)
throw new RuntimeException("Missing argument: file");
if (userId < 0 && save)
throw new RuntimeException("Missing argument (for save=true): userId");
User user = null;
if (userId >= 0) {
SecurityDao securityDao = SecurityDao.getInstance(jochreSession);
user = securityDao.loadUser(userId);
}
File file = new File(filename);
JochreDocumentGenerator jochreDocumentGenerator = new JochreDocumentGenerator(file.getName(), userFriendlyName, jochreSession);
if (save)
jochreDocumentGenerator.requestSave(user);
if (jochreDocumentGenerator.isDrawSegmentedImage()) {
if (outputDir != null) {
jochreDocumentGenerator.requestSegmentation(outputDir);
}
}
if (filename.toLowerCase().endsWith(".pdf")) {
PdfDocumentProcessor pdfDocumentProcessor = new PdfDocumentProcessor(file, pages, jochreDocumentGenerator);
pdfDocumentProcessor.process();
} else if (filename.toLowerCase().endsWith(".png") || filename.toLowerCase().endsWith(".jpg") || filename.toLowerCase().endsWith(".jpeg") || filename.toLowerCase().endsWith(".gif")) {
ImageDocumentExtractor extractor = new ImageDocumentExtractor(file, jochreDocumentGenerator);
extractor.extractDocument();
} else {
throw new RuntimeException("Unrecognised file extension");
}
}
Aggregations