use of de.catma.document.source.LanguageItem in project catma by forTEXT.
the class InspectContentStep method initComponents.
private void initComponents() {
setSizeFull();
progressBar = new ProgressBar();
progressBar.setCaption("Inspecting files...");
progressBar.setVisible(false);
progressBar.setIndeterminate(false);
addComponent(progressBar);
Label infoLabel = new Label("Please check if the language has been detected correctly and if the " + "content preview looks fine. " + "For plain text files you might need to adjust the encoding.<br />" + "Double click on a row to change the settings.");
infoLabel.setContentMode(ContentMode.HTML);
addComponent(infoLabel);
contentPanel = new HorizontalLayout();
contentPanel.setSizeFull();
contentPanel.setMargin(false);
addComponent(contentPanel);
setExpandRatio(contentPanel, 1f);
fileGrid = new Grid<UploadFile>(fileDataProvider);
fileGrid.setSizeFull();
ComboBox<Charset> charsetEditor = new ComboBox<Charset>(null, Charset.availableCharsets().values());
Locale[] availableLocales = Locale.getAvailableLocales();
languageItems = new ArrayList<LanguageItem>();
for (Locale locale : availableLocales) {
languageItems.add(new LanguageItem(locale));
}
ComboBox<LanguageItem> languageEditor = new ComboBox<LanguageItem>(null, languageItems);
fileGrid.addColumn(UploadFile::getOriginalFilename).setCaption("File").setWidth(150).setDescriptionGenerator(UploadFile::getOriginalFilename);
fileGrid.addColumn(UploadFile::getMimetype).setCaption("Type").setWidth(150).setDescriptionGenerator(UploadFile::getMimetype);
Binding<UploadFile, Charset> encBinding = fileGrid.getEditor().getBinder().bind(charsetEditor, UploadFile::getCharset, (uploadFile, charset) -> {
uploadFile.setCharset(charset);
updatePreview(uploadFile);
});
fileGrid.getEditor().addOpenListener(event -> {
MediaType type = MediaType.parse(event.getBean().getMimetype());
encBinding.setReadOnly(!type.getBaseType().toString().equals(FileType.TEXT.getMimeType()));
});
fileGrid.addColumn(UploadFile::getCharset).setCaption("Characterset/Encoding").setExpandRatio(2).setEditorBinding(encBinding);
fileGrid.addColumn(UploadFile::getLanguage).setCaption("Language").setExpandRatio(2).setEditorComponent(languageEditor, (uploadFile, language) -> {
uploadFile.setLanguage(language);
updatePreview(uploadFile);
});
fileGrid.getEditor().setEnabled(true).setBuffered(false);
fileActionGridComponent = new ActionGridComponent<Grid<UploadFile>>(new Label("Language and encoding"), fileGrid);
fileActionGridComponent.setMargin(false);
fileActionGridComponent.getActionGridBar().setAddBtnVisible(false);
VerticalLayout leftColumn = new VerticalLayout();
leftColumn.setMargin(false);
leftColumn.setSizeFull();
leftColumn.addComponent(fileActionGridComponent);
leftColumn.setExpandRatio(fileActionGridComponent, 1.0f);
cbUseApostrophe = new CheckBox("always use the apostrophe as a word separator");
cbUseApostrophe.setDescription("This has influence on the segmentation of the text, i. e. on how the wordlist is created.");
leftColumn.addComponent(cbUseApostrophe);
contentPanel.addComponent(leftColumn);
contentPanel.setExpandRatio(leftColumn, 0.6f);
this.taPreview = new TextArea("Preview");
this.taPreview.setReadOnly(true);
this.taPreview.setSizeFull();
contentPanel.addComponent(this.taPreview);
contentPanel.setExpandRatio(this.taPreview, 0.4f);
}
use of de.catma.document.source.LanguageItem in project catma by forTEXT.
the class InspectContentStep method enter.
@Override
public void enter(boolean back) {
if (back) {
return;
}
@SuppressWarnings("unchecked") Collection<UploadFile> fileList = (Collection<UploadFile>) wizardContext.get(DocumentWizard.WizardContextKey.UPLOAD_FILE_LIST);
contentPanel.setEnabled(false);
progressBar.setVisible(true);
progressBar.setIndeterminate(true);
final ArrayList<UploadFile> files = new ArrayList<UploadFile>(fileList);
BackgroundServiceProvider backgroundServiceProvider = (BackgroundServiceProvider) UI.getCurrent();
backgroundServiceProvider.submit("inspecting-files", new DefaultProgressCallable<List<UploadFile>>() {
@Override
public List<UploadFile> call() throws Exception {
Tika tika = new Tika();
LanguageDetector languageDetector = LanguageDetector.getDefaultLanguageDetector();
try {
languageDetector.loadModels();
} catch (IOException e) {
((ErrorHandler) UI.getCurrent()).showAndLogError("Error loading language detection models!", e);
}
for (UploadFile uploadFile : files) {
if (uploadFile.getMimetype().equals(FileType.XML2.getMimeType())) {
XML2ContentHandler contentHandler = new XML2ContentHandler();
SourceDocumentInfo sourceDocumentInfo = new SourceDocumentInfo();
TechInfoSet techInfoSet = new TechInfoSet(uploadFile.getOriginalFilename(), uploadFile.getMimetype(), uploadFile.getTempFilename());
sourceDocumentInfo.setTechInfoSet(techInfoSet);
contentHandler.setSourceDocumentInfo(sourceDocumentInfo);
contentHandler.load();
String content = contentHandler.getContent();
LanguageResult languageResult = languageDetector.detect(content);
if (languageResult.isReasonablyCertain() && languageResult.getLanguage() != null) {
uploadFile.setLanguage(new LanguageItem(new Locale(languageResult.getLanguage())));
}
} else {
Metadata metadata = new Metadata();
try {
try (FileInputStream fis = new FileInputStream(new File(uploadFile.getTempFilename()))) {
String content = tika.parseToString(fis, metadata);
String contentType = metadata.get(Metadata.CONTENT_TYPE);
MediaType mediaType = MediaType.parse(contentType);
String charset = mediaType.getParameters().get("charset");
if (charset != null) {
uploadFile.setCharset(Charset.forName(charset));
}
LanguageResult languageResult = languageDetector.detect(content);
if (languageResult.isReasonablyCertain() && languageResult.getLanguage() != null) {
uploadFile.setLanguage(new LanguageItem(new Locale(languageResult.getLanguage())));
}
}
} catch (Exception e) {
Logger.getLogger(InspectContentStep.class.getName()).log(Level.SEVERE, String.format("Error inspecting %1$s", uploadFile.getOriginalFilename()), e);
String errorMsg = e.getMessage();
if ((errorMsg == null) || (errorMsg.trim().isEmpty())) {
errorMsg = "";
}
Notification.show("Error", String.format("Error inspecting content of %1$s! " + "Adding this file to your Project might fail!\n The underlying error message was:\n%2$s", uploadFile.getOriginalFilename(), errorMsg), Type.ERROR_MESSAGE);
}
}
}
return files;
}
}, new ExecutionListener<List<UploadFile>>() {
@Override
public void done(List<UploadFile> result) {
contentPanel.setEnabled(true);
progressBar.setVisible(false);
progressBar.setIndeterminate(false);
fileList.clear();
fileList.addAll(result);
fileDataProvider.refreshAll();
if (!fileList.isEmpty()) {
fileList.stream().findFirst().ifPresent(uploadFile -> {
fileGrid.select(uploadFile);
updatePreview(uploadFile);
});
}
if (stepChangeListener != null) {
stepChangeListener.stepChanged(InspectContentStep.this);
}
}
@Override
public void error(Throwable t) {
Logger.getLogger(InspectContentStep.class.getName()).log(Level.SEVERE, "Error inspecting files", t);
String errorMsg = t.getMessage();
if ((errorMsg == null) || (errorMsg.trim().isEmpty())) {
errorMsg = "";
}
Notification.show("Error", String.format("Error inspecting the contents! " + "\n The underlying error message was:\n%1$s", errorMsg), Type.ERROR_MESSAGE);
}
});
}
Aggregations