Search in sources :

Example 1 with LanguageItem

use of de.catma.document.source.LanguageItem in project catma by forTEXT.

the class InspectContentStep method initComponents.

private void initComponents() {
    setSizeFull();
    progressBar = new ProgressBar();
    progressBar.setCaption("Inspecting files...");
    progressBar.setVisible(false);
    progressBar.setIndeterminate(false);
    addComponent(progressBar);
    Label infoLabel = new Label("Please check if the language has been detected correctly and if the " + "content preview looks fine. " + "For plain text files you might need to adjust the encoding.<br />" + "Double click on a row to change the settings.");
    infoLabel.setContentMode(ContentMode.HTML);
    addComponent(infoLabel);
    contentPanel = new HorizontalLayout();
    contentPanel.setSizeFull();
    contentPanel.setMargin(false);
    addComponent(contentPanel);
    setExpandRatio(contentPanel, 1f);
    fileGrid = new Grid<UploadFile>(fileDataProvider);
    fileGrid.setSizeFull();
    ComboBox<Charset> charsetEditor = new ComboBox<Charset>(null, Charset.availableCharsets().values());
    Locale[] availableLocales = Locale.getAvailableLocales();
    languageItems = new ArrayList<LanguageItem>();
    for (Locale locale : availableLocales) {
        languageItems.add(new LanguageItem(locale));
    }
    ComboBox<LanguageItem> languageEditor = new ComboBox<LanguageItem>(null, languageItems);
    fileGrid.addColumn(UploadFile::getOriginalFilename).setCaption("File").setWidth(150).setDescriptionGenerator(UploadFile::getOriginalFilename);
    fileGrid.addColumn(UploadFile::getMimetype).setCaption("Type").setWidth(150).setDescriptionGenerator(UploadFile::getMimetype);
    Binding<UploadFile, Charset> encBinding = fileGrid.getEditor().getBinder().bind(charsetEditor, UploadFile::getCharset, (uploadFile, charset) -> {
        uploadFile.setCharset(charset);
        updatePreview(uploadFile);
    });
    fileGrid.getEditor().addOpenListener(event -> {
        MediaType type = MediaType.parse(event.getBean().getMimetype());
        encBinding.setReadOnly(!type.getBaseType().toString().equals(FileType.TEXT.getMimeType()));
    });
    fileGrid.addColumn(UploadFile::getCharset).setCaption("Characterset/Encoding").setExpandRatio(2).setEditorBinding(encBinding);
    fileGrid.addColumn(UploadFile::getLanguage).setCaption("Language").setExpandRatio(2).setEditorComponent(languageEditor, (uploadFile, language) -> {
        uploadFile.setLanguage(language);
        updatePreview(uploadFile);
    });
    fileGrid.getEditor().setEnabled(true).setBuffered(false);
    fileActionGridComponent = new ActionGridComponent<Grid<UploadFile>>(new Label("Language and encoding"), fileGrid);
    fileActionGridComponent.setMargin(false);
    fileActionGridComponent.getActionGridBar().setAddBtnVisible(false);
    VerticalLayout leftColumn = new VerticalLayout();
    leftColumn.setMargin(false);
    leftColumn.setSizeFull();
    leftColumn.addComponent(fileActionGridComponent);
    leftColumn.setExpandRatio(fileActionGridComponent, 1.0f);
    cbUseApostrophe = new CheckBox("always use the apostrophe as a word separator");
    cbUseApostrophe.setDescription("This has influence on the segmentation of the text, i. e. on how the wordlist is created.");
    leftColumn.addComponent(cbUseApostrophe);
    contentPanel.addComponent(leftColumn);
    contentPanel.setExpandRatio(leftColumn, 0.6f);
    this.taPreview = new TextArea("Preview");
    this.taPreview.setReadOnly(true);
    this.taPreview.setSizeFull();
    contentPanel.addComponent(this.taPreview);
    contentPanel.setExpandRatio(this.taPreview, 0.4f);
}
Also used : Locale(java.util.Locale) TextArea(com.vaadin.ui.TextArea) ComboBox(com.vaadin.ui.ComboBox) Grid(com.vaadin.ui.Grid) Label(com.vaadin.ui.Label) Charset(java.nio.charset.Charset) HorizontalLayout(com.vaadin.ui.HorizontalLayout) CheckBox(com.vaadin.ui.CheckBox) MediaType(org.apache.tika.mime.MediaType) VerticalLayout(com.vaadin.ui.VerticalLayout) LanguageItem(de.catma.document.source.LanguageItem) ProgressBar(com.vaadin.ui.ProgressBar)

Example 2 with LanguageItem

use of de.catma.document.source.LanguageItem in project catma by forTEXT.

the class InspectContentStep method enter.

@Override
public void enter(boolean back) {
    if (back) {
        return;
    }
    @SuppressWarnings("unchecked") Collection<UploadFile> fileList = (Collection<UploadFile>) wizardContext.get(DocumentWizard.WizardContextKey.UPLOAD_FILE_LIST);
    contentPanel.setEnabled(false);
    progressBar.setVisible(true);
    progressBar.setIndeterminate(true);
    final ArrayList<UploadFile> files = new ArrayList<UploadFile>(fileList);
    BackgroundServiceProvider backgroundServiceProvider = (BackgroundServiceProvider) UI.getCurrent();
    backgroundServiceProvider.submit("inspecting-files", new DefaultProgressCallable<List<UploadFile>>() {

        @Override
        public List<UploadFile> call() throws Exception {
            Tika tika = new Tika();
            LanguageDetector languageDetector = LanguageDetector.getDefaultLanguageDetector();
            try {
                languageDetector.loadModels();
            } catch (IOException e) {
                ((ErrorHandler) UI.getCurrent()).showAndLogError("Error loading language detection models!", e);
            }
            for (UploadFile uploadFile : files) {
                if (uploadFile.getMimetype().equals(FileType.XML2.getMimeType())) {
                    XML2ContentHandler contentHandler = new XML2ContentHandler();
                    SourceDocumentInfo sourceDocumentInfo = new SourceDocumentInfo();
                    TechInfoSet techInfoSet = new TechInfoSet(uploadFile.getOriginalFilename(), uploadFile.getMimetype(), uploadFile.getTempFilename());
                    sourceDocumentInfo.setTechInfoSet(techInfoSet);
                    contentHandler.setSourceDocumentInfo(sourceDocumentInfo);
                    contentHandler.load();
                    String content = contentHandler.getContent();
                    LanguageResult languageResult = languageDetector.detect(content);
                    if (languageResult.isReasonablyCertain() && languageResult.getLanguage() != null) {
                        uploadFile.setLanguage(new LanguageItem(new Locale(languageResult.getLanguage())));
                    }
                } else {
                    Metadata metadata = new Metadata();
                    try {
                        try (FileInputStream fis = new FileInputStream(new File(uploadFile.getTempFilename()))) {
                            String content = tika.parseToString(fis, metadata);
                            String contentType = metadata.get(Metadata.CONTENT_TYPE);
                            MediaType mediaType = MediaType.parse(contentType);
                            String charset = mediaType.getParameters().get("charset");
                            if (charset != null) {
                                uploadFile.setCharset(Charset.forName(charset));
                            }
                            LanguageResult languageResult = languageDetector.detect(content);
                            if (languageResult.isReasonablyCertain() && languageResult.getLanguage() != null) {
                                uploadFile.setLanguage(new LanguageItem(new Locale(languageResult.getLanguage())));
                            }
                        }
                    } catch (Exception e) {
                        Logger.getLogger(InspectContentStep.class.getName()).log(Level.SEVERE, String.format("Error inspecting %1$s", uploadFile.getOriginalFilename()), e);
                        String errorMsg = e.getMessage();
                        if ((errorMsg == null) || (errorMsg.trim().isEmpty())) {
                            errorMsg = "";
                        }
                        Notification.show("Error", String.format("Error inspecting content of %1$s! " + "Adding this file to your Project might fail!\n The underlying error message was:\n%2$s", uploadFile.getOriginalFilename(), errorMsg), Type.ERROR_MESSAGE);
                    }
                }
            }
            return files;
        }
    }, new ExecutionListener<List<UploadFile>>() {

        @Override
        public void done(List<UploadFile> result) {
            contentPanel.setEnabled(true);
            progressBar.setVisible(false);
            progressBar.setIndeterminate(false);
            fileList.clear();
            fileList.addAll(result);
            fileDataProvider.refreshAll();
            if (!fileList.isEmpty()) {
                fileList.stream().findFirst().ifPresent(uploadFile -> {
                    fileGrid.select(uploadFile);
                    updatePreview(uploadFile);
                });
            }
            if (stepChangeListener != null) {
                stepChangeListener.stepChanged(InspectContentStep.this);
            }
        }

        @Override
        public void error(Throwable t) {
            Logger.getLogger(InspectContentStep.class.getName()).log(Level.SEVERE, "Error inspecting files", t);
            String errorMsg = t.getMessage();
            if ((errorMsg == null) || (errorMsg.trim().isEmpty())) {
                errorMsg = "";
            }
            Notification.show("Error", String.format("Error inspecting the contents! " + "\n The underlying error message was:\n%1$s", errorMsg), Type.ERROR_MESSAGE);
        }
    });
}
Also used : Locale(java.util.Locale) BackgroundServiceProvider(de.catma.backgroundservice.BackgroundServiceProvider) DefaultProgressCallable(de.catma.backgroundservice.DefaultProgressCallable) StepChangeListener(de.catma.ui.dialog.wizard.StepChangeListener) VerticalLayout(com.vaadin.ui.VerticalLayout) LanguageItem(de.catma.document.source.LanguageItem) ComboBox(com.vaadin.ui.ComboBox) UI(com.vaadin.ui.UI) LanguageDetector(org.apache.tika.language.detect.LanguageDetector) WizardContext(de.catma.ui.dialog.wizard.WizardContext) MediaType(org.apache.tika.mime.MediaType) ActionGridComponent(de.catma.ui.component.actiongrid.ActionGridComponent) ProgressStep(de.catma.ui.dialog.wizard.ProgressStep) TechInfoSet(de.catma.document.source.TechInfoSet) ArrayList(java.util.ArrayList) Level(java.util.logging.Level) Binding(com.vaadin.data.Binder.Binding) Metadata(org.apache.tika.metadata.Metadata) Charset(java.nio.charset.Charset) CheckBox(com.vaadin.ui.CheckBox) Notification(com.vaadin.ui.Notification) ErrorHandler(de.catma.ui.module.main.ErrorHandler) Locale(java.util.Locale) Label(com.vaadin.ui.Label) ProgressBar(com.vaadin.ui.ProgressBar) TextArea(com.vaadin.ui.TextArea) ListDataProvider(com.vaadin.data.provider.ListDataProvider) ContentMode(com.vaadin.shared.ui.ContentMode) XML2ContentHandler(de.catma.document.source.contenthandler.XML2ContentHandler) ExecutionListener(de.catma.backgroundservice.ExecutionListener) Collection(java.util.Collection) IndexInfoSet(de.catma.document.source.IndexInfoSet) ProgressStepFactory(de.catma.ui.dialog.wizard.ProgressStepFactory) IOException(java.io.IOException) FileInputStream(java.io.FileInputStream) Logger(java.util.logging.Logger) File(java.io.File) SourceDocumentInfo(de.catma.document.source.SourceDocumentInfo) List(java.util.List) Type(com.vaadin.ui.Notification.Type) HorizontalLayout(com.vaadin.ui.HorizontalLayout) FileType(de.catma.document.source.FileType) Tika(org.apache.tika.Tika) WizardStep(de.catma.ui.dialog.wizard.WizardStep) Collections(java.util.Collections) LanguageResult(org.apache.tika.language.detect.LanguageResult) Grid(com.vaadin.ui.Grid) SingleOptionInputDialog(de.catma.ui.dialog.SingleOptionInputDialog) LanguageResult(org.apache.tika.language.detect.LanguageResult) SourceDocumentInfo(de.catma.document.source.SourceDocumentInfo) ArrayList(java.util.ArrayList) BackgroundServiceProvider(de.catma.backgroundservice.BackgroundServiceProvider) Metadata(org.apache.tika.metadata.Metadata) Tika(org.apache.tika.Tika) MediaType(org.apache.tika.mime.MediaType) ArrayList(java.util.ArrayList) List(java.util.List) TechInfoSet(de.catma.document.source.TechInfoSet) ErrorHandler(de.catma.ui.module.main.ErrorHandler) XML2ContentHandler(de.catma.document.source.contenthandler.XML2ContentHandler) IOException(java.io.IOException) IOException(java.io.IOException) FileInputStream(java.io.FileInputStream) LanguageDetector(org.apache.tika.language.detect.LanguageDetector) Collection(java.util.Collection) LanguageItem(de.catma.document.source.LanguageItem) File(java.io.File)

Aggregations

CheckBox (com.vaadin.ui.CheckBox)2 ComboBox (com.vaadin.ui.ComboBox)2 Grid (com.vaadin.ui.Grid)2 HorizontalLayout (com.vaadin.ui.HorizontalLayout)2 Label (com.vaadin.ui.Label)2 ProgressBar (com.vaadin.ui.ProgressBar)2 TextArea (com.vaadin.ui.TextArea)2 VerticalLayout (com.vaadin.ui.VerticalLayout)2 LanguageItem (de.catma.document.source.LanguageItem)2 Charset (java.nio.charset.Charset)2 Locale (java.util.Locale)2 Binding (com.vaadin.data.Binder.Binding)1 ListDataProvider (com.vaadin.data.provider.ListDataProvider)1 ContentMode (com.vaadin.shared.ui.ContentMode)1 Notification (com.vaadin.ui.Notification)1 Type (com.vaadin.ui.Notification.Type)1 UI (com.vaadin.ui.UI)1 BackgroundServiceProvider (de.catma.backgroundservice.BackgroundServiceProvider)1 DefaultProgressCallable (de.catma.backgroundservice.DefaultProgressCallable)1 ExecutionListener (de.catma.backgroundservice.ExecutionListener)1