Search in sources :

Example 36 with Language

use of org.languagetool.Language in project languagetool by languagetool-org.

the class IndexerSearcherTest method testAllRules.

@Ignore("ignored as long as it doesn't work 100%")
public void testAllRules() throws Exception {
    long startTime = System.currentTimeMillis();
    // comment in to test with external index:
    //directory = new SimpleFSDirectory(new File("/media/external-disk/corpus/languagetool/fast-rule-evaluation-de/"));
    //errorSearcher = new Searcher(directory);
    // TODO: make this work for all languages
    Language language = new English();
    //Language language = new French();
    //Language language = new Spanish();
    //Language language = new Polish();
    //Language language = new German();
    JLanguageTool lt = new JLanguageTool(language);
    System.out.println("Creating index for " + language + "...");
    int ruleCount = createIndex(lt);
    System.out.println("Index created with " + ruleCount + " rules");
    int ruleCounter = 0;
    int ruleProblems = 0;
    int exceptionCount = 0;
    List<Rule> rules = lt.getAllActiveRules();
    for (Rule rule : rules) {
        if (rule instanceof PatternRule && !rule.isDefaultOff()) {
            PatternRule patternRule = (PatternRule) rule;
            try {
                ruleCounter++;
                SearcherResult searcherResult = errorSearcher.findRuleMatchesOnIndex(patternRule, language);
                List<MatchingSentence> matchingSentences = searcherResult.getMatchingSentences();
                boolean foundExpectedMatch = false;
                for (MatchingSentence matchingSentence : matchingSentences) {
                    List<RuleMatch> ruleMatches = matchingSentence.getRuleMatches();
                    List<String> ruleMatchIds = getRuleMatchIds(ruleMatches);
                    if (ruleMatchIds.contains(patternRule.getFullId())) {
                        // TODO: there can be more than one expected match, can't it?
                        foundExpectedMatch = true;
                        break;
                    }
                }
                if (!foundExpectedMatch) {
                    System.out.println("Error: No match found for " + patternRule);
                    System.out.println("Query      : " + searcherResult.getRelaxedQuery().toString(FIELD_NAME_LOWERCASE));
                    System.out.println("Default field: " + FIELD_NAME_LOWERCASE);
                    System.out.println("Lucene Hits: " + searcherResult.getLuceneMatchCount());
                    System.out.println("Matches    : " + matchingSentences);
                    System.out.println("Examples   : " + rule.getIncorrectExamples());
                    System.out.println();
                    ruleProblems++;
                } else {
                //long time = System.currentTimeMillis() - startTime;
                //System.out.println("Tested " + matchingSentences.size() + " sentences in " + time + "ms for rule " + patternRule);
                }
            } catch (UnsupportedPatternRuleException e) {
                System.out.println("UnsupportedPatternRuleException searching for rule " + patternRule.getFullId() + ": " + e.getMessage());
                ruleProblems++;
            } catch (Exception e) {
                System.out.println("Exception searching for rule " + patternRule.getFullId() + ": " + e.getMessage());
                e.printStackTrace(System.out);
                exceptionCount++;
            }
        }
    }
    System.out.println(language + ": problems: " + ruleProblems + ", total rules: " + ruleCounter);
    System.out.println(language + ": exceptions: " + exceptionCount + " (including timeouts)");
    System.out.println("Total time: " + (System.currentTimeMillis() - startTime) + "ms");
}
Also used : PatternRule(org.languagetool.rules.patterns.PatternRule) JLanguageTool(org.languagetool.JLanguageTool) IOException(java.io.IOException) English(org.languagetool.language.English) RuleMatch(org.languagetool.rules.RuleMatch) Language(org.languagetool.Language) PatternRule(org.languagetool.rules.patterns.PatternRule) Rule(org.languagetool.rules.Rule) Ignore(org.junit.Ignore)

Example 37 with Language

use of org.languagetool.Language in project languagetool by languagetool-org.

the class SentenceSourceIndexer method main.

public static void main(String... args) throws Exception {
    if (args.length != 5) {
        System.out.println("Usage: " + SentenceSourceIndexer.class.getSimpleName() + " <dataFile...> <indexDir> <languageCode> <maxSentences> <indexPosTags>");
        System.out.println("\t<dataFiles> comma-separated list of a Wikipedia XML dump (*.xml) and/or Tatoeba files (tatoeba-*)");
        System.out.println("\t<indexDir> directory where Lucene index will be written to, existing index content will be removed");
        System.out.println("\t<languageCode> short code like en for English, de for German etc");
        System.out.println("\t<maxSentences> maximum number of sentences to be indexed, use 0 for no limit");
        System.out.println("\t<indexPosTags> 1 to also index POS tags (i.e. analyze text by LT), 0 to index only the plain text");
        System.exit(1);
    }
    List<String> dumpFilesNames = Arrays.asList(args[0].split(","));
    File indexDir = new File(args[1]);
    String languageCode = args[2];
    int maxSentences = Integer.parseInt(args[3]);
    Language language = Languages.getLanguageForShortCode(languageCode);
    if (maxSentences == 0) {
        System.out.println("Going to index contents from " + dumpFilesNames);
    } else {
        System.out.println("Going to index up to " + maxSentences + " sentences from " + dumpFilesNames);
    }
    System.out.println("Output index dir: " + indexDir);
    long start = System.currentTimeMillis();
    Analyzer analyzer;
    String indexPos = args[4];
    if (indexPos.equals("1")) {
        // this will use LanguageToolAnalyzer
        analyzer = null;
    } else if (indexPos.equals("0")) {
        analyzer = new StandardAnalyzer(new CharArraySet(Collections.emptyList(), false));
    } else {
        throw new IllegalArgumentException("Unknown value '" + indexPos + "' for indexPosTags parameter, use 0 or 1");
    }
    try (FSDirectory fsDirectory = FSDirectory.open(indexDir.toPath());
        SentenceSourceIndexer indexer = new SentenceSourceIndexer(fsDirectory, language, maxSentences, analyzer)) {
        try {
            indexer.run(dumpFilesNames, language);
        } catch (DocumentLimitReachedException e) {
            System.out.println("Sentence limit (" + e.getLimit() + ") reached, stopping indexing");
        } finally {
            indexer.writeMetaDocuments();
        }
        if (analyzer != null) {
            analyzer.close();
        }
    }
    long end = System.currentTimeMillis();
    float minutes = (end - start) / (float) (1000 * 60);
    System.out.printf("Indexing took %.2f minutes\n", minutes);
}
Also used : CharArraySet(org.apache.lucene.analysis.util.CharArraySet) FSDirectory(org.apache.lucene.store.FSDirectory) Analyzer(org.apache.lucene.analysis.Analyzer) StandardAnalyzer(org.apache.lucene.analysis.standard.StandardAnalyzer) Language(org.languagetool.Language) StandardAnalyzer(org.apache.lucene.analysis.standard.StandardAnalyzer) File(java.io.File)

Example 38 with Language

use of org.languagetool.Language in project languagetool by languagetool-org.

the class Example method main.

public static void main(String[] args) throws IOException {
    List<Language> realLanguages = Languages.get();
    System.out.println("This example will test a short string with all languages known to LanguageTool.");
    System.out.println("It's just a test to make sure there's at least no crash.");
    System.out.println("Using LanguageTool " + JLanguageTool.VERSION + " (" + JLanguageTool.BUILD_DATE + ")");
    System.out.println("Supported languages: " + realLanguages.size());
    for (Language language : realLanguages) {
        JLanguageTool langTool = new JLanguageTool(language);
        String input = "And the the";
        List<RuleMatch> result = langTool.check(input);
        System.out.println("Checking '" + input + "' with " + language + ":");
        for (RuleMatch ruleMatch : result) {
            System.out.println("    " + ruleMatch);
        }
    }
}
Also used : RuleMatch(org.languagetool.rules.RuleMatch) Language(org.languagetool.Language) JLanguageTool(org.languagetool.JLanguageTool)

Example 39 with Language

use of org.languagetool.Language in project languagetool by languagetool-org.

the class Main method createGUI.

private void createGUI() {
    loadRecentFiles();
    frame = new JFrame("LanguageTool " + JLanguageTool.VERSION);
    setLookAndFeel();
    openAction = new OpenAction();
    saveAction = new SaveAction();
    saveAsAction = new SaveAsAction();
    checkAction = new CheckAction();
    autoCheckAction = new AutoCheckAction(true);
    showResultAction = new ShowResultAction(true);
    frame.setDefaultCloseOperation(WindowConstants.DO_NOTHING_ON_CLOSE);
    frame.addWindowListener(new CloseListener());
    URL iconUrl = JLanguageTool.getDataBroker().getFromResourceDirAsUrl(TRAY_ICON);
    frame.setIconImage(new ImageIcon(iconUrl).getImage());
    textArea = new JTextArea();
    textArea.setLineWrap(true);
    textArea.setWrapStyleWord(true);
    textArea.addKeyListener(new ControlReturnTextCheckingListener());
    resultArea = new JTextPane();
    undoRedo = new UndoRedoSupport(this.textArea, messages);
    frame.setJMenuBar(createMenuBar());
    GridBagConstraints buttonCons = new GridBagConstraints();
    JPanel insidePanel = new JPanel();
    insidePanel.setOpaque(false);
    insidePanel.setLayout(new GridBagLayout());
    buttonCons.gridx = 0;
    buttonCons.gridy = 0;
    buttonCons.anchor = GridBagConstraints.LINE_START;
    insidePanel.add(new JLabel(messages.getString("textLanguage") + " "), buttonCons);
    //create a ComboBox with flags, do not include hidden languages
    languageBox = LanguageComboBox.create(messages, EXTERNAL_LANGUAGE_SUFFIX, true, false);
    buttonCons.gridx = 1;
    buttonCons.gridy = 0;
    buttonCons.anchor = GridBagConstraints.LINE_START;
    insidePanel.add(languageBox, buttonCons);
    JCheckBox autoDetectBox = new JCheckBox(messages.getString("atd"));
    buttonCons.gridx = 2;
    buttonCons.gridy = 0;
    buttonCons.gridwidth = GridBagConstraints.REMAINDER;
    buttonCons.anchor = GridBagConstraints.LINE_START;
    insidePanel.add(autoDetectBox, buttonCons);
    buttonCons.gridx = 0;
    buttonCons.gridy = 1;
    buttonCons.gridwidth = GridBagConstraints.REMAINDER;
    buttonCons.fill = GridBagConstraints.HORIZONTAL;
    buttonCons.anchor = GridBagConstraints.LINE_END;
    buttonCons.weightx = 1.0;
    insidePanel.add(statusLabel, buttonCons);
    Container contentPane = frame.getContentPane();
    GridBagLayout gridLayout = new GridBagLayout();
    contentPane.setLayout(gridLayout);
    GridBagConstraints cons = new GridBagConstraints();
    cons.gridx = 0;
    cons.gridy = 1;
    cons.fill = GridBagConstraints.HORIZONTAL;
    cons.anchor = GridBagConstraints.FIRST_LINE_START;
    JToolBar toolbar = new JToolBar("Toolbar", JToolBar.HORIZONTAL);
    toolbar.setFloatable(false);
    contentPane.add(toolbar, cons);
    JButton openButton = new JButton(openAction);
    openButton.setHideActionText(true);
    openButton.setFocusable(false);
    toolbar.add(openButton);
    JButton saveButton = new JButton(saveAction);
    saveButton.setHideActionText(true);
    saveButton.setFocusable(false);
    toolbar.add(saveButton);
    JButton saveAsButton = new JButton(saveAsAction);
    saveAsButton.setHideActionText(true);
    saveAsButton.setFocusable(false);
    toolbar.add(saveAsButton);
    JButton spellButton = new JButton(this.checkAction);
    spellButton.setHideActionText(true);
    spellButton.setFocusable(false);
    toolbar.add(spellButton);
    JToggleButton autoSpellButton = new JToggleButton(autoCheckAction);
    autoSpellButton.setHideActionText(true);
    autoSpellButton.setFocusable(false);
    toolbar.add(autoSpellButton);
    JButton clearTextButton = new JButton(new ClearTextAction());
    clearTextButton.setHideActionText(true);
    clearTextButton.setFocusable(false);
    toolbar.add(clearTextButton);
    cons.insets = new Insets(5, 5, 5, 5);
    cons.fill = GridBagConstraints.BOTH;
    cons.weightx = 10.0f;
    cons.weighty = 10.0f;
    cons.gridx = 0;
    cons.gridy = 2;
    cons.weighty = 5.0f;
    splitPane = new JSplitPane(JSplitPane.VERTICAL_SPLIT, new JScrollPane(textArea), new JScrollPane(resultArea));
    mainPanel.setLayout(new GridLayout(0, 1));
    contentPane.add(mainPanel, cons);
    mainPanel.add(splitPane);
    cons.fill = GridBagConstraints.HORIZONTAL;
    cons.gridx = 0;
    cons.gridy = 3;
    cons.weightx = 1.0f;
    cons.weighty = 0.0f;
    cons.insets = new Insets(4, 12, 4, 12);
    contentPane.add(insidePanel, cons);
    ltSupport = new LanguageToolSupport(this.frame, this.textArea, this.undoRedo);
    ResultAreaHelper.install(messages, ltSupport, resultArea);
    languageBox.selectLanguage(ltSupport.getLanguage());
    languageBox.setEnabled(!ltSupport.getConfig().getAutoDetect());
    autoDetectBox.setSelected(ltSupport.getConfig().getAutoDetect());
    taggerShowsDisambigLog = ltSupport.getConfig().getTaggerShowsDisambigLog();
    languageBox.addItemListener(new ItemListener() {

        @Override
        public void itemStateChanged(ItemEvent e) {
            if (e.getStateChange() == ItemEvent.SELECTED) {
                // we cannot re-use the existing LT object anymore
                frame.applyComponentOrientation(ComponentOrientation.getOrientation(Locale.getDefault()));
                Language lang = languageBox.getSelectedLanguage();
                ComponentOrientation componentOrientation = ComponentOrientation.getOrientation(lang.getLocale());
                textArea.applyComponentOrientation(componentOrientation);
                resultArea.applyComponentOrientation(componentOrientation);
                ltSupport.setLanguage(lang);
            }
        }
    });
    autoDetectBox.addItemListener(new ItemListener() {

        @Override
        public void itemStateChanged(ItemEvent e) {
            boolean selected = e.getStateChange() == ItemEvent.SELECTED;
            languageBox.setEnabled(!selected);
            ltSupport.getConfig().setAutoDetect(selected);
            if (selected) {
                Language detected = ltSupport.autoDetectLanguage(textArea.getText());
                languageBox.selectLanguage(detected);
            }
        }
    });
    ltSupport.addLanguageToolListener(new LanguageToolListener() {

        @Override
        public void languageToolEventOccurred(LanguageToolEvent event) {
            if (event.getType() == LanguageToolEvent.Type.CHECKING_STARTED) {
                String msg = org.languagetool.tools.Tools.i18n(messages, "checkStart");
                statusLabel.setText(msg);
                if (event.getCaller() == getFrame()) {
                    setWaitCursor();
                    checkAction.setEnabled(false);
                }
            } else if (event.getType() == LanguageToolEvent.Type.CHECKING_FINISHED) {
                if (event.getCaller() == getFrame()) {
                    checkAction.setEnabled(true);
                    unsetWaitCursor();
                }
                String msg = org.languagetool.tools.Tools.i18n(messages, "checkDone", event.getSource().getMatches().size(), event.getElapsedTime());
                statusLabel.setText(msg);
            } else if (event.getType() == LanguageToolEvent.Type.LANGUAGE_CHANGED) {
                languageBox.selectLanguage(ltSupport.getLanguage());
            } else if (event.getType() == LanguageToolEvent.Type.RULE_ENABLED) {
            //this will trigger a check and the result will be updated by
            //the CHECKING_FINISHED event
            } else if (event.getType() == LanguageToolEvent.Type.RULE_DISABLED) {
                String msg = org.languagetool.tools.Tools.i18n(messages, "checkDoneNoTime", event.getSource().getMatches().size());
                statusLabel.setText(msg);
            }
        }
    });
    frame.applyComponentOrientation(ComponentOrientation.getOrientation(Locale.getDefault()));
    Language lang = ltSupport.getLanguage();
    ComponentOrientation componentOrientation = ComponentOrientation.getOrientation(lang.getLocale());
    textArea.applyComponentOrientation(componentOrientation);
    resultArea.applyComponentOrientation(componentOrientation);
    ResourceBundle textLanguageMessageBundle = JLanguageTool.getMessageBundle(ltSupport.getLanguage());
    textArea.setText(textLanguageMessageBundle.getString("guiDemoText"));
    Configuration config = ltSupport.getConfig();
    if (config.getFontName() != null || config.getFontStyle() != Configuration.FONT_STYLE_INVALID || config.getFontSize() != Configuration.FONT_SIZE_INVALID) {
        String fontName = config.getFontName();
        if (fontName == null) {
            fontName = textArea.getFont().getFamily();
        }
        int fontSize = config.getFontSize();
        if (fontSize == Configuration.FONT_SIZE_INVALID) {
            fontSize = textArea.getFont().getSize();
        }
        Font font = new Font(fontName, config.getFontStyle(), fontSize);
        textArea.setFont(font);
    }
    frame.pack();
    frame.setSize(WINDOW_WIDTH, WINDOW_HEIGHT);
    frame.setLocationByPlatform(true);
    splitPane.setDividerLocation(200);
    MainWindowStateBean state = localStorage.loadProperty("gui.state", MainWindowStateBean.class);
    if (state != null) {
        if (state.getBounds() != null) {
            frame.setBounds(state.getBounds());
            ResizeComponentListener.setBoundsProperty(frame, state.getBounds());
        }
        if (state.getDividerLocation() != null) {
            splitPane.setDividerLocation(state.getDividerLocation());
        }
        if (state.getState() != null) {
            frame.setExtendedState(state.getState());
        }
    }
    ResizeComponentListener.attachToWindow(frame);
    maybeStartServer();
}
Also used : URL(java.net.URL) Language(org.languagetool.Language) ResourceBundle(java.util.ResourceBundle)

Example 40 with Language

use of org.languagetool.Language in project languagetool by languagetool-org.

the class ResultAreaHelper method languageToolEventOccurred.

@Override
public void languageToolEventOccurred(LanguageToolEvent event) {
    if (event.getType() == LanguageToolEvent.Type.CHECKING_STARTED) {
        Language lang = ltSupport.getLanguage();
        String langName;
        if (lang.isExternal()) {
            langName = lang.getTranslatedName(messages) + Main.EXTERNAL_LANGUAGE_SUFFIX;
        } else {
            langName = lang.getTranslatedName(messages);
        }
        String msg = org.languagetool.tools.Tools.i18n(messages, "startChecking", langName) + "...";
        setHeader(msg);
        setMain(EMPTY_PARA);
        if (event.getCaller() == this) {
            statusPane.setCursor(Cursor.getPredefinedCursor(Cursor.WAIT_CURSOR));
        }
    } else if (event.getType() == LanguageToolEvent.Type.CHECKING_FINISHED) {
        setRunTime(event.getElapsedTime());
        String inputText = event.getSource().getTextComponent().getText();
        displayResult(inputText, event.getSource().getMatches());
        if (event.getCaller() == this) {
            statusPane.setCursor(Cursor.getDefaultCursor());
        }
    } else if (event.getType() == LanguageToolEvent.Type.RULE_DISABLED || event.getType() == LanguageToolEvent.Type.RULE_ENABLED) {
        String inputText = event.getSource().getTextComponent().getText();
        displayResult(inputText, event.getSource().getMatches());
    }
}
Also used : Language(org.languagetool.Language)

Aggregations

Language (org.languagetool.Language)84 Test (org.junit.Test)23 File (java.io.File)15 ArrayList (java.util.ArrayList)12 JLanguageTool (org.languagetool.JLanguageTool)11 Rule (org.languagetool.rules.Rule)11 RuleMatch (org.languagetool.rules.RuleMatch)10 IOException (java.io.IOException)7 Ignore (org.junit.Ignore)6 StringTools.readerToString (org.languagetool.tools.StringTools.readerToString)5 InputStream (java.io.InputStream)4 English (org.languagetool.language.English)4 BitextRule (org.languagetool.rules.bitext.BitextRule)4 URL (java.net.URL)3 HashSet (java.util.HashSet)3 MultiThreadedJLanguageTool (org.languagetool.MultiThreadedJLanguageTool)3 AmericanEnglish (org.languagetool.language.AmericanEnglish)3 LanguageModel (org.languagetool.languagemodel.LanguageModel)3 LuceneLanguageModel (org.languagetool.languagemodel.LuceneLanguageModel)3 BufferedReader (java.io.BufferedReader)2