Search in sources :

Example 21 with AnalyzedSentence

use of org.languagetool.AnalyzedSentence in project languagetool by languagetool-org.

the class AbstractWordCoherencyRule method match.

@Override
public RuleMatch[] match(List<AnalyzedSentence> sentences) {
    List<RuleMatch> ruleMatches = new ArrayList<>();
    // e.g. aufwändig -> RuleMatch of aufwendig
    Map<String, RuleMatch> shouldNotAppearWord = new HashMap<>();
    int pos = 0;
    for (AnalyzedSentence sentence : sentences) {
        AnalyzedTokenReadings[] tokens = sentence.getTokensWithoutWhitespace();
        for (AnalyzedTokenReadings tmpToken : tokens) {
            String token = tmpToken.getToken();
            List<AnalyzedToken> readings = tmpToken.getReadings();
            // TODO: in theory we need to care about the other readings, too (affects e.g. German "Schenke" as a noun):
            if (readings.size() > 0) {
                String baseform = readings.get(0).getLemma();
                if (baseform != null) {
                    token = baseform;
                }
            }
            if (shouldNotAppearWord.containsKey(token)) {
                RuleMatch otherMatch = shouldNotAppearWord.get(token);
                String otherSpelling = otherMatch.getMessage();
                String msg = getMessage(token, otherSpelling);
                RuleMatch ruleMatch = new RuleMatch(this, pos + tmpToken.getStartPos(), pos + tmpToken.getEndPos(), msg);
                ruleMatch.setSuggestedReplacement(otherSpelling);
                ruleMatches.add(ruleMatch);
            } else if (getWordMap().containsKey(token)) {
                String shouldNotAppear = getWordMap().get(token);
                RuleMatch potentialRuleMatch = new RuleMatch(this, pos + tmpToken.getStartPos(), pos + tmpToken.getEndPos(), token);
                shouldNotAppearWord.put(shouldNotAppear, potentialRuleMatch);
            }
        }
        pos += sentence.getText().length();
    }
    return toRuleMatchArray(ruleMatches);
}
Also used : AnalyzedSentence(org.languagetool.AnalyzedSentence) AnalyzedToken(org.languagetool.AnalyzedToken) AnalyzedTokenReadings(org.languagetool.AnalyzedTokenReadings)

Example 22 with AnalyzedSentence

use of org.languagetool.AnalyzedSentence in project languagetool by languagetool-org.

the class GermanChunkerTest method assertBasicChunks.

private void assertBasicChunks(String input) throws Exception {
    String plainInput = getPlainInput(input);
    AnalyzedSentence analyzedSentence = lt.getAnalyzedSentence(plainInput);
    AnalyzedTokenReadings[] result = analyzedSentence.getTokensWithoutWhitespace();
    List<ChunkTaggedToken> basicChunks = chunker.getBasicChunks(Arrays.asList(result));
    List<String> expectedChunks = getExpectedChunks(input);
    assertChunks(input, plainInput, basicChunks, expectedChunks);
}
Also used : AnalyzedSentence(org.languagetool.AnalyzedSentence) AnalyzedTokenReadings(org.languagetool.AnalyzedTokenReadings)

Example 23 with AnalyzedSentence

use of org.languagetool.AnalyzedSentence in project languagetool by languagetool-org.

the class GermanChunkerTest method assertFullChunks.

private void assertFullChunks(String input) throws Exception {
    String plainInput = getPlainInput(input);
    AnalyzedSentence analyzedSentence = lt.getAnalyzedSentence(plainInput);
    AnalyzedTokenReadings[] result = analyzedSentence.getTokensWithoutWhitespace();
    chunker.addChunkTags(Arrays.asList(result));
    List<String> expectedChunks = getExpectedChunks(input);
    List<ChunkTaggedToken> result2 = new ArrayList<>();
    int i = 0;
    for (AnalyzedTokenReadings readings : result) {
        if (i > 0) {
            ChunkTaggedToken chunkTaggedToken = new ChunkTaggedToken(readings.getToken(), readings.getChunkTags(), readings);
            result2.add(chunkTaggedToken);
        }
        i++;
    }
    assertChunks(input, plainInput, result2, expectedChunks);
}
Also used : AnalyzedSentence(org.languagetool.AnalyzedSentence) ArrayList(java.util.ArrayList) AnalyzedTokenReadings(org.languagetool.AnalyzedTokenReadings)

Example 24 with AnalyzedSentence

use of org.languagetool.AnalyzedSentence in project languagetool by languagetool-org.

the class CaseRuleTest method testCompareLists.

@Test
public void testCompareLists() throws IOException {
    AnalyzedSentence sentence1 = lt.getAnalyzedSentence("Hier ein Test");
    assertTrue(rule.compareLists(sentence1.getTokensWithoutWhitespace(), 0, 2, new String[] { "", "Hier", "ein" }));
    assertTrue(rule.compareLists(sentence1.getTokensWithoutWhitespace(), 1, 2, new String[] { "Hier", "ein" }));
    assertTrue(rule.compareLists(sentence1.getTokensWithoutWhitespace(), 0, 3, new String[] { "", "Hier", "ein", "Test" }));
    assertFalse(rule.compareLists(sentence1.getTokensWithoutWhitespace(), 0, 4, new String[] { "", "Hier", "ein", "Test" }));
    AnalyzedSentence sentence2 = lt.getAnalyzedSentence("das Heilige Römische Reich");
    assertTrue(rule.compareLists(sentence2.getTokensWithoutWhitespace(), 0, 4, new String[] { "", "das", "Heilige", "Römische", "Reich" }));
    assertFalse(rule.compareLists(sentence2.getTokensWithoutWhitespace(), 8, 11, new String[] { "", "das", "Heilige", "Römische", "Reich" }));
}
Also used : AnalyzedSentence(org.languagetool.AnalyzedSentence) Test(org.junit.Test)

Example 25 with AnalyzedSentence

use of org.languagetool.AnalyzedSentence in project languagetool by languagetool-org.

the class Main method tagTextAndDisplayResults.

private void tagTextAndDisplayResults() {
    JLanguageTool langTool = ltSupport.getLanguageTool();
    // tag text
    List<String> sentences = langTool.sentenceTokenize(textArea.getText());
    StringBuilder sb = new StringBuilder();
    if (taggerShowsDisambigLog) {
        sb.append("<table>");
        sb.append("<tr>");
        sb.append("<td><b>");
        sb.append(messages.getString("token"));
        sb.append("</b></td>");
        sb.append("<td><b>");
        sb.append(messages.getString("disambiguatorLog"));
        sb.append("</b></td>");
        sb.append("</tr>");
        boolean odd = true;
        try {
            for (String sent : sentences) {
                AnalyzedSentence analyzed = langTool.getAnalyzedSentence(sent);
                odd = appendTagsWithDisambigLog(sb, analyzed, odd);
            }
        } catch (Exception e) {
            sb.append(getStackTraceAsHtml(e));
        }
        sb.append("</table>");
    } else {
        try {
            for (String sent : sentences) {
                AnalyzedSentence analyzed = langTool.getAnalyzedSentence(sent);
                String analyzedString = StringTools.escapeHTML(analyzed.toString(",")).replace("&lt;S&gt;", "&lt;S&gt;<br>").replace("[", "<font color='" + TAG_COLOR + "'>[").replace("]", "]</font><br>");
                sb.append(analyzedString).append('\n');
            }
        } catch (Exception e) {
            sb.append(getStackTraceAsHtml(e));
        }
    }
    SwingUtilities.invokeLater(new Runnable() {

        @Override
        public void run() {
            if (taggerDialog == null) {
                taggerDialog = new JDialog(frame);
                taggerDialog.setTitle(messages.getString("taggerWindowTitle"));
                taggerDialog.setDefaultCloseOperation(JDialog.HIDE_ON_CLOSE);
                taggerDialog.setResizable(true);
                taggerDialog.setSize(640, 480);
                taggerDialog.setLocationRelativeTo(frame);
                KeyStroke stroke = KeyStroke.getKeyStroke(KeyEvent.VK_ESCAPE, 0);
                ActionListener actionListener = actionEvent -> taggerDialog.setVisible(false);
                taggerDialog.getRootPane().registerKeyboardAction(actionListener, stroke, JComponent.WHEN_IN_FOCUSED_WINDOW);
                JPanel panel = new JPanel(new GridBagLayout());
                taggerDialog.add(panel);
                taggerArea = new JTextPane();
                taggerArea.setContentType("text/html");
                taggerArea.setEditable(false);
                GridBagConstraints c = new GridBagConstraints();
                c.gridx = 0;
                c.gridwidth = 2;
                c.gridy = 0;
                c.weightx = 1.0;
                c.weighty = 1.0;
                c.insets = new Insets(8, 8, 4, 8);
                c.fill = GridBagConstraints.BOTH;
                panel.add(new JScrollPane(taggerArea), c);
                c.gridwidth = 1;
                c.gridx = 0;
                c.gridy = 1;
                c.weightx = 0.0;
                c.weighty = 0.0;
                c.insets = new Insets(4, 8, 8, 8);
                c.fill = GridBagConstraints.NONE;
                c.anchor = GridBagConstraints.EAST;
                JCheckBox showDisAmbig = new JCheckBox(messages.getString("ShowDisambiguatorLog"));
                showDisAmbig.setSelected(taggerShowsDisambigLog);
                showDisAmbig.addItemListener((ItemEvent e) -> {
                    taggerShowsDisambigLog = e.getStateChange() == ItemEvent.SELECTED;
                    ltSupport.getConfig().setTaggerShowsDisambigLog(taggerShowsDisambigLog);
                });
                panel.add(showDisAmbig, c);
                c.gridx = 1;
                JButton closeButton = new JButton(messages.getString("guiCloseButton"));
                closeButton.addActionListener(actionListener);
                panel.add(closeButton, c);
            }
            // orientation each time should be set as language may is changed
            taggerDialog.applyComponentOrientation(ComponentOrientation.getOrientation(languageBox.getSelectedLanguage().getLocale()));
            taggerDialog.setVisible(true);
            taggerArea.setText(HTML_FONT_START + sb + HTML_FONT_END);
        }
    });
}
Also used : JLanguageTool(org.languagetool.JLanguageTool) PortBindingException(org.languagetool.server.PortBindingException) AnalyzedSentence(org.languagetool.AnalyzedSentence)

Aggregations

AnalyzedSentence (org.languagetool.AnalyzedSentence)40 AnalyzedTokenReadings (org.languagetool.AnalyzedTokenReadings)21 ArrayList (java.util.ArrayList)8 Test (org.junit.Test)8 JLanguageTool (org.languagetool.JLanguageTool)8 RuleMatch (org.languagetool.rules.RuleMatch)8 Rule (org.languagetool.rules.Rule)5 IOException (java.io.IOException)4 DisambiguationPatternRule (org.languagetool.tagging.disambiguation.rules.DisambiguationPatternRule)4 English (org.languagetool.language.English)3 SpellingCheckRule (org.languagetool.rules.spelling.SpellingCheckRule)3 AnalyzedToken (org.languagetool.AnalyzedToken)2 Ukrainian (org.languagetool.language.Ukrainian)2 InputStream (java.io.InputStream)1 Document (org.apache.lucene.document.Document)1 ConfusionSet (org.languagetool.rules.ConfusionSet)1 CorrectExample (org.languagetool.rules.CorrectExample)1 IncorrectExample (org.languagetool.rules.IncorrectExample)1 BitextRule (org.languagetool.rules.bitext.BitextRule)1 ConfusionProbabilityRule (org.languagetool.rules.ngrams.ConfusionProbabilityRule)1