Search in sources :

Example 61 with RuleMatch

use of org.languagetool.rules.RuleMatch in project languagetool by languagetool-org.

the class Example method main.

public static void main(String[] args) throws IOException {
    List<Language> realLanguages = Languages.get();
    System.out.println("This example will test a short string with all languages known to LanguageTool.");
    System.out.println("It's just a test to make sure there's at least no crash.");
    System.out.println("Using LanguageTool " + JLanguageTool.VERSION + " (" + JLanguageTool.BUILD_DATE + ")");
    System.out.println("Supported languages: " + realLanguages.size());
    for (Language language : realLanguages) {
        JLanguageTool langTool = new JLanguageTool(language);
        String input = "And the the";
        List<RuleMatch> result = langTool.check(input);
        System.out.println("Checking '" + input + "' with " + language + ":");
        for (RuleMatch ruleMatch : result) {
            System.out.println("    " + ruleMatch);
Also used : RuleMatch(org.languagetool.rules.RuleMatch) Language(org.languagetool.Language) JLanguageTool(org.languagetool.JLanguageTool)

Example 62 with RuleMatch

use of org.languagetool.rules.RuleMatch in project languagetool by languagetool-org.

the class CommandLineTools method checkText.

   * Check the given text and print results to System.out.
   * @param contents a text to check (may be more than one sentence)
   * @param lt Initialized LanguageTool
   * @param isXmlFormat whether to print the result in XML format
   * @param isJsonFormat whether to print the result in JSON format
   * @param contextSize error text context size: -1 for default
   * @param lineOffset line number offset to be added to line numbers in matches
   * @param prevMatches number of previously matched rules
   * @param apiMode mode of xml/json printout for simple xml/json output
   * @return Number of rule matches to the input text.
public static int checkText(String contents, JLanguageTool lt, boolean isXmlFormat, boolean isJsonFormat, int contextSize, int lineOffset, int prevMatches, StringTools.ApiPrintMode apiMode, boolean listUnknownWords, List<String> unknownWords) throws IOException {
    if (contextSize == -1) {
        contextSize = DEFAULT_CONTEXT_SIZE;
    long startTime = System.currentTimeMillis();
    List<RuleMatch> ruleMatches = lt.check(contents);
    // adjust line numbers
    for (RuleMatch r : ruleMatches) {
        r.setLine(r.getLine() + lineOffset);
        r.setEndLine(r.getEndLine() + lineOffset);
    if (isXmlFormat) {
        if (listUnknownWords && apiMode == StringTools.ApiPrintMode.NORMAL_API) {
            unknownWords = lt.getUnknownWords();
        RuleMatchAsXmlSerializer serializer = new RuleMatchAsXmlSerializer();
        String xml = serializer.ruleMatchesToXml(ruleMatches, contents, contextSize, apiMode, lt.getLanguage(), unknownWords);
        PrintStream out = new PrintStream(System.out, true, "UTF-8");
    } else if (isJsonFormat) {
        RuleMatchesAsJsonSerializer serializer = new RuleMatchesAsJsonSerializer();
        String json = serializer.ruleMatchesToJson(ruleMatches, contents, contextSize, lt.getLanguage());
        PrintStream out = new PrintStream(System.out, true, "UTF-8");
    } else {
        printMatches(ruleMatches, prevMatches, contents, contextSize);
    //display stats if it's not in a buffered mode
    if (apiMode == StringTools.ApiPrintMode.NORMAL_API && !isJsonFormat) {
        SentenceTokenizer sentenceTokenizer = lt.getLanguage().getSentenceTokenizer();
        int sentenceCount = sentenceTokenizer.tokenize(contents).size();
        displayTimeStats(startTime, sentenceCount, isXmlFormat);
    return ruleMatches.size();
Also used : RuleMatchesAsJsonSerializer( PrintStream( RuleMatch(org.languagetool.rules.RuleMatch) SentenceTokenizer(org.languagetool.tokenizers.SentenceTokenizer) RuleMatchAsXmlSerializer(

Example 63 with RuleMatch

use of org.languagetool.rules.RuleMatch in project languagetool by languagetool-org.

the class CommandLineTools method correctBitext.

   * Automatically applies suggestions to the bilingual text.
   * Note: if there is more than one suggestion, always the first
   * one is applied, and others ignored silently.
   * Prints results to System.out.
   * @param reader a bitext file reader
   * @param sourceLt Initialized source JLanguageTool object
   * @param targetLt Initialized target JLanguageTool object
   * @param bRules  List of all BitextRules to use
public static void correctBitext(BitextReader reader, JLanguageTool sourceLt, JLanguageTool targetLt, List<BitextRule> bRules) throws IOException {
    for (StringPair srcAndTrg : reader) {
        List<RuleMatch> curMatches = Tools.checkBitext(srcAndTrg.getSource(), srcAndTrg.getTarget(), sourceLt, targetLt, bRules);
        List<RuleMatch> fixedMatches = new ArrayList<>();
        for (RuleMatch thisMatch : curMatches) {
            fixedMatches.add(targetLt.adjustRuleMatchPos(thisMatch, //don't need to adjust at all, we have zero offset related to trg sentence 
            0, reader.getTargetColumnCount(), reader.getLineCount(), reader.getCurrentLine(), null));
        if (fixedMatches.size() > 0) {
            System.out.println(correctTextFromMatches(srcAndTrg.getTarget(), fixedMatches));
        } else {
Also used : RuleMatch(org.languagetool.rules.RuleMatch) StringPair(org.languagetool.bitext.StringPair) ArrayList(java.util.ArrayList)

Example 64 with RuleMatch

use of org.languagetool.rules.RuleMatch in project languagetool by languagetool-org.

the class JLanguageToolTest method testCleanOverlappingWithGerman.

public void testCleanOverlappingWithGerman() throws IOException {
    JLanguageTool tool = new JLanguageTool(new GermanyGerman());
    // Juxtaposed errors in "TRGS - Technische" should not be removed.
    List<RuleMatch> matches = tool.check("TRGS - Technische Regeln für Gefahrstoffe");
    assertEquals(3, matches.size());
Also used : RuleMatch(org.languagetool.rules.RuleMatch) GermanyGerman(org.languagetool.language.GermanyGerman) Test(org.junit.Test)

Example 65 with RuleMatch

use of org.languagetool.rules.RuleMatch in project languagetool by languagetool-org.

the class AccentuationCheckRule method match.

public RuleMatch[] match(final AnalyzedSentence sentence) {
    final List<RuleMatch> ruleMatches = new ArrayList<>();
    final AnalyzedTokenReadings[] tokens = sentence.getTokensWithoutWhitespace();
    for (int i = 1; i < tokens.length; i++) {
        // ignoring token 0, i.e. SENT_START
        final String token;
        if (i == 1) {
            token = tokens[i].getToken().toLowerCase();
        } else {
            token = tokens[i].getToken();
        final String prevToken = tokens[i - 1].getToken();
        String prevPrevToken = "";
        if (i > 2) {
            prevPrevToken = tokens[i - 2].getToken();
        String nextToken = "";
        if (i < tokens.length - 1) {
            nextToken = tokens[i + 1].getToken();
        String nextNextToken = "";
        if (i < tokens.length - 2) {
            nextNextToken = tokens[i + 2].getToken();
        boolean isRelevantWord = false;
        boolean isRelevantWord2 = false;
        if (StringTools.isEmpty(token)) {
        if (relevantWords.containsKey(token)) {
            isRelevantWord = true;
        if (relevantWords2.containsKey(token)) {
            isRelevantWord2 = true;
        if (!isRelevantWord && !isRelevantWord2) {
        // verb amb pronom feble davant
        if (matchPostagRegexp(tokens[i - 1], PRONOM_FEBLE) && !prevToken.startsWith("'") && !prevToken.startsWith("-")) {
        String replacement = null;
        final Matcher mPreposicioDE = PREPOSICIO_DE.matcher(nextToken);
        final Matcher mExcepcionsDE = EXCEPCIONS_DARRERE_DE.matcher(nextNextToken);
        final Matcher mArticleELMS = ARTICLE_EL_MS.matcher(prevToken);
        final Matcher mArticleELFS = ARTICLE_EL_FS.matcher(prevToken);
        final Matcher mArticleELMP = ARTICLE_EL_MP.matcher(prevToken);
        final Matcher mArticleELFP = ARTICLE_EL_FP.matcher(prevToken);
        if (isRelevantWord && !matchPostagRegexp(tokens[i], GN) && !matchPostagRegexp(tokens[i], LOCUCIONS)) {
            // amb renuncies
            if (tokens[i - 1].hasPosTag("SPS00") && !tokens[i - 1].hasPosTag("RG") && !matchPostagRegexp(tokens[i - 1], DETERMINANT) && !matchPostagRegexp(tokens[i], INFINITIU)) {
                replacement = relevantWords.get(token).getToken();
            } else if (i > 2 && tokens[i - 2].hasPosTag("SPS00") && !tokens[i - 2].hasPosTag("RG") && !matchPostagRegexp(tokens[i - 2], DETERMINANT) && (matchPostagRegexp(tokens[i - 1], DETERMINANT) || mArticleELMS.matches() || mArticleELFS.matches() || mArticleELMP.matches() || mArticleELFP.matches()) && !matchPostagRegexp(tokens[i], INFINITIU)) {
                replacement = relevantWords.get(token).getToken();
            } else // aquestes renuncies
            if (((matchPostagRegexp(tokens[i - 1], DETERMINANT_MS) && matchPostagRegexp(relevantWords.get(token), NOM_MS) && !token.equals("cantar")) || (matchPostagRegexp(tokens[i - 1], DETERMINANT_MP) && matchPostagRegexp(relevantWords.get(token), NOM_MP)) || (matchPostagRegexp(tokens[i - 1], DETERMINANT_FS) && matchPostagRegexp(relevantWords.get(token), NOM_FS) && !token.equals("venia") && !token.equals("tenia") && !token.equals("continua") && !token.equals("genera") && !token.equals("faria")) || (matchPostagRegexp(tokens[i - 1], DETERMINANT_FP) && matchPostagRegexp(relevantWords.get(token), NOM_FP)))) {
                replacement = relevantWords.get(token).getToken();
            } else // fumaré una faria (correct: fària)
            if (i > 2 && matchPostagRegexp(tokens[i - 2], VERB_CONJUGAT) && ((matchPostagRegexp(tokens[i - 1], DETERMINANT_MS) && matchPostagRegexp(relevantWords.get(token), NOM_MS)) || (matchPostagRegexp(tokens[i - 1], DETERMINANT_MP) && matchPostagRegexp(relevantWords.get(token), NOM_MP)) || (matchPostagRegexp(tokens[i - 1], DETERMINANT_FS) && matchPostagRegexp(relevantWords.get(token), NOM_FS)) || (matchPostagRegexp(tokens[i - 1], DETERMINANT_FP) && matchPostagRegexp(relevantWords.get(token), NOM_FP)))) {
                replacement = relevantWords.get(token).getToken();
            } else // fem la copia (correct: còpia)
            if (i > 2 && matchPostagRegexp(tokens[i - 2], VERB_CONJUGAT) && ((mArticleELMS.matches() && matchPostagRegexp(relevantWords.get(token), NOM_MS)) || (mArticleELMP.matches() && matchPostagRegexp(relevantWords.get(token), NOM_MP)) || (mArticleELFS.matches() && matchPostagRegexp(relevantWords.get(token), NOM_FS)) || (mArticleELFP.matches() && matchPostagRegexp(relevantWords.get(token), NOM_FP)))) {
                replacement = relevantWords.get(token).getToken();
            } else // circumstancies d'una altra classe
            if (!matchPostagRegexp(tokens[i], PARTICIPI_MS) && !token.equals("venia") && !token.equals("venies") && !token.equals("tenia") && !token.equals("tenies") && !token.equals("faria") && !token.equals("faries") && !token.equals("espero") && !token.equals("continua") && !token.equals("continues") && !token.equals("cantar") && !prevToken.equals("que") && !prevToken.equals("qui") && !prevToken.equals("què") && mPreposicioDE.matches() && !matchPostagRegexp(tokens[i - 1], NOT_IN_PREV_TOKEN) && !matchPostagRegexp(tokens[i + 1], LOCUCIONS) && (i < tokens.length - 2) && !matchPostagRegexp(tokens[i + 2], INFINITIU) && !mExcepcionsDE.matches() && !tokens[i - 1].hasPosTag("RG")) {
                replacement = relevantWords.get(token).getToken();
            } else // la renuncia del president.
            if (!token.equals("venia") && !token.equals("venies") && !token.equals("tenia") && !token.equals("tenies") && !token.equals("faria") && !token.equals("faries") && !token.equals("continua") && !token.equals("continues") && !token.equals("cantar") && !token.equals("diferencia") && !token.equals("diferencies") && !token.equals("distancia") && !token.equals("distancies") && ((mArticleELMS.matches() && matchPostagRegexp(relevantWords.get(token), NOM_MS)) || (mArticleELFS.matches() && matchPostagRegexp(relevantWords.get(token), NOM_FS)) || (mArticleELMP.matches() && matchPostagRegexp(relevantWords.get(token), NOM_MP)) || (mArticleELFP.matches() && matchPostagRegexp(relevantWords.get(token), NOM_FP))) && mPreposicioDE.matches()) {
                replacement = relevantWords.get(token).getToken();
            } else // circumstancies extraordinàries
            if (!token.equals("pronuncia") && !token.equals("espero") && !token.equals("pronuncies") && !token.equals("venia") && !token.equals("venies") && !token.equals("tenia") && !token.equals("tenies") && !token.equals("continua") && !token.equals("continues") && !token.equals("faria") && !token.equals("faries") && !token.equals("genera") && !token.equals("figuri") && (i < tokens.length - 1) && ((matchPostagRegexp(relevantWords.get(token), NOM_MS) && matchPostagRegexp(tokens[i + 1], ADJECTIU_MS)) || (matchPostagRegexp(relevantWords.get(token), NOM_FS) && matchPostagRegexp(tokens[i + 1], ADJECTIU_FS)) || (matchPostagRegexp(relevantWords.get(token), NOM_MP) && matchPostagRegexp(tokens[i + 1], ADJECTIU_MP)) || (matchPostagRegexp(relevantWords.get(token), NOM_FP) && matchPostagRegexp(tokens[i + 1], ADJECTIU_FP)))) {
                replacement = relevantWords.get(token).getToken();
            } else // les seves contraries
            if ((matchPostagRegexp(relevantWords.get(token), NOM_MS) && matchPostagRegexp(tokens[i - 1], ADJECTIU_MS) && !matchPostagRegexp(tokens[i], VERB_3S) && !matchPostagRegexp(tokens[i], GRUP_VERBAL)) || (matchPostagRegexp(relevantWords.get(token), NOM_FS) && matchPostagRegexp(tokens[i - 1], ADJECTIU_FS) && !matchPostagRegexp(tokens[i], VERB_3S)) || (matchPostagRegexp(relevantWords.get(token), NOM_MP) && matchPostagRegexp(tokens[i - 1], ADJECTIU_MP)) || (matchPostagRegexp(relevantWords.get(token), NOM_FP) && matchPostagRegexp(tokens[i - 1], ADJECTIU_FP))) {
                replacement = relevantWords.get(token).getToken();
            } else //una nova formula que (fórmula)
            if (nextToken.equals("que") && i > 2 && ((matchPostagRegexp(relevantWords.get(token), NOM_MS) && matchPostagRegexp(tokens[i - 1], ADJECTIU_MS) && matchPostagRegexp(tokens[i - 2], DETERMINANT_MS)) || (matchPostagRegexp(relevantWords.get(token), NOM_FS) && matchPostagRegexp(tokens[i - 1], ADJECTIU_FS) && matchPostagRegexp(tokens[i - 2], DETERMINANT_FS)) || (matchPostagRegexp(relevantWords.get(token), NOM_MP) && matchPostagRegexp(tokens[i - 1], ADJECTIU_MP) && matchPostagRegexp(tokens[i - 2], DETERMINANT_MP)) || (matchPostagRegexp(relevantWords.get(token), NOM_FP) && matchPostagRegexp(tokens[i - 1], ADJECTIU_FP) && matchPostagRegexp(tokens[i - 2], DETERMINANT_FP)))) {
                replacement = relevantWords.get(token).getToken();
            } else // les circumstancies que ens envolten
            if (nextToken.equals("que") && ((mArticleELMS.matches() && matchPostagRegexp(relevantWords.get(token), NOM_MS)) || (mArticleELFS.matches() && matchPostagRegexp(relevantWords.get(token), NOM_FS)) || (mArticleELMP.matches() && matchPostagRegexp(relevantWords.get(token), NOM_MP)) || (mArticleELFP.matches() && matchPostagRegexp(relevantWords.get(token), NOM_FP)))) {
                replacement = relevantWords.get(token).getToken();
            // de positiva influencia
            if (!token.equals("pronuncia") && !token.equals("espero") && !token.equals("pronuncies") && !token.equals("venia") && !token.equals("venies") && !token.equals("tenia") && !token.equals("tenies") && !token.equals("continua") && !token.equals("continues") && !token.equals("faria") && !token.equals("faries") && !token.equals("genera") && !token.equals("figuri") && i > 2 && tokens[i - 2].hasPosTag("SPS00") && !tokens[i - 2].hasPosTag("RG") && ((matchPostagRegexp(relevantWords.get(token), NOM_MS) && matchPostagRegexp(tokens[i - 1], ADJECTIU_MS)) || (matchPostagRegexp(relevantWords.get(token), NOM_FS) && matchPostagRegexp(tokens[i - 1], ADJECTIU_FS)) || (matchPostagRegexp(relevantWords.get(token), NOM_MP) && matchPostagRegexp(tokens[i - 1], ADJECTIU_MP)) || (matchPostagRegexp(relevantWords.get(token), NOM_FP) && matchPostagRegexp(tokens[i - 1], ADJECTIU_FP)))) {
                replacement = relevantWords.get(token).getToken();
        if (isRelevantWord2 && !matchPostagRegexp(tokens[i], GN) && !matchPostagRegexp(tokens[i], LOCUCIONS)) {
            // de manera obvia, circumstàncies extraordinaries.
            if ((matchPostagRegexp(relevantWords2.get(token), ADJECTIU_MS) && matchPostagRegexp(tokens[i - 1], NOM_MS) && !tokens[i - 1].hasPosTag("_GN_FS") && matchPostagRegexp(tokens[i], VERB_CONJUGAT) && !matchPostagRegexp(tokens[i], VERB_3S)) || (matchPostagRegexp(relevantWords2.get(token), ADJECTIU_FS) && prevPrevToken.equalsIgnoreCase("de") && (prevToken.equals("manera") || prevToken.equals("forma"))) || (matchPostagRegexp(relevantWords2.get(token), ADJECTIU_MP) && matchPostagRegexp(tokens[i - 1], NOM_MP)) || (matchPostagRegexp(relevantWords2.get(token), ADJECTIU_FP) && matchPostagRegexp(tokens[i - 1], NOM_FP))) {
                replacement = relevantWords2.get(token).getToken();
            } else // de continua disputa
            if ((i < tokens.length - 1) && !prevToken.equals("que") && !matchPostagRegexp(tokens[i - 1], NOT_IN_PREV_TOKEN) && ((matchPostagRegexp(relevantWords2.get(token), ADJECTIU_MS) && matchPostagRegexp(tokens[i + 1], NOM_MS) && matchPostagRegexp(tokens[i - 1], BEFORE_ADJECTIVE_MS)) || (matchPostagRegexp(relevantWords2.get(token), ADJECTIU_FS) && matchPostagRegexp(tokens[i + 1], NOM_FS) && matchPostagRegexp(tokens[i - 1], BEFORE_ADJECTIVE_FS)) || (matchPostagRegexp(relevantWords2.get(token), ADJECTIU_MP) && matchPostagRegexp(tokens[i + 1], NOM_MP) && matchPostagRegexp(tokens[i - 1], BEFORE_ADJECTIVE_MP)) || (matchPostagRegexp(relevantWords2.get(token), ADJECTIU_FP) && matchPostagRegexp(tokens[i + 1], NOM_FP) && matchPostagRegexp(tokens[i - 1], BEFORE_ADJECTIVE_FP)))) {
                replacement = relevantWords2.get(token).getToken();
            } else // la magnifica conservació
            if ((i < tokens.length - 1) && ((matchPostagRegexp(relevantWords2.get(token), ADJECTIU_MS) && matchPostagRegexp(tokens[i + 1], NOM_MS) && mArticleELMS.matches()) || (matchPostagRegexp(relevantWords2.get(token), ADJECTIU_FS) && matchPostagRegexp(tokens[i + 1], NOM_FS) && mArticleELFS.matches()) || (matchPostagRegexp(relevantWords2.get(token), ADJECTIU_MP) && matchPostagRegexp(tokens[i + 1], NOM_MP) && mArticleELMP.matches()) || (matchPostagRegexp(relevantWords2.get(token), ADJECTIU_FP) && matchPostagRegexp(tokens[i + 1], NOM_FP) && mArticleELFP.matches()))) {
                replacement = relevantWords2.get(token).getToken();
        if (replacement != null) {
            final String msg = "Si és un nom o un adjectiu, ha de portar accent.";
            final RuleMatch ruleMatch = new RuleMatch(this, tokens[i].getStartPos(), tokens[i].getEndPos(), msg, "Falta un accent");
    return toRuleMatchArray(ruleMatches);
Also used : RuleMatch(org.languagetool.rules.RuleMatch) Matcher(java.util.regex.Matcher) ArrayList(java.util.ArrayList) AnalyzedTokenReadings(org.languagetool.AnalyzedTokenReadings)


RuleMatch (org.languagetool.rules.RuleMatch)144 Test (org.junit.Test)64 JLanguageTool (org.languagetool.JLanguageTool)54 ArrayList (java.util.ArrayList)30 AnalyzedTokenReadings (org.languagetool.AnalyzedTokenReadings)14 Rule (org.languagetool.rules.Rule)14 Language (org.languagetool.Language)10 PatternRule (org.languagetool.rules.patterns.PatternRule)10 AnalyzedSentence (org.languagetool.AnalyzedSentence)8 Ukrainian (org.languagetool.language.Ukrainian)8 AbstractPatternRule (org.languagetool.rules.patterns.AbstractPatternRule)8 Matcher (java.util.regex.Matcher)7 English (org.languagetool.language.English)7 IOException ( Catalan (org.languagetool.language.Catalan)6 Polish (org.languagetool.language.Polish)6 GermanyGerman (org.languagetool.language.GermanyGerman)5 AnnotatedText (org.languagetool.markup.AnnotatedText)5 PatternToken (org.languagetool.rules.patterns.PatternToken)5 AnalyzedToken (org.languagetool.AnalyzedToken)4