Search in sources :

Example 1 with StringPair

use of org.languagetool.bitext.StringPair in project languagetool by languagetool-org.

the class BitextPatternRuleTest method testBitextRule.

private void testBitextRule(BitextPatternRule rule, Language lang, JLanguageTool languageTool) throws IOException {
    JLanguageTool srcTool = new JLanguageTool(rule.getSourceLanguage());
    List<StringPair> goodSentences = rule.getCorrectBitextExamples();
    for (StringPair goodSentence : goodSentences) {
        assertTrue("Got good sentence: '" + goodSentence.getSource() + "'", cleanSentence(goodSentence.getSource()).trim().length() > 0);
        assertTrue("Got good sentence: '" + goodSentence.getTarget() + "'", cleanSentence(goodSentence.getTarget()).trim().length() > 0);
        assertFalse(lang + ": Did not expect error in: " + goodSentence + " (Rule: " + rule + ")", match(rule, goodSentence.getSource(), goodSentence.getTarget(), srcTool, languageTool));
    }
    List<IncorrectBitextExample> badSentences = rule.getIncorrectBitextExamples();
    for (IncorrectBitextExample origBadExample : badSentences) {
        // enable indentation use
        StringPair example = origBadExample.getExample();
        String origBadSrcSentence = example.getSource().replaceAll("[\\n\\t]+", "");
        String origBadTrgSentence = example.getTarget().replaceAll("[\\n\\t]+", "");
        List<String> suggestedCorrection = origBadExample.getCorrections();
        int expectedSrcMatchStart = origBadSrcSentence.indexOf("<marker>");
        int expectedSrcMatchEnd = origBadSrcSentence.indexOf("</marker>") - "<marker>".length();
        testMarker(expectedSrcMatchStart, expectedSrcMatchEnd, rule, lang);
        int expectedTrgMatchStart = origBadTrgSentence.indexOf("<marker>");
        int expectedTrgMatchEnd = origBadTrgSentence.indexOf("</marker>") - "<marker>".length();
        testMarker(expectedTrgMatchStart, expectedTrgMatchEnd, rule, lang);
        testBadSentence(origBadSrcSentence, suggestedCorrection, expectedSrcMatchStart, expectedSrcMatchEnd, rule.getSrcRule(), lang, srcTool);
        testBadSentence(origBadTrgSentence, suggestedCorrection, expectedTrgMatchStart, expectedTrgMatchEnd, rule.getTrgRule(), lang, languageTool);
    }
}
Also used : StringPair(org.languagetool.bitext.StringPair)

Example 2 with StringPair

use of org.languagetool.bitext.StringPair in project languagetool by languagetool-org.

the class CommandLineTools method correctBitext.

/**
   * Automatically applies suggestions to the bilingual text.
   * Note: if there is more than one suggestion, always the first
   * one is applied, and others ignored silently.
   * Prints results to System.out.
   *
   * @param reader a bitext file reader
   * @param sourceLt Initialized source JLanguageTool object
   * @param targetLt Initialized target JLanguageTool object
   * @param bRules  List of all BitextRules to use
   */
public static void correctBitext(BitextReader reader, JLanguageTool sourceLt, JLanguageTool targetLt, List<BitextRule> bRules) throws IOException {
    for (StringPair srcAndTrg : reader) {
        List<RuleMatch> curMatches = Tools.checkBitext(srcAndTrg.getSource(), srcAndTrg.getTarget(), sourceLt, targetLt, bRules);
        List<RuleMatch> fixedMatches = new ArrayList<>();
        for (RuleMatch thisMatch : curMatches) {
            fixedMatches.add(targetLt.adjustRuleMatchPos(thisMatch, //don't need to adjust at all, we have zero offset related to trg sentence 
            0, reader.getTargetColumnCount(), reader.getLineCount(), reader.getCurrentLine(), null));
        }
        if (fixedMatches.size() > 0) {
            System.out.println(correctTextFromMatches(srcAndTrg.getTarget(), fixedMatches));
        } else {
            System.out.println(srcAndTrg.getTarget());
        }
    }
}
Also used : RuleMatch(org.languagetool.rules.RuleMatch) StringPair(org.languagetool.bitext.StringPair) ArrayList(java.util.ArrayList)

Example 3 with StringPair

use of org.languagetool.bitext.StringPair in project languagetool by languagetool-org.

the class BitextPatternRuleHandler method endElement.

@Override
public void endElement(String namespaceURI, String sName, String qName) throws SAXException {
    switch(qName) {
        case RULE:
            trgRule.setMessage(message.toString());
            for (Match m : suggestionMatches) {
                trgRule.addSuggestionMatch(m);
            }
            if (phrasePatternTokens.size() <= 1) {
                suggestionMatches.clear();
            }
            BitextPatternRule bRule = new BitextPatternRule(srcRule, trgRule);
            bRule.setCorrectBitextExamples(correctExamples);
            bRule.setIncorrectBitextExamples(incorrectExamples);
            bRule.setSourceLanguage(srcLang);
            rules.add(bRule);
            break;
        case SRC_EXAMPLE:
            srcExample = setExample();
            break;
        case TRG_EXAMPLE:
            trgExample = setExample();
            break;
        case SOURCE:
            srcRule = finalizeRule();
            break;
        case TARGET:
            trgRule = finalizeRule();
            break;
        case EXAMPLE:
            if (inCorrectExample) {
                correctExamples.add(new StringPair(srcExample.getExample(), trgExample.getExample()));
            } else if (inIncorrectExample) {
                StringPair examplePair = new StringPair(srcExample.getExample(), trgExample.getExample());
                if (trgExample.getCorrections().isEmpty()) {
                    incorrectExamples.add(new IncorrectBitextExample(examplePair));
                } else {
                    List<String> corrections = trgExample.getCorrections();
                    incorrectExamples.add(new IncorrectBitextExample(examplePair, corrections));
                }
            }
            inCorrectExample = false;
            inIncorrectExample = false;
            inErrorTriggerExample = false;
            break;
        default:
            super.endElement(namespaceURI, sName, qName);
            break;
    }
}
Also used : StringPair(org.languagetool.bitext.StringPair) ArrayList(java.util.ArrayList) List(java.util.List) IncorrectBitextExample(org.languagetool.rules.bitext.IncorrectBitextExample) Match(org.languagetool.rules.patterns.Match)

Example 4 with StringPair

use of org.languagetool.bitext.StringPair in project languagetool by languagetool-org.

the class CommandLineTools method checkBitext.

/**
   * Checks the bilingual input (bitext) and displays the output (considering the target 
   * language) in API format or in the simple text format.
   *
   * NOTE: the positions returned by the rule matches are adjusted
   * according to the data returned by the reader.
   *
   * @param reader   Reader of bitext strings.
   * @param srcLt Source JLanguageTool (used to analyze the text).
   * @param trgLt Target JLanguageTool (used to analyze the text).
   * @param bRules  Bilingual rules used in addition to target standard rules.
   * @return The number of rules matched on the bitext.
   * @since 1.0.1
   */
public static int checkBitext(BitextReader reader, JLanguageTool srcLt, JLanguageTool trgLt, List<BitextRule> bRules, boolean isXmlFormat) throws IOException {
    long startTime = System.currentTimeMillis();
    int contextSize = DEFAULT_CONTEXT_SIZE;
    List<RuleMatch> ruleMatches = new ArrayList<>();
    int matchCount = 0;
    int sentCount = 0;
    RuleMatchAsXmlSerializer serializer = new RuleMatchAsXmlSerializer();
    PrintStream out = new PrintStream(System.out, true, "UTF-8");
    if (isXmlFormat) {
        out.print(serializer.getXmlStart(null, null));
    }
    for (StringPair srcAndTrg : reader) {
        List<RuleMatch> curMatches = Tools.checkBitext(srcAndTrg.getSource(), srcAndTrg.getTarget(), srcLt, trgLt, bRules);
        List<RuleMatch> fixedMatches = new ArrayList<>();
        for (RuleMatch thisMatch : curMatches) {
            fixedMatches.add(trgLt.adjustRuleMatchPos(thisMatch, reader.getSentencePosition(), reader.getColumnCount(), reader.getLineCount(), reader.getCurrentLine(), null));
        }
        ruleMatches.addAll(fixedMatches);
        if (fixedMatches.size() > 0) {
            if (isXmlFormat) {
                String xml = serializer.ruleMatchesToXmlSnippet(fixedMatches, reader.getCurrentLine(), contextSize);
                out.print(xml);
            } else {
                printMatches(fixedMatches, matchCount, reader.getCurrentLine(), contextSize);
                matchCount += fixedMatches.size();
            }
        }
        sentCount++;
    }
    displayTimeStats(startTime, sentCount, isXmlFormat);
    if (isXmlFormat) {
        out.print(serializer.getXmlEnd());
    }
    return ruleMatches.size();
}
Also used : PrintStream(java.io.PrintStream) RuleMatch(org.languagetool.rules.RuleMatch) StringPair(org.languagetool.bitext.StringPair) ArrayList(java.util.ArrayList) RuleMatchAsXmlSerializer(org.languagetool.tools.RuleMatchAsXmlSerializer)

Aggregations

StringPair (org.languagetool.bitext.StringPair)4 ArrayList (java.util.ArrayList)3 RuleMatch (org.languagetool.rules.RuleMatch)2 PrintStream (java.io.PrintStream)1 List (java.util.List)1 IncorrectBitextExample (org.languagetool.rules.bitext.IncorrectBitextExample)1 Match (org.languagetool.rules.patterns.Match)1 RuleMatchAsXmlSerializer (org.languagetool.tools.RuleMatchAsXmlSerializer)1