Search in sources :

Example 1 with RuleMatchAsXmlSerializer

use of org.languagetool.tools.RuleMatchAsXmlSerializer in project languagetool by languagetool-org.

the class CommandLineTools method checkText.

/**
   * Check the given text and print results to System.out.
   *
   * @param contents a text to check (may be more than one sentence)
   * @param lt Initialized LanguageTool
   * @param isXmlFormat whether to print the result in XML format
   * @param isJsonFormat whether to print the result in JSON format
   * @param contextSize error text context size: -1 for default
   * @param lineOffset line number offset to be added to line numbers in matches
   * @param prevMatches number of previously matched rules
   * @param apiMode mode of xml/json printout for simple xml/json output
   * @return Number of rule matches to the input text.
   */
public static int checkText(String contents, JLanguageTool lt, boolean isXmlFormat, boolean isJsonFormat, int contextSize, int lineOffset, int prevMatches, StringTools.ApiPrintMode apiMode, boolean listUnknownWords, List<String> unknownWords) throws IOException {
    if (contextSize == -1) {
        contextSize = DEFAULT_CONTEXT_SIZE;
    }
    long startTime = System.currentTimeMillis();
    List<RuleMatch> ruleMatches = lt.check(contents);
    // adjust line numbers
    for (RuleMatch r : ruleMatches) {
        r.setLine(r.getLine() + lineOffset);
        r.setEndLine(r.getEndLine() + lineOffset);
    }
    if (isXmlFormat) {
        if (listUnknownWords && apiMode == StringTools.ApiPrintMode.NORMAL_API) {
            unknownWords = lt.getUnknownWords();
        }
        RuleMatchAsXmlSerializer serializer = new RuleMatchAsXmlSerializer();
        String xml = serializer.ruleMatchesToXml(ruleMatches, contents, contextSize, apiMode, lt.getLanguage(), unknownWords);
        PrintStream out = new PrintStream(System.out, true, "UTF-8");
        out.print(xml);
    } else if (isJsonFormat) {
        RuleMatchesAsJsonSerializer serializer = new RuleMatchesAsJsonSerializer();
        String json = serializer.ruleMatchesToJson(ruleMatches, contents, contextSize, lt.getLanguage());
        PrintStream out = new PrintStream(System.out, true, "UTF-8");
        out.print(json);
    } else {
        printMatches(ruleMatches, prevMatches, contents, contextSize);
    }
    //display stats if it's not in a buffered mode
    if (apiMode == StringTools.ApiPrintMode.NORMAL_API && !isJsonFormat) {
        SentenceTokenizer sentenceTokenizer = lt.getLanguage().getSentenceTokenizer();
        int sentenceCount = sentenceTokenizer.tokenize(contents).size();
        displayTimeStats(startTime, sentenceCount, isXmlFormat);
    }
    return ruleMatches.size();
}
Also used : RuleMatchesAsJsonSerializer(org.languagetool.tools.RuleMatchesAsJsonSerializer) PrintStream(java.io.PrintStream) RuleMatch(org.languagetool.rules.RuleMatch) SentenceTokenizer(org.languagetool.tokenizers.SentenceTokenizer) RuleMatchAsXmlSerializer(org.languagetool.tools.RuleMatchAsXmlSerializer)

Example 2 with RuleMatchAsXmlSerializer

use of org.languagetool.tools.RuleMatchAsXmlSerializer in project languagetool by languagetool-org.

the class CommandLineTools method checkBitext.

/**
   * Checks the bilingual input (bitext) and displays the output (considering the target 
   * language) in API format or in the simple text format.
   *
   * NOTE: the positions returned by the rule matches are adjusted
   * according to the data returned by the reader.
   *
   * @param reader   Reader of bitext strings.
   * @param srcLt Source JLanguageTool (used to analyze the text).
   * @param trgLt Target JLanguageTool (used to analyze the text).
   * @param bRules  Bilingual rules used in addition to target standard rules.
   * @return The number of rules matched on the bitext.
   * @since 1.0.1
   */
public static int checkBitext(BitextReader reader, JLanguageTool srcLt, JLanguageTool trgLt, List<BitextRule> bRules, boolean isXmlFormat) throws IOException {
    long startTime = System.currentTimeMillis();
    int contextSize = DEFAULT_CONTEXT_SIZE;
    List<RuleMatch> ruleMatches = new ArrayList<>();
    int matchCount = 0;
    int sentCount = 0;
    RuleMatchAsXmlSerializer serializer = new RuleMatchAsXmlSerializer();
    PrintStream out = new PrintStream(System.out, true, "UTF-8");
    if (isXmlFormat) {
        out.print(serializer.getXmlStart(null, null));
    }
    for (StringPair srcAndTrg : reader) {
        List<RuleMatch> curMatches = Tools.checkBitext(srcAndTrg.getSource(), srcAndTrg.getTarget(), srcLt, trgLt, bRules);
        List<RuleMatch> fixedMatches = new ArrayList<>();
        for (RuleMatch thisMatch : curMatches) {
            fixedMatches.add(trgLt.adjustRuleMatchPos(thisMatch, reader.getSentencePosition(), reader.getColumnCount(), reader.getLineCount(), reader.getCurrentLine(), null));
        }
        ruleMatches.addAll(fixedMatches);
        if (fixedMatches.size() > 0) {
            if (isXmlFormat) {
                String xml = serializer.ruleMatchesToXmlSnippet(fixedMatches, reader.getCurrentLine(), contextSize);
                out.print(xml);
            } else {
                printMatches(fixedMatches, matchCount, reader.getCurrentLine(), contextSize);
                matchCount += fixedMatches.size();
            }
        }
        sentCount++;
    }
    displayTimeStats(startTime, sentCount, isXmlFormat);
    if (isXmlFormat) {
        out.print(serializer.getXmlEnd());
    }
    return ruleMatches.size();
}
Also used : PrintStream(java.io.PrintStream) RuleMatch(org.languagetool.rules.RuleMatch) StringPair(org.languagetool.bitext.StringPair) ArrayList(java.util.ArrayList) RuleMatchAsXmlSerializer(org.languagetool.tools.RuleMatchAsXmlSerializer)

Aggregations

PrintStream (java.io.PrintStream)2 RuleMatch (org.languagetool.rules.RuleMatch)2 RuleMatchAsXmlSerializer (org.languagetool.tools.RuleMatchAsXmlSerializer)2 ArrayList (java.util.ArrayList)1 StringPair (org.languagetool.bitext.StringPair)1 SentenceTokenizer (org.languagetool.tokenizers.SentenceTokenizer)1 RuleMatchesAsJsonSerializer (org.languagetool.tools.RuleMatchesAsJsonSerializer)1