use of org.languagetool.tools.RuleMatchAsXmlSerializer in project languagetool by languagetool-org.
the class CommandLineTools method checkText.
/**
* Check the given text and print results to System.out.
*
* @param contents a text to check (may be more than one sentence)
* @param lt Initialized LanguageTool
* @param isXmlFormat whether to print the result in XML format
* @param isJsonFormat whether to print the result in JSON format
* @param contextSize error text context size: -1 for default
* @param lineOffset line number offset to be added to line numbers in matches
* @param prevMatches number of previously matched rules
* @param apiMode mode of xml/json printout for simple xml/json output
* @return Number of rule matches to the input text.
*/
public static int checkText(String contents, JLanguageTool lt, boolean isXmlFormat, boolean isJsonFormat, int contextSize, int lineOffset, int prevMatches, StringTools.ApiPrintMode apiMode, boolean listUnknownWords, List<String> unknownWords) throws IOException {
if (contextSize == -1) {
contextSize = DEFAULT_CONTEXT_SIZE;
}
long startTime = System.currentTimeMillis();
List<RuleMatch> ruleMatches = lt.check(contents);
// adjust line numbers
for (RuleMatch r : ruleMatches) {
r.setLine(r.getLine() + lineOffset);
r.setEndLine(r.getEndLine() + lineOffset);
}
if (isXmlFormat) {
if (listUnknownWords && apiMode == StringTools.ApiPrintMode.NORMAL_API) {
unknownWords = lt.getUnknownWords();
}
RuleMatchAsXmlSerializer serializer = new RuleMatchAsXmlSerializer();
String xml = serializer.ruleMatchesToXml(ruleMatches, contents, contextSize, apiMode, lt.getLanguage(), unknownWords);
PrintStream out = new PrintStream(System.out, true, "UTF-8");
out.print(xml);
} else if (isJsonFormat) {
RuleMatchesAsJsonSerializer serializer = new RuleMatchesAsJsonSerializer();
String json = serializer.ruleMatchesToJson(ruleMatches, contents, contextSize, lt.getLanguage());
PrintStream out = new PrintStream(System.out, true, "UTF-8");
out.print(json);
} else {
printMatches(ruleMatches, prevMatches, contents, contextSize);
}
//display stats if it's not in a buffered mode
if (apiMode == StringTools.ApiPrintMode.NORMAL_API && !isJsonFormat) {
SentenceTokenizer sentenceTokenizer = lt.getLanguage().getSentenceTokenizer();
int sentenceCount = sentenceTokenizer.tokenize(contents).size();
displayTimeStats(startTime, sentenceCount, isXmlFormat);
}
return ruleMatches.size();
}
use of org.languagetool.tools.RuleMatchAsXmlSerializer in project languagetool by languagetool-org.
the class CommandLineTools method checkBitext.
/**
* Checks the bilingual input (bitext) and displays the output (considering the target
* language) in API format or in the simple text format.
*
* NOTE: the positions returned by the rule matches are adjusted
* according to the data returned by the reader.
*
* @param reader Reader of bitext strings.
* @param srcLt Source JLanguageTool (used to analyze the text).
* @param trgLt Target JLanguageTool (used to analyze the text).
* @param bRules Bilingual rules used in addition to target standard rules.
* @return The number of rules matched on the bitext.
* @since 1.0.1
*/
public static int checkBitext(BitextReader reader, JLanguageTool srcLt, JLanguageTool trgLt, List<BitextRule> bRules, boolean isXmlFormat) throws IOException {
long startTime = System.currentTimeMillis();
int contextSize = DEFAULT_CONTEXT_SIZE;
List<RuleMatch> ruleMatches = new ArrayList<>();
int matchCount = 0;
int sentCount = 0;
RuleMatchAsXmlSerializer serializer = new RuleMatchAsXmlSerializer();
PrintStream out = new PrintStream(System.out, true, "UTF-8");
if (isXmlFormat) {
out.print(serializer.getXmlStart(null, null));
}
for (StringPair srcAndTrg : reader) {
List<RuleMatch> curMatches = Tools.checkBitext(srcAndTrg.getSource(), srcAndTrg.getTarget(), srcLt, trgLt, bRules);
List<RuleMatch> fixedMatches = new ArrayList<>();
for (RuleMatch thisMatch : curMatches) {
fixedMatches.add(trgLt.adjustRuleMatchPos(thisMatch, reader.getSentencePosition(), reader.getColumnCount(), reader.getLineCount(), reader.getCurrentLine(), null));
}
ruleMatches.addAll(fixedMatches);
if (fixedMatches.size() > 0) {
if (isXmlFormat) {
String xml = serializer.ruleMatchesToXmlSnippet(fixedMatches, reader.getCurrentLine(), contextSize);
out.print(xml);
} else {
printMatches(fixedMatches, matchCount, reader.getCurrentLine(), contextSize);
matchCount += fixedMatches.size();
}
}
sentCount++;
}
displayTimeStats(startTime, sentCount, isXmlFormat);
if (isXmlFormat) {
out.print(serializer.getXmlEnd());
}
return ruleMatches.size();
}
Aggregations