use of org.languagetool.rules.patterns.Match.IncludeRange in project languagetool by languagetool-org.
the class MatchState method toFinalString.
/**
* Gets all strings formatted using the match element.
*/
public final String[] toFinalString(Language lang) throws IOException {
String[] formattedString = new String[1];
if (formattedToken != null) {
int readingCount = formattedToken.getReadingsLength();
formattedString[0] = formattedToken.getToken();
Pattern pRegexMatch = match.getRegexMatch();
String regexReplace = match.getRegexReplace();
if (pRegexMatch != null) {
formattedString[0] = pRegexMatch.matcher(formattedString[0]).replaceAll(regexReplace);
}
String posTag = match.getPosTag();
if (posTag != null) {
if (synthesizer == null) {
formattedString[0] = formattedToken.getToken();
} else if (match.isPostagRegexp()) {
TreeSet<String> wordForms = new TreeSet<>();
boolean oneForm = false;
for (int k = 0; k < readingCount; k++) {
if (formattedToken.getAnalyzedToken(k).getLemma() == null) {
String posUnique = formattedToken.getAnalyzedToken(k).getPOSTag();
if (posUnique == null) {
wordForms.add(formattedToken.getToken());
oneForm = true;
} else {
if (SENTENCE_START_TAGNAME.equals(posUnique) || SENTENCE_END_TAGNAME.equals(posUnique) || PARAGRAPH_END_TAGNAME.equals(posUnique)) {
if (!oneForm) {
wordForms.add(formattedToken.getToken());
}
oneForm = true;
} else {
oneForm = false;
}
}
}
}
String targetPosTag = getTargetPosTag();
if (!oneForm) {
for (int i = 0; i < readingCount; i++) {
String[] possibleWordForms = synthesizer.synthesize(formattedToken.getAnalyzedToken(i), targetPosTag, true);
if (possibleWordForms != null) {
wordForms.addAll(Arrays.asList(possibleWordForms));
}
}
}
if (wordForms.isEmpty()) {
if (match.checksSpelling()) {
formattedString[0] = "";
} else {
formattedString[0] = "(" + formattedToken.getToken() + ")";
}
} else {
formattedString = wordForms.toArray(new String[wordForms.size()]);
}
} else {
TreeSet<String> wordForms = new TreeSet<>();
for (int i = 0; i < readingCount; i++) {
String[] possibleWordForms = synthesizer.synthesize(formattedToken.getAnalyzedToken(i), posTag);
if (possibleWordForms != null) {
wordForms.addAll(Arrays.asList(possibleWordForms));
}
}
formattedString = wordForms.toArray(new String[wordForms.size()]);
}
}
}
String original;
if (match.isStaticLemma()) {
original = matchedToken != null ? matchedToken.getToken() : "";
} else {
original = formattedToken != null ? formattedToken.getToken() : "";
}
for (int i = 0; i < formattedString.length; i++) {
formattedString[i] = convertCase(formattedString[i], original, lang);
}
// TODO should case conversion happen before or after including skipped tokens?
IncludeRange includeSkipped = match.getIncludeSkipped();
if (includeSkipped != IncludeRange.NONE && skippedTokens != null && !skippedTokens.isEmpty()) {
String[] helper = new String[formattedString.length];
for (int i = 0; i < formattedString.length; i++) {
if (formattedString[i] == null) {
formattedString[i] = "";
}
helper[i] = formattedString[i] + skippedTokens;
}
formattedString = helper;
}
if (match.checksSpelling() && lang != null) {
List<String> formattedStringElements = Arrays.asList(formattedString);
// tagger-based speller
List<AnalyzedTokenReadings> analyzed = lang.getTagger().tag(formattedStringElements);
for (int i = 0; i < formattedString.length; i++) {
AnalyzedToken analyzedToken = analyzed.get(i).getAnalyzedToken(0);
if (analyzedToken.getLemma() == null && analyzedToken.hasNoTag()) {
formattedString[i] = "";
}
}
}
return formattedString;
}
use of org.languagetool.rules.patterns.Match.IncludeRange in project languagetool by languagetool-org.
the class MatchState method setToken.
/**
* Sets the token to be formatted etc. and includes the support for
* including the skipped tokens.
*
* @param tokens Array of tokens
* @param index Index of the token to be formatted
* @param next Position of the next token (the skipped tokens are the ones between the tokens[index] and tokens[next]
*/
public final void setToken(AnalyzedTokenReadings[] tokens, int index, int next) {
int idx = index;
if (index >= tokens.length) {
// TODO: hacky workaround, find a proper solution. See EnglishPatternRuleTest.testBug()
idx = tokens.length - 1;
}
setToken(tokens[idx]);
IncludeRange includeSkipped = match.getIncludeSkipped();
if (next > 1 && includeSkipped != IncludeRange.NONE) {
StringBuilder sb = new StringBuilder();
if (includeSkipped == IncludeRange.FOLLOWING) {
formattedToken = null;
}
for (int k = index + 1; k < index + next; k++) {
if (tokens[k].isWhitespaceBefore() && !(k == index + 1 && includeSkipped == IncludeRange.FOLLOWING)) {
sb.append(' ');
}
sb.append(tokens[k].getToken());
}
skippedTokens = sb.toString();
} else {
skippedTokens = "";
}
}
Aggregations