use of org.omegat.tokenizer.ITokenizer in project omegat by omegat-org.
the class DiffDriver method render.
/**
* Given two strings, perform a diff comparison and return a Render object.
*
* @param original Original string
* @param revised Revised string for comparison
* @return Render object
*/
public static Render render(String original, String revised, boolean optimize) {
Render result = new Render();
ITokenizer tokenizer = Core.getProject().getSourceTokenizer();
if (tokenizer == null) {
// Project has probably been closed.
return result;
}
String[] originalStrings = tokenizer.tokenizeVerbatimToStrings(original);
String[] revisedStrings = tokenizer.tokenizeVerbatimToStrings(revised);
if (originalStrings == null || revisedStrings == null) {
return result;
}
// Get "change script", a linked list of Diff.changes.
Diff diff = new Diff(originalStrings, revisedStrings);
Diff.change script = diff.diff_2(false);
assert (validate(script, originalStrings, revisedStrings));
StringBuilder rawText = new StringBuilder();
// case there was an insertion at the end.
for (int n = 0; n <= originalStrings.length; n++) {
Diff.change c = search(n, script);
if (c == null) {
// No change for this token.
if (n < originalStrings.length) {
if (optimize) {
result.addRun(rawText.length(), originalStrings[n].length(), Type.NOCHANGE);
}
rawText.append(originalStrings[n]);
}
continue;
} else {
// Next time, start search from the next change.
script = c.link;
}
// Handle deletions
if (c.deleted > 0) {
int start = rawText.length();
// rawText.append("-[");
for (int m = 0; m < c.deleted; m++) {
rawText.append(originalStrings[n + m]);
}
// rawText.append("]");
n += c.deleted - 1;
result.addRun(start, rawText.length() - start, Type.DELETE);
}
// Handle insertions
if (c.inserted > 0) {
int start = rawText.length();
// rawText.append("+[");
for (int m = 0; m < c.inserted; m++) {
rawText.append(revisedStrings[c.line1 + m]);
}
// rawText.append("]");
result.addRun(start, rawText.length() - start, Type.INSERT);
// add the original token in as well.
if (c.deleted == 0 && n < originalStrings.length) {
if (optimize) {
result.addRun(rawText.length(), originalStrings[n].length(), Type.NOCHANGE);
}
rawText.append(originalStrings[n]);
}
}
}
result.text = rawText.toString();
if (optimize) {
Render optimized = optimizeRender(result, 0);
return (optimized.formatting.size() < result.formatting.size()) ? optimized : result;
} else {
return result;
}
}
use of org.omegat.tokenizer.ITokenizer in project omegat by omegat-org.
the class PluginUtils method searchForTokenizer.
private static Class<?> searchForTokenizer(String lang) {
if (lang.isEmpty()) {
return null;
}
lang = lang.toLowerCase(Locale.ENGLISH);
// Choose first relevant tokenizer as fallback if no
// "default" tokenizer is found.
Class<?> fallback = null;
for (Class<?> c : TOKENIZER_CLASSES) {
Tokenizer ann = c.getAnnotation(Tokenizer.class);
if (ann == null) {
continue;
}
String[] languages = ann.languages();
try {
if (languages.length == 1 && languages[0].equals(Tokenizer.DISCOVER_AT_RUNTIME)) {
languages = ((ITokenizer) c.newInstance()).getSupportedLanguages();
}
} catch (IllegalAccessException | InstantiationException ex) {
Log.log(ex);
}
for (String s : languages) {
if (lang.equals(s)) {
if (ann.isDefault()) {
// Return best possible match.
return c;
} else if (fallback == null) {
fallback = c;
}
}
}
}
return fallback;
}
use of org.omegat.tokenizer.ITokenizer in project omegat by omegat-org.
the class MatchesTextArea method substituteNumbers.
/**
* Attempts to substitute numbers in a match with numbers from the source
* segment. For substitution to be done, the number of numbers must be the
* same between source and matches, and the numbers must be the same between
* the source match and the target match. The order of the numbers can be
* different between the source match and the target match. Numbers will be
* substituted at the correct location.
*
* @param source
* The source segment
* @param sourceMatch
* The source of the match
* @param targetMatch
* The target of the match
* @return The target match with numbers possibly substituted
*/
@Override
public String substituteNumbers(String source, String sourceMatch, String targetMatch) {
ITokenizer sourceTok = Core.getProject().getSourceTokenizer();
ITokenizer targetTok = Core.getProject().getTargetTokenizer();
return substituteNumbers(source, sourceMatch, targetMatch, sourceTok, targetTok);
}
use of org.omegat.tokenizer.ITokenizer in project omegat by omegat-org.
the class MatchesTextArea method setActiveMatch.
/**
* Sets the index of an active match. It basically highlights the fuzzy
* match string selected. (numbers start from 0)
*/
@Override
public void setActiveMatch(int activeMatch) {
UIThreadsUtil.mustBeSwingThread();
if (activeMatch < 0 || activeMatch >= matches.size() || this.activeMatch == activeMatch) {
return;
}
this.activeMatch = activeMatch;
StyledDocument doc = (StyledDocument) getDocument();
doc.setCharacterAttributes(0, doc.getLength(), ATTRIBUTES_EMPTY, true);
int start = delimiters.get(activeMatch);
int end = delimiters.get(activeMatch + 1);
NearString match = matches.get(activeMatch);
// List tokens = match.str.getSrcTokenList();
ITokenizer tokenizer = Core.getProject().getSourceTokenizer();
if (tokenizer == null) {
return;
}
// Apply sourceText styling
if (sourcePos.get(activeMatch) != -1) {
Token[] tokens = tokenizer.tokenizeVerbatim(match.source);
// fix for bug 1586397
byte[] attributes = match.attr;
for (int i = 0; i < tokens.length; i++) {
Token token = tokens[i];
int tokstart = start + sourcePos.get(activeMatch) + token.getOffset();
int toklength = token.getLength();
if ((attributes[i] & StringData.UNIQ) != 0) {
doc.setCharacterAttributes(tokstart, toklength, ATTRIBUTES_CHANGED, false);
} else if ((attributes[i] & StringData.PAIR) != 0) {
doc.setCharacterAttributes(tokstart, toklength, ATTRIBUTES_UNCHANGED, false);
}
}
}
// Iterate through (up to) 5 fuzzy matches
for (int i = 0; i < diffInfos.size(); i++) {
Map<Integer, List<TextRun>> diffInfo = diffInfos.get(i);
// Iterate through each diff variant (${diff}, ${diffReversed}, ...)
for (Entry<Integer, List<TextRun>> e : diffInfo.entrySet()) {
int diffPos = e.getKey();
if (diffPos != -1) {
// Iterate through each style chunk (added or deleted)
for (TextRun r : e.getValue()) {
int tokstart = delimiters.get(i) + diffPos + r.start;
switch(r.type) {
case DELETE:
doc.setCharacterAttributes(tokstart, r.length, i == activeMatch ? ATTRIBUTES_DELETED_ACTIVE : ATTRIBUTES_DELETED_INACTIVE, false);
break;
case INSERT:
doc.setCharacterAttributes(tokstart, r.length, i == activeMatch ? ATTRIBUTES_INSERTED_ACTIVE : ATTRIBUTES_INSERTED_INACTIVE, false);
break;
case NOCHANGE:
}
}
}
}
}
doc.setCharacterAttributes(start, end - start, ATTRIBUTES_SELECTED, false);
// two newlines
setCaretPosition(end - 2);
final int fstart = start;
SwingUtilities.invokeLater(new Runnable() {
@Override
public void run() {
setCaretPosition(fstart);
}
});
}
use of org.omegat.tokenizer.ITokenizer in project omegat by omegat-org.
the class EditorUtils method replaceGlossaryEntries.
/**
* Convenience method for {@link #replaceGlossaryEntries(String, List, Locale, ITokenizer)}. Glossary entries are
* retrieved from {@code GlossaryManager}; the locale and tokenizer are taken from the project's current values for
* the source language.
*
* @param text
* Text in which to replace glossary hits. Assumed to be in the project's source language.
* @return Text with source glossary terms replaced with target terms
*/
public static String replaceGlossaryEntries(String text) {
Locale locale = Core.getProject().getProjectProperties().getSourceLanguage().getLocale();
ITokenizer tokenizer = Core.getProject().getSourceTokenizer();
return replaceGlossaryEntries(text, Core.getGlossaryManager().getGlossaryEntries(text), locale, tokenizer);
}
Aggregations