use of org.omegat.util.Token in project omegat by omegat-org.
the class FuzzyMatcher method buildSimilarityData.
/**
* Builds the similarity data for color highlight in match window.
*/
public static byte[] buildSimilarityData(Token[] sourceTokens, Token[] matchTokens) {
int len = matchTokens.length;
byte[] result = new byte[len];
boolean leftfound = true;
for (int i = 0; i < len; i++) {
result[i] = 0;
Token righttoken = null;
if (i + 1 < len) {
righttoken = matchTokens[i + 1];
}
boolean rightfound = (i + 1 == len) || DefaultTokenizer.isContains(sourceTokens, righttoken);
Token token = matchTokens[i];
boolean found = DefaultTokenizer.isContains(sourceTokens, token);
if (found && (!leftfound || !rightfound)) {
result[i] = StringData.PAIR;
} else if (!found) {
result[i] = StringData.UNIQ;
}
leftfound = found;
}
return result;
}
use of org.omegat.util.Token in project omegat by omegat-org.
the class LineLengthLimitWriter method breakAt.
/**
* Write part of line to specified position, and change token offsets.
*/
void breakAt(int pos, Token[] tokens) throws IOException {
// Strip and discard whitespace from end of line
out.write(StringUtil.rstrip(str.substring(0, pos)));
str.delete(0, pos);
if (str.length() > 0) {
writeBreakEol();
} else {
writeSourceEol();
}
for (int i = 0; i < tokens.length; i++) {
Token t = tokens[i];
if (t == null || t.getOffset() < pos) {
tokens[i] = null;
} else {
tokens[i] = new Token(null, t.getOffset() - pos, t.getLength());
}
}
}
use of org.omegat.util.Token in project omegat by omegat-org.
the class LineLengthLimitWriter method getBreakPos.
int getBreakPos(Token[] tokens) {
if (str.codePointCount(0, str.length()) <= maxLineLength) {
// line no longer than max length - use full line
return str.length();
}
// check if spaces only more than max length
int latestNonSpacesTokenPos = 0;
for (int i = tokens.length - 1; i >= 0; i--) {
Token t = tokens[i];
if (t == null) {
// less than begin
continue;
}
if (isSpaces(t)) {
continue;
}
// non-spaces token
latestNonSpacesTokenPos = t.getOffset() + t.getLength();
break;
}
if (str.codePointCount(0, latestNonSpacesTokenPos) <= maxLineLength) {
return str.length();
}
// try to break on the space ends
int spacesStart = -1;
for (Token t : tokens) {
if (t == null) {
// less than begin
continue;
}
if (str.codePointCount(0, t.getOffset()) >= lineLength) {
// spaces can be after max length
if (spacesStart >= 0 && str.codePointCount(0, spacesStart) < maxLineLength) {
return t.getOffset();
}
}
if (isSpaces(t)) {
if (spacesStart < 0) {
spacesStart = t.getOffset();
}
} else {
spacesStart = -1;
}
}
// try to break on the space boundaries
for (Token t : tokens) {
if (t == null) {
// less than begin
continue;
}
int cps = str.codePointCount(0, t.getOffset());
if (cps >= lineLength && cps < maxLineLength) {
if (isSpaces(t)) {
return t.getOffset();
}
}
cps = str.codePointCount(0, t.getOffset() + t.getLength());
if (cps >= lineLength && cps < maxLineLength) {
if (isSpaces(t)) {
return t.getOffset() + t.getLength();
}
}
}
// impossible to break on space boundaries - break at any token, except brackets
for (Token t : tokens) {
if (t == null) {
// less than begin
continue;
}
int cps = str.codePointCount(0, t.getOffset());
if (cps >= lineLength && cps < maxLineLength) {
if (isPossibleBreakBefore(t.getOffset())) {
return t.getOffset();
}
}
cps = str.codePointCount(0, t.getOffset() + t.getLength());
if (cps >= lineLength && cps < maxLineLength) {
if (isPossibleBreakBefore(t.getOffset() + t.getLength())) {
return t.getOffset() + t.getLength();
}
}
}
// use latest token before line length
for (int i = 0; i < tokens.length; i++) {
Token t = tokens[i];
if (t == null) {
// less than begin
continue;
}
if (str.codePointCount(0, t.getOffset()) >= lineLength) {
if (i == 0) {
return t.getOffset() + t.getLength();
}
int j = i - 1;
while (j >= 0) {
Token tp = tokens[j--];
if (tp != null && tp.getOffset() > 0) {
if (isPossibleBreakBefore(tp.getOffset())) {
return tp.getOffset();
}
}
}
return t.getOffset();
}
}
// use full line
return str.length();
}
use of org.omegat.util.Token in project omegat by omegat-org.
the class MatchesTextArea method setActiveMatch.
/**
* Sets the index of an active match. It basically highlights the fuzzy
* match string selected. (numbers start from 0)
*/
@Override
public void setActiveMatch(int activeMatch) {
UIThreadsUtil.mustBeSwingThread();
if (activeMatch < 0 || activeMatch >= matches.size() || this.activeMatch == activeMatch) {
return;
}
this.activeMatch = activeMatch;
StyledDocument doc = (StyledDocument) getDocument();
doc.setCharacterAttributes(0, doc.getLength(), ATTRIBUTES_EMPTY, true);
int start = delimiters.get(activeMatch);
int end = delimiters.get(activeMatch + 1);
NearString match = matches.get(activeMatch);
// List tokens = match.str.getSrcTokenList();
ITokenizer tokenizer = Core.getProject().getSourceTokenizer();
if (tokenizer == null) {
return;
}
// Apply sourceText styling
if (sourcePos.get(activeMatch) != -1) {
Token[] tokens = tokenizer.tokenizeVerbatim(match.source);
// fix for bug 1586397
byte[] attributes = match.attr;
for (int i = 0; i < tokens.length; i++) {
Token token = tokens[i];
int tokstart = start + sourcePos.get(activeMatch) + token.getOffset();
int toklength = token.getLength();
if ((attributes[i] & StringData.UNIQ) != 0) {
doc.setCharacterAttributes(tokstart, toklength, ATTRIBUTES_CHANGED, false);
} else if ((attributes[i] & StringData.PAIR) != 0) {
doc.setCharacterAttributes(tokstart, toklength, ATTRIBUTES_UNCHANGED, false);
}
}
}
// Iterate through (up to) 5 fuzzy matches
for (int i = 0; i < diffInfos.size(); i++) {
Map<Integer, List<TextRun>> diffInfo = diffInfos.get(i);
// Iterate through each diff variant (${diff}, ${diffReversed}, ...)
for (Entry<Integer, List<TextRun>> e : diffInfo.entrySet()) {
int diffPos = e.getKey();
if (diffPos != -1) {
// Iterate through each style chunk (added or deleted)
for (TextRun r : e.getValue()) {
int tokstart = delimiters.get(i) + diffPos + r.start;
switch(r.type) {
case DELETE:
doc.setCharacterAttributes(tokstart, r.length, i == activeMatch ? ATTRIBUTES_DELETED_ACTIVE : ATTRIBUTES_DELETED_INACTIVE, false);
break;
case INSERT:
doc.setCharacterAttributes(tokstart, r.length, i == activeMatch ? ATTRIBUTES_INSERTED_ACTIVE : ATTRIBUTES_INSERTED_INACTIVE, false);
break;
case NOCHANGE:
}
}
}
}
}
doc.setCharacterAttributes(start, end - start, ATTRIBUTES_SELECTED, false);
// two newlines
setCaretPosition(end - 2);
final int fstart = start;
SwingUtilities.invokeLater(new Runnable() {
@Override
public void run() {
setCaretPosition(fstart);
}
});
}
use of org.omegat.util.Token in project omegat by omegat-org.
the class CalcMatchStatistics method calcMaxSimilarity.
int calcMaxSimilarity(SourceTextEntry ste) {
String srcNoXmlTags = removeXmlTags(ste);
FindMatches localFinder = finder.get();
List<NearString> nears = localFinder.search(srcNoXmlTags, true, false, this::isInterrupted);
final Token[] strTokensStem = localFinder.tokenizeAll(ste.getSrcText());
int maxSimilarity = 0;
CACHE: for (NearString near : nears) {
final Token[] candTokens = localFinder.tokenizeAll(near.source);
int newSimilarity = FuzzyMatcher.calcSimilarity(distanceCalculator.get(), strTokensStem, candTokens);
if (near.fuzzyMark) {
newSimilarity -= FindMatches.PENALTY_FOR_FUZZY;
}
if (newSimilarity > maxSimilarity) {
maxSimilarity = newSimilarity;
if (newSimilarity >= 95) {
// enough to say that we are in row 2
break CACHE;
}
}
}
return maxSimilarity;
}
Aggregations