use of org.omegat.core.data.ProtectedPart in project omegat by omegat-org.
the class TagUtil method applyCustomProtectedParts.
/**
* Find some protected parts according to the given regular expression. E.g. printf variables, java
* MessageFormat patterns, user defined custom tags.
*
* These protected parts shouldn't affect statistic but just be displayed in gray in editor and take part
* in tag validation.
*/
public static List<ProtectedPart> applyCustomProtectedParts(String source, Pattern protectedPartsPatterns, List<ProtectedPart> protectedParts) {
List<ProtectedPart> result;
if (protectedParts != null) {
// Remove already defined protected parts first to prevent intersection
for (ProtectedPart pp : protectedParts) {
source = source.replace(pp.getTextInSourceSegment(), StaticUtils.TAG_REPLACEMENT);
}
result = protectedParts;
} else {
result = new ArrayList<ProtectedPart>();
}
Matcher placeholderMatcher = protectedPartsPatterns.matcher(source);
while (placeholderMatcher.find()) {
ProtectedPart pp = new ProtectedPart();
pp.setTextInSourceSegment(placeholderMatcher.group());
pp.setDetailsFromSourceFile(placeholderMatcher.group());
if (StatisticsSettings.isCountingCustomTags()) {
pp.setReplacementWordsCountCalculation(placeholderMatcher.group());
} else {
pp.setReplacementWordsCountCalculation(StaticUtils.TAG_REPLACEMENT);
}
pp.setReplacementUniquenessCalculation(placeholderMatcher.group());
pp.setReplacementMatchCalculation(placeholderMatcher.group());
result.add(pp);
}
return result;
}
use of org.omegat.core.data.ProtectedPart in project omegat by omegat-org.
the class Aligner method parseFile.
/**
* Parse the specified file and return the contents as a pair of lists:
* <ul>
* <li>Key: A list of IDs for the parsed text units
* <li>Value: A list of parsed text units
* </ul>
*
* @param file
* Path to input file
* @return Pair of lists
* @throws Exception
* If parsing fails
*/
private Entry<List<String>, List<String>> parseFile(String file) throws Exception {
final List<String> ids = new ArrayList<>();
final List<String> rawSegs = new ArrayList<>();
Core.getFilterMaster().loadFile(file, new FilterContext(srcLang, trgLang, true).setRemoveAllTags(removeTags), new IParseCallback() {
@Override
public void linkPrevNextSegments() {
}
@Override
public void addEntry(String id, String source, String translation, boolean isFuzzy, String comment, IFilter filter) {
process(source, id);
}
@Override
public void addEntry(String id, String source, String translation, boolean isFuzzy, String comment, String path, IFilter filter, List<ProtectedPart> protectedParts) {
process(source, id != null ? id : path != null ? path : null);
}
@Override
public void addEntryWithProperties(String id, String source, String translation, boolean isFuzzy, String[] props, String path, IFilter filter, List<ProtectedPart> protectedParts) {
process(source, id != null ? id : path != null ? path : null);
}
private void process(String text, String id) {
boolean removeSpaces = Core.getFilterMaster().getConfig().isRemoveSpacesNonseg();
text = StringUtil.normalizeUnicode(ParseEntry.stripSomeChars(text, new ParseEntryResult(), removeTags, removeSpaces));
if (!text.trim().isEmpty()) {
if (id != null) {
ids.add(id);
}
rawSegs.add(text);
}
}
});
return new AbstractMap.SimpleImmutableEntry<>(ids, rawSegs);
}
use of org.omegat.core.data.ProtectedPart in project omegat by omegat-org.
the class XLIFFDialect method constructShortcuts.
@Override
public String constructShortcuts(List<Element> elements, List<ProtectedPart> protectedParts) {
protectedParts.clear();
// create shortcuts
InlineTagHandler tagHandler = new InlineTagHandler();
StringBuilder r = new StringBuilder();
for (Element el : elements) {
if (el instanceof XMLContentBasedTag) {
XMLContentBasedTag tag = (XMLContentBasedTag) el;
String shortcut = null;
int shortcutLetter;
int tagIndex;
boolean tagProtected;
if ("bpt".equals(tag.getTag())) {
// XLIFF specification requires 'rid' and 'id' attributes,
// but some tools uses 'i' attribute like for TMX
tagHandler.startBPT(tag.getAttribute("rid"), tag.getAttribute("id"), tag.getAttribute("i"));
shortcutLetter = calcTagShortcutLetter(tag, ignoreTypeForBptTags);
tagHandler.setTagShortcutLetter(shortcutLetter);
tagIndex = tagHandler.endBPT();
shortcut = "<" + (shortcutLetter != 0 ? String.valueOf(Character.toChars(shortcutLetter)) : 'f') + tagIndex + '>';
tagProtected = false;
} else if ("ept".equals(tag.getTag())) {
tagHandler.startEPT(tag.getAttribute("rid"), tag.getAttribute("id"), tag.getAttribute("i"));
tagIndex = tagHandler.endEPT();
shortcutLetter = tagHandler.getTagShortcutLetter();
shortcut = "</" + (shortcutLetter != 0 ? String.valueOf(Character.toChars(shortcutLetter)) : 'f') + tagIndex + '>';
tagProtected = false;
} else if ("it".equals(tag.getTag())) {
tagHandler.startOTHER();
tagHandler.setCurrentPos(tag.getAttribute("pos"));
tagIndex = tagHandler.endOTHER();
// XLIFF specification requires 'open/close' values,
// but some tools may use 'begin/end' values like for TMX
shortcutLetter = calcTagShortcutLetter(tag);
if ("close".equals(tagHandler.getCurrentPos()) || "end".equals(tagHandler.getCurrentPos())) {
// for better compatibility with corresponding TMX files
if (forceShortCutToF) {
shortcutLetter = 'f';
}
shortcut = "</" + (shortcutLetter != 0 ? String.valueOf(Character.toChars(shortcutLetter)) : 'f') + tagIndex + '>';
} else {
shortcut = "<" + (shortcutLetter != 0 ? String.valueOf(Character.toChars(shortcutLetter)) : 'f') + tagIndex + '>';
}
tagProtected = false;
} else if ("ph".equals(tag.getTag())) {
tagHandler.startOTHER();
tagIndex = tagHandler.endOTHER();
shortcutLetter = calcTagShortcutLetter(tag, ignoreTypeForPhTags);
shortcut = "<" + (shortcutLetter != 0 ? String.valueOf(Character.toChars(shortcutLetter)) : 'f') + tagIndex + "/>";
tagProtected = false;
} else if ("mrk".equals(tag.getTag())) {
tagHandler.startOTHER();
tagIndex = tagHandler.endOTHER();
shortcutLetter = 'm';
shortcut = "<m" + tagIndex + ">" + tag.getIntactContents().sourceToOriginal() + "</m" + tagIndex + ">";
tagProtected = true;
} else {
shortcutLetter = 'f';
tagIndex = -1;
tagProtected = false;
}
tag.setShortcutLetter(shortcutLetter);
tag.setShortcutIndex(tagIndex);
tag.setShortcut(shortcut);
r.append(shortcut);
ProtectedPart pp = new ProtectedPart();
pp.setTextInSourceSegment(shortcut);
pp.setDetailsFromSourceFile(tag.toOriginal());
if (tagProtected) {
// protected text with related tags, like <m0>Acme</m0>
if (StatisticsSettings.isCountingProtectedText()) {
// Protected texts are counted, but related tags are not counted in the word count
pp.setReplacementWordsCountCalculation(StaticUtils.TAG_REPLACEMENT + tag.getIntactContents().sourceToOriginal() + StaticUtils.TAG_REPLACEMENT);
} else {
// All protected parts are not counted in the word count(default)
pp.setReplacementWordsCountCalculation(StaticUtils.TAG_REPLACEMENT);
}
pp.setReplacementUniquenessCalculation(StaticUtils.TAG_REPLACEMENT);
pp.setReplacementMatchCalculation(tag.getIntactContents().sourceToOriginal());
} else {
// simple tag, like <i0>
if (StatisticsSettings.isCountingStandardTags()) {
pp.setReplacementWordsCountCalculation(tag.toSafeCalcShortcut());
} else {
pp.setReplacementWordsCountCalculation(StaticUtils.TAG_REPLACEMENT);
}
pp.setReplacementUniquenessCalculation(StaticUtils.TAG_REPLACEMENT);
pp.setReplacementMatchCalculation(StaticUtils.TAG_REPLACEMENT);
}
protectedParts.add(pp);
} else if (el instanceof Tag) {
Tag tag = (Tag) el;
int tagIndex = tagHandler.paired(tag.getTag(), tag.getType());
tag.setIndex(tagIndex);
String shortcut = tag.toShortcut();
r.append(shortcut);
ProtectedPart pp = new ProtectedPart();
pp.setTextInSourceSegment(shortcut);
pp.setDetailsFromSourceFile(tag.toOriginal());
if (StatisticsSettings.isCountingStandardTags()) {
pp.setReplacementWordsCountCalculation(tag.toSafeCalcShortcut());
} else {
pp.setReplacementWordsCountCalculation(StaticUtils.TAG_REPLACEMENT);
}
pp.setReplacementUniquenessCalculation(StaticUtils.TAG_REPLACEMENT);
pp.setReplacementMatchCalculation(StaticUtils.TAG_REPLACEMENT);
protectedParts.add(pp);
} else {
r.append(el.toShortcut());
}
}
return r.toString();
}
use of org.omegat.core.data.ProtectedPart in project omegat by omegat-org.
the class CalcStandardStatistics method buildProjectStats.
/**
* Builds a file with statistic info about the project. The total word &
* character count of the project, the total number of unique segments, plus
* the details for each file.
*/
public static String buildProjectStats(final IProject project, final StatisticsInfo hotStat, final StatisticsPanel callback) {
StatCount total = new StatCount();
StatCount remaining = new StatCount();
StatCount unique = new StatCount();
StatCount remainingUnique = new StatCount();
// find unique segments
Map<String, SourceTextEntry> uniqueSegment = new HashMap<String, SourceTextEntry>();
Set<String> translated = new HashSet<String>();
for (SourceTextEntry ste : project.getAllEntries()) {
String src = ste.getSrcText();
for (ProtectedPart pp : ste.getProtectedParts()) {
src = src.replace(pp.getTextInSourceSegment(), pp.getReplacementUniquenessCalculation());
}
if (!uniqueSegment.containsKey(src)) {
uniqueSegment.put(src, ste);
}
TMXEntry tr = project.getTranslationInfo(ste);
if (tr.isTranslated()) {
translated.add(src);
}
}
Set<String> filesUnique = new HashSet<String>();
Set<String> filesRemainingUnique = new HashSet<String>();
for (Map.Entry<String, SourceTextEntry> en : uniqueSegment.entrySet()) {
/* Number of words and chars calculated without all tags and protected parts. */
StatCount count = new StatCount(en.getValue());
// add to unique
unique.add(count);
filesUnique.add(en.getValue().getKey().file);
// add to unique remaining
if (!translated.contains(en.getKey())) {
remainingUnique.add(count);
filesRemainingUnique.add(en.getValue().getKey().file);
}
}
unique.addFiles(filesUnique.size());
remainingUnique.addFiles(filesRemainingUnique.size());
List<FileData> counts = new ArrayList<FileData>();
Map<String, Boolean> firstSeenUniqueSegment = new HashMap<String, Boolean>();
for (FileInfo file : project.getProjectFiles()) {
FileData numbers = new FileData();
numbers.filename = file.filePath;
counts.add(numbers);
int fileTotal = 0;
int fileRemaining = 0;
for (SourceTextEntry ste : file.entries) {
String src = ste.getSrcText();
for (ProtectedPart pp : ste.getProtectedParts()) {
src = src.replace(pp.getTextInSourceSegment(), pp.getReplacementUniquenessCalculation());
}
/* Number of words and chars calculated without all tags and protected parts. */
StatCount count = new StatCount(ste);
// add to total
total.add(count);
fileTotal = 1;
// add to remaining
TMXEntry tr = project.getTranslationInfo(ste);
if (!tr.isTranslated()) {
remaining.add(count);
fileRemaining = 1;
}
// add to file's info
numbers.total.add(count);
Boolean firstSeen = firstSeenUniqueSegment.get(src);
if (firstSeen == null) {
firstSeenUniqueSegment.put(src, false);
numbers.unique.add(count);
if (!tr.isTranslated()) {
numbers.remainingUnique.add(count);
}
}
if (!tr.isTranslated()) {
numbers.remaining.add(count);
}
}
total.addFiles(fileTotal);
remaining.addFiles(fileRemaining);
}
StringBuilder result = new StringBuilder();
result.append(OStrings.getString("CT_STATS_Project_Statistics"));
result.append("\n\n");
String[][] headerTable = calcHeaderTable(new StatCount[] { total, remaining, unique, remainingUnique });
if (callback != null) {
callback.setProjectTableData(HT_HEADERS, headerTable);
}
result.append(TextUtil.showTextTable(HT_HEADERS, headerTable, HT_ALIGN));
result.append("\n\n");
// STATISTICS BY FILE
result.append(OStrings.getString("CT_STATS_FILE_Statistics"));
result.append("\n\n");
String[][] filesTable = calcFilesTable(project.getProjectProperties(), counts);
if (callback != null) {
callback.setFilesTableData(FT_HEADERS, filesTable);
}
result.append(TextUtil.showTextTable(FT_HEADERS, filesTable, FT_ALIGN));
if (hotStat != null) {
hotStat.numberOfSegmentsTotal = total.segments;
hotStat.numberofTranslatedSegments = translated.size();
hotStat.numberOfUniqueSegments = unique.segments;
hotStat.uniqueCountsByFile.clear();
for (FileData fd : counts) {
hotStat.uniqueCountsByFile.put(fd.filename, fd.unique.segments);
}
}
return result.toString();
}
use of org.omegat.core.data.ProtectedPart in project omegat by omegat-org.
the class Entry method checkAndRecoverTags.
/**
* Before setting translation checks whether the translation contains all
* the same tags in weakly correct order. See
* {@link #setTranslation(String, XMLDialect, List)} for details.
*/
private void checkAndRecoverTags(String translation, List<ProtectedPart> protectedParts) throws TranslationException {
translatedEntry = new Entry(xmlDialect, handler);
// /////////////////////////////////////////////////////////////////////
// recovering tags
List<TagUtil.Tag> shortTags = TagUtil.buildTagList(translation, protectedParts.toArray(new ProtectedPart[protectedParts.size()]));
int pos = 0;
for (TagUtil.Tag shortTag : shortTags) {
if (pos < shortTag.pos) {
translatedEntry.add(createTextInstance(translation.substring(pos, shortTag.pos)));
pos = shortTag.pos;
}
for (int j = getFirstGood(); j <= getLastGood(); j++) {
Element longElem = get(j);
if (longElem instanceof Tag) {
Tag longTag = (Tag) longElem;
if (longTag.toShortcut().equals(shortTag.tag)) {
translatedEntry.add(longTag);
pos += shortTag.tag.length();
break;
}
}
}
// P.S. If shortcut tag isn't found, probably we should issue a
// warning.
}
if (pos < translation.length()) {
translatedEntry.add(createTextInstance(translation.substring(pos)));
}
// /////////////////////////////////////////////////////////////////////
// checking tags
// TODO: implement checking
}
Aggregations