Search in sources :

Example 6 with ProtectedPart

use of org.omegat.core.data.ProtectedPart in project omegat by omegat-org.

the class TagUtil method applyCustomProtectedParts.

/**
 * Find some protected parts according to the given regular expression. E.g. printf variables, java
 * MessageFormat patterns, user defined custom tags.
 *
 * These protected parts shouldn't affect statistic but just be displayed in gray in editor and take part
 * in tag validation.
 */
public static List<ProtectedPart> applyCustomProtectedParts(String source, Pattern protectedPartsPatterns, List<ProtectedPart> protectedParts) {
    List<ProtectedPart> result;
    if (protectedParts != null) {
        // Remove already defined protected parts first to prevent intersection
        for (ProtectedPart pp : protectedParts) {
            source = source.replace(pp.getTextInSourceSegment(), StaticUtils.TAG_REPLACEMENT);
        }
        result = protectedParts;
    } else {
        result = new ArrayList<ProtectedPart>();
    }
    Matcher placeholderMatcher = protectedPartsPatterns.matcher(source);
    while (placeholderMatcher.find()) {
        ProtectedPart pp = new ProtectedPart();
        pp.setTextInSourceSegment(placeholderMatcher.group());
        pp.setDetailsFromSourceFile(placeholderMatcher.group());
        if (StatisticsSettings.isCountingCustomTags()) {
            pp.setReplacementWordsCountCalculation(placeholderMatcher.group());
        } else {
            pp.setReplacementWordsCountCalculation(StaticUtils.TAG_REPLACEMENT);
        }
        pp.setReplacementUniquenessCalculation(placeholderMatcher.group());
        pp.setReplacementMatchCalculation(placeholderMatcher.group());
        result.add(pp);
    }
    return result;
}
Also used : ProtectedPart(org.omegat.core.data.ProtectedPart) Matcher(java.util.regex.Matcher)

Example 7 with ProtectedPart

use of org.omegat.core.data.ProtectedPart in project omegat by omegat-org.

the class Aligner method parseFile.

/**
 * Parse the specified file and return the contents as a pair of lists:
 * <ul>
 * <li>Key: A list of IDs for the parsed text units
 * <li>Value: A list of parsed text units
 * </ul>
 *
 * @param file
 *            Path to input file
 * @return Pair of lists
 * @throws Exception
 *             If parsing fails
 */
private Entry<List<String>, List<String>> parseFile(String file) throws Exception {
    final List<String> ids = new ArrayList<>();
    final List<String> rawSegs = new ArrayList<>();
    Core.getFilterMaster().loadFile(file, new FilterContext(srcLang, trgLang, true).setRemoveAllTags(removeTags), new IParseCallback() {

        @Override
        public void linkPrevNextSegments() {
        }

        @Override
        public void addEntry(String id, String source, String translation, boolean isFuzzy, String comment, IFilter filter) {
            process(source, id);
        }

        @Override
        public void addEntry(String id, String source, String translation, boolean isFuzzy, String comment, String path, IFilter filter, List<ProtectedPart> protectedParts) {
            process(source, id != null ? id : path != null ? path : null);
        }

        @Override
        public void addEntryWithProperties(String id, String source, String translation, boolean isFuzzy, String[] props, String path, IFilter filter, List<ProtectedPart> protectedParts) {
            process(source, id != null ? id : path != null ? path : null);
        }

        private void process(String text, String id) {
            boolean removeSpaces = Core.getFilterMaster().getConfig().isRemoveSpacesNonseg();
            text = StringUtil.normalizeUnicode(ParseEntry.stripSomeChars(text, new ParseEntryResult(), removeTags, removeSpaces));
            if (!text.trim().isEmpty()) {
                if (id != null) {
                    ids.add(id);
                }
                rawSegs.add(text);
            }
        }
    });
    return new AbstractMap.SimpleImmutableEntry<>(ids, rawSegs);
}
Also used : IParseCallback(org.omegat.filters2.IParseCallback) ProtectedPart(org.omegat.core.data.ProtectedPart) ArrayList(java.util.ArrayList) ParseEntryResult(org.omegat.core.data.ParseEntry.ParseEntryResult) IFilter(org.omegat.filters2.IFilter) FilterContext(org.omegat.filters2.FilterContext)

Example 8 with ProtectedPart

use of org.omegat.core.data.ProtectedPart in project omegat by omegat-org.

the class XLIFFDialect method constructShortcuts.

@Override
public String constructShortcuts(List<Element> elements, List<ProtectedPart> protectedParts) {
    protectedParts.clear();
    // create shortcuts
    InlineTagHandler tagHandler = new InlineTagHandler();
    StringBuilder r = new StringBuilder();
    for (Element el : elements) {
        if (el instanceof XMLContentBasedTag) {
            XMLContentBasedTag tag = (XMLContentBasedTag) el;
            String shortcut = null;
            int shortcutLetter;
            int tagIndex;
            boolean tagProtected;
            if ("bpt".equals(tag.getTag())) {
                // XLIFF specification requires 'rid' and 'id' attributes,
                // but some tools uses 'i' attribute like for TMX
                tagHandler.startBPT(tag.getAttribute("rid"), tag.getAttribute("id"), tag.getAttribute("i"));
                shortcutLetter = calcTagShortcutLetter(tag, ignoreTypeForBptTags);
                tagHandler.setTagShortcutLetter(shortcutLetter);
                tagIndex = tagHandler.endBPT();
                shortcut = "<" + (shortcutLetter != 0 ? String.valueOf(Character.toChars(shortcutLetter)) : 'f') + tagIndex + '>';
                tagProtected = false;
            } else if ("ept".equals(tag.getTag())) {
                tagHandler.startEPT(tag.getAttribute("rid"), tag.getAttribute("id"), tag.getAttribute("i"));
                tagIndex = tagHandler.endEPT();
                shortcutLetter = tagHandler.getTagShortcutLetter();
                shortcut = "</" + (shortcutLetter != 0 ? String.valueOf(Character.toChars(shortcutLetter)) : 'f') + tagIndex + '>';
                tagProtected = false;
            } else if ("it".equals(tag.getTag())) {
                tagHandler.startOTHER();
                tagHandler.setCurrentPos(tag.getAttribute("pos"));
                tagIndex = tagHandler.endOTHER();
                // XLIFF specification requires 'open/close' values,
                // but some tools may use 'begin/end' values like for TMX
                shortcutLetter = calcTagShortcutLetter(tag);
                if ("close".equals(tagHandler.getCurrentPos()) || "end".equals(tagHandler.getCurrentPos())) {
                    // for better compatibility with corresponding TMX files
                    if (forceShortCutToF) {
                        shortcutLetter = 'f';
                    }
                    shortcut = "</" + (shortcutLetter != 0 ? String.valueOf(Character.toChars(shortcutLetter)) : 'f') + tagIndex + '>';
                } else {
                    shortcut = "<" + (shortcutLetter != 0 ? String.valueOf(Character.toChars(shortcutLetter)) : 'f') + tagIndex + '>';
                }
                tagProtected = false;
            } else if ("ph".equals(tag.getTag())) {
                tagHandler.startOTHER();
                tagIndex = tagHandler.endOTHER();
                shortcutLetter = calcTagShortcutLetter(tag, ignoreTypeForPhTags);
                shortcut = "<" + (shortcutLetter != 0 ? String.valueOf(Character.toChars(shortcutLetter)) : 'f') + tagIndex + "/>";
                tagProtected = false;
            } else if ("mrk".equals(tag.getTag())) {
                tagHandler.startOTHER();
                tagIndex = tagHandler.endOTHER();
                shortcutLetter = 'm';
                shortcut = "<m" + tagIndex + ">" + tag.getIntactContents().sourceToOriginal() + "</m" + tagIndex + ">";
                tagProtected = true;
            } else {
                shortcutLetter = 'f';
                tagIndex = -1;
                tagProtected = false;
            }
            tag.setShortcutLetter(shortcutLetter);
            tag.setShortcutIndex(tagIndex);
            tag.setShortcut(shortcut);
            r.append(shortcut);
            ProtectedPart pp = new ProtectedPart();
            pp.setTextInSourceSegment(shortcut);
            pp.setDetailsFromSourceFile(tag.toOriginal());
            if (tagProtected) {
                // protected text with related tags, like <m0>Acme</m0>
                if (StatisticsSettings.isCountingProtectedText()) {
                    // Protected texts are counted, but related tags are not counted in the word count
                    pp.setReplacementWordsCountCalculation(StaticUtils.TAG_REPLACEMENT + tag.getIntactContents().sourceToOriginal() + StaticUtils.TAG_REPLACEMENT);
                } else {
                    // All protected parts are not counted in the word count(default)
                    pp.setReplacementWordsCountCalculation(StaticUtils.TAG_REPLACEMENT);
                }
                pp.setReplacementUniquenessCalculation(StaticUtils.TAG_REPLACEMENT);
                pp.setReplacementMatchCalculation(tag.getIntactContents().sourceToOriginal());
            } else {
                // simple tag, like <i0>
                if (StatisticsSettings.isCountingStandardTags()) {
                    pp.setReplacementWordsCountCalculation(tag.toSafeCalcShortcut());
                } else {
                    pp.setReplacementWordsCountCalculation(StaticUtils.TAG_REPLACEMENT);
                }
                pp.setReplacementUniquenessCalculation(StaticUtils.TAG_REPLACEMENT);
                pp.setReplacementMatchCalculation(StaticUtils.TAG_REPLACEMENT);
            }
            protectedParts.add(pp);
        } else if (el instanceof Tag) {
            Tag tag = (Tag) el;
            int tagIndex = tagHandler.paired(tag.getTag(), tag.getType());
            tag.setIndex(tagIndex);
            String shortcut = tag.toShortcut();
            r.append(shortcut);
            ProtectedPart pp = new ProtectedPart();
            pp.setTextInSourceSegment(shortcut);
            pp.setDetailsFromSourceFile(tag.toOriginal());
            if (StatisticsSettings.isCountingStandardTags()) {
                pp.setReplacementWordsCountCalculation(tag.toSafeCalcShortcut());
            } else {
                pp.setReplacementWordsCountCalculation(StaticUtils.TAG_REPLACEMENT);
            }
            pp.setReplacementUniquenessCalculation(StaticUtils.TAG_REPLACEMENT);
            pp.setReplacementMatchCalculation(StaticUtils.TAG_REPLACEMENT);
            protectedParts.add(pp);
        } else {
            r.append(el.toShortcut());
        }
    }
    return r.toString();
}
Also used : ProtectedPart(org.omegat.core.data.ProtectedPart) XMLContentBasedTag(org.omegat.filters3.xml.XMLContentBasedTag) Element(org.omegat.filters3.Element) InlineTagHandler(org.omegat.util.InlineTagHandler) XMLTag(org.omegat.filters3.xml.XMLTag) Tag(org.omegat.filters3.Tag) XMLContentBasedTag(org.omegat.filters3.xml.XMLContentBasedTag)

Example 9 with ProtectedPart

use of org.omegat.core.data.ProtectedPart in project omegat by omegat-org.

the class CalcStandardStatistics method buildProjectStats.

/**
 * Builds a file with statistic info about the project. The total word &
 * character count of the project, the total number of unique segments, plus
 * the details for each file.
 */
public static String buildProjectStats(final IProject project, final StatisticsInfo hotStat, final StatisticsPanel callback) {
    StatCount total = new StatCount();
    StatCount remaining = new StatCount();
    StatCount unique = new StatCount();
    StatCount remainingUnique = new StatCount();
    // find unique segments
    Map<String, SourceTextEntry> uniqueSegment = new HashMap<String, SourceTextEntry>();
    Set<String> translated = new HashSet<String>();
    for (SourceTextEntry ste : project.getAllEntries()) {
        String src = ste.getSrcText();
        for (ProtectedPart pp : ste.getProtectedParts()) {
            src = src.replace(pp.getTextInSourceSegment(), pp.getReplacementUniquenessCalculation());
        }
        if (!uniqueSegment.containsKey(src)) {
            uniqueSegment.put(src, ste);
        }
        TMXEntry tr = project.getTranslationInfo(ste);
        if (tr.isTranslated()) {
            translated.add(src);
        }
    }
    Set<String> filesUnique = new HashSet<String>();
    Set<String> filesRemainingUnique = new HashSet<String>();
    for (Map.Entry<String, SourceTextEntry> en : uniqueSegment.entrySet()) {
        /* Number of words and chars calculated without all tags and protected parts. */
        StatCount count = new StatCount(en.getValue());
        // add to unique
        unique.add(count);
        filesUnique.add(en.getValue().getKey().file);
        // add to unique remaining
        if (!translated.contains(en.getKey())) {
            remainingUnique.add(count);
            filesRemainingUnique.add(en.getValue().getKey().file);
        }
    }
    unique.addFiles(filesUnique.size());
    remainingUnique.addFiles(filesRemainingUnique.size());
    List<FileData> counts = new ArrayList<FileData>();
    Map<String, Boolean> firstSeenUniqueSegment = new HashMap<String, Boolean>();
    for (FileInfo file : project.getProjectFiles()) {
        FileData numbers = new FileData();
        numbers.filename = file.filePath;
        counts.add(numbers);
        int fileTotal = 0;
        int fileRemaining = 0;
        for (SourceTextEntry ste : file.entries) {
            String src = ste.getSrcText();
            for (ProtectedPart pp : ste.getProtectedParts()) {
                src = src.replace(pp.getTextInSourceSegment(), pp.getReplacementUniquenessCalculation());
            }
            /* Number of words and chars calculated without all tags and protected parts. */
            StatCount count = new StatCount(ste);
            // add to total
            total.add(count);
            fileTotal = 1;
            // add to remaining
            TMXEntry tr = project.getTranslationInfo(ste);
            if (!tr.isTranslated()) {
                remaining.add(count);
                fileRemaining = 1;
            }
            // add to file's info
            numbers.total.add(count);
            Boolean firstSeen = firstSeenUniqueSegment.get(src);
            if (firstSeen == null) {
                firstSeenUniqueSegment.put(src, false);
                numbers.unique.add(count);
                if (!tr.isTranslated()) {
                    numbers.remainingUnique.add(count);
                }
            }
            if (!tr.isTranslated()) {
                numbers.remaining.add(count);
            }
        }
        total.addFiles(fileTotal);
        remaining.addFiles(fileRemaining);
    }
    StringBuilder result = new StringBuilder();
    result.append(OStrings.getString("CT_STATS_Project_Statistics"));
    result.append("\n\n");
    String[][] headerTable = calcHeaderTable(new StatCount[] { total, remaining, unique, remainingUnique });
    if (callback != null) {
        callback.setProjectTableData(HT_HEADERS, headerTable);
    }
    result.append(TextUtil.showTextTable(HT_HEADERS, headerTable, HT_ALIGN));
    result.append("\n\n");
    // STATISTICS BY FILE
    result.append(OStrings.getString("CT_STATS_FILE_Statistics"));
    result.append("\n\n");
    String[][] filesTable = calcFilesTable(project.getProjectProperties(), counts);
    if (callback != null) {
        callback.setFilesTableData(FT_HEADERS, filesTable);
    }
    result.append(TextUtil.showTextTable(FT_HEADERS, filesTable, FT_ALIGN));
    if (hotStat != null) {
        hotStat.numberOfSegmentsTotal = total.segments;
        hotStat.numberofTranslatedSegments = translated.size();
        hotStat.numberOfUniqueSegments = unique.segments;
        hotStat.uniqueCountsByFile.clear();
        for (FileData fd : counts) {
            hotStat.uniqueCountsByFile.put(fd.filename, fd.unique.segments);
        }
    }
    return result.toString();
}
Also used : ProtectedPart(org.omegat.core.data.ProtectedPart) HashMap(java.util.HashMap) ArrayList(java.util.ArrayList) FileInfo(org.omegat.core.data.IProject.FileInfo) SourceTextEntry(org.omegat.core.data.SourceTextEntry) HashMap(java.util.HashMap) Map(java.util.Map) TMXEntry(org.omegat.core.data.TMXEntry) HashSet(java.util.HashSet)

Example 10 with ProtectedPart

use of org.omegat.core.data.ProtectedPart in project omegat by omegat-org.

the class Entry method checkAndRecoverTags.

/**
 * Before setting translation checks whether the translation contains all
 * the same tags in weakly correct order. See
 * {@link #setTranslation(String, XMLDialect, List)} for details.
 */
private void checkAndRecoverTags(String translation, List<ProtectedPart> protectedParts) throws TranslationException {
    translatedEntry = new Entry(xmlDialect, handler);
    // /////////////////////////////////////////////////////////////////////
    // recovering tags
    List<TagUtil.Tag> shortTags = TagUtil.buildTagList(translation, protectedParts.toArray(new ProtectedPart[protectedParts.size()]));
    int pos = 0;
    for (TagUtil.Tag shortTag : shortTags) {
        if (pos < shortTag.pos) {
            translatedEntry.add(createTextInstance(translation.substring(pos, shortTag.pos)));
            pos = shortTag.pos;
        }
        for (int j = getFirstGood(); j <= getLastGood(); j++) {
            Element longElem = get(j);
            if (longElem instanceof Tag) {
                Tag longTag = (Tag) longElem;
                if (longTag.toShortcut().equals(shortTag.tag)) {
                    translatedEntry.add(longTag);
                    pos += shortTag.tag.length();
                    break;
                }
            }
        }
    // P.S. If shortcut tag isn't found, probably we should issue a
    // warning.
    }
    if (pos < translation.length()) {
        translatedEntry.add(createTextInstance(translation.substring(pos)));
    }
// /////////////////////////////////////////////////////////////////////
// checking tags
// TODO: implement checking
}
Also used : ProtectedPart(org.omegat.core.data.ProtectedPart) TagUtil(org.omegat.util.TagUtil) XMLContentBasedTag(org.omegat.filters3.xml.XMLContentBasedTag)

Aggregations

ProtectedPart (org.omegat.core.data.ProtectedPart)18 ArrayList (java.util.ArrayList)10 SourceTextEntry (org.omegat.core.data.SourceTextEntry)6 IFilter (org.omegat.filters2.IFilter)4 IParseCallback (org.omegat.filters2.IParseCallback)4 Point (java.awt.Point)3 File (java.io.File)3 Element (org.omegat.filters3.Element)3 HashMap (java.util.HashMap)2 HashSet (java.util.HashSet)2 Map (java.util.Map)2 FileInfo (org.omegat.core.data.IProject.FileInfo)2 TMXEntry (org.omegat.core.data.TMXEntry)2 Tag (org.omegat.filters3.Tag)2 XMLContentBasedTag (org.omegat.filters3.xml.XMLContentBasedTag)2 Matcher (java.util.regex.Matcher)1 AttributeSet (javax.swing.text.AttributeSet)1 BadLocationException (javax.swing.text.BadLocationException)1 HighlightPainter (javax.swing.text.Highlighter.HighlightPainter)1 Test (org.junit.Test)1