Search in sources :

Example 16 with SourceTextEntry

use of org.omegat.core.data.SourceTextEntry in project omegat by omegat-org.

the class CalcMatchStatistics method calcTotal.

MatchStatCounts calcTotal(boolean outData) {
    MatchStatCounts result = new MatchStatCounts();
    alreadyProcessedInProject.clear();
    final List<SourceTextEntry> untranslatedEntries = new ArrayList<SourceTextEntry>();
    // We should iterate all segments from all files in project.
    for (SourceTextEntry ste : Core.getProject().getAllEntries()) {
        checkInterrupted();
        StatCount count = new StatCount(ste);
        boolean isFirst = alreadyProcessedInProject.add(ste.getSrcText());
        if (Core.getProject().getTranslationInfo(ste).isTranslated()) {
            // segment has translation - should be calculated as "Exact matched"
            result.addExact(count);
            entryProcessed();
        } else if (!isFirst) {
            // already processed - repetition
            result.addRepetition(count);
            entryProcessed();
        } else {
            // first time
            untranslatedEntries.add(ste);
        }
    }
    if (outData) {
        String[][] table = result.calcTableWithoutPercentage(rowsTotal);
        String outText = TextUtil.showTextTable(header, table, align);
        showText(outText);
        showTable(table);
    }
    calcSimilarity(untranslatedEntries).ifPresent(result::addCounts);
    if (outData) {
        String[][] table = result.calcTable(rowsTotal, i -> i != 1);
        String outText = TextUtil.showTextTable(header, table, align);
        showText(outText);
        showTable(table);
        String fn = Core.getProject().getProjectProperties().getProjectInternal() + OConsts.STATS_MATCH_FILENAME;
        Statistics.writeStat(fn, outText);
        callback.setDataFile(fn);
    }
    return result;
}
Also used : SourceTextEntry(org.omegat.core.data.SourceTextEntry) ArrayList(java.util.ArrayList) NearString(org.omegat.core.matching.NearString)

Example 17 with SourceTextEntry

use of org.omegat.core.data.SourceTextEntry in project omegat by omegat-org.

the class CalcStandardStatistics method buildProjectStats.

/**
 * Builds a file with statistic info about the project. The total word &
 * character count of the project, the total number of unique segments, plus
 * the details for each file.
 */
public static String buildProjectStats(final IProject project, final StatisticsInfo hotStat, final StatisticsPanel callback) {
    StatCount total = new StatCount();
    StatCount remaining = new StatCount();
    StatCount unique = new StatCount();
    StatCount remainingUnique = new StatCount();
    // find unique segments
    Map<String, SourceTextEntry> uniqueSegment = new HashMap<String, SourceTextEntry>();
    Set<String> translated = new HashSet<String>();
    for (SourceTextEntry ste : project.getAllEntries()) {
        String src = ste.getSrcText();
        for (ProtectedPart pp : ste.getProtectedParts()) {
            src = src.replace(pp.getTextInSourceSegment(), pp.getReplacementUniquenessCalculation());
        }
        if (!uniqueSegment.containsKey(src)) {
            uniqueSegment.put(src, ste);
        }
        TMXEntry tr = project.getTranslationInfo(ste);
        if (tr.isTranslated()) {
            translated.add(src);
        }
    }
    Set<String> filesUnique = new HashSet<String>();
    Set<String> filesRemainingUnique = new HashSet<String>();
    for (Map.Entry<String, SourceTextEntry> en : uniqueSegment.entrySet()) {
        /* Number of words and chars calculated without all tags and protected parts. */
        StatCount count = new StatCount(en.getValue());
        // add to unique
        unique.add(count);
        filesUnique.add(en.getValue().getKey().file);
        // add to unique remaining
        if (!translated.contains(en.getKey())) {
            remainingUnique.add(count);
            filesRemainingUnique.add(en.getValue().getKey().file);
        }
    }
    unique.addFiles(filesUnique.size());
    remainingUnique.addFiles(filesRemainingUnique.size());
    List<FileData> counts = new ArrayList<FileData>();
    Map<String, Boolean> firstSeenUniqueSegment = new HashMap<String, Boolean>();
    for (FileInfo file : project.getProjectFiles()) {
        FileData numbers = new FileData();
        numbers.filename = file.filePath;
        counts.add(numbers);
        int fileTotal = 0;
        int fileRemaining = 0;
        for (SourceTextEntry ste : file.entries) {
            String src = ste.getSrcText();
            for (ProtectedPart pp : ste.getProtectedParts()) {
                src = src.replace(pp.getTextInSourceSegment(), pp.getReplacementUniquenessCalculation());
            }
            /* Number of words and chars calculated without all tags and protected parts. */
            StatCount count = new StatCount(ste);
            // add to total
            total.add(count);
            fileTotal = 1;
            // add to remaining
            TMXEntry tr = project.getTranslationInfo(ste);
            if (!tr.isTranslated()) {
                remaining.add(count);
                fileRemaining = 1;
            }
            // add to file's info
            numbers.total.add(count);
            Boolean firstSeen = firstSeenUniqueSegment.get(src);
            if (firstSeen == null) {
                firstSeenUniqueSegment.put(src, false);
                numbers.unique.add(count);
                if (!tr.isTranslated()) {
                    numbers.remainingUnique.add(count);
                }
            }
            if (!tr.isTranslated()) {
                numbers.remaining.add(count);
            }
        }
        total.addFiles(fileTotal);
        remaining.addFiles(fileRemaining);
    }
    StringBuilder result = new StringBuilder();
    result.append(OStrings.getString("CT_STATS_Project_Statistics"));
    result.append("\n\n");
    String[][] headerTable = calcHeaderTable(new StatCount[] { total, remaining, unique, remainingUnique });
    if (callback != null) {
        callback.setProjectTableData(HT_HEADERS, headerTable);
    }
    result.append(TextUtil.showTextTable(HT_HEADERS, headerTable, HT_ALIGN));
    result.append("\n\n");
    // STATISTICS BY FILE
    result.append(OStrings.getString("CT_STATS_FILE_Statistics"));
    result.append("\n\n");
    String[][] filesTable = calcFilesTable(project.getProjectProperties(), counts);
    if (callback != null) {
        callback.setFilesTableData(FT_HEADERS, filesTable);
    }
    result.append(TextUtil.showTextTable(FT_HEADERS, filesTable, FT_ALIGN));
    if (hotStat != null) {
        hotStat.numberOfSegmentsTotal = total.segments;
        hotStat.numberofTranslatedSegments = translated.size();
        hotStat.numberOfUniqueSegments = unique.segments;
        hotStat.uniqueCountsByFile.clear();
        for (FileData fd : counts) {
            hotStat.uniqueCountsByFile.put(fd.filename, fd.unique.segments);
        }
    }
    return result.toString();
}
Also used : ProtectedPart(org.omegat.core.data.ProtectedPart) HashMap(java.util.HashMap) ArrayList(java.util.ArrayList) FileInfo(org.omegat.core.data.IProject.FileInfo) SourceTextEntry(org.omegat.core.data.SourceTextEntry) HashMap(java.util.HashMap) Map(java.util.Map) TMXEntry(org.omegat.core.data.TMXEntry) HashSet(java.util.HashSet)

Example 18 with SourceTextEntry

use of org.omegat.core.data.SourceTextEntry in project omegat by omegat-org.

the class FindMatches method search.

public List<NearString> search(final String searchText, final boolean requiresTranslation, final boolean fillSimilarityData, final IStopped stop) throws StoppedException {
    result = new ArrayList<>(OConsts.MAX_NEAR_STRINGS + 1);
    srcText = searchText;
    removedText = "";
    // of the translatable text
    if (removePattern != null) {
        StringBuilder removedBuffer = new StringBuilder();
        Matcher removeMatcher = removePattern.matcher(srcText);
        while (removeMatcher.find()) {
            removedBuffer.append(removeMatcher.group());
        }
        srcText = removeMatcher.replaceAll("");
        removedText = removedBuffer.toString();
    }
    // get tokens for original string
    strTokensStem = tokenizeStem(srcText);
    strTokensNoStem = tokenizeNoStem(srcText);
    strTokensAll = tokenizeAll(srcText);
    // travel by project entries, including orphaned
    if (project.getProjectProperties().isSupportDefaultTranslations()) {
        project.iterateByDefaultTranslations(new DefaultTranslationsIterator() {

            public void iterate(String source, TMXEntry trans) {
                checkStopped(stop);
                if (!searchExactlyTheSame && source.equals(searchText)) {
                    // skip original==original entry comparison
                    return;
                }
                if (requiresTranslation && trans.translation == null) {
                    return;
                }
                String fileName = project.isOrphaned(source) ? ORPHANED_FILE_NAME : null;
                processEntry(null, source, trans.translation, NearString.MATCH_SOURCE.MEMORY, false, 0, fileName, trans.creator, trans.creationDate, trans.changer, trans.changeDate, null);
            }
        });
    }
    project.iterateByMultipleTranslations(new MultipleTranslationsIterator() {

        public void iterate(EntryKey source, TMXEntry trans) {
            checkStopped(stop);
            if (!searchExactlyTheSame && source.sourceText.equals(searchText)) {
                // skip original==original entry comparison
                return;
            }
            if (requiresTranslation && trans.translation == null) {
                return;
            }
            String fileName = project.isOrphaned(source) ? ORPHANED_FILE_NAME : null;
            processEntry(source, source.sourceText, trans.translation, NearString.MATCH_SOURCE.MEMORY, false, 0, fileName, trans.creator, trans.creationDate, trans.changer, trans.changeDate, null);
        }
    });
    // travel by translation memories
    for (Map.Entry<String, ExternalTMX> en : project.getTransMemories().entrySet()) {
        int penalty = 0;
        Matcher matcher = SEARCH_FOR_PENALTY.matcher(en.getKey());
        if (matcher.find()) {
            penalty = Integer.parseInt(matcher.group(1));
        }
        for (PrepareTMXEntry tmen : en.getValue().getEntries()) {
            checkStopped(stop);
            if (tmen.source == null) {
                // Not all TMX entries have a source; in that case there can be no meaningful match, so skip.
                continue;
            }
            if (requiresTranslation && tmen.translation == null) {
                continue;
            }
            processEntry(null, tmen.source, tmen.translation, NearString.MATCH_SOURCE.TM, false, penalty, en.getKey(), tmen.creator, tmen.creationDate, tmen.changer, tmen.changeDate, tmen.otherProperties);
        }
    }
    // travel by all entries for check source file translations
    for (SourceTextEntry ste : project.getAllEntries()) {
        checkStopped(stop);
        if (ste.getSourceTranslation() != null) {
            processEntry(ste.getKey(), ste.getSrcText(), ste.getSourceTranslation(), NearString.MATCH_SOURCE.MEMORY, ste.isSourceTranslationFuzzy(), 0, ste.getKey().file, "", 0, "", 0, null);
        }
    }
    if (separateSegmentMatcher != null) {
        // split paragraph even when segmentation disabled, then find matches for every segment
        List<StringBuilder> spaces = new ArrayList<StringBuilder>();
        List<Rule> brules = new ArrayList<Rule>();
        Language sourceLang = project.getProjectProperties().getSourceLanguage();
        Language targetLang = project.getProjectProperties().getTargetLanguage();
        List<String> segments = Core.getSegmenter().segment(sourceLang, srcText, spaces, brules);
        if (segments.size() > 1) {
            List<String> fsrc = new ArrayList<String>(segments.size());
            List<String> ftrans = new ArrayList<String>(segments.size());
            // multiple segments
            for (short i = 0; i < segments.size(); i++) {
                String onesrc = segments.get(i);
                // find match for separate segment
                List<NearString> segmentMatch = separateSegmentMatcher.search(onesrc, requiresTranslation, false, stop);
                if (!segmentMatch.isEmpty() && segmentMatch.get(0).scores[0].score >= SUBSEGMENT_MATCH_THRESHOLD) {
                    fsrc.add(segmentMatch.get(0).source);
                    ftrans.add(segmentMatch.get(0).translation);
                } else {
                    fsrc.add("");
                    ftrans.add("");
                }
            }
            // glue found sources
            String foundSrc = Core.getSegmenter().glue(sourceLang, sourceLang, fsrc, spaces, brules);
            // glue found translations
            String foundTrans = Core.getSegmenter().glue(sourceLang, targetLang, ftrans, spaces, brules);
            processEntry(null, foundSrc, foundTrans, NearString.MATCH_SOURCE.TM, false, 0, "", "", 0, "", 0, null);
        }
    }
    if (fillSimilarityData) {
        // fill similarity data only for result
        for (NearString near : result) {
            // fix for bug 1586397
            byte[] similarityData = FuzzyMatcher.buildSimilarityData(strTokensAll, tokenizeAll(near.source));
            near.attr = similarityData;
        }
    }
    return result;
}
Also used : EntryKey(org.omegat.core.data.EntryKey) Matcher(java.util.regex.Matcher) FuzzyMatcher(org.omegat.core.matching.FuzzyMatcher) ArrayList(java.util.ArrayList) NearString(org.omegat.core.matching.NearString) NearString(org.omegat.core.matching.NearString) Language(org.omegat.util.Language) SourceTextEntry(org.omegat.core.data.SourceTextEntry) DefaultTranslationsIterator(org.omegat.core.data.IProject.DefaultTranslationsIterator) ExternalTMX(org.omegat.core.data.ExternalTMX) Rule(org.omegat.core.segmentation.Rule) HashMap(java.util.HashMap) Map(java.util.Map) PrepareTMXEntry(org.omegat.core.data.PrepareTMXEntry) PrepareTMXEntry(org.omegat.core.data.PrepareTMXEntry) TMXEntry(org.omegat.core.data.TMXEntry) MultipleTranslationsIterator(org.omegat.core.data.IProject.MultipleTranslationsIterator)

Example 19 with SourceTextEntry

use of org.omegat.core.data.SourceTextEntry in project omegat by omegat-org.

the class ScriptsMonitor method hookEntryEvent.

private void hookEntryEvent() {
    if (m_entryEventListener != null) {
        CoreEvents.unregisterEntryEventListener(m_entryEventListener);
    }
    addEventScripts(EventType.ENTRY_ACTIVATED);
    addEventScripts(EventType.NEW_FILE);
    m_entryEventListener = new IEntryEventListener() {

        @Override
        public void onNewFile(String activeFileName) {
            HashMap<String, Object> binding = new HashMap<String, Object>();
            binding.put("activeFileName", activeFileName);
            for (ScriptItem si : m_eventsScript.get(EventType.NEW_FILE)) {
                m_scriptingWindow.executeScriptFile(si, binding);
            }
        }

        @Override
        public void onEntryActivated(SourceTextEntry newEntry) {
            HashMap<String, Object> binding = new HashMap<String, Object>();
            binding.put("newEntry", newEntry);
            for (ScriptItem si : m_eventsScript.get(EventType.ENTRY_ACTIVATED)) {
                m_scriptingWindow.executeScriptFile(si, binding);
            }
        }
    };
    CoreEvents.registerEntryEventListener(m_entryEventListener);
}
Also used : HashMap(java.util.HashMap) SourceTextEntry(org.omegat.core.data.SourceTextEntry) IEntryEventListener(org.omegat.core.events.IEntryEventListener)

Example 20 with SourceTextEntry

use of org.omegat.core.data.SourceTextEntry in project omegat by omegat-org.

the class TestFilterBase method checkMultiStart.

protected void checkMultiStart(IProject.FileInfo fi, String file) {
    this.fi = fi;
    fiCount = 0;
    for (SourceTextEntry ste : fi.entries) {
        assertEquals(file, ste.getKey().file);
        assertEquals(ste.getSrcText(), ste.getKey().sourceText);
    }
}
Also used : SourceTextEntry(org.omegat.core.data.SourceTextEntry)

Aggregations

SourceTextEntry (org.omegat.core.data.SourceTextEntry)32 Point (java.awt.Point)14 TMXEntry (org.omegat.core.data.TMXEntry)12 PrepareTMXEntry (org.omegat.core.data.PrepareTMXEntry)9 ArrayList (java.util.ArrayList)8 IProject (org.omegat.core.data.IProject)7 HashMap (java.util.HashMap)6 Map (java.util.Map)6 EntryKey (org.omegat.core.data.EntryKey)5 FileInfo (org.omegat.core.data.IProject.FileInfo)5 Language (org.omegat.util.Language)5 List (java.util.List)4 Before (org.junit.Before)4 ProtectedPart (org.omegat.core.data.ProtectedPart)4 NearString (org.omegat.core.matching.NearString)4 Cursor (java.awt.Cursor)3 File (java.io.File)2 IOException (java.io.IOException)2 HashSet (java.util.HashSet)2 Locale (java.util.Locale)2