Search in sources :

Example 36 with SourceTextEntry

use of org.omegat.core.data.SourceTextEntry in project omegat by omegat-org.

the class CalcMatchStatistics method forFile.

MatchStatCounts forFile(IProject.FileInfo fi) {
    MatchStatCounts result = new MatchStatCounts();
    alreadyProcessedInFile.clear();
    final List<SourceTextEntry> untranslatedEntries = new ArrayList<SourceTextEntry>();
    // We should iterate all segments from file.
    for (SourceTextEntry ste : fi.entries) {
        checkInterrupted();
        StatCount count = new StatCount(ste);
        boolean existInFile = alreadyProcessedInFile.contains(ste.getSrcText());
        boolean existInPreviousFiles = alreadyProcessedInProject.contains(ste.getSrcText());
        if (Core.getProject().getTranslationInfo(ste).isTranslated()) {
            // segment has translation - should be calculated as
            // "Exact matched"
            result.addExact(count);
            treated++;
        } else if (existInPreviousFiles) {
            // exist in other file
            result.addRepetitionFromOtherFiles(count);
            entryProcessed();
        } else if (existInFile) {
            // exist in this file
            result.addRepetitionWithinThisFile(count);
            entryProcessed();
        } else {
            // first time
            untranslatedEntries.add(ste);
            alreadyProcessedInFile.add(ste.getSrcText());
        }
    }
    alreadyProcessedInProject.addAll(alreadyProcessedInFile);
    calcSimilarity(untranslatedEntries).ifPresent(result::addCounts);
    return result;
}
Also used : SourceTextEntry(org.omegat.core.data.SourceTextEntry) ArrayList(java.util.ArrayList)

Example 37 with SourceTextEntry

use of org.omegat.core.data.SourceTextEntry in project omegat by omegat-org.

the class CalcStandardStatistics method buildProjectStats.

/**
 * Builds a file with statistic info about the project. The total word &
 * character count of the project, the total number of unique segments, plus
 * the details for each file.
 */
public static String buildProjectStats(final IProject project, final StatisticsInfo hotStat, final StatisticsPanel callback) {
    StatCount total = new StatCount();
    StatCount remaining = new StatCount();
    StatCount unique = new StatCount();
    StatCount remainingUnique = new StatCount();
    // find unique segments
    Map<String, SourceTextEntry> uniqueSegment = new HashMap<String, SourceTextEntry>();
    Set<String> translated = new HashSet<String>();
    for (SourceTextEntry ste : project.getAllEntries()) {
        String src = ste.getSrcText();
        for (ProtectedPart pp : ste.getProtectedParts()) {
            src = src.replace(pp.getTextInSourceSegment(), pp.getReplacementUniquenessCalculation());
        }
        if (!uniqueSegment.containsKey(src)) {
            uniqueSegment.put(src, ste);
        }
        TMXEntry tr = project.getTranslationInfo(ste);
        if (tr.isTranslated()) {
            translated.add(src);
        }
    }
    Set<String> filesUnique = new HashSet<String>();
    Set<String> filesRemainingUnique = new HashSet<String>();
    for (Map.Entry<String, SourceTextEntry> en : uniqueSegment.entrySet()) {
        /* Number of words and chars calculated without all tags and protected parts. */
        StatCount count = new StatCount(en.getValue());
        // add to unique
        unique.add(count);
        filesUnique.add(en.getValue().getKey().file);
        // add to unique remaining
        if (!translated.contains(en.getKey())) {
            remainingUnique.add(count);
            filesRemainingUnique.add(en.getValue().getKey().file);
        }
    }
    unique.addFiles(filesUnique.size());
    remainingUnique.addFiles(filesRemainingUnique.size());
    List<FileData> counts = new ArrayList<FileData>();
    Map<String, Boolean> firstSeenUniqueSegment = new HashMap<String, Boolean>();
    for (FileInfo file : project.getProjectFiles()) {
        FileData numbers = new FileData();
        numbers.filename = file.filePath;
        counts.add(numbers);
        int fileTotal = 0;
        int fileRemaining = 0;
        for (SourceTextEntry ste : file.entries) {
            String src = ste.getSrcText();
            for (ProtectedPart pp : ste.getProtectedParts()) {
                src = src.replace(pp.getTextInSourceSegment(), pp.getReplacementUniquenessCalculation());
            }
            /* Number of words and chars calculated without all tags and protected parts. */
            StatCount count = new StatCount(ste);
            // add to total
            total.add(count);
            fileTotal = 1;
            // add to remaining
            TMXEntry tr = project.getTranslationInfo(ste);
            if (!tr.isTranslated()) {
                remaining.add(count);
                fileRemaining = 1;
            }
            // add to file's info
            numbers.total.add(count);
            Boolean firstSeen = firstSeenUniqueSegment.get(src);
            if (firstSeen == null) {
                firstSeenUniqueSegment.put(src, false);
                numbers.unique.add(count);
                if (!tr.isTranslated()) {
                    numbers.remainingUnique.add(count);
                }
            }
            if (!tr.isTranslated()) {
                numbers.remaining.add(count);
            }
        }
        total.addFiles(fileTotal);
        remaining.addFiles(fileRemaining);
    }
    StringBuilder result = new StringBuilder();
    result.append(OStrings.getString("CT_STATS_Project_Statistics"));
    result.append("\n\n");
    String[][] headerTable = calcHeaderTable(new StatCount[] { total, remaining, unique, remainingUnique });
    if (callback != null) {
        callback.setProjectTableData(HT_HEADERS, headerTable);
    }
    result.append(TextUtil.showTextTable(HT_HEADERS, headerTable, HT_ALIGN));
    result.append("\n\n");
    // STATISTICS BY FILE
    result.append(OStrings.getString("CT_STATS_FILE_Statistics"));
    result.append("\n\n");
    String[][] filesTable = calcFilesTable(project.getProjectProperties(), counts);
    if (callback != null) {
        callback.setFilesTableData(FT_HEADERS, filesTable);
    }
    result.append(TextUtil.showTextTable(FT_HEADERS, filesTable, FT_ALIGN));
    if (hotStat != null) {
        hotStat.numberOfSegmentsTotal = total.segments;
        hotStat.numberofTranslatedSegments = translated.size();
        hotStat.numberOfUniqueSegments = unique.segments;
        hotStat.uniqueCountsByFile.clear();
        for (FileData fd : counts) {
            hotStat.uniqueCountsByFile.put(fd.filename, fd.unique.segments);
        }
    }
    return result.toString();
}
Also used : ProtectedPart(org.omegat.core.data.ProtectedPart) HashMap(java.util.HashMap) ArrayList(java.util.ArrayList) FileInfo(org.omegat.core.data.IProject.FileInfo) SourceTextEntry(org.omegat.core.data.SourceTextEntry) HashMap(java.util.HashMap) Map(java.util.Map) TMXEntry(org.omegat.core.data.TMXEntry) HashSet(java.util.HashSet)

Aggregations

SourceTextEntry (org.omegat.core.data.SourceTextEntry)37 Point (java.awt.Point)14 TMXEntry (org.omegat.core.data.TMXEntry)14 ArrayList (java.util.ArrayList)9 PrepareTMXEntry (org.omegat.core.data.PrepareTMXEntry)9 IProject (org.omegat.core.data.IProject)8 HashMap (java.util.HashMap)7 Map (java.util.Map)7 EntryKey (org.omegat.core.data.EntryKey)7 FileInfo (org.omegat.core.data.IProject.FileInfo)6 ProtectedPart (org.omegat.core.data.ProtectedPart)6 NearString (org.omegat.core.matching.NearString)5 Language (org.omegat.util.Language)5 List (java.util.List)4 Cursor (java.awt.Cursor)3 HashSet (java.util.HashSet)3 Before (org.junit.Before)3 File (java.io.File)2 IOException (java.io.IOException)2 Locale (java.util.Locale)2