use of org.omegat.core.data.SourceTextEntry in project omegat by omegat-org.
the class CalcMatchStatistics method forFile.
MatchStatCounts forFile(IProject.FileInfo fi) {
MatchStatCounts result = new MatchStatCounts();
alreadyProcessedInFile.clear();
final List<SourceTextEntry> untranslatedEntries = new ArrayList<SourceTextEntry>();
// We should iterate all segments from file.
for (SourceTextEntry ste : fi.entries) {
checkInterrupted();
StatCount count = new StatCount(ste);
boolean existInFile = alreadyProcessedInFile.contains(ste.getSrcText());
boolean existInPreviousFiles = alreadyProcessedInProject.contains(ste.getSrcText());
if (Core.getProject().getTranslationInfo(ste).isTranslated()) {
// segment has translation - should be calculated as
// "Exact matched"
result.addExact(count);
treated++;
} else if (existInPreviousFiles) {
// exist in other file
result.addRepetitionFromOtherFiles(count);
entryProcessed();
} else if (existInFile) {
// exist in this file
result.addRepetitionWithinThisFile(count);
entryProcessed();
} else {
// first time
untranslatedEntries.add(ste);
alreadyProcessedInFile.add(ste.getSrcText());
}
}
alreadyProcessedInProject.addAll(alreadyProcessedInFile);
calcSimilarity(untranslatedEntries).ifPresent(result::addCounts);
return result;
}
use of org.omegat.core.data.SourceTextEntry in project omegat by omegat-org.
the class CalcStandardStatistics method buildProjectStats.
/**
* Builds a file with statistic info about the project. The total word &
* character count of the project, the total number of unique segments, plus
* the details for each file.
*/
public static String buildProjectStats(final IProject project, final StatisticsInfo hotStat, final StatisticsPanel callback) {
StatCount total = new StatCount();
StatCount remaining = new StatCount();
StatCount unique = new StatCount();
StatCount remainingUnique = new StatCount();
// find unique segments
Map<String, SourceTextEntry> uniqueSegment = new HashMap<String, SourceTextEntry>();
Set<String> translated = new HashSet<String>();
for (SourceTextEntry ste : project.getAllEntries()) {
String src = ste.getSrcText();
for (ProtectedPart pp : ste.getProtectedParts()) {
src = src.replace(pp.getTextInSourceSegment(), pp.getReplacementUniquenessCalculation());
}
if (!uniqueSegment.containsKey(src)) {
uniqueSegment.put(src, ste);
}
TMXEntry tr = project.getTranslationInfo(ste);
if (tr.isTranslated()) {
translated.add(src);
}
}
Set<String> filesUnique = new HashSet<String>();
Set<String> filesRemainingUnique = new HashSet<String>();
for (Map.Entry<String, SourceTextEntry> en : uniqueSegment.entrySet()) {
/* Number of words and chars calculated without all tags and protected parts. */
StatCount count = new StatCount(en.getValue());
// add to unique
unique.add(count);
filesUnique.add(en.getValue().getKey().file);
// add to unique remaining
if (!translated.contains(en.getKey())) {
remainingUnique.add(count);
filesRemainingUnique.add(en.getValue().getKey().file);
}
}
unique.addFiles(filesUnique.size());
remainingUnique.addFiles(filesRemainingUnique.size());
List<FileData> counts = new ArrayList<FileData>();
Map<String, Boolean> firstSeenUniqueSegment = new HashMap<String, Boolean>();
for (FileInfo file : project.getProjectFiles()) {
FileData numbers = new FileData();
numbers.filename = file.filePath;
counts.add(numbers);
int fileTotal = 0;
int fileRemaining = 0;
for (SourceTextEntry ste : file.entries) {
String src = ste.getSrcText();
for (ProtectedPart pp : ste.getProtectedParts()) {
src = src.replace(pp.getTextInSourceSegment(), pp.getReplacementUniquenessCalculation());
}
/* Number of words and chars calculated without all tags and protected parts. */
StatCount count = new StatCount(ste);
// add to total
total.add(count);
fileTotal = 1;
// add to remaining
TMXEntry tr = project.getTranslationInfo(ste);
if (!tr.isTranslated()) {
remaining.add(count);
fileRemaining = 1;
}
// add to file's info
numbers.total.add(count);
Boolean firstSeen = firstSeenUniqueSegment.get(src);
if (firstSeen == null) {
firstSeenUniqueSegment.put(src, false);
numbers.unique.add(count);
if (!tr.isTranslated()) {
numbers.remainingUnique.add(count);
}
}
if (!tr.isTranslated()) {
numbers.remaining.add(count);
}
}
total.addFiles(fileTotal);
remaining.addFiles(fileRemaining);
}
StringBuilder result = new StringBuilder();
result.append(OStrings.getString("CT_STATS_Project_Statistics"));
result.append("\n\n");
String[][] headerTable = calcHeaderTable(new StatCount[] { total, remaining, unique, remainingUnique });
if (callback != null) {
callback.setProjectTableData(HT_HEADERS, headerTable);
}
result.append(TextUtil.showTextTable(HT_HEADERS, headerTable, HT_ALIGN));
result.append("\n\n");
// STATISTICS BY FILE
result.append(OStrings.getString("CT_STATS_FILE_Statistics"));
result.append("\n\n");
String[][] filesTable = calcFilesTable(project.getProjectProperties(), counts);
if (callback != null) {
callback.setFilesTableData(FT_HEADERS, filesTable);
}
result.append(TextUtil.showTextTable(FT_HEADERS, filesTable, FT_ALIGN));
if (hotStat != null) {
hotStat.numberOfSegmentsTotal = total.segments;
hotStat.numberofTranslatedSegments = translated.size();
hotStat.numberOfUniqueSegments = unique.segments;
hotStat.uniqueCountsByFile.clear();
for (FileData fd : counts) {
hotStat.uniqueCountsByFile.put(fd.filename, fd.unique.segments);
}
}
return result.toString();
}
Aggregations