use of org.omegat.core.data.SourceTextEntry in project omegat by omegat-org.
the class CalcMatchStatistics method calcTotal.
MatchStatCounts calcTotal(boolean outData) {
MatchStatCounts result = new MatchStatCounts();
alreadyProcessedInProject.clear();
final List<SourceTextEntry> untranslatedEntries = new ArrayList<SourceTextEntry>();
// We should iterate all segments from all files in project.
for (SourceTextEntry ste : Core.getProject().getAllEntries()) {
checkInterrupted();
StatCount count = new StatCount(ste);
boolean isFirst = alreadyProcessedInProject.add(ste.getSrcText());
if (Core.getProject().getTranslationInfo(ste).isTranslated()) {
// segment has translation - should be calculated as "Exact matched"
result.addExact(count);
entryProcessed();
} else if (!isFirst) {
// already processed - repetition
result.addRepetition(count);
entryProcessed();
} else {
// first time
untranslatedEntries.add(ste);
}
}
if (outData) {
String[][] table = result.calcTableWithoutPercentage(rowsTotal);
String outText = TextUtil.showTextTable(header, table, align);
showText(outText);
showTable(table);
}
calcSimilarity(untranslatedEntries).ifPresent(result::addCounts);
if (outData) {
String[][] table = result.calcTable(rowsTotal, i -> i != 1);
String outText = TextUtil.showTextTable(header, table, align);
showText(outText);
showTable(table);
String fn = Core.getProject().getProjectProperties().getProjectInternal() + OConsts.STATS_MATCH_FILENAME;
Statistics.writeStat(fn, outText);
callback.setDataFile(fn);
}
return result;
}
use of org.omegat.core.data.SourceTextEntry in project omegat by omegat-org.
the class CalcStandardStatistics method buildProjectStats.
/**
* Builds a file with statistic info about the project. The total word &
* character count of the project, the total number of unique segments, plus
* the details for each file.
*/
public static String buildProjectStats(final IProject project, final StatisticsInfo hotStat, final StatisticsPanel callback) {
StatCount total = new StatCount();
StatCount remaining = new StatCount();
StatCount unique = new StatCount();
StatCount remainingUnique = new StatCount();
// find unique segments
Map<String, SourceTextEntry> uniqueSegment = new HashMap<String, SourceTextEntry>();
Set<String> translated = new HashSet<String>();
for (SourceTextEntry ste : project.getAllEntries()) {
String src = ste.getSrcText();
for (ProtectedPart pp : ste.getProtectedParts()) {
src = src.replace(pp.getTextInSourceSegment(), pp.getReplacementUniquenessCalculation());
}
if (!uniqueSegment.containsKey(src)) {
uniqueSegment.put(src, ste);
}
TMXEntry tr = project.getTranslationInfo(ste);
if (tr.isTranslated()) {
translated.add(src);
}
}
Set<String> filesUnique = new HashSet<String>();
Set<String> filesRemainingUnique = new HashSet<String>();
for (Map.Entry<String, SourceTextEntry> en : uniqueSegment.entrySet()) {
/* Number of words and chars calculated without all tags and protected parts. */
StatCount count = new StatCount(en.getValue());
// add to unique
unique.add(count);
filesUnique.add(en.getValue().getKey().file);
// add to unique remaining
if (!translated.contains(en.getKey())) {
remainingUnique.add(count);
filesRemainingUnique.add(en.getValue().getKey().file);
}
}
unique.addFiles(filesUnique.size());
remainingUnique.addFiles(filesRemainingUnique.size());
List<FileData> counts = new ArrayList<FileData>();
Map<String, Boolean> firstSeenUniqueSegment = new HashMap<String, Boolean>();
for (FileInfo file : project.getProjectFiles()) {
FileData numbers = new FileData();
numbers.filename = file.filePath;
counts.add(numbers);
int fileTotal = 0;
int fileRemaining = 0;
for (SourceTextEntry ste : file.entries) {
String src = ste.getSrcText();
for (ProtectedPart pp : ste.getProtectedParts()) {
src = src.replace(pp.getTextInSourceSegment(), pp.getReplacementUniquenessCalculation());
}
/* Number of words and chars calculated without all tags and protected parts. */
StatCount count = new StatCount(ste);
// add to total
total.add(count);
fileTotal = 1;
// add to remaining
TMXEntry tr = project.getTranslationInfo(ste);
if (!tr.isTranslated()) {
remaining.add(count);
fileRemaining = 1;
}
// add to file's info
numbers.total.add(count);
Boolean firstSeen = firstSeenUniqueSegment.get(src);
if (firstSeen == null) {
firstSeenUniqueSegment.put(src, false);
numbers.unique.add(count);
if (!tr.isTranslated()) {
numbers.remainingUnique.add(count);
}
}
if (!tr.isTranslated()) {
numbers.remaining.add(count);
}
}
total.addFiles(fileTotal);
remaining.addFiles(fileRemaining);
}
StringBuilder result = new StringBuilder();
result.append(OStrings.getString("CT_STATS_Project_Statistics"));
result.append("\n\n");
String[][] headerTable = calcHeaderTable(new StatCount[] { total, remaining, unique, remainingUnique });
if (callback != null) {
callback.setProjectTableData(HT_HEADERS, headerTable);
}
result.append(TextUtil.showTextTable(HT_HEADERS, headerTable, HT_ALIGN));
result.append("\n\n");
// STATISTICS BY FILE
result.append(OStrings.getString("CT_STATS_FILE_Statistics"));
result.append("\n\n");
String[][] filesTable = calcFilesTable(project.getProjectProperties(), counts);
if (callback != null) {
callback.setFilesTableData(FT_HEADERS, filesTable);
}
result.append(TextUtil.showTextTable(FT_HEADERS, filesTable, FT_ALIGN));
if (hotStat != null) {
hotStat.numberOfSegmentsTotal = total.segments;
hotStat.numberofTranslatedSegments = translated.size();
hotStat.numberOfUniqueSegments = unique.segments;
hotStat.uniqueCountsByFile.clear();
for (FileData fd : counts) {
hotStat.uniqueCountsByFile.put(fd.filename, fd.unique.segments);
}
}
return result.toString();
}
use of org.omegat.core.data.SourceTextEntry in project omegat by omegat-org.
the class FindMatches method search.
public List<NearString> search(final String searchText, final boolean requiresTranslation, final boolean fillSimilarityData, final IStopped stop) throws StoppedException {
result = new ArrayList<>(OConsts.MAX_NEAR_STRINGS + 1);
srcText = searchText;
removedText = "";
// of the translatable text
if (removePattern != null) {
StringBuilder removedBuffer = new StringBuilder();
Matcher removeMatcher = removePattern.matcher(srcText);
while (removeMatcher.find()) {
removedBuffer.append(removeMatcher.group());
}
srcText = removeMatcher.replaceAll("");
removedText = removedBuffer.toString();
}
// get tokens for original string
strTokensStem = tokenizeStem(srcText);
strTokensNoStem = tokenizeNoStem(srcText);
strTokensAll = tokenizeAll(srcText);
// travel by project entries, including orphaned
if (project.getProjectProperties().isSupportDefaultTranslations()) {
project.iterateByDefaultTranslations(new DefaultTranslationsIterator() {
public void iterate(String source, TMXEntry trans) {
checkStopped(stop);
if (!searchExactlyTheSame && source.equals(searchText)) {
// skip original==original entry comparison
return;
}
if (requiresTranslation && trans.translation == null) {
return;
}
String fileName = project.isOrphaned(source) ? ORPHANED_FILE_NAME : null;
processEntry(null, source, trans.translation, NearString.MATCH_SOURCE.MEMORY, false, 0, fileName, trans.creator, trans.creationDate, trans.changer, trans.changeDate, null);
}
});
}
project.iterateByMultipleTranslations(new MultipleTranslationsIterator() {
public void iterate(EntryKey source, TMXEntry trans) {
checkStopped(stop);
if (!searchExactlyTheSame && source.sourceText.equals(searchText)) {
// skip original==original entry comparison
return;
}
if (requiresTranslation && trans.translation == null) {
return;
}
String fileName = project.isOrphaned(source) ? ORPHANED_FILE_NAME : null;
processEntry(source, source.sourceText, trans.translation, NearString.MATCH_SOURCE.MEMORY, false, 0, fileName, trans.creator, trans.creationDate, trans.changer, trans.changeDate, null);
}
});
// travel by translation memories
for (Map.Entry<String, ExternalTMX> en : project.getTransMemories().entrySet()) {
int penalty = 0;
Matcher matcher = SEARCH_FOR_PENALTY.matcher(en.getKey());
if (matcher.find()) {
penalty = Integer.parseInt(matcher.group(1));
}
for (PrepareTMXEntry tmen : en.getValue().getEntries()) {
checkStopped(stop);
if (tmen.source == null) {
// Not all TMX entries have a source; in that case there can be no meaningful match, so skip.
continue;
}
if (requiresTranslation && tmen.translation == null) {
continue;
}
processEntry(null, tmen.source, tmen.translation, NearString.MATCH_SOURCE.TM, false, penalty, en.getKey(), tmen.creator, tmen.creationDate, tmen.changer, tmen.changeDate, tmen.otherProperties);
}
}
// travel by all entries for check source file translations
for (SourceTextEntry ste : project.getAllEntries()) {
checkStopped(stop);
if (ste.getSourceTranslation() != null) {
processEntry(ste.getKey(), ste.getSrcText(), ste.getSourceTranslation(), NearString.MATCH_SOURCE.MEMORY, ste.isSourceTranslationFuzzy(), 0, ste.getKey().file, "", 0, "", 0, null);
}
}
if (separateSegmentMatcher != null) {
// split paragraph even when segmentation disabled, then find matches for every segment
List<StringBuilder> spaces = new ArrayList<StringBuilder>();
List<Rule> brules = new ArrayList<Rule>();
Language sourceLang = project.getProjectProperties().getSourceLanguage();
Language targetLang = project.getProjectProperties().getTargetLanguage();
List<String> segments = Core.getSegmenter().segment(sourceLang, srcText, spaces, brules);
if (segments.size() > 1) {
List<String> fsrc = new ArrayList<String>(segments.size());
List<String> ftrans = new ArrayList<String>(segments.size());
// multiple segments
for (short i = 0; i < segments.size(); i++) {
String onesrc = segments.get(i);
// find match for separate segment
List<NearString> segmentMatch = separateSegmentMatcher.search(onesrc, requiresTranslation, false, stop);
if (!segmentMatch.isEmpty() && segmentMatch.get(0).scores[0].score >= SUBSEGMENT_MATCH_THRESHOLD) {
fsrc.add(segmentMatch.get(0).source);
ftrans.add(segmentMatch.get(0).translation);
} else {
fsrc.add("");
ftrans.add("");
}
}
// glue found sources
String foundSrc = Core.getSegmenter().glue(sourceLang, sourceLang, fsrc, spaces, brules);
// glue found translations
String foundTrans = Core.getSegmenter().glue(sourceLang, targetLang, ftrans, spaces, brules);
processEntry(null, foundSrc, foundTrans, NearString.MATCH_SOURCE.TM, false, 0, "", "", 0, "", 0, null);
}
}
if (fillSimilarityData) {
// fill similarity data only for result
for (NearString near : result) {
// fix for bug 1586397
byte[] similarityData = FuzzyMatcher.buildSimilarityData(strTokensAll, tokenizeAll(near.source));
near.attr = similarityData;
}
}
return result;
}
use of org.omegat.core.data.SourceTextEntry in project omegat by omegat-org.
the class ScriptsMonitor method hookEntryEvent.
private void hookEntryEvent() {
if (m_entryEventListener != null) {
CoreEvents.unregisterEntryEventListener(m_entryEventListener);
}
addEventScripts(EventType.ENTRY_ACTIVATED);
addEventScripts(EventType.NEW_FILE);
m_entryEventListener = new IEntryEventListener() {
@Override
public void onNewFile(String activeFileName) {
HashMap<String, Object> binding = new HashMap<String, Object>();
binding.put("activeFileName", activeFileName);
for (ScriptItem si : m_eventsScript.get(EventType.NEW_FILE)) {
m_scriptingWindow.executeScriptFile(si, binding);
}
}
@Override
public void onEntryActivated(SourceTextEntry newEntry) {
HashMap<String, Object> binding = new HashMap<String, Object>();
binding.put("newEntry", newEntry);
for (ScriptItem si : m_eventsScript.get(EventType.ENTRY_ACTIVATED)) {
m_scriptingWindow.executeScriptFile(si, binding);
}
}
};
CoreEvents.registerEntryEventListener(m_entryEventListener);
}
use of org.omegat.core.data.SourceTextEntry in project omegat by omegat-org.
the class TestFilterBase method checkMultiStart.
protected void checkMultiStart(IProject.FileInfo fi, String file) {
this.fi = fi;
fiCount = 0;
for (SourceTextEntry ste : fi.entries) {
assertEquals(file, ste.getKey().file);
assertEquals(ste.getSrcText(), ste.getKey().sourceText);
}
}
Aggregations