use of org.omegat.core.data.ExternalTMX in project omegat by omegat-org.
the class Searcher method searchProject.
private void searchProject() {
// reset the number of search hits
m_numFinds = 0;
// search the Memory, if requested
if (m_searchExpression.memory) {
// search through all project entries
IProject dataEngine = m_project;
for (int i = 0; i < m_project.getAllEntries().size(); i++) {
// stop searching if the max. nr of hits has been reached
if (m_numFinds >= expression.numberOfResults) {
return;
}
// get the source and translation of the next entry
SourceTextEntry ste = dataEngine.getAllEntries().get(i);
TMXEntry te = m_project.getTranslationInfo(ste);
checkEntry(ste.getSrcText(), te.translation, te.note, ste.getComment(), te, i, null);
checkStop.checkInterrupted();
}
// search in orphaned
if (!m_searchExpression.excludeOrphans) {
m_project.iterateByDefaultTranslations(new IProject.DefaultTranslationsIterator() {
final String file = OStrings.getString("CT_ORPHAN_STRINGS");
public void iterate(String source, TMXEntry en) {
// stop searching if the max. nr of hits has been reached
if (m_numFinds >= expression.numberOfResults) {
return;
}
checkStop.checkInterrupted();
if (m_project.isOrphaned(source)) {
checkEntry(en.source, en.translation, en.note, null, en, ENTRY_ORIGIN_ORPHAN, file);
}
}
});
m_project.iterateByMultipleTranslations(new IProject.MultipleTranslationsIterator() {
final String file = OStrings.getString("CT_ORPHAN_STRINGS");
public void iterate(EntryKey source, TMXEntry en) {
// reached
if (m_numFinds >= expression.numberOfResults) {
return;
}
checkStop.checkInterrupted();
if (m_project.isOrphaned(source)) {
checkEntry(en.source, en.translation, en.note, null, en, ENTRY_ORIGIN_ORPHAN, file);
}
}
});
}
}
// search the TM, if requested
if (m_searchExpression.tm) {
// that case.
if (!expression.searchAuthor && !expression.searchDateAfter && !expression.searchDateBefore) {
for (Map.Entry<String, ExternalTMX> tmEn : m_project.getTransMemories().entrySet()) {
final String fileTM = tmEn.getKey();
if (!searchEntries(tmEn.getValue().getEntries(), fileTM)) {
return;
}
checkStop.checkInterrupted();
}
for (Map.Entry<Language, ProjectTMX> tmEn : m_project.getOtherTargetLanguageTMs().entrySet()) {
final Language langTM = tmEn.getKey();
if (!searchEntriesAlternative(tmEn.getValue().getDefaults(), langTM.getLanguage())) {
return;
}
if (!searchEntriesAlternative(tmEn.getValue().getAlternatives(), langTM.getLanguage())) {
return;
}
checkStop.checkInterrupted();
}
}
}
// search the glossary, if requested
if (m_searchExpression.glossary) {
String intro = OStrings.getString("SW_GLOSSARY_RESULT");
List<GlossaryEntry> entries = Core.getGlossaryManager().getLocalEntries();
for (GlossaryEntry en : entries) {
checkEntry(en.getSrcText(), en.getLocText(), null, null, null, ENTRY_ORIGIN_GLOSSARY, intro);
// stop searching if the max. nr of hits has been reached
if (m_numFinds >= expression.numberOfResults) {
return;
}
checkStop.checkInterrupted();
}
}
}
use of org.omegat.core.data.ExternalTMX in project omegat by omegat-org.
the class FindMatches method search.
public List<NearString> search(final String searchText, final boolean requiresTranslation, final boolean fillSimilarityData, final IStopped stop) throws StoppedException {
result = new ArrayList<>(OConsts.MAX_NEAR_STRINGS + 1);
srcText = searchText;
removedText = "";
// of the translatable text
if (removePattern != null) {
StringBuilder removedBuffer = new StringBuilder();
Matcher removeMatcher = removePattern.matcher(srcText);
while (removeMatcher.find()) {
removedBuffer.append(removeMatcher.group());
}
srcText = removeMatcher.replaceAll("");
removedText = removedBuffer.toString();
}
// get tokens for original string
strTokensStem = tokenizeStem(srcText);
strTokensNoStem = tokenizeNoStem(srcText);
strTokensAll = tokenizeAll(srcText);
// travel by project entries, including orphaned
if (project.getProjectProperties().isSupportDefaultTranslations()) {
project.iterateByDefaultTranslations(new DefaultTranslationsIterator() {
public void iterate(String source, TMXEntry trans) {
checkStopped(stop);
if (!searchExactlyTheSame && source.equals(searchText)) {
// skip original==original entry comparison
return;
}
if (requiresTranslation && trans.translation == null) {
return;
}
String fileName = project.isOrphaned(source) ? ORPHANED_FILE_NAME : null;
processEntry(null, source, trans.translation, NearString.MATCH_SOURCE.MEMORY, false, 0, fileName, trans.creator, trans.creationDate, trans.changer, trans.changeDate, null);
}
});
}
project.iterateByMultipleTranslations(new MultipleTranslationsIterator() {
public void iterate(EntryKey source, TMXEntry trans) {
checkStopped(stop);
if (!searchExactlyTheSame && source.sourceText.equals(searchText)) {
// skip original==original entry comparison
return;
}
if (requiresTranslation && trans.translation == null) {
return;
}
String fileName = project.isOrphaned(source) ? ORPHANED_FILE_NAME : null;
processEntry(source, source.sourceText, trans.translation, NearString.MATCH_SOURCE.MEMORY, false, 0, fileName, trans.creator, trans.creationDate, trans.changer, trans.changeDate, null);
}
});
// travel by translation memories
for (Map.Entry<String, ExternalTMX> en : project.getTransMemories().entrySet()) {
int penalty = 0;
Matcher matcher = SEARCH_FOR_PENALTY.matcher(en.getKey());
if (matcher.find()) {
penalty = Integer.parseInt(matcher.group(1));
}
for (PrepareTMXEntry tmen : en.getValue().getEntries()) {
checkStopped(stop);
if (tmen.source == null) {
// Not all TMX entries have a source; in that case there can be no meaningful match, so skip.
continue;
}
if (requiresTranslation && tmen.translation == null) {
continue;
}
processEntry(null, tmen.source, tmen.translation, NearString.MATCH_SOURCE.TM, false, penalty, en.getKey(), tmen.creator, tmen.creationDate, tmen.changer, tmen.changeDate, tmen.otherProperties);
}
}
// travel by all entries for check source file translations
for (SourceTextEntry ste : project.getAllEntries()) {
checkStopped(stop);
if (ste.getSourceTranslation() != null) {
processEntry(ste.getKey(), ste.getSrcText(), ste.getSourceTranslation(), NearString.MATCH_SOURCE.MEMORY, ste.isSourceTranslationFuzzy(), 0, ste.getKey().file, "", 0, "", 0, null);
}
}
if (separateSegmentMatcher != null) {
// split paragraph even when segmentation disabled, then find matches for every segment
List<StringBuilder> spaces = new ArrayList<StringBuilder>();
List<Rule> brules = new ArrayList<Rule>();
Language sourceLang = project.getProjectProperties().getSourceLanguage();
Language targetLang = project.getProjectProperties().getTargetLanguage();
List<String> segments = Core.getSegmenter().segment(sourceLang, srcText, spaces, brules);
if (segments.size() > 1) {
List<String> fsrc = new ArrayList<String>(segments.size());
List<String> ftrans = new ArrayList<String>(segments.size());
// multiple segments
for (short i = 0; i < segments.size(); i++) {
String onesrc = segments.get(i);
// find match for separate segment
List<NearString> segmentMatch = separateSegmentMatcher.search(onesrc, requiresTranslation, false, stop);
if (!segmentMatch.isEmpty() && segmentMatch.get(0).scores[0].score >= SUBSEGMENT_MATCH_THRESHOLD) {
fsrc.add(segmentMatch.get(0).source);
ftrans.add(segmentMatch.get(0).translation);
} else {
fsrc.add("");
ftrans.add("");
}
}
// glue found sources
String foundSrc = Core.getSegmenter().glue(sourceLang, sourceLang, fsrc, spaces, brules);
// glue found translations
String foundTrans = Core.getSegmenter().glue(sourceLang, targetLang, ftrans, spaces, brules);
processEntry(null, foundSrc, foundTrans, NearString.MATCH_SOURCE.TM, false, 0, "", "", 0, "", 0, null);
}
}
if (fillSimilarityData) {
// fill similarity data only for result
for (NearString near : result) {
// fix for bug 1586397
byte[] similarityData = FuzzyMatcher.buildSimilarityData(strTokensAll, tokenizeAll(near.source));
near.attr = similarityData;
}
}
return result;
}
use of org.omegat.core.data.ExternalTMX in project omegat by omegat-org.
the class POFilterTest method testLoad.
@Test
public void testLoad() throws Exception {
String f = "test/data/filters/po/file-POFilter-multiple.po";
Map<String, String> options = new TreeMap<String, String>();
options.put("skipHeader", "true");
TestFileInfo fi = loadSourceFiles(new PoFilter(), f, options);
String comment = OStrings.getString("POFILTER_TRANSLATOR_COMMENTS") + "\n" + "A valid comment\nAnother valid comment\n\n" + OStrings.getString("POFILTER_EXTRACTED_COMMENTS") + "\n" + "Some extracted comments\nMore extracted comments\n\n" + OStrings.getString("POFILTER_REFERENCES") + "\n" + "/my/source/file\n/my/source/file2\n\n";
checkMultiStart(fi, f);
checkMulti("source1", null, "some context", null, null, comment);
checkMulti("source2", null, "", null, null, null);
checkMulti("source3", null, "", null, null, null);
checkMulti("source1", null, "", null, null, null);
checkMulti("source1", null, "other context", null, null, null);
checkMulti("source4", null, "one more context", null, null, null);
checkMulti("source4", null, "one more context[1]", null, null, StringUtil.format(OStrings.getString("POFILTER_PLURAL_FORM_COMMENT"), 1) + "\n");
checkMulti("source4", null, "one more context[2]", null, null, StringUtil.format(OStrings.getString("POFILTER_PLURAL_FORM_COMMENT"), 2) + "\n");
checkMulti("source5", null, "", null, null, null);
checkMulti("source6", null, "", null, null, null);
checkMultiEnd();
ExternalTMX tmEntries = fi.referenceEntries;
assertEquals(2, tmEntries.getEntries().size());
{
PrepareTMXEntry entry = tmEntries.getEntries().get(0);
assertEquals("True fuzzy!", entry.source);
assertEquals("trans5", entry.translation);
}
{
PrepareTMXEntry entry = tmEntries.getEntries().get(1);
assertEquals("True fuzzy 2!", entry.source);
assertEquals("trans6", entry.translation);
}
}
Aggregations