Search in sources :

Example 1 with ExternalTMX

use of org.omegat.core.data.ExternalTMX in project omegat by omegat-org.

the class Searcher method searchProject.

private void searchProject() {
    // reset the number of search hits
    m_numFinds = 0;
    // search the Memory, if requested
    if (m_searchExpression.memory) {
        // search through all project entries
        IProject dataEngine = m_project;
        for (int i = 0; i < m_project.getAllEntries().size(); i++) {
            // stop searching if the max. nr of hits has been reached
            if (m_numFinds >= expression.numberOfResults) {
                return;
            }
            // get the source and translation of the next entry
            SourceTextEntry ste = dataEngine.getAllEntries().get(i);
            TMXEntry te = m_project.getTranslationInfo(ste);
            checkEntry(ste.getSrcText(), te.translation, te.note, ste.getComment(), te, i, null);
            checkStop.checkInterrupted();
        }
        // search in orphaned
        if (!m_searchExpression.excludeOrphans) {
            m_project.iterateByDefaultTranslations(new IProject.DefaultTranslationsIterator() {

                final String file = OStrings.getString("CT_ORPHAN_STRINGS");

                public void iterate(String source, TMXEntry en) {
                    // stop searching if the max. nr of hits has been reached
                    if (m_numFinds >= expression.numberOfResults) {
                        return;
                    }
                    checkStop.checkInterrupted();
                    if (m_project.isOrphaned(source)) {
                        checkEntry(en.source, en.translation, en.note, null, en, ENTRY_ORIGIN_ORPHAN, file);
                    }
                }
            });
            m_project.iterateByMultipleTranslations(new IProject.MultipleTranslationsIterator() {

                final String file = OStrings.getString("CT_ORPHAN_STRINGS");

                public void iterate(EntryKey source, TMXEntry en) {
                    // reached
                    if (m_numFinds >= expression.numberOfResults) {
                        return;
                    }
                    checkStop.checkInterrupted();
                    if (m_project.isOrphaned(source)) {
                        checkEntry(en.source, en.translation, en.note, null, en, ENTRY_ORIGIN_ORPHAN, file);
                    }
                }
            });
        }
    }
    // search the TM, if requested
    if (m_searchExpression.tm) {
        // that case.
        if (!expression.searchAuthor && !expression.searchDateAfter && !expression.searchDateBefore) {
            for (Map.Entry<String, ExternalTMX> tmEn : m_project.getTransMemories().entrySet()) {
                final String fileTM = tmEn.getKey();
                if (!searchEntries(tmEn.getValue().getEntries(), fileTM)) {
                    return;
                }
                checkStop.checkInterrupted();
            }
            for (Map.Entry<Language, ProjectTMX> tmEn : m_project.getOtherTargetLanguageTMs().entrySet()) {
                final Language langTM = tmEn.getKey();
                if (!searchEntriesAlternative(tmEn.getValue().getDefaults(), langTM.getLanguage())) {
                    return;
                }
                if (!searchEntriesAlternative(tmEn.getValue().getAlternatives(), langTM.getLanguage())) {
                    return;
                }
                checkStop.checkInterrupted();
            }
        }
    }
    // search the glossary, if requested
    if (m_searchExpression.glossary) {
        String intro = OStrings.getString("SW_GLOSSARY_RESULT");
        List<GlossaryEntry> entries = Core.getGlossaryManager().getLocalEntries();
        for (GlossaryEntry en : entries) {
            checkEntry(en.getSrcText(), en.getLocText(), null, null, null, ENTRY_ORIGIN_GLOSSARY, intro);
            // stop searching if the max. nr of hits has been reached
            if (m_numFinds >= expression.numberOfResults) {
                return;
            }
            checkStop.checkInterrupted();
        }
    }
}
Also used : ProjectTMX(org.omegat.core.data.ProjectTMX) EntryKey(org.omegat.core.data.EntryKey) IProject(org.omegat.core.data.IProject) Language(org.omegat.util.Language) GlossaryEntry(org.omegat.gui.glossary.GlossaryEntry) SourceTextEntry(org.omegat.core.data.SourceTextEntry) ExternalTMX(org.omegat.core.data.ExternalTMX) HashMap(java.util.HashMap) Map(java.util.Map) PrepareTMXEntry(org.omegat.core.data.PrepareTMXEntry) TMXEntry(org.omegat.core.data.TMXEntry)

Example 2 with ExternalTMX

use of org.omegat.core.data.ExternalTMX in project omegat by omegat-org.

the class FindMatches method search.

public List<NearString> search(final String searchText, final boolean requiresTranslation, final boolean fillSimilarityData, final IStopped stop) throws StoppedException {
    result = new ArrayList<>(OConsts.MAX_NEAR_STRINGS + 1);
    srcText = searchText;
    removedText = "";
    // of the translatable text
    if (removePattern != null) {
        StringBuilder removedBuffer = new StringBuilder();
        Matcher removeMatcher = removePattern.matcher(srcText);
        while (removeMatcher.find()) {
            removedBuffer.append(removeMatcher.group());
        }
        srcText = removeMatcher.replaceAll("");
        removedText = removedBuffer.toString();
    }
    // get tokens for original string
    strTokensStem = tokenizeStem(srcText);
    strTokensNoStem = tokenizeNoStem(srcText);
    strTokensAll = tokenizeAll(srcText);
    // travel by project entries, including orphaned
    if (project.getProjectProperties().isSupportDefaultTranslations()) {
        project.iterateByDefaultTranslations(new DefaultTranslationsIterator() {

            public void iterate(String source, TMXEntry trans) {
                checkStopped(stop);
                if (!searchExactlyTheSame && source.equals(searchText)) {
                    // skip original==original entry comparison
                    return;
                }
                if (requiresTranslation && trans.translation == null) {
                    return;
                }
                String fileName = project.isOrphaned(source) ? ORPHANED_FILE_NAME : null;
                processEntry(null, source, trans.translation, NearString.MATCH_SOURCE.MEMORY, false, 0, fileName, trans.creator, trans.creationDate, trans.changer, trans.changeDate, null);
            }
        });
    }
    project.iterateByMultipleTranslations(new MultipleTranslationsIterator() {

        public void iterate(EntryKey source, TMXEntry trans) {
            checkStopped(stop);
            if (!searchExactlyTheSame && source.sourceText.equals(searchText)) {
                // skip original==original entry comparison
                return;
            }
            if (requiresTranslation && trans.translation == null) {
                return;
            }
            String fileName = project.isOrphaned(source) ? ORPHANED_FILE_NAME : null;
            processEntry(source, source.sourceText, trans.translation, NearString.MATCH_SOURCE.MEMORY, false, 0, fileName, trans.creator, trans.creationDate, trans.changer, trans.changeDate, null);
        }
    });
    // travel by translation memories
    for (Map.Entry<String, ExternalTMX> en : project.getTransMemories().entrySet()) {
        int penalty = 0;
        Matcher matcher = SEARCH_FOR_PENALTY.matcher(en.getKey());
        if (matcher.find()) {
            penalty = Integer.parseInt(matcher.group(1));
        }
        for (PrepareTMXEntry tmen : en.getValue().getEntries()) {
            checkStopped(stop);
            if (tmen.source == null) {
                // Not all TMX entries have a source; in that case there can be no meaningful match, so skip.
                continue;
            }
            if (requiresTranslation && tmen.translation == null) {
                continue;
            }
            processEntry(null, tmen.source, tmen.translation, NearString.MATCH_SOURCE.TM, false, penalty, en.getKey(), tmen.creator, tmen.creationDate, tmen.changer, tmen.changeDate, tmen.otherProperties);
        }
    }
    // travel by all entries for check source file translations
    for (SourceTextEntry ste : project.getAllEntries()) {
        checkStopped(stop);
        if (ste.getSourceTranslation() != null) {
            processEntry(ste.getKey(), ste.getSrcText(), ste.getSourceTranslation(), NearString.MATCH_SOURCE.MEMORY, ste.isSourceTranslationFuzzy(), 0, ste.getKey().file, "", 0, "", 0, null);
        }
    }
    if (separateSegmentMatcher != null) {
        // split paragraph even when segmentation disabled, then find matches for every segment
        List<StringBuilder> spaces = new ArrayList<StringBuilder>();
        List<Rule> brules = new ArrayList<Rule>();
        Language sourceLang = project.getProjectProperties().getSourceLanguage();
        Language targetLang = project.getProjectProperties().getTargetLanguage();
        List<String> segments = Core.getSegmenter().segment(sourceLang, srcText, spaces, brules);
        if (segments.size() > 1) {
            List<String> fsrc = new ArrayList<String>(segments.size());
            List<String> ftrans = new ArrayList<String>(segments.size());
            // multiple segments
            for (short i = 0; i < segments.size(); i++) {
                String onesrc = segments.get(i);
                // find match for separate segment
                List<NearString> segmentMatch = separateSegmentMatcher.search(onesrc, requiresTranslation, false, stop);
                if (!segmentMatch.isEmpty() && segmentMatch.get(0).scores[0].score >= SUBSEGMENT_MATCH_THRESHOLD) {
                    fsrc.add(segmentMatch.get(0).source);
                    ftrans.add(segmentMatch.get(0).translation);
                } else {
                    fsrc.add("");
                    ftrans.add("");
                }
            }
            // glue found sources
            String foundSrc = Core.getSegmenter().glue(sourceLang, sourceLang, fsrc, spaces, brules);
            // glue found translations
            String foundTrans = Core.getSegmenter().glue(sourceLang, targetLang, ftrans, spaces, brules);
            processEntry(null, foundSrc, foundTrans, NearString.MATCH_SOURCE.TM, false, 0, "", "", 0, "", 0, null);
        }
    }
    if (fillSimilarityData) {
        // fill similarity data only for result
        for (NearString near : result) {
            // fix for bug 1586397
            byte[] similarityData = FuzzyMatcher.buildSimilarityData(strTokensAll, tokenizeAll(near.source));
            near.attr = similarityData;
        }
    }
    return result;
}
Also used : EntryKey(org.omegat.core.data.EntryKey) Matcher(java.util.regex.Matcher) FuzzyMatcher(org.omegat.core.matching.FuzzyMatcher) ArrayList(java.util.ArrayList) NearString(org.omegat.core.matching.NearString) NearString(org.omegat.core.matching.NearString) Language(org.omegat.util.Language) SourceTextEntry(org.omegat.core.data.SourceTextEntry) DefaultTranslationsIterator(org.omegat.core.data.IProject.DefaultTranslationsIterator) ExternalTMX(org.omegat.core.data.ExternalTMX) Rule(org.omegat.core.segmentation.Rule) HashMap(java.util.HashMap) Map(java.util.Map) PrepareTMXEntry(org.omegat.core.data.PrepareTMXEntry) PrepareTMXEntry(org.omegat.core.data.PrepareTMXEntry) TMXEntry(org.omegat.core.data.TMXEntry) MultipleTranslationsIterator(org.omegat.core.data.IProject.MultipleTranslationsIterator)

Example 3 with ExternalTMX

use of org.omegat.core.data.ExternalTMX in project omegat by omegat-org.

the class POFilterTest method testLoad.

@Test
public void testLoad() throws Exception {
    String f = "test/data/filters/po/file-POFilter-multiple.po";
    Map<String, String> options = new TreeMap<String, String>();
    options.put("skipHeader", "true");
    TestFileInfo fi = loadSourceFiles(new PoFilter(), f, options);
    String comment = OStrings.getString("POFILTER_TRANSLATOR_COMMENTS") + "\n" + "A valid comment\nAnother valid comment\n\n" + OStrings.getString("POFILTER_EXTRACTED_COMMENTS") + "\n" + "Some extracted comments\nMore extracted comments\n\n" + OStrings.getString("POFILTER_REFERENCES") + "\n" + "/my/source/file\n/my/source/file2\n\n";
    checkMultiStart(fi, f);
    checkMulti("source1", null, "some context", null, null, comment);
    checkMulti("source2", null, "", null, null, null);
    checkMulti("source3", null, "", null, null, null);
    checkMulti("source1", null, "", null, null, null);
    checkMulti("source1", null, "other context", null, null, null);
    checkMulti("source4", null, "one more context", null, null, null);
    checkMulti("source4", null, "one more context[1]", null, null, StringUtil.format(OStrings.getString("POFILTER_PLURAL_FORM_COMMENT"), 1) + "\n");
    checkMulti("source4", null, "one more context[2]", null, null, StringUtil.format(OStrings.getString("POFILTER_PLURAL_FORM_COMMENT"), 2) + "\n");
    checkMulti("source5", null, "", null, null, null);
    checkMulti("source6", null, "", null, null, null);
    checkMultiEnd();
    ExternalTMX tmEntries = fi.referenceEntries;
    assertEquals(2, tmEntries.getEntries().size());
    {
        PrepareTMXEntry entry = tmEntries.getEntries().get(0);
        assertEquals("True fuzzy!", entry.source);
        assertEquals("trans5", entry.translation);
    }
    {
        PrepareTMXEntry entry = tmEntries.getEntries().get(1);
        assertEquals("True fuzzy 2!", entry.source);
        assertEquals("trans6", entry.translation);
    }
}
Also used : PoFilter(org.omegat.filters2.po.PoFilter) ExternalTMX(org.omegat.core.data.ExternalTMX) TreeMap(java.util.TreeMap) PrepareTMXEntry(org.omegat.core.data.PrepareTMXEntry) Test(org.junit.Test)

Aggregations

ExternalTMX (org.omegat.core.data.ExternalTMX)3 PrepareTMXEntry (org.omegat.core.data.PrepareTMXEntry)3 HashMap (java.util.HashMap)2 Map (java.util.Map)2 EntryKey (org.omegat.core.data.EntryKey)2 SourceTextEntry (org.omegat.core.data.SourceTextEntry)2 TMXEntry (org.omegat.core.data.TMXEntry)2 Language (org.omegat.util.Language)2 ArrayList (java.util.ArrayList)1 TreeMap (java.util.TreeMap)1 Matcher (java.util.regex.Matcher)1 Test (org.junit.Test)1 IProject (org.omegat.core.data.IProject)1 DefaultTranslationsIterator (org.omegat.core.data.IProject.DefaultTranslationsIterator)1 MultipleTranslationsIterator (org.omegat.core.data.IProject.MultipleTranslationsIterator)1 ProjectTMX (org.omegat.core.data.ProjectTMX)1 FuzzyMatcher (org.omegat.core.matching.FuzzyMatcher)1 NearString (org.omegat.core.matching.NearString)1 Rule (org.omegat.core.segmentation.Rule)1 PoFilter (org.omegat.filters2.po.PoFilter)1