Search in sources :

Example 16 with Language

use of org.omegat.util.Language in project omegat by omegat-org.

the class SpellChecker method initialize.

/**
 * Initialize the library for the given project. Loads the lists of ignored and learned words for the
 * project
 */
public void initialize() {
    Language targetLanguage = Core.getProject().getProjectProperties().getTargetLanguage();
    Stream<String> toCheck = Stream.of(// Full xx_YY
    targetLanguage.getLocaleCode(), // Full xx-YY
    targetLanguage.getLocaleCode().replace('_', '-'), // xx only
    targetLanguage.getLanguageCode());
    checker = toCheck.map(SpellChecker::initializeWithLanguage).filter(Optional::isPresent).findFirst().orElseGet(() -> Optional.of(new SpellCheckerDummy())).get();
    if (checker instanceof SpellCheckerDummy) {
        Log.log("No spell checker found for language " + targetLanguage);
    }
    loadWordLists();
}
Also used : Language(org.omegat.util.Language) Optional(java.util.Optional)

Example 17 with Language

use of org.omegat.util.Language in project omegat by omegat-org.

the class FindMatches method search.

public List<NearString> search(final String searchText, final boolean requiresTranslation, final boolean fillSimilarityData, final IStopped stop) throws StoppedException {
    result = new ArrayList<>(OConsts.MAX_NEAR_STRINGS + 1);
    srcText = searchText;
    removedText = "";
    // of the translatable text
    if (removePattern != null) {
        StringBuilder removedBuffer = new StringBuilder();
        Matcher removeMatcher = removePattern.matcher(srcText);
        while (removeMatcher.find()) {
            removedBuffer.append(removeMatcher.group());
        }
        srcText = removeMatcher.replaceAll("");
        removedText = removedBuffer.toString();
    }
    // get tokens for original string
    strTokensStem = tokenizeStem(srcText);
    strTokensNoStem = tokenizeNoStem(srcText);
    strTokensAll = tokenizeAll(srcText);
    // travel by project entries, including orphaned
    if (project.getProjectProperties().isSupportDefaultTranslations()) {
        project.iterateByDefaultTranslations(new DefaultTranslationsIterator() {

            public void iterate(String source, TMXEntry trans) {
                checkStopped(stop);
                if (!searchExactlyTheSame && source.equals(searchText)) {
                    // skip original==original entry comparison
                    return;
                }
                if (requiresTranslation && trans.translation == null) {
                    return;
                }
                String fileName = project.isOrphaned(source) ? ORPHANED_FILE_NAME : null;
                processEntry(null, source, trans.translation, NearString.MATCH_SOURCE.MEMORY, false, 0, fileName, trans.creator, trans.creationDate, trans.changer, trans.changeDate, null);
            }
        });
    }
    project.iterateByMultipleTranslations(new MultipleTranslationsIterator() {

        public void iterate(EntryKey source, TMXEntry trans) {
            checkStopped(stop);
            if (!searchExactlyTheSame && source.sourceText.equals(searchText)) {
                // skip original==original entry comparison
                return;
            }
            if (requiresTranslation && trans.translation == null) {
                return;
            }
            String fileName = project.isOrphaned(source) ? ORPHANED_FILE_NAME : null;
            processEntry(source, source.sourceText, trans.translation, NearString.MATCH_SOURCE.MEMORY, false, 0, fileName, trans.creator, trans.creationDate, trans.changer, trans.changeDate, null);
        }
    });
    // travel by translation memories
    for (Map.Entry<String, ExternalTMX> en : project.getTransMemories().entrySet()) {
        int penalty = 0;
        Matcher matcher = SEARCH_FOR_PENALTY.matcher(en.getKey());
        if (matcher.find()) {
            penalty = Integer.parseInt(matcher.group(1));
        }
        for (PrepareTMXEntry tmen : en.getValue().getEntries()) {
            checkStopped(stop);
            if (tmen.source == null) {
                // Not all TMX entries have a source; in that case there can be no meaningful match, so skip.
                continue;
            }
            if (requiresTranslation && tmen.translation == null) {
                continue;
            }
            processEntry(null, tmen.source, tmen.translation, NearString.MATCH_SOURCE.TM, false, penalty, en.getKey(), tmen.creator, tmen.creationDate, tmen.changer, tmen.changeDate, tmen.otherProperties);
        }
    }
    // travel by all entries for check source file translations
    for (SourceTextEntry ste : project.getAllEntries()) {
        checkStopped(stop);
        if (ste.getSourceTranslation() != null) {
            processEntry(ste.getKey(), ste.getSrcText(), ste.getSourceTranslation(), NearString.MATCH_SOURCE.MEMORY, ste.isSourceTranslationFuzzy(), 0, ste.getKey().file, "", 0, "", 0, null);
        }
    }
    if (separateSegmentMatcher != null) {
        // split paragraph even when segmentation disabled, then find matches for every segment
        List<StringBuilder> spaces = new ArrayList<StringBuilder>();
        List<Rule> brules = new ArrayList<Rule>();
        Language sourceLang = project.getProjectProperties().getSourceLanguage();
        Language targetLang = project.getProjectProperties().getTargetLanguage();
        List<String> segments = Core.getSegmenter().segment(sourceLang, srcText, spaces, brules);
        if (segments.size() > 1) {
            List<String> fsrc = new ArrayList<String>(segments.size());
            List<String> ftrans = new ArrayList<String>(segments.size());
            // multiple segments
            for (short i = 0; i < segments.size(); i++) {
                String onesrc = segments.get(i);
                // find match for separate segment
                List<NearString> segmentMatch = separateSegmentMatcher.search(onesrc, requiresTranslation, false, stop);
                if (!segmentMatch.isEmpty() && segmentMatch.get(0).scores[0].score >= SUBSEGMENT_MATCH_THRESHOLD) {
                    fsrc.add(segmentMatch.get(0).source);
                    ftrans.add(segmentMatch.get(0).translation);
                } else {
                    fsrc.add("");
                    ftrans.add("");
                }
            }
            // glue found sources
            String foundSrc = Core.getSegmenter().glue(sourceLang, sourceLang, fsrc, spaces, brules);
            // glue found translations
            String foundTrans = Core.getSegmenter().glue(sourceLang, targetLang, ftrans, spaces, brules);
            processEntry(null, foundSrc, foundTrans, NearString.MATCH_SOURCE.TM, false, 0, "", "", 0, "", 0, null);
        }
    }
    if (fillSimilarityData) {
        // fill similarity data only for result
        for (NearString near : result) {
            // fix for bug 1586397
            byte[] similarityData = FuzzyMatcher.buildSimilarityData(strTokensAll, tokenizeAll(near.source));
            near.attr = similarityData;
        }
    }
    return result;
}
Also used : EntryKey(org.omegat.core.data.EntryKey) Matcher(java.util.regex.Matcher) FuzzyMatcher(org.omegat.core.matching.FuzzyMatcher) ArrayList(java.util.ArrayList) NearString(org.omegat.core.matching.NearString) NearString(org.omegat.core.matching.NearString) Language(org.omegat.util.Language) SourceTextEntry(org.omegat.core.data.SourceTextEntry) DefaultTranslationsIterator(org.omegat.core.data.IProject.DefaultTranslationsIterator) ExternalTMX(org.omegat.core.data.ExternalTMX) Rule(org.omegat.core.segmentation.Rule) HashMap(java.util.HashMap) Map(java.util.Map) PrepareTMXEntry(org.omegat.core.data.PrepareTMXEntry) PrepareTMXEntry(org.omegat.core.data.PrepareTMXEntry) TMXEntry(org.omegat.core.data.TMXEntry) MultipleTranslationsIterator(org.omegat.core.data.IProject.MultipleTranslationsIterator)

Example 18 with Language

use of org.omegat.util.Language in project omegat by omegat-org.

the class TeamTool method initTeamProject.

/**
 * Utility function to create a minimal project to serve as a base for a
 * team project. Will add/stage everything if invoked on a path already
 * containing a git working tree or svn checkout.
 *
 * @param dir
 *            Directory in which to create team project
 * @param srcLang
 *            Source language
 * @param trgLang
 *            Target language
 * @param showGui
 *            If true, show the Project Properties dialog
 * @throws Exception
 *             If specified dir is not a directory, is not writeable, etc.
 */
public static void initTeamProject(File dir, String srcLang, String trgLang) throws Exception {
    if (!dir.isDirectory()) {
        throw new IllegalArgumentException("Specified dir is not a directory: " + dir.getPath());
    }
    if (!dir.canWrite()) {
        throw new IOException("Specified dir is not writeable: " + dir.getPath());
    }
    // Create project properties
    ProjectProperties props = new ProjectProperties(dir);
    props.setSourceLanguage(srcLang);
    props.setTargetLanguage(trgLang);
    // Set default tokenizers
    props.setSourceTokenizer(PluginUtils.getTokenizerClassForLanguage(new Language(srcLang)));
    props.setTargetTokenizer(PluginUtils.getTokenizerClassForLanguage(new Language(trgLang)));
    // Create project internal directories
    props.autocreateDirectories();
    // Create version-controlled glossary file
    props.getWritableGlossaryFile().getAsFile().createNewFile();
    ProjectFileStorage.writeProjectFile(props);
    // Create empty project TM
    new ProjectTMX(props.getSourceLanguage(), props.getTargetLanguage(), props.isSentenceSegmentingEnabled(), null, null).save(props, new File(props.getProjectInternal(), OConsts.STATUS_EXTENSION).getPath(), false);
    // and set EOL handling correctly for cross-platform work
    if (new File(dir, ".svn").isDirectory()) {
        SVNClientManager mgr = SVNClientManager.newInstance();
        mgr.getWCClient().doSetProperty(dir, "svn:auto-props", SVNPropertyValue.create("*.txt = svn:eol-style=native\n*.tmx = svn:eol-style=native\n"), false, SVNDepth.EMPTY, null, null);
        mgr.getWCClient().doAdd(dir.listFiles(f -> !f.getName().startsWith(".")), false, false, true, SVNDepth.fromRecurse(true), false, false, false, true);
    } else if (new File(dir, ".git").isDirectory()) {
        try (BufferedWriter writer = Files.newBufferedWriter(new File(dir, ".gitattributes").toPath())) {
            writer.write("* text=auto\n");
            writer.write("*.tmx text\n");
            writer.write("*.txt text\n");
        }
        Git.open(dir).add().addFilepattern(".").call();
    }
    System.out.println(StringUtil.format(OStrings.getString("TEAM_TOOL_INIT_COMPLETE"), srcLang, trgLang));
}
Also used : ProjectTMX(org.omegat.core.data.ProjectTMX) OConsts(org.omegat.util.OConsts) Arrays(java.util.Arrays) ProjectProperties(org.omegat.core.data.ProjectProperties) Files(java.nio.file.Files) BufferedWriter(java.io.BufferedWriter) CLIParameters(org.omegat.CLIParameters) Log(org.omegat.util.Log) SVNDepth(org.tmatesoft.svn.core.SVNDepth) IOException(java.io.IOException) Language(org.omegat.util.Language) PluginUtils(org.omegat.filters2.master.PluginUtils) SVNClientManager(org.tmatesoft.svn.core.wc.SVNClientManager) File(java.io.File) Level(java.util.logging.Level) StringUtil(org.omegat.util.StringUtil) ProjectTMX(org.omegat.core.data.ProjectTMX) OStrings(org.omegat.util.OStrings) SVNPropertyValue(org.tmatesoft.svn.core.SVNPropertyValue) ProjectFileStorage(org.omegat.util.ProjectFileStorage) Git(org.eclipse.jgit.api.Git) Collections(java.util.Collections) Preferences(org.omegat.util.Preferences) Language(org.omegat.util.Language) IOException(java.io.IOException) ProjectProperties(org.omegat.core.data.ProjectProperties) File(java.io.File) SVNClientManager(org.tmatesoft.svn.core.wc.SVNClientManager) BufferedWriter(java.io.BufferedWriter)

Example 19 with Language

use of org.omegat.util.Language in project omegat by omegat-org.

the class LanguageToolWrapper method setBridgeFromCurrentProject.

/**
 * Set this instance's LanguageTool bridge based on the current project.
 */
public static void setBridgeFromCurrentProject() {
    if (bridge != null) {
        bridge.stop();
    }
    if (Core.getProject().isProjectLoaded()) {
        Language sourceLang = Core.getProject().getProjectProperties().getSourceLanguage();
        Language targetLang = Core.getProject().getProjectProperties().getTargetLanguage();
        bridge = createBridgeFromPrefs(sourceLang, targetLang);
    }
}
Also used : Language(org.omegat.util.Language)

Example 20 with Language

use of org.omegat.util.Language in project omegat by omegat-org.

the class HunspellTokenizer method populateInstalledDicts.

private static synchronized void populateInstalledDicts() {
    if (affixFiles != null && dictionaryFiles != null) {
        return;
    }
    affixFiles = new HashMap<>();
    dictionaryFiles = new HashMap<>();
    String dictionaryDirPath = Preferences.getPreference(Preferences.SPELLCHECKER_DICTIONARY_DIRECTORY);
    if (dictionaryDirPath.isEmpty()) {
        return;
    }
    File dictionaryDir = new File(dictionaryDirPath);
    if (!dictionaryDir.isDirectory()) {
        return;
    }
    for (File file : dictionaryDir.listFiles()) {
        String name = file.getName();
        if (name.endsWith(OConsts.SC_AFFIX_EXTENSION)) {
            Language lang = new Language(name.substring(0, name.lastIndexOf(OConsts.SC_AFFIX_EXTENSION)));
            affixFiles.put(lang, file);
            affixFiles.put(new Language(lang.getLanguageCode()), file);
        } else if (name.endsWith(OConsts.SC_DICTIONARY_EXTENSION)) {
            Language lang = new Language(name.substring(0, name.lastIndexOf(OConsts.SC_DICTIONARY_EXTENSION)));
            dictionaryFiles.put(lang, file);
            dictionaryFiles.put(new Language(lang.getLanguageCode()), file);
        }
    }
    Set<Language> commonLangs = new HashSet<>(affixFiles.keySet());
    commonLangs.retainAll(dictionaryFiles.keySet());
    availableDictLangs = langsToStrings(commonLangs);
}
Also used : Language(org.omegat.util.Language) File(java.io.File) HashSet(java.util.HashSet)

Aggregations

Language (org.omegat.util.Language)43 Test (org.junit.Test)16 File (java.io.File)13 ArrayList (java.util.ArrayList)13 Map (java.util.Map)7 FilterContext (org.omegat.filters2.FilterContext)7 List (java.util.List)6 IProject (org.omegat.core.data.IProject)6 SourceTextEntry (org.omegat.core.data.SourceTextEntry)5 HashMap (java.util.HashMap)4 Before (org.junit.Before)4 EntryKey (org.omegat.core.data.EntryKey)4 ProjectTMX (org.omegat.core.data.ProjectTMX)4 TMXEntry (org.omegat.core.data.TMXEntry)4 XHTMLFilter (org.omegat.filters3.xml.xhtml.XHTMLFilter)4 DefaultTokenizer (org.omegat.tokenizer.DefaultTokenizer)4 ITokenizer (org.omegat.tokenizer.ITokenizer)4 IOException (java.io.IOException)3 Files (java.nio.file.Files)3 Matcher (java.util.regex.Matcher)3