Search in sources :

Example 36 with Language

use of org.omegat.util.Language in project omegat by omegat-org.

the class GlossaryReaderTBX method readMartif.

public static List<GlossaryEntry> readMartif(final Martif tbx, boolean priorityGlossary, String origin) throws Exception {
    if (tbx.getText() == null) {
        return Collections.emptyList();
    }
    String sLang = Core.getProject().getProjectProperties().getSourceLanguage().getLanguageCode();
    String tLang = Core.getProject().getProjectProperties().getTargetLanguage().getLanguageCode();
    StringBuilder note = new StringBuilder();
    StringBuilder descTerm = new StringBuilder();
    StringBuilder descTig = new StringBuilder();
    List<GlossaryEntry> result = new ArrayList<GlossaryEntry>();
    List<String> sTerms = new ArrayList<String>();
    List<String> tTerms = new ArrayList<String>();
    for (TermEntry te : tbx.getText().getBody().getTermEntry()) {
        note.setLength(0);
        descTerm.setLength(0);
        descTig.setLength(0);
        appendDescOrNote(te.getDescripOrDescripGrpOrAdmin(), descTerm);
        for (LangSet ls : te.getLangSet()) {
            Language termLanguage = new Language(ls.getLang());
            // We use only the language code
            String lang = termLanguage.getLanguageCode();
            appendDescOrNote(ls.getDescripOrDescripGrpOrAdmin(), descTig);
            for (Object o : ls.getTigOrNtig()) {
                if (o instanceof Tig) {
                    Tig t = (Tig) o;
                    if (sLang.equalsIgnoreCase(lang)) {
                        sTerms.add(readContent(t.getTerm().getContent()));
                    } else if (tLang.equalsIgnoreCase(lang)) {
                        tTerms.add(readContent(t.getTerm().getContent()));
                        appendDescOrNote(t.getTermNote(), note);
                    }
                    appendDescOrNote(t.getDescripOrDescripGrpOrAdmin(), descTig);
                } else if (o instanceof Ntig) {
                    Ntig n = (Ntig) o;
                    if (sLang.equalsIgnoreCase(lang)) {
                        sTerms.add(readContent(n.getTermGrp().getTerm().getContent()));
                    } else if (tLang.equalsIgnoreCase(lang)) {
                        tTerms.add(readContent(n.getTermGrp().getTerm().getContent()));
                        appendDescOrNote(n.getTermGrp().getTermNoteOrTermNoteGrp(), note);
                    }
                    appendDescOrNote(n.getDescripOrDescripGrpOrAdmin(), descTig);
                }
            }
        }
        StringBuilder comment = new StringBuilder();
        appendLine(comment, descTerm.toString());
        appendLine(comment, descTig.toString());
        appendLine(comment, note.toString());
        for (String s : sTerms) {
            boolean addedForLang = false;
            for (String t : tTerms) {
                result.add(new GlossaryEntry(s, t, comment.toString(), priorityGlossary, origin));
                addedForLang = true;
            }
            if (!addedForLang) {
                // An entry is created just to get the definition
                result.add(new GlossaryEntry(s, "", comment.toString(), priorityGlossary, origin));
            }
        }
        sTerms.clear();
        tTerms.clear();
    }
    return result;
}
Also used : Ntig(gen.core.tbx.Ntig) ArrayList(java.util.ArrayList) LangSet(gen.core.tbx.LangSet) Tig(gen.core.tbx.Tig) Language(org.omegat.util.Language) TermEntry(gen.core.tbx.TermEntry)

Example 37 with Language

use of org.omegat.util.Language in project omegat by omegat-org.

the class AlignFilePickerController method main.

/**
 * Entry point for debugging or standalone use. Optionally accepts four arguments to pre-fill the picker:
 * <ol>
 * <li>Source language
 * <li>Source file path
 * <li>Target language
 * <li>Target file path
 * </ol>
 *
 * @param args
 * @throws Exception
 */
public static void main(String[] args) throws Exception {
    System.setProperty("apple.laf.useScreenMenuBar", "true");
    Preferences.init();
    PluginUtils.loadPlugins(Collections.emptyMap());
    Core.setFilterMaster(new FilterMaster(FilterMaster.createDefaultFiltersConfig()));
    Core.setSegmenter(new Segmenter(SRX.getDefault()));
    AlignFilePickerController picker = new AlignFilePickerController();
    if (args.length == 4) {
        picker.sourceLanguage = new Language(args[0]);
        picker.sourceFile = args[1];
        picker.targetLanguage = new Language(args[2]);
        picker.targetFile = args[3];
    }
    picker.show(null);
}
Also used : Language(org.omegat.util.Language) FilterMaster(org.omegat.filters2.master.FilterMaster) Segmenter(org.omegat.core.segmentation.Segmenter)

Example 38 with Language

use of org.omegat.util.Language in project omegat by omegat-org.

the class ParseEntry method addEntryWithProperties.

/**
 * This method is called by filters to add new entry in OmegaT after read it from source file.
 *
 * @param id
 *            ID of entry, if format supports it
 * @param source
 *            Translatable source string
 * @param translation
 *            translation of the source string, if format supports it
 * @param isFuzzy
 *            flag for fuzzy translation. If a translation is fuzzy, it is not added to the projects TMX,
 *            but it is added to the generated 'reference' TMX, a special TMX that is used as extra
 *            reference during translation.
 * @param props
 *            a staggered array of non-uniquely-identifying key=value properties (metadata) for the entry
 * @param path
 *            path of entry in file
 * @param filter
 *            filter which produces entry
 * @param protectedParts
 *            protected parts
 */
@Override
public void addEntryWithProperties(String id, String source, String translation, boolean isFuzzy, String[] props, String path, IFilter filter, List<ProtectedPart> protectedParts) {
    if (StringUtil.isEmpty(source)) {
        // empty string - not need to save
        return;
    }
    if (props != null && props.length % 2 != 0) {
        throw new IllegalArgumentException("Entry properties must be in a key=value array with an even number of items.");
    }
    ParseEntryResult tmp = new ParseEntryResult();
    boolean removeSpaces = Core.getFilterMaster().getConfig().isRemoveSpacesNonseg();
    source = stripSomeChars(source, tmp, config.isRemoveTags(), removeSpaces);
    source = StringUtil.normalizeUnicode(source);
    if (config.isRemoveTags() && protectedParts != null) {
        for (int i = 0; i < protectedParts.size(); i++) {
            ProtectedPart p = protectedParts.get(i);
            String s = p.getTextInSourceSegment();
            s = PatternConsts.OMEGAT_TAG.matcher(s).replaceAll("");
            if (s.isEmpty()) {
                protectedParts.remove(i);
                i--;
            } else {
                p.setTextInSourceSegment(s);
            }
        }
    }
    if (translation != null) {
        translation = stripSomeChars(translation, tmp, config.isRemoveTags(), removeSpaces);
        translation = StringUtil.normalizeUnicode(translation);
    }
    if (config.isSentenceSegmentingEnabled()) {
        List<StringBuilder> spaces = new ArrayList<StringBuilder>();
        List<Rule> brules = new ArrayList<Rule>();
        Language sourceLang = config.getSourceLanguage();
        List<String> segments = Core.getSegmenter().segment(sourceLang, source, spaces, brules);
        if (segments.size() == 1) {
            internalAddSegment(id, (short) 0, segments.get(0), translation, isFuzzy, props, path, protectedParts);
        } else {
            for (short i = 0; i < segments.size(); i++) {
                String onesrc = segments.get(i);
                List<ProtectedPart> segmentProtectedParts = ProtectedPart.extractFor(protectedParts, onesrc);
                internalAddSegment(id, i, onesrc, null, false, props, path, segmentProtectedParts);
            }
        }
    } else {
        internalAddSegment(id, (short) 0, source, translation, isFuzzy, props, path, protectedParts);
    }
}
Also used : ArrayList(java.util.ArrayList) Language(org.omegat.util.Language) Rule(org.omegat.core.segmentation.Rule)

Example 39 with Language

use of org.omegat.util.Language in project omegat by omegat-org.

the class RealProject method loadOtherLanguages.

/**
 * Locates and loads external TMX files with legacy translations. Uses directory monitor for check file
 * updates.
 */
private void loadOtherLanguages() throws IOException {
    File tmOtherLanguagesRoot = new File(config.getTMOtherLangRoot());
    tmOtherLanguagesMonitor = new DirectoryMonitor(tmOtherLanguagesRoot, file -> {
        String name = file.getName();
        if (!name.matches("[A-Z]{2}([-_][A-Z]{2})?\\.tmx")) {
            // not a TMX file in XX_XX.tmx format
            return;
        }
        Language targetLanguage = new Language(name.substring(0, name.length() - ".tmx".length()));
        // create new translation memories map
        Map<Language, ProjectTMX> newOtherTargetLangTMs = new TreeMap<>(otherTargetLangTMs);
        if (file.exists()) {
            try {
                ProjectTMX newTMX = new ProjectTMX(config.getSourceLanguage(), targetLanguage, config.isSentenceSegmentingEnabled(), file, checkOrphanedCallback);
                newOtherTargetLangTMs.put(targetLanguage, newTMX);
            } catch (Exception e) {
                String filename = file.getPath();
                Log.logErrorRB(e, "TF_TM_LOAD_ERROR", filename);
                Core.getMainWindow().displayErrorRB(e, "TF_TM_LOAD_ERROR", filename);
            }
        } else {
            newOtherTargetLangTMs.remove(targetLanguage);
        }
        otherTargetLangTMs = newOtherTargetLangTMs;
    });
    tmOtherLanguagesMonitor.checkChanges();
    tmOtherLanguagesMonitor.start();
}
Also used : OConsts(org.omegat.util.OConsts) RandomAccessFile(java.io.RandomAccessFile) SRX(org.omegat.core.segmentation.SRX) IFilter(org.omegat.filters2.IFilter) PatternConsts(org.omegat.util.PatternConsts) StaticUtils(org.omegat.util.StaticUtils) FileLock(java.nio.channels.FileLock) KnownException(org.omegat.core.KnownException) Matcher(java.util.regex.Matcher) StringUtil(org.omegat.util.StringUtil) CalcStandardStatistics(org.omegat.core.statistics.CalcStandardStatistics) Core(org.omegat.core.Core) Filters(gen.core.filters.Filters) Map(java.util.Map) ProjectFileStorage(org.omegat.util.ProjectFileStorage) Path(java.nio.file.Path) Set(java.util.Set) FilterMaster(org.omegat.filters2.master.FilterMaster) GlossaryReaderTSV(org.omegat.gui.glossary.GlossaryReaderTSV) Logger(java.util.logging.Logger) Collectors(java.util.stream.Collectors) StandardCharsets(java.nio.charset.StandardCharsets) List(java.util.List) Writer(java.io.Writer) FilterContext(org.omegat.filters2.FilterContext) TagUtil(org.omegat.util.TagUtil) Optional(java.util.Optional) Pattern(java.util.regex.Pattern) DirectoryMonitor(org.omegat.util.DirectoryMonitor) Preferences(org.omegat.util.Preferences) StmProperties(org.madlonkay.supertmxmerge.StmProperties) ITokenizer(org.omegat.tokenizer.ITokenizer) CLIParameters(org.omegat.CLIParameters) RebaseAndCommit(org.omegat.core.team2.RebaseAndCommit) Log(org.omegat.util.Log) FileUtil(org.omegat.util.FileUtil) TMXReader2(org.omegat.util.TMXReader2) HashMap(java.util.HashMap) Stack(java.util.Stack) CoreEvents(org.omegat.core.CoreEvents) ArrayList(java.util.ArrayList) HashSet(java.util.HashSet) OStrings(org.omegat.util.OStrings) StreamUtil(org.omegat.util.StreamUtil) IAlignCallback(org.omegat.filters2.IAlignCallback) DefaultTokenizer(org.omegat.tokenizer.DefaultTokenizer) RuntimePreferences(org.omegat.util.RuntimePreferences) Segmenter(org.omegat.core.segmentation.Segmenter) Files(java.nio.file.Files) UIThreadsUtil(org.omegat.util.gui.UIThreadsUtil) ExternalLinked(org.omegat.core.data.TMXEntry.ExternalLinked) IProjectEventListener(org.omegat.core.events.IProjectEventListener) IOException(java.io.IOException) Language(org.omegat.util.Language) Statistics(org.omegat.core.statistics.Statistics) StatisticsInfo(org.omegat.core.statistics.StatisticsInfo) RemoteRepositoryProvider(org.omegat.core.team2.RemoteRepositoryProvider) File(java.io.File) SuperTmxMerge(org.madlonkay.supertmxmerge.SuperTmxMerge) SAXParseException(org.xml.sax.SAXParseException) TreeMap(java.util.TreeMap) Paths(java.nio.file.Paths) CommandMonitor(org.omegat.core.threads.CommandMonitor) GlossaryEntry(org.omegat.gui.glossary.GlossaryEntry) FileChannel(java.nio.channels.FileChannel) Collections(java.util.Collections) DirectoryMonitor(org.omegat.util.DirectoryMonitor) Language(org.omegat.util.Language) RandomAccessFile(java.io.RandomAccessFile) File(java.io.File) Map(java.util.Map) HashMap(java.util.HashMap) TreeMap(java.util.TreeMap) KnownException(org.omegat.core.KnownException) IOException(java.io.IOException) SAXParseException(org.xml.sax.SAXParseException)

Example 40 with Language

use of org.omegat.util.Language in project omegat by omegat-org.

the class TranslateEntry method getTranslation.

/**
 * {@inheritDoc}
 */
@Override
public String getTranslation(final String id, final String origSource, final String path) {
    ParseEntry.ParseEntryResult spr = new ParseEntry.ParseEntryResult();
    // fix for bug 3487497;
    // Fetch removed tags if the options
    // has been enabled.
    String tags = null;
    if (config.isRemoveTags()) {
        tags = TagUtil.buildTagListForRemove(origSource);
    }
    boolean removeSpaces = Core.getFilterMaster().getConfig().isRemoveSpacesNonseg();
    final String source = StringUtil.normalizeUnicode(ParseEntry.stripSomeChars(origSource, spr, config.isRemoveTags(), removeSpaces));
    StringBuilder res = new StringBuilder();
    if (config.isSentenceSegmentingEnabled()) {
        boolean translated = false;
        List<StringBuilder> spaces = new ArrayList<StringBuilder>();
        List<Rule> brules = new ArrayList<Rule>();
        Language sourceLang = config.getSourceLanguage();
        Language targetLang = config.getTargetLanguage();
        List<String> segments = Core.getSegmenter().segment(sourceLang, source, spaces, brules);
        for (int i = 0; i < segments.size(); i++) {
            String onesrc = segments.get(i);
            String tr = internalGetSegmentTranslation(id, i, onesrc, path);
            if (tr == null) {
                tr = onesrc;
            } else {
                translated = true;
            }
            segments.set(i, tr);
        }
        if (!translated) {
            // there is no even one translated segment
            return null;
        }
        res.append(Core.getSegmenter().glue(sourceLang, targetLang, segments, spaces, brules));
    } else {
        String tr = internalGetSegmentTranslation(id, 0, source, path);
        if (tr == null) {
            // non-translated
            return null;
        }
        res.append(tr);
    }
    // replacing all occurrences of LF (\n) by either single CR (\r) or CRLF
    // (\r\n)
    // this is a reversal of the process at the beginning of this method
    // fix for bug 1462566
    String r = res.toString();
    // - Word: anything placed before the leading tag is omitted in translated document
    // https://sourceforge.net/p/omegat/bugs/634/
    // This is a Word document, Remove Tags (from Project Properties) is not checked and Remove leading and
    // trailing tags (from File Filters) is not checked
    String fileName = getCurrentFile().toLowerCase(Locale.ENGLISH);
    if ((fileName.endsWith(".docx") || fileName.endsWith(".docm")) && !config.isRemoveTags() && !Core.getFilterMaster().getConfig().isRemoveTags()) {
        // Locate the location of the first tag
        String firstTag = TagUtil.getFirstTag(r);
        if (firstTag != null) {
            int locFirstTag = r.indexOf(firstTag);
            // Is there text before that first tag?
            if (locFirstTag > 0) {
                // Was the first tag between two words without any spaces around?
                String addSpace = "";
                if (!Character.isWhitespace(r.codePointBefore(locFirstTag)) && !Character.isWhitespace(r.codePointAt(locFirstTag + firstTag.length())) && Core.getProject().getProjectProperties().getTargetLanguage().isSpaceDelimited()) {
                    addSpace = " ";
                }
                // Move that first tag before the text, adding a space if needed.
                r = firstTag + r.substring(0, locFirstTag) + addSpace + r.substring(locFirstTag + firstTag.length());
            }
        }
    }
    // the end of the translated string.
    if (config.isRemoveTags()) {
        r += tags;
    }
    if (spr.crlf) {
        r = r.replace("\n", "\r\n");
    } else if (spr.cr) {
        r = r.replace("\n", "\r");
    }
    if (spr.spacesAtBegin > 0) {
        r = origSource.substring(0, spr.spacesAtBegin) + r;
    }
    if (spr.spacesAtEnd > 0) {
        r = r + origSource.substring(origSource.length() - spr.spacesAtEnd);
    }
    return r;
}
Also used : ArrayList(java.util.ArrayList) Language(org.omegat.util.Language) Rule(org.omegat.core.segmentation.Rule)

Aggregations

Language (org.omegat.util.Language)43 Test (org.junit.Test)16 File (java.io.File)13 ArrayList (java.util.ArrayList)13 Map (java.util.Map)7 FilterContext (org.omegat.filters2.FilterContext)7 List (java.util.List)6 IProject (org.omegat.core.data.IProject)6 SourceTextEntry (org.omegat.core.data.SourceTextEntry)5 HashMap (java.util.HashMap)4 Before (org.junit.Before)4 EntryKey (org.omegat.core.data.EntryKey)4 ProjectTMX (org.omegat.core.data.ProjectTMX)4 TMXEntry (org.omegat.core.data.TMXEntry)4 XHTMLFilter (org.omegat.filters3.xml.xhtml.XHTMLFilter)4 DefaultTokenizer (org.omegat.tokenizer.DefaultTokenizer)4 ITokenizer (org.omegat.tokenizer.ITokenizer)4 IOException (java.io.IOException)3 Files (java.nio.file.Files)3 Matcher (java.util.regex.Matcher)3