use of org.omegat.util.Language in project omegat by omegat-org.
the class SpellChecker method initialize.
/**
* Initialize the library for the given project. Loads the lists of ignored and learned words for the
* project
*/
public void initialize() {
Language targetLanguage = Core.getProject().getProjectProperties().getTargetLanguage();
Stream<String> toCheck = Stream.of(// Full xx_YY
targetLanguage.getLocaleCode(), // Full xx-YY
targetLanguage.getLocaleCode().replace('_', '-'), // xx only
targetLanguage.getLanguageCode());
checker = toCheck.map(SpellChecker::initializeWithLanguage).filter(Optional::isPresent).findFirst().orElseGet(() -> Optional.of(new SpellCheckerDummy())).get();
if (checker instanceof SpellCheckerDummy) {
Log.log("No spell checker found for language " + targetLanguage);
}
loadWordLists();
}
use of org.omegat.util.Language in project omegat by omegat-org.
the class FindMatches method search.
public List<NearString> search(final String searchText, final boolean requiresTranslation, final boolean fillSimilarityData, final IStopped stop) throws StoppedException {
result = new ArrayList<>(OConsts.MAX_NEAR_STRINGS + 1);
srcText = searchText;
removedText = "";
// of the translatable text
if (removePattern != null) {
StringBuilder removedBuffer = new StringBuilder();
Matcher removeMatcher = removePattern.matcher(srcText);
while (removeMatcher.find()) {
removedBuffer.append(removeMatcher.group());
}
srcText = removeMatcher.replaceAll("");
removedText = removedBuffer.toString();
}
// get tokens for original string
strTokensStem = tokenizeStem(srcText);
strTokensNoStem = tokenizeNoStem(srcText);
strTokensAll = tokenizeAll(srcText);
// travel by project entries, including orphaned
if (project.getProjectProperties().isSupportDefaultTranslations()) {
project.iterateByDefaultTranslations(new DefaultTranslationsIterator() {
public void iterate(String source, TMXEntry trans) {
checkStopped(stop);
if (!searchExactlyTheSame && source.equals(searchText)) {
// skip original==original entry comparison
return;
}
if (requiresTranslation && trans.translation == null) {
return;
}
String fileName = project.isOrphaned(source) ? ORPHANED_FILE_NAME : null;
processEntry(null, source, trans.translation, NearString.MATCH_SOURCE.MEMORY, false, 0, fileName, trans.creator, trans.creationDate, trans.changer, trans.changeDate, null);
}
});
}
project.iterateByMultipleTranslations(new MultipleTranslationsIterator() {
public void iterate(EntryKey source, TMXEntry trans) {
checkStopped(stop);
if (!searchExactlyTheSame && source.sourceText.equals(searchText)) {
// skip original==original entry comparison
return;
}
if (requiresTranslation && trans.translation == null) {
return;
}
String fileName = project.isOrphaned(source) ? ORPHANED_FILE_NAME : null;
processEntry(source, source.sourceText, trans.translation, NearString.MATCH_SOURCE.MEMORY, false, 0, fileName, trans.creator, trans.creationDate, trans.changer, trans.changeDate, null);
}
});
// travel by translation memories
for (Map.Entry<String, ExternalTMX> en : project.getTransMemories().entrySet()) {
int penalty = 0;
Matcher matcher = SEARCH_FOR_PENALTY.matcher(en.getKey());
if (matcher.find()) {
penalty = Integer.parseInt(matcher.group(1));
}
for (PrepareTMXEntry tmen : en.getValue().getEntries()) {
checkStopped(stop);
if (tmen.source == null) {
// Not all TMX entries have a source; in that case there can be no meaningful match, so skip.
continue;
}
if (requiresTranslation && tmen.translation == null) {
continue;
}
processEntry(null, tmen.source, tmen.translation, NearString.MATCH_SOURCE.TM, false, penalty, en.getKey(), tmen.creator, tmen.creationDate, tmen.changer, tmen.changeDate, tmen.otherProperties);
}
}
// travel by all entries for check source file translations
for (SourceTextEntry ste : project.getAllEntries()) {
checkStopped(stop);
if (ste.getSourceTranslation() != null) {
processEntry(ste.getKey(), ste.getSrcText(), ste.getSourceTranslation(), NearString.MATCH_SOURCE.MEMORY, ste.isSourceTranslationFuzzy(), 0, ste.getKey().file, "", 0, "", 0, null);
}
}
if (separateSegmentMatcher != null) {
// split paragraph even when segmentation disabled, then find matches for every segment
List<StringBuilder> spaces = new ArrayList<StringBuilder>();
List<Rule> brules = new ArrayList<Rule>();
Language sourceLang = project.getProjectProperties().getSourceLanguage();
Language targetLang = project.getProjectProperties().getTargetLanguage();
List<String> segments = Core.getSegmenter().segment(sourceLang, srcText, spaces, brules);
if (segments.size() > 1) {
List<String> fsrc = new ArrayList<String>(segments.size());
List<String> ftrans = new ArrayList<String>(segments.size());
// multiple segments
for (short i = 0; i < segments.size(); i++) {
String onesrc = segments.get(i);
// find match for separate segment
List<NearString> segmentMatch = separateSegmentMatcher.search(onesrc, requiresTranslation, false, stop);
if (!segmentMatch.isEmpty() && segmentMatch.get(0).scores[0].score >= SUBSEGMENT_MATCH_THRESHOLD) {
fsrc.add(segmentMatch.get(0).source);
ftrans.add(segmentMatch.get(0).translation);
} else {
fsrc.add("");
ftrans.add("");
}
}
// glue found sources
String foundSrc = Core.getSegmenter().glue(sourceLang, sourceLang, fsrc, spaces, brules);
// glue found translations
String foundTrans = Core.getSegmenter().glue(sourceLang, targetLang, ftrans, spaces, brules);
processEntry(null, foundSrc, foundTrans, NearString.MATCH_SOURCE.TM, false, 0, "", "", 0, "", 0, null);
}
}
if (fillSimilarityData) {
// fill similarity data only for result
for (NearString near : result) {
// fix for bug 1586397
byte[] similarityData = FuzzyMatcher.buildSimilarityData(strTokensAll, tokenizeAll(near.source));
near.attr = similarityData;
}
}
return result;
}
use of org.omegat.util.Language in project omegat by omegat-org.
the class TeamTool method initTeamProject.
/**
* Utility function to create a minimal project to serve as a base for a
* team project. Will add/stage everything if invoked on a path already
* containing a git working tree or svn checkout.
*
* @param dir
* Directory in which to create team project
* @param srcLang
* Source language
* @param trgLang
* Target language
* @param showGui
* If true, show the Project Properties dialog
* @throws Exception
* If specified dir is not a directory, is not writeable, etc.
*/
public static void initTeamProject(File dir, String srcLang, String trgLang) throws Exception {
if (!dir.isDirectory()) {
throw new IllegalArgumentException("Specified dir is not a directory: " + dir.getPath());
}
if (!dir.canWrite()) {
throw new IOException("Specified dir is not writeable: " + dir.getPath());
}
// Create project properties
ProjectProperties props = new ProjectProperties(dir);
props.setSourceLanguage(srcLang);
props.setTargetLanguage(trgLang);
// Set default tokenizers
props.setSourceTokenizer(PluginUtils.getTokenizerClassForLanguage(new Language(srcLang)));
props.setTargetTokenizer(PluginUtils.getTokenizerClassForLanguage(new Language(trgLang)));
// Create project internal directories
props.autocreateDirectories();
// Create version-controlled glossary file
props.getWritableGlossaryFile().getAsFile().createNewFile();
ProjectFileStorage.writeProjectFile(props);
// Create empty project TM
new ProjectTMX(props.getSourceLanguage(), props.getTargetLanguage(), props.isSentenceSegmentingEnabled(), null, null).save(props, new File(props.getProjectInternal(), OConsts.STATUS_EXTENSION).getPath(), false);
// and set EOL handling correctly for cross-platform work
if (new File(dir, ".svn").isDirectory()) {
SVNClientManager mgr = SVNClientManager.newInstance();
mgr.getWCClient().doSetProperty(dir, "svn:auto-props", SVNPropertyValue.create("*.txt = svn:eol-style=native\n*.tmx = svn:eol-style=native\n"), false, SVNDepth.EMPTY, null, null);
mgr.getWCClient().doAdd(dir.listFiles(f -> !f.getName().startsWith(".")), false, false, true, SVNDepth.fromRecurse(true), false, false, false, true);
} else if (new File(dir, ".git").isDirectory()) {
try (BufferedWriter writer = Files.newBufferedWriter(new File(dir, ".gitattributes").toPath())) {
writer.write("* text=auto\n");
writer.write("*.tmx text\n");
writer.write("*.txt text\n");
}
Git.open(dir).add().addFilepattern(".").call();
}
System.out.println(StringUtil.format(OStrings.getString("TEAM_TOOL_INIT_COMPLETE"), srcLang, trgLang));
}
use of org.omegat.util.Language in project omegat by omegat-org.
the class LanguageToolWrapper method setBridgeFromCurrentProject.
/**
* Set this instance's LanguageTool bridge based on the current project.
*/
public static void setBridgeFromCurrentProject() {
if (bridge != null) {
bridge.stop();
}
if (Core.getProject().isProjectLoaded()) {
Language sourceLang = Core.getProject().getProjectProperties().getSourceLanguage();
Language targetLang = Core.getProject().getProjectProperties().getTargetLanguage();
bridge = createBridgeFromPrefs(sourceLang, targetLang);
}
}
use of org.omegat.util.Language in project omegat by omegat-org.
the class HunspellTokenizer method populateInstalledDicts.
private static synchronized void populateInstalledDicts() {
if (affixFiles != null && dictionaryFiles != null) {
return;
}
affixFiles = new HashMap<>();
dictionaryFiles = new HashMap<>();
String dictionaryDirPath = Preferences.getPreference(Preferences.SPELLCHECKER_DICTIONARY_DIRECTORY);
if (dictionaryDirPath.isEmpty()) {
return;
}
File dictionaryDir = new File(dictionaryDirPath);
if (!dictionaryDir.isDirectory()) {
return;
}
for (File file : dictionaryDir.listFiles()) {
String name = file.getName();
if (name.endsWith(OConsts.SC_AFFIX_EXTENSION)) {
Language lang = new Language(name.substring(0, name.lastIndexOf(OConsts.SC_AFFIX_EXTENSION)));
affixFiles.put(lang, file);
affixFiles.put(new Language(lang.getLanguageCode()), file);
} else if (name.endsWith(OConsts.SC_DICTIONARY_EXTENSION)) {
Language lang = new Language(name.substring(0, name.lastIndexOf(OConsts.SC_DICTIONARY_EXTENSION)));
dictionaryFiles.put(lang, file);
dictionaryFiles.put(new Language(lang.getLanguageCode()), file);
}
}
Set<Language> commonLangs = new HashSet<>(affixFiles.keySet());
commonLangs.retainAll(dictionaryFiles.keySet());
availableDictLangs = langsToStrings(commonLangs);
}
Aggregations