use of org.omegat.tokenizer.ITokenizer in project omegat by omegat-org.
the class MatchesTextAreaTest method testReplaceNumbers.
@Test
public void testReplaceNumbers() {
ITokenizer tok = new DefaultTokenizer();
// Simple case
String source = "chapter 5";
String srcMatch = "chapter 1";
String trgMatch = "foo 1";
assertEquals("foo 5", MatchesTextArea.substituteNumbers(source, srcMatch, trgMatch, tok, tok));
// Double
source = "chapter 5.5";
srcMatch = "chapter 1.1";
trgMatch = "foo 1.1";
assertEquals("foo 5.5", MatchesTextArea.substituteNumbers(source, srcMatch, trgMatch, tok, tok));
// Different order
source = "hoge 9 fuga 8 piyo 7";
srcMatch = "foo 1 bar 2 baz 3";
trgMatch = "bing 3 bang 2 bop 1";
assertEquals("bing 7 bang 8 bop 9", MatchesTextArea.substituteNumbers(source, srcMatch, trgMatch, tok, tok));
// User-reported problem string (duplicate numbers)
source = "Point C : Recommence les \u00E9tapes 16 \u00E0 21 \u2013 pages 16 et 17";
srcMatch = "Point B : Recommence les \u00E9tapes 9 \u00E0 15 \u2013 page 14 et 15";
trgMatch = "Point B: Repeat steps 9 to 15 \u2013 pages 14 and 15";
assertEquals("Point B: Repeat steps 16 to 21 \u2013 pages 16 and 17", MatchesTextArea.substituteNumbers(source, srcMatch, trgMatch, tok, tok));
// Substitution not possible: differing number counts
source = "hoge 9 fuga 8 piyo 7";
srcMatch = "foo 1 bar 2 baz";
trgMatch = "bing 3 bang 2 bop 1";
assertEquals("bing 3 bang 2 bop 1", MatchesTextArea.substituteNumbers(source, srcMatch, trgMatch, tok, tok));
// Substitution not possible: differing number counts
source = "hoge 9 fuga 8 piyo";
srcMatch = "foo 1 bar 2 baz 3";
trgMatch = "bing 3 bang 2 bop 1";
assertEquals("bing 3 bang 2 bop 1", MatchesTextArea.substituteNumbers(source, srcMatch, trgMatch, tok, tok));
// Substitution not possible: differing number counts
source = "hoge 9 fuga 8 piyo 7";
srcMatch = "foo 1 bar 2 baz 3";
trgMatch = "bing 3 bang 2 bop";
assertEquals("bing 3 bang 2 bop", MatchesTextArea.substituteNumbers(source, srcMatch, trgMatch, tok, tok));
// Substitution not possible: differing number counts
source = "hoge 9 fuga 8 piyo 7";
srcMatch = "foo 1 bar 2 baz 3 3";
trgMatch = "bing 3 bang 2 bop 1";
assertEquals("bing 3 bang 2 bop 1", MatchesTextArea.substituteNumbers(source, srcMatch, trgMatch, tok, tok));
// Substitution not possible: differing numbers
source = "hoge 9 fuga 8 piyo 7";
srcMatch = "foo 1 bar 2 baz 33";
trgMatch = "bing 3 bang 2 bop 1";
assertEquals("bing 3 bang 2 bop 1", MatchesTextArea.substituteNumbers(source, srcMatch, trgMatch, tok, tok));
}
use of org.omegat.tokenizer.ITokenizer in project omegat by omegat-org.
the class FindGlossaryThread method search.
@Override
protected List<GlossaryEntry> search() {
ITokenizer tok = Core.getProject().getSourceTokenizer();
if (tok == null) {
return Collections.emptyList();
}
List<GlossaryEntry> entries = manager.getGlossaryEntries(ste.getSrcText());
if (entries == null) {
return Collections.emptyList();
}
GlossarySearcher searcher = new GlossarySearcher(tok, Core.getProject().getProjectProperties().getSourceLanguage()) {
@Override
protected void checkCancelled() {
checkEntryChanged();
}
};
return searcher.searchSourceMatches(ste, entries);
}
use of org.omegat.tokenizer.ITokenizer in project omegat by omegat-org.
the class GlossaryManager method searchSourceMatches.
/**
* Get all glossary entries with source terms found in the provided string.
*
* @param src
* The text to search
* @return A list of matching glossary entries
*/
public List<GlossaryEntry> searchSourceMatches(SourceTextEntry ste) {
ITokenizer tok = Core.getProject().getSourceTokenizer();
if (tok == null) {
return Collections.emptyList();
}
List<GlossaryEntry> entries = getGlossaryEntries(ste.getSrcText());
if (entries == null) {
return Collections.emptyList();
}
GlossarySearcher searcher = new GlossarySearcher(tok, Core.getProject().getProjectProperties().getSourceLanguage());
return searcher.searchSourceMatches(ste, entries);
}
use of org.omegat.tokenizer.ITokenizer in project omegat by omegat-org.
the class GlossaryManager method searchSourceMatchTokens.
/**
* Get tokens of the source text that match the supplied glossary entry.
*
* @param ste
* The entry to search
* @return A list of tokens matching the supplied glossary entry
*/
public List<Token[]> searchSourceMatchTokens(SourceTextEntry ste, GlossaryEntry entry) {
ITokenizer tok = Core.getProject().getSourceTokenizer();
if (tok == null) {
return Collections.emptyList();
}
GlossarySearcher searcher = new GlossarySearcher(tok, Core.getProject().getProjectProperties().getSourceLanguage());
return searcher.searchSourceMatchTokens(ste, entry);
}
use of org.omegat.tokenizer.ITokenizer in project omegat by omegat-org.
the class MosesTranslate method mosesPreprocess.
private String mosesPreprocess(String text, Locale locale) {
ITokenizer tokenizer = Core.getProject().getSourceTokenizer();
StringBuilder sb = new StringBuilder();
for (String t : tokenizer.tokenizeVerbatimToStrings(text)) {
sb.append(t);
sb.append(" ");
}
String result = sb.toString();
return result.toLowerCase(locale);
}
Aggregations