use of org.omegat.tokenizer.LuceneEnglishTokenizer in project omegat by omegat-org.
the class EditorUtilsTest method testChangeCase.
@Test
public void testChangeCase() {
Locale locale = Locale.ENGLISH;
ITokenizer tokenizer = new LuceneEnglishTokenizer();
String input = "a I've GOT a {crazy} text hErE including 1 \u65e5\u672c\u8a9e!";
String round1 = EditorUtils.doChangeCase(input, CHANGE_CASE_TO.CYCLE, locale, tokenizer);
assertEquals("A I'VE GOT A {CRAZY} TEXT HERE INCLUDING 1 \u65e5\u672c\u8a9e!", round1);
assertEquals(round1, EditorUtils.doChangeCase(input, CHANGE_CASE_TO.UPPER, locale, tokenizer));
String round2 = EditorUtils.doChangeCase(round1, CHANGE_CASE_TO.CYCLE, locale, tokenizer);
assertEquals("a i've got a {crazy} text here including 1 \u65e5\u672c\u8a9e!", round2);
assertEquals(round2, EditorUtils.doChangeCase(input, CHANGE_CASE_TO.LOWER, locale, tokenizer));
String round3 = EditorUtils.doChangeCase(round2, CHANGE_CASE_TO.CYCLE, locale, tokenizer);
assertEquals("A i've got a {crazy} text here including 1 \u65e5\u672c\u8a9e!", round3);
assertEquals(round3, EditorUtils.doChangeCase(input, CHANGE_CASE_TO.SENTENCE, locale, tokenizer));
String round4 = EditorUtils.doChangeCase(round3, CHANGE_CASE_TO.CYCLE, locale, tokenizer);
assertEquals("A I've Got A {Crazy} Text Here Including 1 \u65e5\u672c\u8a9e!", round4);
assertEquals(round4, EditorUtils.doChangeCase(input, CHANGE_CASE_TO.TITLE, locale, tokenizer));
String round5 = EditorUtils.doChangeCase(round4, CHANGE_CASE_TO.CYCLE, locale, tokenizer);
assertEquals(round1, round5);
input = "lower case only";
assertEquals(input, EditorUtils.doChangeCase(input, CHANGE_CASE_TO.LOWER, locale, tokenizer));
assertEquals("LOWER CASE ONLY", EditorUtils.doChangeCase(input, CHANGE_CASE_TO.UPPER, locale, tokenizer));
assertEquals("Lower case only", EditorUtils.doChangeCase(input, CHANGE_CASE_TO.SENTENCE, locale, tokenizer));
assertEquals("Lower Case Only", EditorUtils.doChangeCase(input, CHANGE_CASE_TO.TITLE, locale, tokenizer));
assertEquals("Lower case only", EditorUtils.doChangeCase(input, CHANGE_CASE_TO.CYCLE, locale, tokenizer));
input = "UPPER CASE ONLY";
assertEquals("upper case only", EditorUtils.doChangeCase(input, CHANGE_CASE_TO.LOWER, locale, tokenizer));
assertEquals(input, EditorUtils.doChangeCase(input, CHANGE_CASE_TO.UPPER, locale, tokenizer));
assertEquals("Upper case only", EditorUtils.doChangeCase(input, CHANGE_CASE_TO.SENTENCE, locale, tokenizer));
assertEquals("Upper Case Only", EditorUtils.doChangeCase(input, CHANGE_CASE_TO.TITLE, locale, tokenizer));
assertEquals("upper case only", EditorUtils.doChangeCase(input, CHANGE_CASE_TO.CYCLE, locale, tokenizer));
input = "Title Case Only";
assertEquals("title case only", EditorUtils.doChangeCase(input, CHANGE_CASE_TO.LOWER, locale, tokenizer));
assertEquals("TITLE CASE ONLY", EditorUtils.doChangeCase(input, CHANGE_CASE_TO.UPPER, locale, tokenizer));
assertEquals("Title case only", EditorUtils.doChangeCase(input, CHANGE_CASE_TO.SENTENCE, locale, tokenizer));
assertEquals(input, EditorUtils.doChangeCase(input, CHANGE_CASE_TO.TITLE, locale, tokenizer));
assertEquals("TITLE CASE ONLY", EditorUtils.doChangeCase(input, CHANGE_CASE_TO.CYCLE, locale, tokenizer));
input = "Sentence case string";
assertEquals("sentence case string", EditorUtils.doChangeCase(input, CHANGE_CASE_TO.LOWER, locale, tokenizer));
assertEquals("SENTENCE CASE STRING", EditorUtils.doChangeCase(input, CHANGE_CASE_TO.UPPER, locale, tokenizer));
assertEquals(input, EditorUtils.doChangeCase(input, CHANGE_CASE_TO.SENTENCE, locale, tokenizer));
assertEquals("Sentence Case String", EditorUtils.doChangeCase(input, CHANGE_CASE_TO.TITLE, locale, tokenizer));
assertEquals("Sentence Case String", EditorUtils.doChangeCase(input, CHANGE_CASE_TO.CYCLE, locale, tokenizer));
input = "mIxed CaSe oNly";
assertEquals("mixed case only", EditorUtils.doChangeCase(input, CHANGE_CASE_TO.LOWER, locale, tokenizer));
assertEquals("MIXED CASE ONLY", EditorUtils.doChangeCase(input, CHANGE_CASE_TO.UPPER, locale, tokenizer));
assertEquals("Mixed case only", EditorUtils.doChangeCase(input, CHANGE_CASE_TO.SENTENCE, locale, tokenizer));
assertEquals("Mixed Case Only", EditorUtils.doChangeCase(input, CHANGE_CASE_TO.TITLE, locale, tokenizer));
assertEquals("MIXED CASE ONLY", EditorUtils.doChangeCase(input, CHANGE_CASE_TO.CYCLE, locale, tokenizer));
// Ambiguous only
input = "A B C";
assertEquals("a b c", EditorUtils.doChangeCase(input, CHANGE_CASE_TO.LOWER, locale, tokenizer));
assertEquals(input, EditorUtils.doChangeCase(input, CHANGE_CASE_TO.UPPER, locale, tokenizer));
assertEquals("A b c", EditorUtils.doChangeCase(input, CHANGE_CASE_TO.SENTENCE, locale, tokenizer));
assertEquals(input, EditorUtils.doChangeCase(input, CHANGE_CASE_TO.TITLE, locale, tokenizer));
round2 = EditorUtils.doChangeCase(input, CHANGE_CASE_TO.CYCLE, locale, tokenizer);
assertEquals("a b c", round2);
round3 = EditorUtils.doChangeCase(round2, CHANGE_CASE_TO.CYCLE, locale, tokenizer);
assertEquals("A b c", round3);
round4 = EditorUtils.doChangeCase(round3, CHANGE_CASE_TO.CYCLE, locale, tokenizer);
assertEquals(input, round4);
// No letter-containing tokens
input = "{!} 1 \u65e5\u672c\u8a9e";
assertEquals(input, EditorUtils.doChangeCase(input, CHANGE_CASE_TO.LOWER, locale, tokenizer));
assertEquals(input, EditorUtils.doChangeCase(input, CHANGE_CASE_TO.UPPER, locale, tokenizer));
assertEquals(input, EditorUtils.doChangeCase(input, CHANGE_CASE_TO.SENTENCE, locale, tokenizer));
assertEquals(input, EditorUtils.doChangeCase(input, CHANGE_CASE_TO.TITLE, locale, tokenizer));
assertEquals(input, EditorUtils.doChangeCase(input, CHANGE_CASE_TO.CYCLE, locale, tokenizer));
// Single tokens
input = "lower";
assertEquals(input, EditorUtils.doChangeCase(input, CHANGE_CASE_TO.LOWER, locale, tokenizer));
assertEquals("LOWER", EditorUtils.doChangeCase(input, CHANGE_CASE_TO.UPPER, locale, tokenizer));
assertEquals("Lower", EditorUtils.doChangeCase(input, CHANGE_CASE_TO.SENTENCE, locale, tokenizer));
assertEquals("Lower", EditorUtils.doChangeCase(input, CHANGE_CASE_TO.TITLE, locale, tokenizer));
assertEquals("Lower", EditorUtils.doChangeCase(input, CHANGE_CASE_TO.CYCLE, locale, tokenizer));
input = "UPPER";
assertEquals("upper", EditorUtils.doChangeCase(input, CHANGE_CASE_TO.LOWER, locale, tokenizer));
assertEquals(input, EditorUtils.doChangeCase(input, CHANGE_CASE_TO.UPPER, locale, tokenizer));
assertEquals("Upper", EditorUtils.doChangeCase(input, CHANGE_CASE_TO.SENTENCE, locale, tokenizer));
assertEquals("Upper", EditorUtils.doChangeCase(input, CHANGE_CASE_TO.TITLE, locale, tokenizer));
assertEquals("upper", EditorUtils.doChangeCase(input, CHANGE_CASE_TO.CYCLE, locale, tokenizer));
input = "Title";
assertEquals("title", EditorUtils.doChangeCase(input, CHANGE_CASE_TO.LOWER, locale, tokenizer));
assertEquals("TITLE", EditorUtils.doChangeCase(input, CHANGE_CASE_TO.UPPER, locale, tokenizer));
assertEquals(input, EditorUtils.doChangeCase(input, CHANGE_CASE_TO.SENTENCE, locale, tokenizer));
assertEquals(input, EditorUtils.doChangeCase(input, CHANGE_CASE_TO.TITLE, locale, tokenizer));
assertEquals("TITLE", EditorUtils.doChangeCase(input, CHANGE_CASE_TO.CYCLE, locale, tokenizer));
input = "mIxed";
assertEquals("mixed", EditorUtils.doChangeCase(input, CHANGE_CASE_TO.LOWER, locale, tokenizer));
assertEquals("MIXED", EditorUtils.doChangeCase(input, CHANGE_CASE_TO.UPPER, locale, tokenizer));
assertEquals("Mixed", EditorUtils.doChangeCase(input, CHANGE_CASE_TO.SENTENCE, locale, tokenizer));
assertEquals("Mixed", EditorUtils.doChangeCase(input, CHANGE_CASE_TO.TITLE, locale, tokenizer));
assertEquals("MIXED", EditorUtils.doChangeCase(input, CHANGE_CASE_TO.CYCLE, locale, tokenizer));
// Ambiguous
input = "A";
assertEquals("a", EditorUtils.doChangeCase(input, CHANGE_CASE_TO.LOWER, locale, tokenizer));
assertEquals(input, EditorUtils.doChangeCase(input, CHANGE_CASE_TO.UPPER, locale, tokenizer));
assertEquals(input, EditorUtils.doChangeCase(input, CHANGE_CASE_TO.SENTENCE, locale, tokenizer));
assertEquals(input, EditorUtils.doChangeCase(input, CHANGE_CASE_TO.TITLE, locale, tokenizer));
assertEquals("a", EditorUtils.doChangeCase(input, CHANGE_CASE_TO.CYCLE, locale, tokenizer));
// Includes number (https://sourceforge.net/p/omegat/bugs/612/)
input = "MQL5";
assertEquals("mql5", EditorUtils.doChangeCase(input, CHANGE_CASE_TO.LOWER, locale, tokenizer));
assertEquals(input, EditorUtils.doChangeCase(input, CHANGE_CASE_TO.UPPER, locale, tokenizer));
assertEquals("Mql5", EditorUtils.doChangeCase(input, CHANGE_CASE_TO.SENTENCE, locale, tokenizer));
assertEquals("Mql5", EditorUtils.doChangeCase(input, CHANGE_CASE_TO.TITLE, locale, tokenizer));
assertEquals("mql5", EditorUtils.doChangeCase(input, CHANGE_CASE_TO.CYCLE, locale, tokenizer));
// Includes OmegaT tag
input = "<g0>Foo</g0>";
assertEquals("<g0>foo</g0>", EditorUtils.doChangeCase(input, CHANGE_CASE_TO.LOWER, locale, tokenizer));
assertEquals("<g0>FOO</g0>", EditorUtils.doChangeCase(input, CHANGE_CASE_TO.UPPER, locale, tokenizer));
assertEquals(input, EditorUtils.doChangeCase(input, CHANGE_CASE_TO.SENTENCE, locale, tokenizer));
assertEquals(input, EditorUtils.doChangeCase(input, CHANGE_CASE_TO.TITLE, locale, tokenizer));
assertEquals("<g0>FOO</g0>", EditorUtils.doChangeCase(input, CHANGE_CASE_TO.CYCLE, locale, tokenizer));
// Includes surrounding punctuation
input = "\"Foo, Bar\"";
assertEquals("\"foo, bar\"", EditorUtils.doChangeCase(input, CHANGE_CASE_TO.LOWER, locale, tokenizer));
assertEquals("\"FOO, BAR\"", EditorUtils.doChangeCase(input, CHANGE_CASE_TO.UPPER, locale, tokenizer));
assertEquals("\"Foo, bar\"", EditorUtils.doChangeCase(input, CHANGE_CASE_TO.SENTENCE, locale, tokenizer));
assertEquals(input, EditorUtils.doChangeCase(input, CHANGE_CASE_TO.TITLE, locale, tokenizer));
assertEquals("\"FOO, BAR\"", EditorUtils.doChangeCase(input, CHANGE_CASE_TO.CYCLE, locale, tokenizer));
}
use of org.omegat.tokenizer.LuceneEnglishTokenizer in project omegat by omegat-org.
the class EditorUtilsTest method testReplaceGlossaryEntries.
@Test
public void testReplaceGlossaryEntries() {
List<GlossaryEntry> entries = new ArrayList<GlossaryEntry>();
entries.add(new GlossaryEntry("snowman", "sneeuwpop", "", false, null));
entries.add(new GlossaryEntry("Bob", "Blub", "", false, null));
ITokenizer tokenizer = new LuceneEnglishTokenizer();
Locale locale = Locale.ENGLISH;
String srcText = "Snowman Bob went to the snowman party. SnOwMaN!";
String expected = "Sneeuwpop Blub went to the sneeuwpop party. sneeuwpop!";
assertEquals(expected, EditorUtils.replaceGlossaryEntries(srcText, entries, locale, tokenizer));
// Empty cases
assertNull(EditorUtils.replaceGlossaryEntries(null, entries, locale, tokenizer));
assertEquals("", EditorUtils.replaceGlossaryEntries("", entries, locale, tokenizer));
assertSame(srcText, EditorUtils.replaceGlossaryEntries(srcText, null, locale, tokenizer));
assertSame(srcText, EditorUtils.replaceGlossaryEntries(srcText, new ArrayList<GlossaryEntry>(), locale, tokenizer));
try {
EditorUtils.replaceGlossaryEntries(srcText, entries, null, tokenizer);
fail("Should give NPE when given null locale");
} catch (NullPointerException ex) {
}
try {
EditorUtils.replaceGlossaryEntries(srcText, entries, locale, null);
fail("Should give NPE when given null tokenizer");
} catch (NullPointerException ex) {
}
// Multiword entry
entries.add(0, new GlossaryEntry("snowman party", "sneeuwpop parti", "", false, null));
srcText = "Snowman Bob went to the snowman party. SnOwMaN!";
expected = "Sneeuwpop Blub went to the sneeuwpop parti. sneeuwpop!";
assertEquals(expected, EditorUtils.replaceGlossaryEntries(srcText, entries, locale, tokenizer));
// Replace final token (see https://sourceforge.net/p/omegat/bugs/819/)
srcText = "Snowman Bob went to the snowman party. SnOwMaN";
expected = "Sneeuwpop Blub went to the sneeuwpop parti. sneeuwpop";
assertEquals(expected, EditorUtils.replaceGlossaryEntries(srcText, entries, locale, tokenizer));
}
Aggregations