use of com.ibm.icu.text.BreakIterator in project eclipse.platform.text by eclipse.
the class AbstractDecoratedTextEditor method openSaveErrorDialog.
/**
* Presents an error dialog to the user when a problem happens during save.
* <p>
* Overrides the default behavior by showing a more advanced error dialog in case of encoding
* problems.
* </p>
*
* @param title the dialog title
* @param message the message to display
* @param exception the exception to handle
* @since 3.6
*/
@Override
protected void openSaveErrorDialog(String title, String message, CoreException exception) {
IStatus status = exception.getStatus();
final IDocumentProvider documentProvider = getDocumentProvider();
if (!(status.getCode() == IFileBufferStatusCodes.CHARSET_MAPPING_FAILED && documentProvider instanceof IStorageDocumentProvider)) {
super.openSaveErrorDialog(title, message, exception);
return;
}
final int saveAsUTF8ButtonId = IDialogConstants.OK_ID + IDialogConstants.CANCEL_ID + 1;
final int selectUnmappableCharButtonId = saveAsUTF8ButtonId + 1;
final Charset charset = getCharset();
ErrorDialog errorDialog = new ErrorDialog(getSite().getShell(), title, message, status, IStatus.ERROR) {
@Override
protected void createButtonsForButtonBar(Composite parent) {
super.createButtonsForButtonBar(parent);
createButton(parent, saveAsUTF8ButtonId, TextEditorMessages.AbstractDecoratedTextEditor_save_error_Dialog_button_saveAsUTF8, false);
if (charset != null)
createButton(parent, selectUnmappableCharButtonId, TextEditorMessages.AbstractDecoratedTextEditor_save_error_Dialog_button_selectUnmappable, false);
}
@Override
protected void buttonPressed(int id) {
if (id == saveAsUTF8ButtonId || id == selectUnmappableCharButtonId) {
setReturnCode(id);
close();
} else
super.buttonPressed(id);
}
@Override
protected boolean shouldShowDetailsButton() {
return false;
}
};
int returnCode = errorDialog.open();
if (returnCode == saveAsUTF8ButtonId) {
// $NON-NLS-1$
((IStorageDocumentProvider) documentProvider).setEncoding(getEditorInput(), "UTF-8");
IProgressMonitor monitor = getProgressMonitor();
try {
doSave(monitor);
} finally {
monitor.done();
}
} else if (returnCode == selectUnmappableCharButtonId) {
CharsetEncoder encoder = charset.newEncoder();
IDocument document = getDocumentProvider().getDocument(getEditorInput());
int documentLength = document.getLength();
int offset = 0;
BreakIterator charBreakIterator = BreakIterator.getCharacterInstance();
charBreakIterator.setText(document.get());
while (offset < documentLength) {
try {
int next = charBreakIterator.next();
String ch = document.get(offset, next - offset);
if (!encoder.canEncode(ch)) {
selectAndReveal(offset, next - offset);
return;
}
offset = next;
} catch (BadLocationException ex) {
EditorsPlugin.log(ex);
// Skip this character. Showing yet another dialog here is overkill
}
}
}
}
use of com.ibm.icu.text.BreakIterator in project es6draft by anba.
the class SegmentIteratorPrototype method AdvanceSegmentIterator.
/**
* AdvanceSegmentIterator ( iterator, direction )
*
* @param iterator
* the segment iterator object
* @param direction
* the direction kind
* @return {@code true} if iterator has hit the end of the string, otherwise {@code false}
*/
public static boolean AdvanceSegmentIterator(SegmentIteratorObject iterator, Direction direction) {
/* step 1 */
BreakIterator breakIterator = iterator.getBreakIterator();
/* step 2 */
String string = iterator.getString();
/* step 3 */
int position = iterator.getPosition();
/* step 4 */
if ((direction == Direction.Forwards && position >= string.length()) || (direction == Direction.Backwards && position <= 0)) {
return true;
}
/* step 5 */
int result;
if (direction == Direction.Forwards) {
result = breakIterator.following(position);
} else {
assert direction == Direction.Backwards;
result = breakIterator.preceding(position);
}
/* step 6 */
String breakType = null;
if (result != BreakIterator.DONE) {
switch(iterator.getGranularity()) {
case "grapheme":
// Always undefined.
break;
case "word":
{
int ruleStatus = breakIterator.getRuleStatus();
if (BreakIterator.WORD_NONE <= ruleStatus && ruleStatus < BreakIterator.WORD_NONE_LIMIT) {
breakType = "none";
} else if (BreakIterator.WORD_NUMBER <= ruleStatus && ruleStatus < BreakIterator.WORD_NUMBER_LIMIT) {
breakType = "word";
} else if (BreakIterator.WORD_LETTER <= ruleStatus && ruleStatus < BreakIterator.WORD_LETTER_LIMIT) {
breakType = "word";
} else if (BreakIterator.WORD_KANA <= ruleStatus && ruleStatus < BreakIterator.WORD_KANA_LIMIT) {
breakType = "word";
} else if (BreakIterator.WORD_IDEO <= ruleStatus && ruleStatus < BreakIterator.WORD_IDEO_LIMIT) {
breakType = "word";
}
break;
}
case "line":
{
int ruleStatus = breakIterator.getRuleStatus();
if (LineBreakTag.SOFT <= ruleStatus && ruleStatus < LineBreakTag.SOFT_LIMIT) {
breakType = "soft";
} else if (LineBreakTag.HARD <= ruleStatus && ruleStatus < LineBreakTag.HARD_LIMIT) {
breakType = "hard";
}
break;
}
case "sentence":
{
int ruleStatus = breakIterator.getRuleStatus();
if (SentenceBreakTag.TERM <= ruleStatus && ruleStatus < SentenceBreakTag.TERM_LIMIT) {
breakType = "term";
} else if (SentenceBreakTag.SEP <= ruleStatus && ruleStatus < SentenceBreakTag.SEP_LIMIT) {
breakType = "sep";
}
break;
}
default:
throw new AssertionError();
}
}
iterator.setBreakType(breakType);
/* step 7 */
iterator.setPosition(breakIterator.current());
/* step 8 */
return false;
}
use of com.ibm.icu.text.BreakIterator in project elasticsearch by elastic.
the class IcuTokenizerFactory method parseRules.
//parse a single RBBi rule file
private BreakIterator parseRules(String filename, Environment env) throws IOException {
final Path path = env.configFile().resolve(filename);
String rules = Files.readAllLines(path).stream().filter((v) -> v.startsWith("#") == false).collect(Collectors.joining("\n"));
return new RuleBasedBreakIterator(rules.toString());
}
use of com.ibm.icu.text.BreakIterator in project elasticsearch by elastic.
the class IcuTokenizerFactory method getIcuConfig.
private ICUTokenizerConfig getIcuConfig(Environment env, Settings settings) {
Map<Integer, String> tailored = new HashMap<>();
try {
String[] ruleFiles = settings.getAsArray(RULE_FILES);
for (String scriptAndResourcePath : ruleFiles) {
int colonPos = scriptAndResourcePath.indexOf(":");
if (colonPos == -1 || colonPos == scriptAndResourcePath.length() - 1) {
throw new IllegalArgumentException(RULE_FILES + " should contain comma-separated \"code:rulefile\" pairs");
}
String scriptCode = scriptAndResourcePath.substring(0, colonPos).trim();
String resourcePath = scriptAndResourcePath.substring(colonPos + 1).trim();
tailored.put(UCharacter.getPropertyValueEnum(UProperty.SCRIPT, scriptCode), resourcePath);
}
if (tailored.isEmpty()) {
return null;
} else {
final BreakIterator[] breakers = new BreakIterator[UScript.CODE_LIMIT];
for (Map.Entry<Integer, String> entry : tailored.entrySet()) {
int code = entry.getKey();
String resourcePath = entry.getValue();
breakers[code] = parseRules(resourcePath, env);
}
// cjkAsWords nor myanmarAsWords are not configurable yet.
ICUTokenizerConfig config = new DefaultICUTokenizerConfig(true, true) {
@Override
public BreakIterator getBreakIterator(int script) {
if (breakers[script] != null) {
return (BreakIterator) breakers[script].clone();
} else {
return super.getBreakIterator(script);
}
}
};
return config;
}
} catch (Exception e) {
throw new ElasticsearchException("failed to load ICU rule files", e);
}
}
use of com.ibm.icu.text.BreakIterator in project es6draft by anba.
the class SegmenterObject method createBreakIterator.
private BreakIterator createBreakIterator() {
ULocale locale = ULocale.forLanguageTag(this.locale);
if ("line".equals(granularity)) {
// "strictness" cannot be set through unicode extensions (u-lb-strict), handle here:
locale = locale.setKeywordValue("lb", strictness);
}
BreakIterator breakIterator;
switch(granularity) {
case "grapheme":
breakIterator = BreakIterator.getCharacterInstance(locale);
break;
case "word":
breakIterator = BreakIterator.getWordInstance(locale);
break;
case "sentence":
breakIterator = BreakIterator.getSentenceInstance(locale);
break;
case "line":
breakIterator = BreakIterator.getLineInstance(locale);
break;
default:
throw new AssertionError();
}
return breakIterator;
}
Aggregations