use of java.text.CharacterIterator in project robovm by robovm.
the class CollatorTest method assertGetCollationElementIteratorCharacterIterator.
private void assertGetCollationElementIteratorCharacterIterator(Locale l, String s, Integer... offsets) {
RuleBasedCollator coll = (RuleBasedCollator) Collator.getInstance(l);
CharacterIterator it = new StringCharacterIterator(s);
assertCollationElementIterator(coll.getCollationElementIterator(it), offsets);
}
use of java.text.CharacterIterator in project drill by apache.
the class Text method utf8Length.
/**
* For the given string, returns the number of UTF-8 bytes required to encode the string.
*
* @param string
* text to encode
* @return number of UTF-8 bytes required to encode
*/
public static int utf8Length(String string) {
CharacterIterator iter = new StringCharacterIterator(string);
char ch = iter.first();
int size = 0;
while (ch != CharacterIterator.DONE) {
if ((ch >= 0xD800) && (ch < 0xDC00)) {
// surrogate pair?
char trail = iter.next();
if ((trail > 0xDBFF) && (trail < 0xE000)) {
// valid pair
size += 4;
} else {
// invalid pair
size += 3;
// rewind one
iter.previous();
}
} else if (ch < 0x80) {
size++;
} else if (ch < 0x800) {
size += 2;
} else {
// ch < 0x10000, that is, the largest char value
size += 3;
}
ch = iter.next();
}
return size;
}
use of java.text.CharacterIterator in project jdk8u_jdk by JetBrains.
the class RuleBasedBreakIterator method handlePrevious.
/**
* This method backs the iterator back up to a "safe position" in the text.
* This is a position that we know, without any context, must be a break position.
* The various calling methods then iterate forward from this safe position to
* the appropriate position to return. (For more information, see the description
* of buildBackwardsStateTable() in RuleBasedBreakIterator.Builder.)
*/
protected int handlePrevious() {
CharacterIterator text = getText();
int state = START_STATE;
int category = 0;
int lastCategory = 0;
int c = getCurrent();
// loop until we reach the beginning of the text or transition to state 0
while (c != CharacterIterator.DONE && state != STOP_STATE) {
// save the last character's category and look up the current
// character's category
lastCategory = category;
category = lookupCategory(c);
// state transition in the backwards state table
if (category != IGNORE) {
state = lookupBackwardState(state, category);
}
// then advance one character backwards
c = getPrevious();
}
// the break position.)
if (c != CharacterIterator.DONE) {
if (lastCategory != IGNORE) {
getNext();
getNext();
} else {
getNext();
}
}
return text.getIndex();
}
use of java.text.CharacterIterator in project jdk8u_jdk by JetBrains.
the class RuleBasedBreakIterator method handleNext.
//=======================================================================
// implementation
//=======================================================================
/**
* This method is the actual implementation of the next() method. All iteration
* vectors through here. This method initializes the state machine to state 1
* and advances through the text character by character until we reach the end
* of the text or the state machine transitions to state 0. We update our return
* value every time the state machine passes through a possible end state.
*/
protected int handleNext() {
// if we're already at the end of the text, return DONE.
CharacterIterator text = getText();
if (text.getIndex() == text.getEndIndex()) {
return BreakIterator.DONE;
}
// no matter what, we always advance at least one character forward
int result = getNextIndex();
int lookaheadResult = 0;
// begin in state 1
int state = START_STATE;
int category;
int c = getCurrent();
// loop until we reach the end of the text or transition to state 0
while (c != CharacterIterator.DONE && state != STOP_STATE) {
// look up the current character's character category (which tells us
// which column in the state table to look at)
category = lookupCategory(c);
// transition in the state table
if (category != IGNORE) {
state = lookupState(state, category);
}
// to the last saved lookup-state position
if (lookaheadStates[state]) {
if (endStates[state]) {
result = lookaheadResult;
} else {
lookaheadResult = getNextIndex();
}
} else // otherwise, if the state we've just transitioned to is an accepting
// state, update the break position to be the current iteration position
{
if (endStates[state]) {
result = getNextIndex();
}
}
c = getNext();
}
// position, that always matches the lookahead criteria)
if (c == CharacterIterator.DONE && lookaheadResult == text.getEndIndex()) {
result = lookaheadResult;
}
text.setIndex(result);
return result;
}
use of java.text.CharacterIterator in project jdk8u_jdk by JetBrains.
the class RuleBasedBreakIterator method preceding.
/**
* Sets the iterator to refer to the last boundary position before the
* specified position.
* @offset The position to begin searching for a break from.
* @return The position of the last boundary before the starting position.
*/
@Override
public int preceding(int offset) {
// if we start by updating the current iteration position to the
// position specified by the caller, we can just use previous()
// to carry out this operation
CharacterIterator text = getText();
checkOffset(offset, text);
text.setIndex(offset);
return previous();
}
Aggregations