use of sun.text.CompactByteArray in project jdk8u_jdk by JetBrains.
the class BreakDictionary method readDictionaryFile.
private void readDictionaryFile(final String dictionaryName) throws IOException, MissingResourceException {
BufferedInputStream in;
try {
in = AccessController.doPrivileged(new PrivilegedExceptionAction<BufferedInputStream>() {
@Override
public BufferedInputStream run() throws Exception {
return new BufferedInputStream(getClass().getResourceAsStream("/sun/text/resources/" + dictionaryName));
}
});
} catch (PrivilegedActionException e) {
throw new InternalError(e.toString(), e);
}
byte[] buf = new byte[8];
if (in.read(buf) != 8) {
throw new MissingResourceException("Wrong data length", dictionaryName, "");
}
// check version
int version = RuleBasedBreakIterator.getInt(buf, 0);
if (version != supportedVersion) {
throw new MissingResourceException("Dictionary version(" + version + ") is unsupported", dictionaryName, "");
}
// get data size
int len = RuleBasedBreakIterator.getInt(buf, 4);
buf = new byte[len];
if (in.read(buf) != len) {
throw new MissingResourceException("Wrong data length", dictionaryName, "");
}
// close the stream
in.close();
int l;
int offset = 0;
// read in the column map for BMP characteres (this is serialized in
// its internal form: an index array followed by a data array)
l = RuleBasedBreakIterator.getInt(buf, offset);
offset += 4;
short[] temp = new short[l];
for (int i = 0; i < l; i++, offset += 2) {
temp[i] = RuleBasedBreakIterator.getShort(buf, offset);
}
l = RuleBasedBreakIterator.getInt(buf, offset);
offset += 4;
byte[] temp2 = new byte[l];
for (int i = 0; i < l; i++, offset++) {
temp2[i] = buf[offset];
}
columnMap = new CompactByteArray(temp, temp2);
// read in numCols and numColGroups
numCols = RuleBasedBreakIterator.getInt(buf, offset);
offset += 4;
numColGroups = RuleBasedBreakIterator.getInt(buf, offset);
offset += 4;
// read in the row-number index
l = RuleBasedBreakIterator.getInt(buf, offset);
offset += 4;
rowIndex = new short[l];
for (int i = 0; i < l; i++, offset += 2) {
rowIndex[i] = RuleBasedBreakIterator.getShort(buf, offset);
}
// load in the populated-cells bitmap: index first, then bitmap list
l = RuleBasedBreakIterator.getInt(buf, offset);
offset += 4;
rowIndexFlagsIndex = new short[l];
for (int i = 0; i < l; i++, offset += 2) {
rowIndexFlagsIndex[i] = RuleBasedBreakIterator.getShort(buf, offset);
}
l = RuleBasedBreakIterator.getInt(buf, offset);
offset += 4;
rowIndexFlags = new int[l];
for (int i = 0; i < l; i++, offset += 4) {
rowIndexFlags[i] = RuleBasedBreakIterator.getInt(buf, offset);
}
// load in the row-shift index
l = RuleBasedBreakIterator.getInt(buf, offset);
offset += 4;
rowIndexShifts = new byte[l];
for (int i = 0; i < l; i++, offset++) {
rowIndexShifts[i] = buf[offset];
}
// load in the actual state table
l = RuleBasedBreakIterator.getInt(buf, offset);
offset += 4;
table = new short[l];
for (int i = 0; i < l; i++, offset += 2) {
table[i] = RuleBasedBreakIterator.getShort(buf, offset);
}
// finally, prepare the column map for supplementary characters
l = RuleBasedBreakIterator.getInt(buf, offset);
offset += 4;
int[] temp3 = new int[l];
for (int i = 0; i < l; i++, offset += 4) {
temp3[i] = RuleBasedBreakIterator.getInt(buf, offset);
}
supplementaryCharColumnMap = new SupplementaryCharacterData(temp3);
}
use of sun.text.CompactByteArray in project jdk8u_jdk by JetBrains.
the class RuleBasedBreakIterator method readTables.
/**
* Read datafile. The datafile's format is as follows:
* <pre>
* BreakIteratorData {
* u1 magic[7];
* u1 version;
* u4 totalDataSize;
* header_info header;
* body value;
* }
* </pre>
* <code>totalDataSize</code> is the summation of the size of
* <code>header_info</code> and <code>body</code> in byte count.
* <p>
* In <code>header</code>, each field except for checksum implies the
* length of each field. Since <code>BMPdataLength</code> is a fixed-length
* data(512 entries), its length isn't included in <code>header</code>.
* <code>checksum</code> is a CRC32 value of all in <code>body</code>.
* <pre>
* header_info {
* u4 stateTableLength;
* u4 backwardsStateTableLength;
* u4 endStatesLength;
* u4 lookaheadStatesLength;
* u4 BMPdataLength;
* u4 nonBMPdataLength;
* u4 additionalDataLength;
* u8 checksum;
* }
* </pre>
* <p>
*
* Finally, <code>BMPindices</code> and <code>BMPdata</code> are set to
* <code>charCategoryTable</code>. <code>nonBMPdata</code> is set to
* <code>supplementaryCharCategoryTable</code>.
* <pre>
* body {
* u2 stateTable[stateTableLength];
* u2 backwardsStateTable[backwardsStateTableLength];
* u1 endStates[endStatesLength];
* u1 lookaheadStates[lookaheadStatesLength];
* u2 BMPindices[512];
* u1 BMPdata[BMPdataLength];
* u4 nonBMPdata[numNonBMPdataLength];
* u1 additionalData[additionalDataLength];
* }
* </pre>
*/
protected final void readTables(String datafile) throws IOException, MissingResourceException {
byte[] buffer = readFile(datafile);
/* Read header_info. */
int stateTableLength = getInt(buffer, 0);
int backwardsStateTableLength = getInt(buffer, 4);
int endStatesLength = getInt(buffer, 8);
int lookaheadStatesLength = getInt(buffer, 12);
int BMPdataLength = getInt(buffer, 16);
int nonBMPdataLength = getInt(buffer, 20);
int additionalDataLength = getInt(buffer, 24);
checksum = getLong(buffer, 28);
/* Read stateTable[numCategories * numRows] */
stateTable = new short[stateTableLength];
int offset = HEADER_LENGTH;
for (int i = 0; i < stateTableLength; i++, offset += 2) {
stateTable[i] = getShort(buffer, offset);
}
/* Read backwardsStateTable[numCategories * numRows] */
backwardsStateTable = new short[backwardsStateTableLength];
for (int i = 0; i < backwardsStateTableLength; i++, offset += 2) {
backwardsStateTable[i] = getShort(buffer, offset);
}
/* Read endStates[numRows] */
endStates = new boolean[endStatesLength];
for (int i = 0; i < endStatesLength; i++, offset++) {
endStates[i] = buffer[offset] == 1;
}
/* Read lookaheadStates[numRows] */
lookaheadStates = new boolean[lookaheadStatesLength];
for (int i = 0; i < lookaheadStatesLength; i++, offset++) {
lookaheadStates[i] = buffer[offset] == 1;
}
/* Read a category table and indices for BMP characters. */
// BMPindices
short[] temp1 = new short[BMP_INDICES_LENGTH];
for (int i = 0; i < BMP_INDICES_LENGTH; i++, offset += 2) {
temp1[i] = getShort(buffer, offset);
}
// BMPdata
byte[] temp2 = new byte[BMPdataLength];
System.arraycopy(buffer, offset, temp2, 0, BMPdataLength);
offset += BMPdataLength;
charCategoryTable = new CompactByteArray(temp1, temp2);
/* Read a category table for non-BMP characters. */
int[] temp3 = new int[nonBMPdataLength];
for (int i = 0; i < nonBMPdataLength; i++, offset += 4) {
temp3[i] = getInt(buffer, offset);
}
supplementaryCharCategoryTable = new SupplementaryCharacterData(temp3);
/* Read additional data */
if (additionalDataLength > 0) {
additionalData = new byte[additionalDataLength];
System.arraycopy(buffer, offset, additionalData, 0, additionalDataLength);
}
/* Set numCategories */
numCategories = stateTable.length / endStates.length;
}
use of sun.text.CompactByteArray in project Bytecoder by mirkosertic.
the class RuleBasedBreakIterator method setupTables.
/**
* Initializes the fields with the given rule data.
* The data format is as follows:
* <pre>
* BreakIteratorData {
* u1 magic[7];
* u1 version;
* u4 totalDataSize;
* header_info header;
* body value;
* }
* </pre>
* <code>totalDataSize</code> is the summation of the size of
* <code>header_info</code> and <code>body</code> in byte count.
* <p>
* In <code>header</code>, each field except for checksum implies the
* length of each field. Since <code>BMPdataLength</code> is a fixed-length
* data(512 entries), its length isn't included in <code>header</code>.
* <code>checksum</code> is a CRC32 value of all in <code>body</code>.
* <pre>
* header_info {
* u4 stateTableLength;
* u4 backwardsStateTableLength;
* u4 endStatesLength;
* u4 lookaheadStatesLength;
* u4 BMPdataLength;
* u4 nonBMPdataLength;
* u4 additionalDataLength;
* u8 checksum;
* }
* </pre>
* <p>
*
* Finally, <code>BMPindices</code> and <code>BMPdata</code> are set to
* <code>charCategoryTable</code>. <code>nonBMPdata</code> is set to
* <code>supplementaryCharCategoryTable</code>.
* <pre>
* body {
* u2 stateTable[stateTableLength];
* u2 backwardsStateTable[backwardsStateTableLength];
* u1 endStates[endStatesLength];
* u1 lookaheadStates[lookaheadStatesLength];
* u2 BMPindices[512];
* u1 BMPdata[BMPdataLength];
* u4 nonBMPdata[numNonBMPdataLength];
* u1 additionalData[additionalDataLength];
* }
* </pre>
*
* @throws BufferUnderflowException if the end-of-data is reached before
* setting up all the tables
*/
private void setupTables(String ruleFile, ByteBuffer bb) {
/* Read header_info. */
int stateTableLength = bb.getInt();
int backwardsStateTableLength = bb.getInt();
int endStatesLength = bb.getInt();
int lookaheadStatesLength = bb.getInt();
int BMPdataLength = bb.getInt();
int nonBMPdataLength = bb.getInt();
int additionalDataLength = bb.getInt();
checksum = bb.getLong();
/* Read stateTable[numCategories * numRows] */
stateTable = new short[stateTableLength];
for (int i = 0; i < stateTableLength; i++) {
stateTable[i] = bb.getShort();
}
/* Read backwardsStateTable[numCategories * numRows] */
backwardsStateTable = new short[backwardsStateTableLength];
for (int i = 0; i < backwardsStateTableLength; i++) {
backwardsStateTable[i] = bb.getShort();
}
/* Read endStates[numRows] */
endStates = new boolean[endStatesLength];
for (int i = 0; i < endStatesLength; i++) {
endStates[i] = bb.get() == 1;
}
/* Read lookaheadStates[numRows] */
lookaheadStates = new boolean[lookaheadStatesLength];
for (int i = 0; i < lookaheadStatesLength; i++) {
lookaheadStates[i] = bb.get() == 1;
}
/* Read a category table and indices for BMP characters. */
// BMPindices
short[] temp1 = new short[BMP_INDICES_LENGTH];
for (int i = 0; i < BMP_INDICES_LENGTH; i++) {
temp1[i] = bb.getShort();
}
// BMPdata
byte[] temp2 = new byte[BMPdataLength];
bb.get(temp2);
charCategoryTable = new CompactByteArray(temp1, temp2);
/* Read a category table for non-BMP characters. */
int[] temp3 = new int[nonBMPdataLength];
for (int i = 0; i < nonBMPdataLength; i++) {
temp3[i] = bb.getInt();
}
supplementaryCharCategoryTable = new SupplementaryCharacterData(temp3);
/* Read additional data */
if (additionalDataLength > 0) {
additionalData = new byte[additionalDataLength];
bb.get(additionalData);
}
assert bb.position() == bb.limit();
/* Set numCategories */
numCategories = stateTable.length / endStates.length;
}
use of sun.text.CompactByteArray in project Bytecoder by mirkosertic.
the class BreakDictionary method setupDictionary.
private void setupDictionary(String dictionaryName, byte[] dictionaryData) {
ByteBuffer bb = ByteBuffer.wrap(dictionaryData);
// check version
int version = bb.getInt();
if (version != supportedVersion) {
throw new MissingResourceException("Dictionary version(" + version + ") is unsupported", dictionaryName, "");
}
// Check data size
int len = bb.getInt();
if (bb.position() + len != bb.limit()) {
throw new MissingResourceException("Dictionary size is wrong: " + bb.limit(), dictionaryName, "");
}
// read in the column map for BMP characteres (this is serialized in
// its internal form: an index array followed by a data array)
len = bb.getInt();
short[] temp = new short[len];
for (int i = 0; i < len; i++) {
temp[i] = bb.getShort();
}
len = bb.getInt();
byte[] temp2 = new byte[len];
bb.get(temp2);
columnMap = new CompactByteArray(temp, temp2);
// read in numCols and numColGroups
numCols = bb.getInt();
numColGroups = bb.getInt();
// read in the row-number index
len = bb.getInt();
rowIndex = new short[len];
for (int i = 0; i < len; i++) {
rowIndex[i] = bb.getShort();
}
// load in the populated-cells bitmap: index first, then bitmap list
len = bb.getInt();
rowIndexFlagsIndex = new short[len];
for (int i = 0; i < len; i++) {
rowIndexFlagsIndex[i] = bb.getShort();
}
len = bb.getInt();
rowIndexFlags = new int[len];
for (int i = 0; i < len; i++) {
rowIndexFlags[i] = bb.getInt();
}
// load in the row-shift index
len = bb.getInt();
rowIndexShifts = new byte[len];
bb.get(rowIndexShifts);
// load in the actual state table
len = bb.getInt();
table = new short[len];
for (int i = 0; i < len; i++) {
table[i] = bb.getShort();
}
// finally, prepare the column map for supplementary characters
len = bb.getInt();
int[] temp3 = new int[len];
for (int i = 0; i < len; i++) {
temp3[i] = bb.getInt();
}
assert bb.position() == bb.limit();
supplementaryCharColumnMap = new SupplementaryCharacterData(temp3);
}
use of sun.text.CompactByteArray in project jdk8u_jdk by JetBrains.
the class RuleBasedBreakIteratorBuilder method buildCharCategories.
/**
* This function builds the character category table. On entry,
* tempRuleList is a vector of break rules that has had variable names substituted.
* On exit, the charCategoryTable data member has been initialized to hold the
* character category table, and tempRuleList's rules have been munged to contain
* character category numbers everywhere a literal character or a [] expression
* originally occurred.
*/
@SuppressWarnings("fallthrough")
protected void buildCharCategories(Vector<String> tempRuleList) {
int bracketLevel = 0;
int p = 0;
int lineNum = 0;
// build hash table of every literal character or [] expression in the rule list
// and use CharSet.parseString() to derive a CharSet object representing the
// characters each refers to
expressions = new Hashtable<>();
while (lineNum < tempRuleList.size()) {
String line = tempRuleList.elementAt(lineNum);
p = 0;
while (p < line.length()) {
int c = line.codePointAt(p);
switch(c) {
// skip over all syntax characters except [
case '{':
case '}':
case '(':
case ')':
case '*':
case '.':
case '/':
case '|':
case ';':
case '?':
case '!':
break;
// and add the whole expression to the expression list
case '[':
int q = p + 1;
++bracketLevel;
while (q < line.length() && bracketLevel != 0) {
c = line.codePointAt(q);
switch(c) {
case '\\':
q++;
break;
case '[':
++bracketLevel;
break;
case ']':
--bracketLevel;
break;
}
q = q + Character.charCount(c);
}
if (expressions.get(line.substring(p, q)) == null) {
expressions.put(line.substring(p, q), CharSet.parseString(line.substring(p, q)));
}
p = q - 1;
break;
// it as a single character
case '\\':
++p;
c = line.codePointAt(p);
// for an isolated single character, add it to the expression list
default:
expressions.put(line.substring(p, p + 1), CharSet.parseString(line.substring(p, p + 1)));
break;
}
p += Character.charCount(line.codePointAt(p));
}
++lineNum;
}
// dump CharSet's internal expression cache
CharSet.releaseExpressionCache();
// create the temporary category table (which is a vector of CharSet objects)
categories = new Vector<>();
if (ignoreChars != null) {
categories.addElement(ignoreChars);
} else {
categories.addElement(new CharSet());
}
ignoreChars = null;
// this is a hook to allow subclasses to add categories on their own
mungeExpressionList(expressions);
// for each expression in the expressions list, do...
for (Enumeration<Object> iter = expressions.elements(); iter.hasMoreElements(); ) {
// initialize the working char set to the chars in the current expression
CharSet e = (CharSet) iter.nextElement();
// for each category in the category list, do...
for (int j = categories.size() - 1; !e.empty() && j > 0; j--) {
// if there's overlap between the current working set of chars
// and the current category...
CharSet that = categories.elementAt(j);
if (!that.intersection(e).empty()) {
// add a new category for the characters that were in the
// current category but not in the working char set
CharSet temp = that.difference(e);
if (!temp.empty()) {
categories.addElement(temp);
}
// remove those characters from the working char set and replace
// the current category with the characters that it did
// have in common with the current working char set
temp = e.intersection(that);
e = e.difference(that);
if (!temp.equals(that)) {
categories.setElementAt(temp, j);
}
}
}
// add a new category containing them
if (!e.empty()) {
categories.addElement(e);
}
}
// we have the ignore characters stored in position 0. Make an extra pass through
// the character category list and remove anything from the ignore list that shows
// up in some other category
CharSet allChars = new CharSet();
for (int i = 1; i < categories.size(); i++) {
allChars = allChars.union(categories.elementAt(i));
}
CharSet ignoreChars = categories.elementAt(0);
ignoreChars = ignoreChars.difference(allChars);
categories.setElementAt(ignoreChars, 0);
for (Enumeration<String> iter = expressions.keys(); iter.hasMoreElements(); ) {
String key = iter.nextElement();
CharSet cs = (CharSet) expressions.get(key);
StringBuffer cats = new StringBuffer();
// for each category...
for (int j = 0; j < categories.size(); j++) {
// if the current expression contains characters in that category...
CharSet temp = cs.intersection(categories.elementAt(j));
if (!temp.empty()) {
// then add the encoded category number to the String for this
// expression
cats.append((char) (0x100 + j));
if (temp.equals(cs)) {
break;
}
}
}
// once we've finished building the encoded String for this expression,
// replace the CharSet object with it
expressions.put(key, cats.toString());
}
// and finally, we turn the temporary category table into a permanent category
// table, which is a CompactByteArray. (we skip category 0, which by definition
// refers to all characters not mentioned specifically in the rules)
charCategoryTable = new CompactByteArray((byte) 0);
supplementaryCharCategoryTable = new SupplementaryCharacterData((byte) 0);
// for each category...
for (int i = 0; i < categories.size(); i++) {
CharSet chars = categories.elementAt(i);
// go through the character ranges in the category one by one...
Enumeration<int[]> enum_ = chars.getChars();
while (enum_.hasMoreElements()) {
int[] range = enum_.nextElement();
// and set the corresponding elements in the CompactArray accordingly
if (i != 0) {
if (range[0] < Character.MIN_SUPPLEMENTARY_CODE_POINT) {
if (range[1] < Character.MIN_SUPPLEMENTARY_CODE_POINT) {
charCategoryTable.setElementAt((char) range[0], (char) range[1], (byte) i);
} else {
charCategoryTable.setElementAt((char) range[0], (char) 0xFFFF, (byte) i);
supplementaryCharCategoryTable.appendElement(Character.MIN_SUPPLEMENTARY_CODE_POINT, range[1], (byte) i);
}
} else {
supplementaryCharCategoryTable.appendElement(range[0], range[1], (byte) i);
}
} else // (category 0 is special-- it's the hiding place for the ignore
// characters, whose real category number in the CompactArray is
// -1 [this is because category 0 contains all characters not
// specifically mentioned anywhere in the rules] )
{
if (range[0] < Character.MIN_SUPPLEMENTARY_CODE_POINT) {
if (range[1] < Character.MIN_SUPPLEMENTARY_CODE_POINT) {
charCategoryTable.setElementAt((char) range[0], (char) range[1], IGNORE);
} else {
charCategoryTable.setElementAt((char) range[0], (char) 0xFFFF, IGNORE);
supplementaryCharCategoryTable.appendElement(Character.MIN_SUPPLEMENTARY_CODE_POINT, range[1], IGNORE);
}
} else {
supplementaryCharCategoryTable.appendElement(range[0], range[1], IGNORE);
}
}
}
}
// once we've populated the CompactArray, compact it
charCategoryTable.compact();
// And, complete the category table for supplementary characters
supplementaryCharCategoryTable.complete();
// initialize numCategories
numCategories = categories.size();
}
Aggregations