use of com.hankcs.hanlp.corpus.io.ByteArray in project HanLP by hankcs.
the class CharType method generate.
private static ByteArray generate() throws IOException {
int preType = 5;
int preChar = 0;
List<int[]> typeList = new LinkedList<int[]>();
for (int i = 0; i <= Character.MAX_VALUE; ++i) {
int type = TextUtility.charType((char) i);
// System.out.printf("%d %d\n", i, TextUtility.charType((char) i));
if (type != preType) {
int[] array = new int[3];
array[0] = preChar;
array[1] = i - 1;
array[2] = preType;
typeList.add(array);
// System.out.printf("%d %d %d\n", array[0], array[1], array[2]);
preChar = i;
}
preType = type;
}
{
int[] array = new int[3];
array[0] = preChar;
array[1] = (int) Character.MAX_VALUE;
array[2] = preType;
typeList.add(array);
}
// System.out.print("int[" + typeList.size() + "][3] array = \n");
DataOutputStream out = new DataOutputStream(new FileOutputStream(HanLP.Config.CharTypePath));
for (int[] array : typeList) {
// System.out.printf("%d %d %d\n", array[0], array[1], array[2]);
out.writeChar(array[0]);
out.writeChar(array[1]);
out.writeByte(array[2]);
}
out.close();
ByteArray byteArray = ByteArray.createByteArray(HanLP.Config.CharTypePath);
return byteArray;
}
use of com.hankcs.hanlp.corpus.io.ByteArray in project HanLP by hankcs.
the class WordNatureDependencyModel method loadDat.
boolean loadDat(String path) {
ByteArray byteArray = ByteArray.createByteArray(path + Predefine.BIN_EXT);
if (byteArray == null)
return false;
int size = byteArray.nextInt();
Attribute[] attributeArray = new Attribute[size];
for (int i = 0; i < attributeArray.length; ++i) {
int length = byteArray.nextInt();
Attribute attribute = new Attribute(length);
for (int j = 0; j < attribute.dependencyRelation.length; ++j) {
attribute.dependencyRelation[j] = byteArray.nextString();
attribute.p[j] = byteArray.nextFloat();
}
attributeArray[i] = attribute;
}
return trie.load(byteArray, attributeArray);
}
use of com.hankcs.hanlp.corpus.io.ByteArray in project HanLP by hankcs.
the class PinyinDictionary method loadDat.
static boolean loadDat(String path) {
ByteArray byteArray = ByteArray.createByteArray(path + Predefine.BIN_EXT);
if (byteArray == null)
return false;
int size = byteArray.nextInt();
Pinyin[][] valueArray = new Pinyin[size][];
for (int i = 0; i < valueArray.length; ++i) {
int length = byteArray.nextInt();
valueArray[i] = new Pinyin[length];
for (int j = 0; j < length; ++j) {
valueArray[i][j] = pinyins[byteArray.nextInt()];
}
}
if (!trie.load(byteArray, valueArray))
return false;
return true;
}
use of com.hankcs.hanlp.corpus.io.ByteArray in project HanLP by hankcs.
the class BaseChineseDictionary method loadDat.
static boolean loadDat(String path, AhoCorasickDoubleArrayTrie<String> trie) {
ByteArray byteArray = ByteArray.createByteArray(path + Predefine.BIN_EXT);
if (byteArray == null)
return false;
int size = byteArray.nextInt();
String[] valueArray = new String[size];
for (int i = 0; i < valueArray.length; ++i) {
valueArray[i] = byteArray.nextString();
}
trie.load(byteArray, valueArray);
return true;
}
use of com.hankcs.hanlp.corpus.io.ByteArray in project HanLP by hankcs.
the class BigramDependencyModel method loadDat.
private static boolean loadDat(String path) {
ByteArray byteArray = ByteArray.createByteArray(path);
if (byteArray == null)
return false;
int size = byteArray.nextInt();
String[] valueArray = new String[size];
for (int i = 0; i < valueArray.length; ++i) {
valueArray[i] = byteArray.nextUTF();
}
return trie.load(byteArray, valueArray);
}
Aggregations