Search in sources :

Example 6 with ByteArray

use of com.hankcs.hanlp.corpus.io.ByteArray in project HanLP by hankcs.

the class CharType method generate.

private static ByteArray generate() throws IOException {
    int preType = 5;
    int preChar = 0;
    List<int[]> typeList = new LinkedList<int[]>();
    for (int i = 0; i <= Character.MAX_VALUE; ++i) {
        int type = TextUtility.charType((char) i);
        //            System.out.printf("%d %d\n", i, TextUtility.charType((char) i));
        if (type != preType) {
            int[] array = new int[3];
            array[0] = preChar;
            array[1] = i - 1;
            array[2] = preType;
            typeList.add(array);
            //                System.out.printf("%d %d %d\n", array[0], array[1], array[2]);
            preChar = i;
        }
        preType = type;
    }
    {
        int[] array = new int[3];
        array[0] = preChar;
        array[1] = (int) Character.MAX_VALUE;
        array[2] = preType;
        typeList.add(array);
    }
    //        System.out.print("int[" + typeList.size() + "][3] array = \n");
    DataOutputStream out = new DataOutputStream(new FileOutputStream(HanLP.Config.CharTypePath));
    for (int[] array : typeList) {
        //            System.out.printf("%d %d %d\n", array[0], array[1], array[2]);
        out.writeChar(array[0]);
        out.writeChar(array[1]);
        out.writeByte(array[2]);
    }
    out.close();
    ByteArray byteArray = ByteArray.createByteArray(HanLP.Config.CharTypePath);
    return byteArray;
}
Also used : DataOutputStream(java.io.DataOutputStream) FileOutputStream(java.io.FileOutputStream) ByteArray(com.hankcs.hanlp.corpus.io.ByteArray) LinkedList(java.util.LinkedList)

Example 7 with ByteArray

use of com.hankcs.hanlp.corpus.io.ByteArray in project HanLP by hankcs.

the class WordNatureDependencyModel method loadDat.

boolean loadDat(String path) {
    ByteArray byteArray = ByteArray.createByteArray(path + Predefine.BIN_EXT);
    if (byteArray == null)
        return false;
    int size = byteArray.nextInt();
    Attribute[] attributeArray = new Attribute[size];
    for (int i = 0; i < attributeArray.length; ++i) {
        int length = byteArray.nextInt();
        Attribute attribute = new Attribute(length);
        for (int j = 0; j < attribute.dependencyRelation.length; ++j) {
            attribute.dependencyRelation[j] = byteArray.nextString();
            attribute.p[j] = byteArray.nextFloat();
        }
        attributeArray[i] = attribute;
    }
    return trie.load(byteArray, attributeArray);
}
Also used : ByteArray(com.hankcs.hanlp.corpus.io.ByteArray)

Example 8 with ByteArray

use of com.hankcs.hanlp.corpus.io.ByteArray in project HanLP by hankcs.

the class PinyinDictionary method loadDat.

static boolean loadDat(String path) {
    ByteArray byteArray = ByteArray.createByteArray(path + Predefine.BIN_EXT);
    if (byteArray == null)
        return false;
    int size = byteArray.nextInt();
    Pinyin[][] valueArray = new Pinyin[size][];
    for (int i = 0; i < valueArray.length; ++i) {
        int length = byteArray.nextInt();
        valueArray[i] = new Pinyin[length];
        for (int j = 0; j < length; ++j) {
            valueArray[i][j] = pinyins[byteArray.nextInt()];
        }
    }
    if (!trie.load(byteArray, valueArray))
        return false;
    return true;
}
Also used : ByteArray(com.hankcs.hanlp.corpus.io.ByteArray)

Example 9 with ByteArray

use of com.hankcs.hanlp.corpus.io.ByteArray in project HanLP by hankcs.

the class BaseChineseDictionary method loadDat.

static boolean loadDat(String path, AhoCorasickDoubleArrayTrie<String> trie) {
    ByteArray byteArray = ByteArray.createByteArray(path + Predefine.BIN_EXT);
    if (byteArray == null)
        return false;
    int size = byteArray.nextInt();
    String[] valueArray = new String[size];
    for (int i = 0; i < valueArray.length; ++i) {
        valueArray[i] = byteArray.nextString();
    }
    trie.load(byteArray, valueArray);
    return true;
}
Also used : ByteArray(com.hankcs.hanlp.corpus.io.ByteArray)

Example 10 with ByteArray

use of com.hankcs.hanlp.corpus.io.ByteArray in project HanLP by hankcs.

the class BigramDependencyModel method loadDat.

private static boolean loadDat(String path) {
    ByteArray byteArray = ByteArray.createByteArray(path);
    if (byteArray == null)
        return false;
    int size = byteArray.nextInt();
    String[] valueArray = new String[size];
    for (int i = 0; i < valueArray.length; ++i) {
        valueArray[i] = byteArray.nextUTF();
    }
    return trie.load(byteArray, valueArray);
}
Also used : ByteArray(com.hankcs.hanlp.corpus.io.ByteArray)

Aggregations

ByteArray (com.hankcs.hanlp.corpus.io.ByteArray)21 DataOutputStream (java.io.DataOutputStream)5 FileOutputStream (java.io.FileOutputStream)5 Nature (com.hankcs.hanlp.corpus.tag.Nature)2 DoubleArrayTrie (com.hankcs.hanlp.collection.trie.DoubleArrayTrie)1 LinkedList (java.util.LinkedList)1