use of com.hankcs.hanlp.corpus.io.ByteArray in project HanLP by hankcs.
the class TestBytesArray method testBenchmark.
public void testBenchmark() throws Exception {
long start;
ByteArray byteArray = ByteArray.createByteArray(HanLP.Config.MaxEntModelPath + Predefine.BIN_EXT);
MaxEntModel.create(byteArray);
byteArray = ByteArrayFileStream.createByteArrayFileStream(HanLP.Config.MaxEntModelPath + Predefine.BIN_EXT);
MaxEntModel.create(byteArray);
start = System.currentTimeMillis();
byteArray = ByteArray.createByteArray(HanLP.Config.MaxEntModelPath + Predefine.BIN_EXT);
MaxEntModel.create(byteArray);
System.out.printf("ByteArray: %d ms\n", (System.currentTimeMillis() - start));
start = System.currentTimeMillis();
byteArray = ByteArrayFileStream.createByteArrayFileStream(HanLP.Config.MaxEntModelPath + Predefine.BIN_EXT);
MaxEntModel.create(byteArray);
System.out.printf("ByteArrayStream: %d ms\n", (System.currentTimeMillis() - start));
// ByteArray: 2626 ms
// ByteArrayStream: 4165 ms
}
use of com.hankcs.hanlp.corpus.io.ByteArray in project HanLP by hankcs.
the class TestBytesArray method testLoadByteArrayStream.
/**
* 能够在-Xms512m -Xmx512m -Xmn256m下运行
* @throws Exception
*/
public void testLoadByteArrayStream() throws Exception {
ByteArray byteArray = ByteArrayFileStream.createByteArrayFileStream(HanLP.Config.MaxEntModelPath + Predefine.BIN_EXT);
MaxEntModel.create(byteArray);
}
use of com.hankcs.hanlp.corpus.io.ByteArray in project HanLP by hankcs.
the class CRFModel method loadBin.
/**
* 加载Bin形式的CRF++模型<br>
* 注意该Bin形式不是CRF++的二进制模型,而是HanLP由CRF++的文本模型转换过来的私有格式
* @param path
* @return
*/
public static CRFModel loadBin(String path) {
ByteArray byteArray = ByteArray.createByteArray(path);
if (byteArray == null)
return null;
CRFModel model = new CRFModel();
if (model.load(byteArray))
return model;
return null;
}
use of com.hankcs.hanlp.corpus.io.ByteArray in project HanLP by hankcs.
the class CoreDictionary method loadDat.
/**
* 从磁盘加载双数组
*
* @param path
* @return
*/
static boolean loadDat(String path) {
try {
ByteArray byteArray = ByteArray.createByteArray(path + Predefine.BIN_EXT);
if (byteArray == null)
return false;
int size = byteArray.nextInt();
CoreDictionary.Attribute[] attributes = new CoreDictionary.Attribute[size];
final Nature[] natureIndexArray = Nature.values();
for (int i = 0; i < size; ++i) {
// 第一个是全部频次,第二个是词性个数
int currentTotalFrequency = byteArray.nextInt();
int length = byteArray.nextInt();
attributes[i] = new CoreDictionary.Attribute(length);
attributes[i].totalFrequency = currentTotalFrequency;
for (int j = 0; j < length; ++j) {
attributes[i].nature[j] = natureIndexArray[byteArray.nextInt()];
attributes[i].frequency[j] = byteArray.nextInt();
}
}
if (!trie.load(byteArray, attributes) || byteArray.hasMore())
return false;
} catch (Exception e) {
logger.warning("读取失败,问题发生在" + e);
return false;
}
return true;
}
use of com.hankcs.hanlp.corpus.io.ByteArray in project HanLP by hankcs.
the class BiGramDictionary method loadDat.
/**
* 从dat文件中加载排好的trie
*
* @param path
* @return
*/
private static boolean loadDat(String path) {
try {
ByteArray byteArray = ByteArray.createByteArray(path + Predefine.BIN_EXT);
if (byteArray == null)
return false;
int size = byteArray.nextInt();
Integer[] value = new Integer[size];
for (int i = 0; i < size; i++) {
value[i] = byteArray.nextInt();
}
if (!trie.load(byteArray, value))
return false;
} catch (Exception e) {
return false;
}
return true;
}
Aggregations