use of org.nlpcn.commons.lang.tire.domain.Forest in project ansj_seg by NLPchina.
the class DicLibrary method init.
/**
* 用户自定义词典加载
*
* @param key
* @param path
* @return
*/
private static synchronized Forest init(String key, KV<String, Forest> kv) {
Forest forest = kv.getV();
if (forest != null) {
return forest;
}
try {
forest = new Forest();
LOG.debug("begin init dic !");
long start = System.currentTimeMillis();
String temp = null;
String[] strs = null;
Value value = null;
try (BufferedReader br = IOUtil.getReader(PathToStream.stream(kv.getK()), "UTF-8")) {
while ((temp = br.readLine()) != null) {
if (StringUtil.isNotBlank(temp)) {
temp = StringUtil.trim(temp);
strs = temp.split("\t");
strs[0] = strs[0].toLowerCase();
// 如何核心辞典存在那么就放弃
if (MyStaticValue.isSkipUserDefine && DATDictionary.getId(strs[0]) > 0) {
continue;
}
if (strs.length != 3) {
value = new Value(strs[0], DEFAULT_NATURE, DEFAULT_FREQ_STR);
} else {
value = new Value(strs[0], strs[1], strs[2]);
}
Library.insertWord(forest, value);
}
}
}
LOG.info("load dic use time:" + (System.currentTimeMillis() - start) + " path is : " + kv.getK());
kv.setV(forest);
return forest;
} catch (Exception e) {
LOG.error("Init ambiguity library error :" + e.getMessage() + ", path: " + kv.getK());
DIC.remove(key);
return null;
}
}
use of org.nlpcn.commons.lang.tire.domain.Forest in project ansj_seg by NLPchina.
the class ReloadUserLibrary method loadFormFile.
private static void loadFormFile() throws Exception {
// make new forest
Forest forest = Library.makeForest(new File("new_Library_Path").getPath());
// 将新构建的辞典树替换掉舊的。
DicLibrary.put(DicLibrary.DEFAULT, DicLibrary.DEFAULT, forest);
}
use of org.nlpcn.commons.lang.tire.domain.Forest in project ansj_seg by NLPchina.
the class NlpDemoTest method main.
public static void main(String[] args) throws IOException {
NlpAnalysis nlp = (NlpAnalysis) new NlpAnalysis().setForests(new Forest[] { DicLibrary.get() });
nlp.resetContent(new StringReader("2015年无锡市突发环境事件"));
Term term = nlp.next();
while (term != null) {
System.out.println(term.getRealName() + "\t|\t" + term.getName());
term = nlp.next();
}
// System.out.println(parse);
}
use of org.nlpcn.commons.lang.tire.domain.Forest in project ansj_seg by NLPchina.
the class ReloadAmbiguityLibrary method loadFormFile.
private static void loadFormFile() throws Exception {
// make new forest
Forest forest = Library.makeForest("new_Library_Path");
// 将新构建的辞典树替换掉舊的。
AmbiguityLibrary.put(AmbiguityLibrary.DEFAULT, AmbiguityLibrary.DEFAULT, forest);
}
use of org.nlpcn.commons.lang.tire.domain.Forest in project ansj_seg by NLPchina.
the class Test method main.
public static void main(String[] args) throws Exception {
// 构造一个用户词典
Forest forest = Library.makeForest("library/default.dic");
forest = new Forest();
// 增加新词,中间按照'\t'隔开
DicLibrary.insert(DicLibrary.DEFAULT, "ansj中文分词", "userDefine", 1000);
Result terms = ToAnalysis.parse("我觉得Ansj中文分词是一个不错的系统!我是王婆!");
System.out.println("增加新词例子:" + terms);
// 删除词语,只能删除.用户自定义的词典.
DicLibrary.delete(DicLibrary.DEFAULT, "ansj中文分词");
terms = ToAnalysis.parse("我觉得ansj中文分词是一个不错的系统!我是王婆!");
System.out.println("删除用户自定义词典例子:" + terms);
// 歧义词
Value value = new Value("济南下车", "济南", "n", "下车", "v");
System.out.println(ToAnalysis.parse("我经济南下车到广州.中国经济南下势头迅猛!"));
AmbiguityLibrary.insert(AmbiguityLibrary.DEFAULT, value);
System.out.println(ToAnalysis.parse("我经济南下车到广州.中国经济南下势头迅猛!"));
// 多用户词典
String str = "神探夏洛克这部电影作者.是一个dota迷";
System.out.println(ToAnalysis.parse(str));
// 两个词汇 神探夏洛克 douta迷
Forest dic1 = new Forest();
Library.insertWord(dic1, new Value("神探夏洛克", "define", "1000"));
Forest dic2 = new Forest();
Library.insertWord(dic2, new Value("dota迷", "define", "1000"));
System.out.println(ToAnalysis.parse(str, dic1, dic2));
}
Aggregations