use of org.nlpcn.commons.lang.tire.domain.Value in project ansj_seg by NLPchina.
the class DicLibrary method init.
/**
* 用户自定义词典加载
*
* @param key
* @param path
* @return
*/
private static synchronized Forest init(String key, KV<String, Forest> kv) {
Forest forest = kv.getV();
if (forest != null) {
return forest;
}
try {
forest = new Forest();
LOG.debug("begin init dic !");
long start = System.currentTimeMillis();
String temp = null;
String[] strs = null;
Value value = null;
try (BufferedReader br = IOUtil.getReader(PathToStream.stream(kv.getK()), "UTF-8")) {
while ((temp = br.readLine()) != null) {
if (StringUtil.isNotBlank(temp)) {
temp = StringUtil.trim(temp);
strs = temp.split("\t");
strs[0] = strs[0].toLowerCase();
// 如何核心辞典存在那么就放弃
if (MyStaticValue.isSkipUserDefine && DATDictionary.getId(strs[0]) > 0) {
continue;
}
if (strs.length != 3) {
value = new Value(strs[0], DEFAULT_NATURE, DEFAULT_FREQ_STR);
} else {
value = new Value(strs[0], strs[1], strs[2]);
}
Library.insertWord(forest, value);
}
}
}
LOG.info("load dic use time:" + (System.currentTimeMillis() - start) + " path is : " + kv.getK());
kv.setV(forest);
return forest;
} catch (Exception e) {
LOG.error("Init ambiguity library error :" + e.getMessage() + ", path: " + kv.getK());
DIC.remove(key);
return null;
}
}
use of org.nlpcn.commons.lang.tire.domain.Value in project ansj_seg by NLPchina.
the class ReloadAmbiguityLibrary method main.
public static void main(String[] args) throws Exception {
// 从文件中reload
loadFormFile();
// 通过内存中reload
loadFormStr();
// 歧义辞典增加新词
Value value = new Value("三个和尚", "三个", "m", "和尚", "n");
Library.insertWord(AmbiguityLibrary.get(), value);
// 歧义辞典删除词
Library.removeWord(AmbiguityLibrary.get(), "三个和尚");
}
use of org.nlpcn.commons.lang.tire.domain.Value in project ansj_seg by NLPchina.
the class NlpDemoTest method insertWord.
private static void insertWord(Forest forest, String keyword, String nature, int freq) {
String[] paramers = new String[2];
paramers[0] = nature;
paramers[1] = String.valueOf(freq);
Value value = new Value(keyword, paramers);
Library.insertWord(forest, value);
}
use of org.nlpcn.commons.lang.tire.domain.Value in project ansj_seg by NLPchina.
the class Test method main.
public static void main(String[] args) throws Exception {
// 构造一个用户词典
Forest forest = Library.makeForest("library/default.dic");
forest = new Forest();
// 增加新词,中间按照'\t'隔开
DicLibrary.insert(DicLibrary.DEFAULT, "ansj中文分词", "userDefine", 1000);
Result terms = ToAnalysis.parse("我觉得Ansj中文分词是一个不错的系统!我是王婆!");
System.out.println("增加新词例子:" + terms);
// 删除词语,只能删除.用户自定义的词典.
DicLibrary.delete(DicLibrary.DEFAULT, "ansj中文分词");
terms = ToAnalysis.parse("我觉得ansj中文分词是一个不错的系统!我是王婆!");
System.out.println("删除用户自定义词典例子:" + terms);
// 歧义词
Value value = new Value("济南下车", "济南", "n", "下车", "v");
System.out.println(ToAnalysis.parse("我经济南下车到广州.中国经济南下势头迅猛!"));
AmbiguityLibrary.insert(AmbiguityLibrary.DEFAULT, value);
System.out.println(ToAnalysis.parse("我经济南下车到广州.中国经济南下势头迅猛!"));
// 多用户词典
String str = "神探夏洛克这部电影作者.是一个dota迷";
System.out.println(ToAnalysis.parse(str));
// 两个词汇 神探夏洛克 douta迷
Forest dic1 = new Forest();
Library.insertWord(dic1, new Value("神探夏洛克", "define", "1000"));
Forest dic2 = new Forest();
Library.insertWord(dic2, new Value("dota迷", "define", "1000"));
System.out.println(ToAnalysis.parse(str, dic1, dic2));
}
use of org.nlpcn.commons.lang.tire.domain.Value in project ansj_seg by NLPchina.
the class DicAnalysisTest method test1.
@Test
public void test1() {
DicLibrary.insert(DicLibrary.DEFAULT, "金水区", "ad", 1000);
DicLibrary.insert(DicLibrary.DEFAULT, "渝北区", "ad", 1000);
DicLibrary.insert(DicLibrary.DEFAULT, "金童路", "ad", 1000);
DicLibrary.insert(DicLibrary.DEFAULT, "奥山", "ad", 1000);
DicLibrary.insert(DicLibrary.DEFAULT, "来自大", "ab", 1000);
DicLibrary.insert(DicLibrary.DEFAULT, "自大学", "ab", 2000);
DicLibrary.insert(DicLibrary.DEFAULT, "网大学", "ab", 1000);
System.out.println(DicAnalysis.parse("重庆重庆市渝北区金童路奥山别墅162"));
System.out.println(DicAnalysis.parse("河南省郑州市金水区金水区农科路与文博西路交叉口向东200米路南"));
System.out.println(DicAnalysis.parse("来自大学生小说网大学"));
String newWord = "爸爸去哪儿";
String nature = "aaaaa";
String str = "上海电力2012年财务报表如下怎爸爸去哪儿么办";
//增加新词
DicLibrary.insert(DicLibrary.DEFAULT, newWord, nature, 1000);
DicLibrary.insert(DicLibrary.DEFAULT, "上海电力", nature, 1000);
List<Term> parse = DicAnalysis.parse(str).getTerms();
HashMap<String, Term> hs = new HashMap<String, Term>();
for (Term term : parse) {
hs.put(term.getName(), term);
}
Assert.assertTrue(hs.containsKey(newWord));
Assert.assertEquals(hs.get(newWord).natrue().natureStr, nature);
Library.insertWord(DicLibrary.get(), new Value("北京卡", "UserDefined", "1000"));
Assert.assertEquals(DicAnalysis.parse("北京卡机场服务").get(0).getName(), "北京卡");
//删除词
DicLibrary.delete(DicLibrary.DEFAULT, newWord);
parse = DicAnalysis.parse(str).getTerms();
hs = new HashMap<String, Term>();
for (Term term : parse) {
hs.put(term.getName(), term);
}
Assert.assertTrue(!hs.containsKey(newWord));
}
Aggregations