Search in sources :

Example 1 with Value

use of org.nlpcn.commons.lang.tire.domain.Value in project ansj_seg by NLPchina.

the class DicLibrary method init.

/**
	 * 用户自定义词典加载
	 * 
	 * @param key
	 * @param path
	 * @return
	 */
private static synchronized Forest init(String key, KV<String, Forest> kv) {
    Forest forest = kv.getV();
    if (forest != null) {
        return forest;
    }
    try {
        forest = new Forest();
        LOG.debug("begin init dic !");
        long start = System.currentTimeMillis();
        String temp = null;
        String[] strs = null;
        Value value = null;
        try (BufferedReader br = IOUtil.getReader(PathToStream.stream(kv.getK()), "UTF-8")) {
            while ((temp = br.readLine()) != null) {
                if (StringUtil.isNotBlank(temp)) {
                    temp = StringUtil.trim(temp);
                    strs = temp.split("\t");
                    strs[0] = strs[0].toLowerCase();
                    // 如何核心辞典存在那么就放弃
                    if (MyStaticValue.isSkipUserDefine && DATDictionary.getId(strs[0]) > 0) {
                        continue;
                    }
                    if (strs.length != 3) {
                        value = new Value(strs[0], DEFAULT_NATURE, DEFAULT_FREQ_STR);
                    } else {
                        value = new Value(strs[0], strs[1], strs[2]);
                    }
                    Library.insertWord(forest, value);
                }
            }
        }
        LOG.info("load dic use time:" + (System.currentTimeMillis() - start) + " path is : " + kv.getK());
        kv.setV(forest);
        return forest;
    } catch (Exception e) {
        LOG.error("Init ambiguity library error :" + e.getMessage() + ", path: " + kv.getK());
        DIC.remove(key);
        return null;
    }
}
Also used : MyStaticValue(org.ansj.util.MyStaticValue) Value(org.nlpcn.commons.lang.tire.domain.Value) BufferedReader(java.io.BufferedReader) Forest(org.nlpcn.commons.lang.tire.domain.Forest)

Example 2 with Value

use of org.nlpcn.commons.lang.tire.domain.Value in project ansj_seg by NLPchina.

the class ReloadAmbiguityLibrary method main.

public static void main(String[] args) throws Exception {
    // 从文件中reload
    loadFormFile();
    // 通过内存中reload
    loadFormStr();
    // 歧义辞典增加新词
    Value value = new Value("三个和尚", "三个", "m", "和尚", "n");
    Library.insertWord(AmbiguityLibrary.get(), value);
    // 歧义辞典删除词
    Library.removeWord(AmbiguityLibrary.get(), "三个和尚");
}
Also used : Value(org.nlpcn.commons.lang.tire.domain.Value)

Example 3 with Value

use of org.nlpcn.commons.lang.tire.domain.Value in project ansj_seg by NLPchina.

the class NlpDemoTest method insertWord.

private static void insertWord(Forest forest, String keyword, String nature, int freq) {
    String[] paramers = new String[2];
    paramers[0] = nature;
    paramers[1] = String.valueOf(freq);
    Value value = new Value(keyword, paramers);
    Library.insertWord(forest, value);
}
Also used : Value(org.nlpcn.commons.lang.tire.domain.Value)

Example 4 with Value

use of org.nlpcn.commons.lang.tire.domain.Value in project ansj_seg by NLPchina.

the class Test method main.

public static void main(String[] args) throws Exception {
    // 构造一个用户词典
    Forest forest = Library.makeForest("library/default.dic");
    forest = new Forest();
    // 增加新词,中间按照'\t'隔开
    DicLibrary.insert(DicLibrary.DEFAULT, "ansj中文分词", "userDefine", 1000);
    Result terms = ToAnalysis.parse("我觉得Ansj中文分词是一个不错的系统!我是王婆!");
    System.out.println("增加新词例子:" + terms);
    // 删除词语,只能删除.用户自定义的词典.
    DicLibrary.delete(DicLibrary.DEFAULT, "ansj中文分词");
    terms = ToAnalysis.parse("我觉得ansj中文分词是一个不错的系统!我是王婆!");
    System.out.println("删除用户自定义词典例子:" + terms);
    // 歧义词
    Value value = new Value("济南下车", "济南", "n", "下车", "v");
    System.out.println(ToAnalysis.parse("我经济南下车到广州.中国经济南下势头迅猛!"));
    AmbiguityLibrary.insert(AmbiguityLibrary.DEFAULT, value);
    System.out.println(ToAnalysis.parse("我经济南下车到广州.中国经济南下势头迅猛!"));
    // 多用户词典
    String str = "神探夏洛克这部电影作者.是一个dota迷";
    System.out.println(ToAnalysis.parse(str));
    // 两个词汇 神探夏洛克 douta迷
    Forest dic1 = new Forest();
    Library.insertWord(dic1, new Value("神探夏洛克", "define", "1000"));
    Forest dic2 = new Forest();
    Library.insertWord(dic2, new Value("dota迷", "define", "1000"));
    System.out.println(ToAnalysis.parse(str, dic1, dic2));
}
Also used : Value(org.nlpcn.commons.lang.tire.domain.Value) Forest(org.nlpcn.commons.lang.tire.domain.Forest) Result(org.ansj.domain.Result)

Example 5 with Value

use of org.nlpcn.commons.lang.tire.domain.Value in project ansj_seg by NLPchina.

the class DicAnalysisTest method test1.

@Test
public void test1() {
    DicLibrary.insert(DicLibrary.DEFAULT, "金水区", "ad", 1000);
    DicLibrary.insert(DicLibrary.DEFAULT, "渝北区", "ad", 1000);
    DicLibrary.insert(DicLibrary.DEFAULT, "金童路", "ad", 1000);
    DicLibrary.insert(DicLibrary.DEFAULT, "奥山", "ad", 1000);
    DicLibrary.insert(DicLibrary.DEFAULT, "来自大", "ab", 1000);
    DicLibrary.insert(DicLibrary.DEFAULT, "自大学", "ab", 2000);
    DicLibrary.insert(DicLibrary.DEFAULT, "网大学", "ab", 1000);
    System.out.println(DicAnalysis.parse("重庆重庆市渝北区金童路奥山别墅162"));
    System.out.println(DicAnalysis.parse("河南省郑州市金水区金水区农科路与文博西路交叉口向东200米路南"));
    System.out.println(DicAnalysis.parse("来自大学生小说网大学"));
    String newWord = "爸爸去哪儿";
    String nature = "aaaaa";
    String str = "上海电力2012年财务报表如下怎爸爸去哪儿么办";
    //增加新词
    DicLibrary.insert(DicLibrary.DEFAULT, newWord, nature, 1000);
    DicLibrary.insert(DicLibrary.DEFAULT, "上海电力", nature, 1000);
    List<Term> parse = DicAnalysis.parse(str).getTerms();
    HashMap<String, Term> hs = new HashMap<String, Term>();
    for (Term term : parse) {
        hs.put(term.getName(), term);
    }
    Assert.assertTrue(hs.containsKey(newWord));
    Assert.assertEquals(hs.get(newWord).natrue().natureStr, nature);
    Library.insertWord(DicLibrary.get(), new Value("北京卡", "UserDefined", "1000"));
    Assert.assertEquals(DicAnalysis.parse("北京卡机场服务").get(0).getName(), "北京卡");
    //删除词
    DicLibrary.delete(DicLibrary.DEFAULT, newWord);
    parse = DicAnalysis.parse(str).getTerms();
    hs = new HashMap<String, Term>();
    for (Term term : parse) {
        hs.put(term.getName(), term);
    }
    Assert.assertTrue(!hs.containsKey(newWord));
}
Also used : HashMap(java.util.HashMap) Value(org.nlpcn.commons.lang.tire.domain.Value) Term(org.ansj.domain.Term) CorpusTest(org.ansj.CorpusTest) Test(org.junit.Test)

Aggregations

Value (org.nlpcn.commons.lang.tire.domain.Value)7 Forest (org.nlpcn.commons.lang.tire.domain.Forest)4 MyStaticValue (org.ansj.util.MyStaticValue)2 BufferedReader (java.io.BufferedReader)1 HashMap (java.util.HashMap)1 CorpusTest (org.ansj.CorpusTest)1 Result (org.ansj.domain.Result)1 Term (org.ansj.domain.Term)1 Test (org.junit.Test)1