Search in sources :

Example 6 with Forest

use of org.nlpcn.commons.lang.tire.domain.Forest in project ansj_seg by NLPchina.

the class DicLibrary method init.

/**
	 * 用户自定义词典加载
	 * 
	 * @param key
	 * @param path
	 * @return
	 */
private static synchronized Forest init(String key, KV<String, Forest> kv) {
    Forest forest = kv.getV();
    if (forest != null) {
        return forest;
    }
    try {
        forest = new Forest();
        LOG.debug("begin init dic !");
        long start = System.currentTimeMillis();
        String temp = null;
        String[] strs = null;
        Value value = null;
        try (BufferedReader br = IOUtil.getReader(PathToStream.stream(kv.getK()), "UTF-8")) {
            while ((temp = br.readLine()) != null) {
                if (StringUtil.isNotBlank(temp)) {
                    temp = StringUtil.trim(temp);
                    strs = temp.split("\t");
                    strs[0] = strs[0].toLowerCase();
                    // 如何核心辞典存在那么就放弃
                    if (MyStaticValue.isSkipUserDefine && DATDictionary.getId(strs[0]) > 0) {
                        continue;
                    }
                    if (strs.length != 3) {
                        value = new Value(strs[0], DEFAULT_NATURE, DEFAULT_FREQ_STR);
                    } else {
                        value = new Value(strs[0], strs[1], strs[2]);
                    }
                    Library.insertWord(forest, value);
                }
            }
        }
        LOG.info("load dic use time:" + (System.currentTimeMillis() - start) + " path is : " + kv.getK());
        kv.setV(forest);
        return forest;
    } catch (Exception e) {
        LOG.error("Init ambiguity library error :" + e.getMessage() + ", path: " + kv.getK());
        DIC.remove(key);
        return null;
    }
}
Also used : MyStaticValue(org.ansj.util.MyStaticValue) Value(org.nlpcn.commons.lang.tire.domain.Value) BufferedReader(java.io.BufferedReader) Forest(org.nlpcn.commons.lang.tire.domain.Forest)

Example 7 with Forest

use of org.nlpcn.commons.lang.tire.domain.Forest in project ansj_seg by NLPchina.

the class ReloadUserLibrary method loadFormFile.

private static void loadFormFile() throws Exception {
    // make new forest
    Forest forest = Library.makeForest(new File("new_Library_Path").getPath());
    // 将新构建的辞典树替换掉舊的。
    DicLibrary.put(DicLibrary.DEFAULT, DicLibrary.DEFAULT, forest);
}
Also used : Forest(org.nlpcn.commons.lang.tire.domain.Forest) File(java.io.File)

Example 8 with Forest

use of org.nlpcn.commons.lang.tire.domain.Forest in project ansj_seg by NLPchina.

the class NlpDemoTest method main.

public static void main(String[] args) throws IOException {
    NlpAnalysis nlp = (NlpAnalysis) new NlpAnalysis().setForests(new Forest[] { DicLibrary.get() });
    nlp.resetContent(new StringReader("2015年无锡市突发环境事件"));
    Term term = nlp.next();
    while (term != null) {
        System.out.println(term.getRealName() + "\t|\t" + term.getName());
        term = nlp.next();
    }
//		System.out.println(parse);
}
Also used : StringReader(java.io.StringReader) Forest(org.nlpcn.commons.lang.tire.domain.Forest) NlpAnalysis(org.ansj.splitWord.analysis.NlpAnalysis) Term(org.ansj.domain.Term)

Example 9 with Forest

use of org.nlpcn.commons.lang.tire.domain.Forest in project ansj_seg by NLPchina.

the class ReloadAmbiguityLibrary method loadFormFile.

private static void loadFormFile() throws Exception {
    // make new forest
    Forest forest = Library.makeForest("new_Library_Path");
    // 将新构建的辞典树替换掉舊的。
    AmbiguityLibrary.put(AmbiguityLibrary.DEFAULT, AmbiguityLibrary.DEFAULT, forest);
}
Also used : Forest(org.nlpcn.commons.lang.tire.domain.Forest)

Example 10 with Forest

use of org.nlpcn.commons.lang.tire.domain.Forest in project ansj_seg by NLPchina.

the class Test method main.

public static void main(String[] args) throws Exception {
    // 构造一个用户词典
    Forest forest = Library.makeForest("library/default.dic");
    forest = new Forest();
    // 增加新词,中间按照'\t'隔开
    DicLibrary.insert(DicLibrary.DEFAULT, "ansj中文分词", "userDefine", 1000);
    Result terms = ToAnalysis.parse("我觉得Ansj中文分词是一个不错的系统!我是王婆!");
    System.out.println("增加新词例子:" + terms);
    // 删除词语,只能删除.用户自定义的词典.
    DicLibrary.delete(DicLibrary.DEFAULT, "ansj中文分词");
    terms = ToAnalysis.parse("我觉得ansj中文分词是一个不错的系统!我是王婆!");
    System.out.println("删除用户自定义词典例子:" + terms);
    // 歧义词
    Value value = new Value("济南下车", "济南", "n", "下车", "v");
    System.out.println(ToAnalysis.parse("我经济南下车到广州.中国经济南下势头迅猛!"));
    AmbiguityLibrary.insert(AmbiguityLibrary.DEFAULT, value);
    System.out.println(ToAnalysis.parse("我经济南下车到广州.中国经济南下势头迅猛!"));
    // 多用户词典
    String str = "神探夏洛克这部电影作者.是一个dota迷";
    System.out.println(ToAnalysis.parse(str));
    // 两个词汇 神探夏洛克 douta迷
    Forest dic1 = new Forest();
    Library.insertWord(dic1, new Value("神探夏洛克", "define", "1000"));
    Forest dic2 = new Forest();
    Library.insertWord(dic2, new Value("dota迷", "define", "1000"));
    System.out.println(ToAnalysis.parse(str, dic1, dic2));
}
Also used : Value(org.nlpcn.commons.lang.tire.domain.Value) Forest(org.nlpcn.commons.lang.tire.domain.Forest) Result(org.ansj.domain.Result)

Aggregations

Forest (org.nlpcn.commons.lang.tire.domain.Forest)20 ArrayList (java.util.ArrayList)4 Term (org.ansj.domain.Term)4 Value (org.nlpcn.commons.lang.tire.domain.Value)4 AsianPersonRecognition (org.ansj.recognition.arrimpl.AsianPersonRecognition)3 ForeignPersonRecognition (org.ansj.recognition.arrimpl.ForeignPersonRecognition)3 NumRecognition (org.ansj.recognition.arrimpl.NumRecognition)3 NlpAnalysis (org.ansj.splitWord.analysis.NlpAnalysis)3 Graph (org.ansj.util.Graph)3 SmartForest (org.nlpcn.commons.lang.tire.domain.SmartForest)3 BufferedReader (java.io.BufferedReader)2 List (java.util.List)2 AnsjTokenizer (org.ansj.lucene.util.AnsjTokenizer)2 UserDefineRecognition (org.ansj.recognition.arrimpl.UserDefineRecognition)2 StopRecognition (org.ansj.recognition.impl.StopRecognition)2 SynonymsRecgnition (org.ansj.recognition.impl.SynonymsRecgnition)2 Analysis (org.ansj.splitWord.Analysis)2 BaseAnalysis (org.ansj.splitWord.analysis.BaseAnalysis)2 DicAnalysis (org.ansj.splitWord.analysis.DicAnalysis)2 IndexAnalysis (org.ansj.splitWord.analysis.IndexAnalysis)2