use of com.hankcs.hanlp.corpus.dictionary.DictionaryMaker in project HanLP by hankcs.
the class WordNatureWeightModelMaker method makeModel.
public static boolean makeModel(String corpusLoadPath, String modelSavePath) {
Set<String> posSet = new TreeSet<String>();
DictionaryMaker dictionaryMaker = new DictionaryMaker();
for (CoNLLSentence sentence : CoNLLLoader.loadSentenceList(corpusLoadPath)) {
for (CoNLLWord word : sentence.word) {
addPair(word.NAME, word.HEAD.NAME, word.DEPREL, dictionaryMaker);
addPair(word.NAME, wrapTag(word.HEAD.POSTAG), word.DEPREL, dictionaryMaker);
addPair(wrapTag(word.POSTAG), word.HEAD.NAME, word.DEPREL, dictionaryMaker);
addPair(wrapTag(word.POSTAG), wrapTag(word.HEAD.POSTAG), word.DEPREL, dictionaryMaker);
posSet.add(word.POSTAG);
}
}
for (CoNLLSentence sentence : CoNLLLoader.loadSentenceList(corpusLoadPath)) {
for (CoNLLWord word : sentence.word) {
addPair(word.NAME, word.HEAD.NAME, word.DEPREL, dictionaryMaker);
addPair(word.NAME, wrapTag(word.HEAD.POSTAG), word.DEPREL, dictionaryMaker);
addPair(wrapTag(word.POSTAG), word.HEAD.NAME, word.DEPREL, dictionaryMaker);
addPair(wrapTag(word.POSTAG), wrapTag(word.HEAD.POSTAG), word.DEPREL, dictionaryMaker);
posSet.add(word.POSTAG);
}
}
StringBuilder sb = new StringBuilder();
for (String pos : posSet) {
sb.append("case \"" + pos + "\":\n");
}
IOUtil.saveTxt("data/model/dependency/pos-thu.txt", sb.toString());
return dictionaryMaker.saveTxtTo(modelSavePath);
}
use of com.hankcs.hanlp.corpus.dictionary.DictionaryMaker in project HanLP by hankcs.
the class NRCorpusLoader method load.
public static boolean load(String path) {
try {
BufferedReader br = new BufferedReader(new InputStreamReader(new FileInputStream(path), "UTF-8"));
String line;
DictionaryMaker dictionaryMaker = new DictionaryMaker();
while ((line = br.readLine()) != null) {
if (line.matches(".*[\\p{P}+~$`^=|<>~`$^+=|<>¥×|\\s|a-z0-9A-Z]+.*"))
continue;
// 只载入两字和三字的名字
Integer length = line.length();
switch(length) {
case 2:
{
Word wordB = new Word(line.substring(0, 1), NR.B.toString());
Word wordE = new Word(line.substring(1), NR.E.toString());
dictionaryMaker.add(wordB);
dictionaryMaker.add(wordE);
break;
}
case 3:
{
Word wordB = new Word(line.substring(0, 1), NR.B.toString());
Word wordC = new Word(line.substring(1, 2), NR.C.toString());
Word wordD = new Word(line.substring(2, 3), NR.D.toString());
dictionaryMaker.add(wordB);
dictionaryMaker.add(wordC);
dictionaryMaker.add(wordD);
break;
}
default:
// L.trace("放弃【{}】", line);
break;
}
}
br.close();
logger.info(dictionaryMaker.toString());
dictionaryMaker.saveTxtTo("data/dictionary/person/name.txt", new DictionaryMaker.Filter() {
@Override
public boolean onSave(Item item) {
return false;
}
});
} catch (Exception e) {
logger.warning("读取" + path + "发生错误");
return false;
}
return true;
}
Aggregations