use of com.hankcs.hanlp.corpus.dependency.CoNll.CoNLLWord in project HanLP by hankcs.
the class MaxEntDependencyModelMaker method makeModel.
public static boolean makeModel(String corpusLoadPath, String modelSavePath) throws IOException {
BufferedWriter bw = new BufferedWriter(new OutputStreamWriter(IOUtil.newOutputStream(modelSavePath)));
LinkedList<CoNLLSentence> sentenceList = CoNLLLoader.loadSentenceList(corpusLoadPath);
int id = 1;
for (CoNLLSentence sentence : sentenceList) {
System.out.printf("%d / %d...", id++, sentenceList.size());
String[][] edgeArray = sentence.getEdgeArray();
CoNLLWord[] word = sentence.getWordArrayWithRoot();
for (int i = 0; i < word.length; ++i) {
for (int j = 0; j < word.length; ++j) {
if (i == j)
continue;
// 这就是一个边的实例,从i出发,到j,当然它可能存在也可能不存在,不存在取null照样是一个实例
List<String> contextList = new LinkedList<String>();
// 先生成i和j的原子特征
contextList.addAll(generateSingleWordContext(word, i, "i"));
contextList.addAll(generateSingleWordContext(word, j, "j"));
// 然后生成二元组的特征
contextList.addAll(generateUniContext(word, i, j));
// 将特征字符串化
for (String f : contextList) {
bw.write(f);
bw.write(' ');
}
// 事件名称为依存关系
bw.write("" + edgeArray[i][j]);
bw.newLine();
}
}
System.out.println("done.");
}
bw.close();
return true;
}
use of com.hankcs.hanlp.corpus.dependency.CoNll.CoNLLWord in project HanLP by hankcs.
the class WordNatureWeightModelMaker method makeModel.
public static boolean makeModel(String corpusLoadPath, String modelSavePath) {
Set<String> posSet = new TreeSet<String>();
DictionaryMaker dictionaryMaker = new DictionaryMaker();
for (CoNLLSentence sentence : CoNLLLoader.loadSentenceList(corpusLoadPath)) {
for (CoNLLWord word : sentence.word) {
addPair(word.NAME, word.HEAD.NAME, word.DEPREL, dictionaryMaker);
addPair(word.NAME, wrapTag(word.HEAD.POSTAG), word.DEPREL, dictionaryMaker);
addPair(wrapTag(word.POSTAG), word.HEAD.NAME, word.DEPREL, dictionaryMaker);
addPair(wrapTag(word.POSTAG), wrapTag(word.HEAD.POSTAG), word.DEPREL, dictionaryMaker);
posSet.add(word.POSTAG);
}
}
for (CoNLLSentence sentence : CoNLLLoader.loadSentenceList(corpusLoadPath)) {
for (CoNLLWord word : sentence.word) {
addPair(word.NAME, word.HEAD.NAME, word.DEPREL, dictionaryMaker);
addPair(word.NAME, wrapTag(word.HEAD.POSTAG), word.DEPREL, dictionaryMaker);
addPair(wrapTag(word.POSTAG), word.HEAD.NAME, word.DEPREL, dictionaryMaker);
addPair(wrapTag(word.POSTAG), wrapTag(word.HEAD.POSTAG), word.DEPREL, dictionaryMaker);
posSet.add(word.POSTAG);
}
}
StringBuilder sb = new StringBuilder();
for (String pos : posSet) {
sb.append("case \"" + pos + "\":\n");
}
IOUtil.saveTxt("data/model/dependency/pos-thu.txt", sb.toString());
return dictionaryMaker.saveTxtTo(modelSavePath);
}
Aggregations