use of com.hankcs.hanlp.corpus.dependency.CoNll.CoNLLWord in project HanLP by hankcs.
the class MaxEntDependencyModelMaker method generateUniContext.
public static Collection<String> generateUniContext(CoNLLWord[] word, int i, int j) {
Collection<String> context = new LinkedList<String>();
context.add(word[i].NAME + '→' + word[j].NAME);
context.add(word[i].POSTAG + '→' + word[j].POSTAG);
context.add(word[i].NAME + '→' + word[j].NAME + (i - j));
context.add(word[i].POSTAG + '→' + word[j].POSTAG + (i - j));
CoNLLWord wordBeforeI = i - 1 >= 0 ? word[i - 1] : CoNLLWord.NULL;
CoNLLWord wordBeforeJ = j - 1 >= 0 ? word[j - 1] : CoNLLWord.NULL;
context.add(wordBeforeI.NAME + '@' + word[i].NAME + '→' + word[j].NAME);
context.add(word[i].NAME + '→' + wordBeforeJ.NAME + '@' + word[j].NAME);
context.add(wordBeforeI.POSTAG + '@' + word[i].POSTAG + '→' + word[j].POSTAG);
context.add(word[i].POSTAG + '→' + wordBeforeJ.POSTAG + '@' + word[j].POSTAG);
return context;
}
use of com.hankcs.hanlp.corpus.dependency.CoNll.CoNLLWord in project HanLP by hankcs.
the class MaxEntDependencyModelMaker method generateSingleWordContext.
public static Collection<String> generateSingleWordContext(CoNLLWord[] word, int index, String mark) {
Collection<String> context = new LinkedList<String>();
for (int i = index - 2; i < index + 2 + 1; ++i) {
CoNLLWord w = i >= 0 && i < word.length ? word[i] : CoNLLWord.NULL;
// 在尾巴上做个标记,不然特征冲突了
context.add(w.NAME + mark + (i - index));
context.add(w.POSTAG + mark + (i - index));
}
return context;
}
use of com.hankcs.hanlp.corpus.dependency.CoNll.CoNLLWord in project HanLP by hankcs.
the class TestParse method testEvaluate.
public void testEvaluate() throws Exception {
testParse();
LinkedList<CoNLLSentence> sentenceList = CoNLLLoader.loadSentenceList("D:\\Doc\\语料库\\依存分析训练数据\\THU\\dev.conll");
Evaluator evaluator = new Evaluator();
int id = 1;
for (CoNLLSentence sentence : sentenceList) {
System.out.printf("%d / %d...", id++, sentenceList.size());
long start = System.currentTimeMillis();
List<Term> termList = new LinkedList<Term>();
for (CoNLLWord word : sentence.word) {
termList.add(new Term(word.LEMMA, Nature.valueOf(word.POSTAG)));
}
CoNLLSentence out = CRFDependencyParser.compute(termList);
evaluator.e(sentence, out);
System.out.println("done in " + (System.currentTimeMillis() - start) + " ms.");
}
System.out.println(evaluator);
}
use of com.hankcs.hanlp.corpus.dependency.CoNll.CoNLLWord in project HanLP by hankcs.
the class AbstractDependencyParser method parse.
@Override
public CoNLLSentence parse(String sentence) {
assert sentence != null;
CoNLLSentence output = parse(segment.seg(sentence.toCharArray()));
if (enableDeprelTranslater && deprelTranslater != null) {
for (CoNLLWord word : output) {
String translatedDeprel = deprelTranslater.get(word.DEPREL);
word.DEPREL = translatedDeprel;
}
}
return output;
}
use of com.hankcs.hanlp.corpus.dependency.CoNll.CoNLLWord in project HanLP by hankcs.
the class CRFDependencyParser method parse.
@Override
public CoNLLSentence parse(List<Term> termList) {
Table table = new Table();
table.v = new String[termList.size()][4];
Iterator<Term> iterator = termList.iterator();
for (String[] line : table.v) {
Term term = iterator.next();
line[0] = term.word;
line[2] = POSUtil.compilePOS(term.nature);
line[1] = line[2].substring(0, 1);
}
crfModel.tag(table);
if (HanLP.Config.DEBUG) {
System.out.println(table);
}
CoNLLWord[] coNLLWordArray = new CoNLLWord[table.size()];
for (int i = 0; i < coNLLWordArray.length; i++) {
coNLLWordArray[i] = new CoNLLWord(i + 1, table.v[i][0], table.v[i][2], table.v[i][1]);
}
int i = 0;
for (String[] line : table.v) {
CRFModelForDependency.DTag dTag = new CRFModelForDependency.DTag(line[3]);
if (dTag.pos.endsWith("ROOT")) {
coNLLWordArray[i].HEAD = CoNLLWord.ROOT;
} else {
int index = convertOffset2Index(dTag, table, i);
if (index == -1)
coNLLWordArray[i].HEAD = CoNLLWord.NULL;
else
coNLLWordArray[i].HEAD = coNLLWordArray[index];
}
++i;
}
for (i = 0; i < coNLLWordArray.length; i++) {
coNLLWordArray[i].DEPREL = BigramDependencyModel.get(coNLLWordArray[i].NAME, coNLLWordArray[i].POSTAG, coNLLWordArray[i].HEAD.NAME, coNLLWordArray[i].HEAD.POSTAG);
}
return new CoNLLSentence(coNLLWordArray);
}
Aggregations