use of com.hankcs.hanlp.corpus.dependency.CoNll.CoNLLSentence in project HanLP by hankcs.
the class TestParse method testEvaluate.
public void testEvaluate() throws Exception {
testParse();
LinkedList<CoNLLSentence> sentenceList = CoNLLLoader.loadSentenceList("D:\\Doc\\语料库\\依存分析训练数据\\THU\\dev.conll");
Evaluator evaluator = new Evaluator();
int id = 1;
for (CoNLLSentence sentence : sentenceList) {
System.out.printf("%d / %d...", id++, sentenceList.size());
long start = System.currentTimeMillis();
List<Term> termList = new LinkedList<Term>();
for (CoNLLWord word : sentence.word) {
termList.add(new Term(word.LEMMA, Nature.valueOf(word.POSTAG)));
}
CoNLLSentence out = CRFDependencyParser.compute(termList);
evaluator.e(sentence, out);
System.out.println("done in " + (System.currentTimeMillis() - start) + " ms.");
}
System.out.println(evaluator);
}
use of com.hankcs.hanlp.corpus.dependency.CoNll.CoNLLSentence in project HanLP by hankcs.
the class DemoDependencyParser method main.
public static void main(String[] args) {
CoNLLSentence sentence = HanLP.parseDependency("徐先生还具体帮助他确定了把画雄鹰、松鼠和麻雀作为主攻目标。");
System.out.println(sentence);
// 可以方便地遍历它
for (CoNLLWord word : sentence) {
System.out.printf("%s --(%s)--> %s\n", word.LEMMA, word.DEPREL, word.HEAD.LEMMA);
}
// 也可以直接拿到数组,任意顺序或逆序遍历
CoNLLWord[] wordArray = sentence.getWordArray();
for (int i = wordArray.length - 1; i >= 0; i--) {
CoNLLWord word = wordArray[i];
System.out.printf("%s --(%s)--> %s\n", word.LEMMA, word.DEPREL, word.HEAD.LEMMA);
}
// 还可以直接遍历子树,从某棵子树的某个节点一路遍历到虚根
CoNLLWord head = wordArray[12];
while ((head = head.HEAD) != null) {
if (head == CoNLLWord.ROOT)
System.out.println(head.LEMMA);
else
System.out.printf("%s --(%s)--> ", head.LEMMA, head.DEPREL);
}
}
use of com.hankcs.hanlp.corpus.dependency.CoNll.CoNLLSentence in project HanLP by hankcs.
the class TestDependencyCorpus method testMakeCRF.
/**
* 导出CRF训练语料
*
* @throws Exception
*/
public void testMakeCRF() throws Exception {
BufferedWriter bw = new BufferedWriter(new OutputStreamWriter(new FileOutputStream("D:\\Tools\\CRF++-0.58\\example\\dependency\\dev.txt")));
LinkedList<CoNLLSentence> coNLLSentences = CoNLLLoader.loadSentenceList("D:\\Doc\\语料库\\依存分析训练数据\\THU\\dev.conll.fixed.txt");
for (CoNLLSentence coNLLSentence : coNLLSentences) {
for (CoNLLWord coNLLWord : coNLLSentence.word) {
bw.write(coNLLWord.NAME);
bw.write('\t');
bw.write(coNLLWord.CPOSTAG);
bw.write('\t');
bw.write(coNLLWord.POSTAG);
bw.write('\t');
int d = coNLLWord.HEAD.ID - coNLLWord.ID;
int posDistance = 1;
if (// 在后面
d > 0) {
for (int i = 1; i < d; ++i) {
if (coNLLSentence.word[coNLLWord.ID - 1 + i].CPOSTAG.equals(coNLLWord.HEAD.CPOSTAG)) {
++posDistance;
}
}
} else {
for (// 在前面
int i = 1; // 在前面
i < -d; // 在前面
++i) {
if (coNLLSentence.word[coNLLWord.ID - 1 - i].CPOSTAG.equals(coNLLWord.HEAD.CPOSTAG)) {
++posDistance;
}
}
}
bw.write((d > 0 ? "+" : "-") + posDistance + "_" + coNLLWord.HEAD.CPOSTAG);
bw.newLine();
}
bw.newLine();
}
bw.close();
}
use of com.hankcs.hanlp.corpus.dependency.CoNll.CoNLLSentence in project HanLP by hankcs.
the class AbstractDependencyParser method parse.
@Override
public CoNLLSentence parse(String sentence) {
assert sentence != null;
CoNLLSentence output = parse(segment.seg(sentence.toCharArray()));
if (enableDeprelTranslater && deprelTranslater != null) {
for (CoNLLWord word : output) {
String translatedDeprel = deprelTranslater.get(word.DEPREL);
word.DEPREL = translatedDeprel;
}
}
return output;
}
use of com.hankcs.hanlp.corpus.dependency.CoNll.CoNLLSentence in project HanLP by hankcs.
the class CRFDependencyParser method parse.
@Override
public CoNLLSentence parse(List<Term> termList) {
Table table = new Table();
table.v = new String[termList.size()][4];
Iterator<Term> iterator = termList.iterator();
for (String[] line : table.v) {
Term term = iterator.next();
line[0] = term.word;
line[2] = POSUtil.compilePOS(term.nature);
line[1] = line[2].substring(0, 1);
}
crfModel.tag(table);
if (HanLP.Config.DEBUG) {
System.out.println(table);
}
CoNLLWord[] coNLLWordArray = new CoNLLWord[table.size()];
for (int i = 0; i < coNLLWordArray.length; i++) {
coNLLWordArray[i] = new CoNLLWord(i + 1, table.v[i][0], table.v[i][2], table.v[i][1]);
}
int i = 0;
for (String[] line : table.v) {
CRFModelForDependency.DTag dTag = new CRFModelForDependency.DTag(line[3]);
if (dTag.pos.endsWith("ROOT")) {
coNLLWordArray[i].HEAD = CoNLLWord.ROOT;
} else {
int index = convertOffset2Index(dTag, table, i);
if (index == -1)
coNLLWordArray[i].HEAD = CoNLLWord.NULL;
else
coNLLWordArray[i].HEAD = coNLLWordArray[index];
}
++i;
}
for (i = 0; i < coNLLWordArray.length; i++) {
coNLLWordArray[i].DEPREL = BigramDependencyModel.get(coNLLWordArray[i].NAME, coNLLWordArray[i].POSTAG, coNLLWordArray[i].HEAD.NAME, coNLLWordArray[i].HEAD.POSTAG);
}
return new CoNLLSentence(coNLLWordArray);
}
Aggregations