Search in sources :

Example 1 with CoNLLSentence

use of com.hankcs.hanlp.corpus.dependency.CoNll.CoNLLSentence in project HanLP by hankcs.

the class TestParse method testEvaluate.

public void testEvaluate() throws Exception {
    testParse();
    LinkedList<CoNLLSentence> sentenceList = CoNLLLoader.loadSentenceList("D:\\Doc\\语料库\\依存分析训练数据\\THU\\dev.conll");
    Evaluator evaluator = new Evaluator();
    int id = 1;
    for (CoNLLSentence sentence : sentenceList) {
        System.out.printf("%d / %d...", id++, sentenceList.size());
        long start = System.currentTimeMillis();
        List<Term> termList = new LinkedList<Term>();
        for (CoNLLWord word : sentence.word) {
            termList.add(new Term(word.LEMMA, Nature.valueOf(word.POSTAG)));
        }
        CoNLLSentence out = CRFDependencyParser.compute(termList);
        evaluator.e(sentence, out);
        System.out.println("done in " + (System.currentTimeMillis() - start) + " ms.");
    }
    System.out.println(evaluator);
}
Also used : CoNLLWord(com.hankcs.hanlp.corpus.dependency.CoNll.CoNLLWord) CoNLLSentence(com.hankcs.hanlp.corpus.dependency.CoNll.CoNLLSentence) Term(com.hankcs.hanlp.seg.common.Term) Evaluator(com.hankcs.hanlp.corpus.dependency.CoNll.Evaluator) LinkedList(java.util.LinkedList)

Example 2 with CoNLLSentence

use of com.hankcs.hanlp.corpus.dependency.CoNll.CoNLLSentence in project HanLP by hankcs.

the class DemoDependencyParser method main.

public static void main(String[] args) {
    CoNLLSentence sentence = HanLP.parseDependency("徐先生还具体帮助他确定了把画雄鹰、松鼠和麻雀作为主攻目标。");
    System.out.println(sentence);
    // 可以方便地遍历它
    for (CoNLLWord word : sentence) {
        System.out.printf("%s --(%s)--> %s\n", word.LEMMA, word.DEPREL, word.HEAD.LEMMA);
    }
    // 也可以直接拿到数组,任意顺序或逆序遍历
    CoNLLWord[] wordArray = sentence.getWordArray();
    for (int i = wordArray.length - 1; i >= 0; i--) {
        CoNLLWord word = wordArray[i];
        System.out.printf("%s --(%s)--> %s\n", word.LEMMA, word.DEPREL, word.HEAD.LEMMA);
    }
    // 还可以直接遍历子树,从某棵子树的某个节点一路遍历到虚根
    CoNLLWord head = wordArray[12];
    while ((head = head.HEAD) != null) {
        if (head == CoNLLWord.ROOT)
            System.out.println(head.LEMMA);
        else
            System.out.printf("%s --(%s)--> ", head.LEMMA, head.DEPREL);
    }
}
Also used : CoNLLWord(com.hankcs.hanlp.corpus.dependency.CoNll.CoNLLWord) CoNLLSentence(com.hankcs.hanlp.corpus.dependency.CoNll.CoNLLSentence)

Example 3 with CoNLLSentence

use of com.hankcs.hanlp.corpus.dependency.CoNll.CoNLLSentence in project HanLP by hankcs.

the class TestDependencyCorpus method testMakeCRF.

/**
     * 导出CRF训练语料
     *
     * @throws Exception
     */
public void testMakeCRF() throws Exception {
    BufferedWriter bw = new BufferedWriter(new OutputStreamWriter(new FileOutputStream("D:\\Tools\\CRF++-0.58\\example\\dependency\\dev.txt")));
    LinkedList<CoNLLSentence> coNLLSentences = CoNLLLoader.loadSentenceList("D:\\Doc\\语料库\\依存分析训练数据\\THU\\dev.conll.fixed.txt");
    for (CoNLLSentence coNLLSentence : coNLLSentences) {
        for (CoNLLWord coNLLWord : coNLLSentence.word) {
            bw.write(coNLLWord.NAME);
            bw.write('\t');
            bw.write(coNLLWord.CPOSTAG);
            bw.write('\t');
            bw.write(coNLLWord.POSTAG);
            bw.write('\t');
            int d = coNLLWord.HEAD.ID - coNLLWord.ID;
            int posDistance = 1;
            if (// 在后面
            d > 0) {
                for (int i = 1; i < d; ++i) {
                    if (coNLLSentence.word[coNLLWord.ID - 1 + i].CPOSTAG.equals(coNLLWord.HEAD.CPOSTAG)) {
                        ++posDistance;
                    }
                }
            } else {
                for (// 在前面
                int i = 1; // 在前面
                i < -d; // 在前面
                ++i) {
                    if (coNLLSentence.word[coNLLWord.ID - 1 - i].CPOSTAG.equals(coNLLWord.HEAD.CPOSTAG)) {
                        ++posDistance;
                    }
                }
            }
            bw.write((d > 0 ? "+" : "-") + posDistance + "_" + coNLLWord.HEAD.CPOSTAG);
            bw.newLine();
        }
        bw.newLine();
    }
    bw.close();
}
Also used : CoNLLWord(com.hankcs.hanlp.corpus.dependency.CoNll.CoNLLWord) FileOutputStream(java.io.FileOutputStream) CoNLLSentence(com.hankcs.hanlp.corpus.dependency.CoNll.CoNLLSentence) OutputStreamWriter(java.io.OutputStreamWriter) BufferedWriter(java.io.BufferedWriter)

Example 4 with CoNLLSentence

use of com.hankcs.hanlp.corpus.dependency.CoNll.CoNLLSentence in project HanLP by hankcs.

the class AbstractDependencyParser method parse.

@Override
public CoNLLSentence parse(String sentence) {
    assert sentence != null;
    CoNLLSentence output = parse(segment.seg(sentence.toCharArray()));
    if (enableDeprelTranslater && deprelTranslater != null) {
        for (CoNLLWord word : output) {
            String translatedDeprel = deprelTranslater.get(word.DEPREL);
            word.DEPREL = translatedDeprel;
        }
    }
    return output;
}
Also used : CoNLLWord(com.hankcs.hanlp.corpus.dependency.CoNll.CoNLLWord) CoNLLSentence(com.hankcs.hanlp.corpus.dependency.CoNll.CoNLLSentence)

Example 5 with CoNLLSentence

use of com.hankcs.hanlp.corpus.dependency.CoNll.CoNLLSentence in project HanLP by hankcs.

the class CRFDependencyParser method parse.

@Override
public CoNLLSentence parse(List<Term> termList) {
    Table table = new Table();
    table.v = new String[termList.size()][4];
    Iterator<Term> iterator = termList.iterator();
    for (String[] line : table.v) {
        Term term = iterator.next();
        line[0] = term.word;
        line[2] = POSUtil.compilePOS(term.nature);
        line[1] = line[2].substring(0, 1);
    }
    crfModel.tag(table);
    if (HanLP.Config.DEBUG) {
        System.out.println(table);
    }
    CoNLLWord[] coNLLWordArray = new CoNLLWord[table.size()];
    for (int i = 0; i < coNLLWordArray.length; i++) {
        coNLLWordArray[i] = new CoNLLWord(i + 1, table.v[i][0], table.v[i][2], table.v[i][1]);
    }
    int i = 0;
    for (String[] line : table.v) {
        CRFModelForDependency.DTag dTag = new CRFModelForDependency.DTag(line[3]);
        if (dTag.pos.endsWith("ROOT")) {
            coNLLWordArray[i].HEAD = CoNLLWord.ROOT;
        } else {
            int index = convertOffset2Index(dTag, table, i);
            if (index == -1)
                coNLLWordArray[i].HEAD = CoNLLWord.NULL;
            else
                coNLLWordArray[i].HEAD = coNLLWordArray[index];
        }
        ++i;
    }
    for (i = 0; i < coNLLWordArray.length; i++) {
        coNLLWordArray[i].DEPREL = BigramDependencyModel.get(coNLLWordArray[i].NAME, coNLLWordArray[i].POSTAG, coNLLWordArray[i].HEAD.NAME, coNLLWordArray[i].HEAD.POSTAG);
    }
    return new CoNLLSentence(coNLLWordArray);
}
Also used : Table(com.hankcs.hanlp.model.crf.Table) CoNLLWord(com.hankcs.hanlp.corpus.dependency.CoNll.CoNLLWord) CoNLLSentence(com.hankcs.hanlp.corpus.dependency.CoNll.CoNLLSentence) Term(com.hankcs.hanlp.seg.common.Term)

Aggregations

CoNLLSentence (com.hankcs.hanlp.corpus.dependency.CoNll.CoNLLSentence)10 CoNLLWord (com.hankcs.hanlp.corpus.dependency.CoNll.CoNLLWord)10 Term (com.hankcs.hanlp.seg.common.Term)4 DictionaryMaker (com.hankcs.hanlp.corpus.dictionary.DictionaryMaker)2 Evaluator (com.hankcs.hanlp.corpus.dependency.CoNll.Evaluator)1 Item (com.hankcs.hanlp.corpus.dictionary.item.Item)1 Edge (com.hankcs.hanlp.dependency.common.Edge)1 Node (com.hankcs.hanlp.dependency.common.Node)1 State (com.hankcs.hanlp.dependency.common.State)1 Table (com.hankcs.hanlp.model.crf.Table)1 BufferedWriter (java.io.BufferedWriter)1 FileOutputStream (java.io.FileOutputStream)1 OutputStreamWriter (java.io.OutputStreamWriter)1 ArrayList (java.util.ArrayList)1 LinkedList (java.util.LinkedList)1 PriorityQueue (java.util.PriorityQueue)1 TreeSet (java.util.TreeSet)1