Search in sources :

Example 1 with Table

use of com.hankcs.hanlp.model.crf.Table in project HanLP by hankcs.

the class TestCRF method testLoadModelWithBiGramFeature.

public void testLoadModelWithBiGramFeature() throws Exception {
    String path = HanLP.Config.CRFSegmentModelPath + Predefine.BIN_EXT;
    CRFModel model = new CRFModel(new BinTrie<FeatureFunction>());
    model.load(ByteArray.createByteArray(path));
    Table table = new Table();
    String text = "人民生活进一步改善了";
    table.v = new String[text.length()][2];
    for (int i = 0; i < text.length(); i++) {
        table.v[i][0] = String.valueOf(text.charAt(i));
    }
    model.tag(table);
    System.out.println(table);
}
Also used : CRFModel(com.hankcs.hanlp.model.crf.CRFModel) Table(com.hankcs.hanlp.model.crf.Table) FeatureFunction(com.hankcs.hanlp.model.crf.FeatureFunction)

Example 2 with Table

use of com.hankcs.hanlp.model.crf.Table in project HanLP by hankcs.

the class TestCRF method testLoadFromTxt.

public void testLoadFromTxt() throws Exception {
    CRFModel model = CRFModel.loadTxt("D:\\Tools\\CRF++-0.58\\example\\seg_cn\\model.txt");
    Table table = new Table();
    table.v = new String[][] { { "商", "?" }, { "品", "?" }, { "和", "?" }, { "服", "?" }, { "务", "?" } };
    model.tag(table);
    System.out.println(table);
}
Also used : CRFModel(com.hankcs.hanlp.model.crf.CRFModel) Table(com.hankcs.hanlp.model.crf.Table)

Example 3 with Table

use of com.hankcs.hanlp.model.crf.Table in project HanLP by hankcs.

the class CRFDependencyParser method parse.

@Override
public CoNLLSentence parse(List<Term> termList) {
    Table table = new Table();
    table.v = new String[termList.size()][4];
    Iterator<Term> iterator = termList.iterator();
    for (String[] line : table.v) {
        Term term = iterator.next();
        line[0] = term.word;
        line[2] = POSUtil.compilePOS(term.nature);
        line[1] = line[2].substring(0, 1);
    }
    crfModel.tag(table);
    if (HanLP.Config.DEBUG) {
        System.out.println(table);
    }
    CoNLLWord[] coNLLWordArray = new CoNLLWord[table.size()];
    for (int i = 0; i < coNLLWordArray.length; i++) {
        coNLLWordArray[i] = new CoNLLWord(i + 1, table.v[i][0], table.v[i][2], table.v[i][1]);
    }
    int i = 0;
    for (String[] line : table.v) {
        CRFModelForDependency.DTag dTag = new CRFModelForDependency.DTag(line[3]);
        if (dTag.pos.endsWith("ROOT")) {
            coNLLWordArray[i].HEAD = CoNLLWord.ROOT;
        } else {
            int index = convertOffset2Index(dTag, table, i);
            if (index == -1)
                coNLLWordArray[i].HEAD = CoNLLWord.NULL;
            else
                coNLLWordArray[i].HEAD = coNLLWordArray[index];
        }
        ++i;
    }
    for (i = 0; i < coNLLWordArray.length; i++) {
        coNLLWordArray[i].DEPREL = BigramDependencyModel.get(coNLLWordArray[i].NAME, coNLLWordArray[i].POSTAG, coNLLWordArray[i].HEAD.NAME, coNLLWordArray[i].HEAD.POSTAG);
    }
    return new CoNLLSentence(coNLLWordArray);
}
Also used : Table(com.hankcs.hanlp.model.crf.Table) CoNLLWord(com.hankcs.hanlp.corpus.dependency.CoNll.CoNLLWord) CoNLLSentence(com.hankcs.hanlp.corpus.dependency.CoNll.CoNLLSentence) Term(com.hankcs.hanlp.seg.common.Term)

Example 4 with Table

use of com.hankcs.hanlp.model.crf.Table in project HanLP by hankcs.

the class TestCRF method testTemplate.

public void testTemplate() throws Exception {
    FeatureTemplate featureTemplate = FeatureTemplate.create("U05:%x[-2,0]/%x[-1,0]/%x[0,0]");
    Table table = new Table();
    table.v = new String[][] { { "那", "S" }, { "音", "B" }, { "韵", "E" } };
    char[] parameter = featureTemplate.generateParameter(table, 0);
    System.out.println(parameter);
}
Also used : FeatureTemplate(com.hankcs.hanlp.model.crf.FeatureTemplate) Table(com.hankcs.hanlp.model.crf.Table)

Example 5 with Table

use of com.hankcs.hanlp.model.crf.Table in project HanLP by hankcs.

the class CRFSegment method segSentence.

@Override
protected List<Term> segSentence(char[] sentence) {
    if (sentence.length == 0)
        return Collections.emptyList();
    char[] sentenceConverted = CharTable.convert(sentence);
    Table table = new Table();
    table.v = atomSegmentToTable(sentenceConverted);
    crfModel.tag(table);
    List<Term> termList = new LinkedList<Term>();
    if (HanLP.Config.DEBUG) {
        System.out.println("CRF标注结果");
        System.out.println(table);
    }
    int offset = 0;
    OUTER: for (int i = 0; i < table.v.length; offset += table.v[i][1].length(), ++i) {
        String[] line = table.v[i];
        switch(line[2].charAt(0)) {
            case 'B':
                {
                    int begin = offset;
                    while (table.v[i][2].charAt(0) != 'E') {
                        offset += table.v[i][1].length();
                        ++i;
                        if (i == table.v.length) {
                            break;
                        }
                    }
                    if (i == table.v.length) {
                        termList.add(new Term(new String(sentence, begin, offset - begin), null));
                        break OUTER;
                    } else
                        termList.add(new Term(new String(sentence, begin, offset - begin + table.v[i][1].length()), null));
                }
                break;
            default:
                {
                    termList.add(new Term(new String(sentence, offset, table.v[i][1].length()), null));
                }
                break;
        }
    }
    if (config.speechTagging) {
        List<Vertex> vertexList = toVertexList(termList, true);
        Viterbi.compute(vertexList, CoreDictionaryTransformMatrixDictionary.transformMatrixDictionary);
        int i = 0;
        for (Term term : termList) {
            if (term.nature != null)
                term.nature = vertexList.get(i + 1).guessNature();
            ++i;
        }
    }
    if (config.useCustomDictionary) {
        List<Vertex> vertexList = toVertexList(termList, false);
        combineByCustomDictionary(vertexList);
        termList = toTermList(vertexList, config.offset);
    }
    return termList;
}
Also used : Vertex(com.hankcs.hanlp.seg.common.Vertex) Table(com.hankcs.hanlp.model.crf.Table) CharTable(com.hankcs.hanlp.dictionary.other.CharTable) Term(com.hankcs.hanlp.seg.common.Term)

Aggregations

Table (com.hankcs.hanlp.model.crf.Table)5 CRFModel (com.hankcs.hanlp.model.crf.CRFModel)2 Term (com.hankcs.hanlp.seg.common.Term)2 CoNLLSentence (com.hankcs.hanlp.corpus.dependency.CoNll.CoNLLSentence)1 CoNLLWord (com.hankcs.hanlp.corpus.dependency.CoNll.CoNLLWord)1 CharTable (com.hankcs.hanlp.dictionary.other.CharTable)1 FeatureFunction (com.hankcs.hanlp.model.crf.FeatureFunction)1 FeatureTemplate (com.hankcs.hanlp.model.crf.FeatureTemplate)1 Vertex (com.hankcs.hanlp.seg.common.Vertex)1