use of com.hankcs.hanlp.model.crf.Table in project HanLP by hankcs.
the class TestCRF method testLoadModelWithBiGramFeature.
public void testLoadModelWithBiGramFeature() throws Exception {
String path = HanLP.Config.CRFSegmentModelPath + Predefine.BIN_EXT;
CRFModel model = new CRFModel(new BinTrie<FeatureFunction>());
model.load(ByteArray.createByteArray(path));
Table table = new Table();
String text = "人民生活进一步改善了";
table.v = new String[text.length()][2];
for (int i = 0; i < text.length(); i++) {
table.v[i][0] = String.valueOf(text.charAt(i));
}
model.tag(table);
System.out.println(table);
}
use of com.hankcs.hanlp.model.crf.Table in project HanLP by hankcs.
the class TestCRF method testLoadFromTxt.
public void testLoadFromTxt() throws Exception {
CRFModel model = CRFModel.loadTxt("D:\\Tools\\CRF++-0.58\\example\\seg_cn\\model.txt");
Table table = new Table();
table.v = new String[][] { { "商", "?" }, { "品", "?" }, { "和", "?" }, { "服", "?" }, { "务", "?" } };
model.tag(table);
System.out.println(table);
}
use of com.hankcs.hanlp.model.crf.Table in project HanLP by hankcs.
the class CRFDependencyParser method parse.
@Override
public CoNLLSentence parse(List<Term> termList) {
Table table = new Table();
table.v = new String[termList.size()][4];
Iterator<Term> iterator = termList.iterator();
for (String[] line : table.v) {
Term term = iterator.next();
line[0] = term.word;
line[2] = POSUtil.compilePOS(term.nature);
line[1] = line[2].substring(0, 1);
}
crfModel.tag(table);
if (HanLP.Config.DEBUG) {
System.out.println(table);
}
CoNLLWord[] coNLLWordArray = new CoNLLWord[table.size()];
for (int i = 0; i < coNLLWordArray.length; i++) {
coNLLWordArray[i] = new CoNLLWord(i + 1, table.v[i][0], table.v[i][2], table.v[i][1]);
}
int i = 0;
for (String[] line : table.v) {
CRFModelForDependency.DTag dTag = new CRFModelForDependency.DTag(line[3]);
if (dTag.pos.endsWith("ROOT")) {
coNLLWordArray[i].HEAD = CoNLLWord.ROOT;
} else {
int index = convertOffset2Index(dTag, table, i);
if (index == -1)
coNLLWordArray[i].HEAD = CoNLLWord.NULL;
else
coNLLWordArray[i].HEAD = coNLLWordArray[index];
}
++i;
}
for (i = 0; i < coNLLWordArray.length; i++) {
coNLLWordArray[i].DEPREL = BigramDependencyModel.get(coNLLWordArray[i].NAME, coNLLWordArray[i].POSTAG, coNLLWordArray[i].HEAD.NAME, coNLLWordArray[i].HEAD.POSTAG);
}
return new CoNLLSentence(coNLLWordArray);
}
use of com.hankcs.hanlp.model.crf.Table in project HanLP by hankcs.
the class TestCRF method testTemplate.
public void testTemplate() throws Exception {
FeatureTemplate featureTemplate = FeatureTemplate.create("U05:%x[-2,0]/%x[-1,0]/%x[0,0]");
Table table = new Table();
table.v = new String[][] { { "那", "S" }, { "音", "B" }, { "韵", "E" } };
char[] parameter = featureTemplate.generateParameter(table, 0);
System.out.println(parameter);
}
use of com.hankcs.hanlp.model.crf.Table in project HanLP by hankcs.
the class CRFSegment method segSentence.
@Override
protected List<Term> segSentence(char[] sentence) {
if (sentence.length == 0)
return Collections.emptyList();
char[] sentenceConverted = CharTable.convert(sentence);
Table table = new Table();
table.v = atomSegmentToTable(sentenceConverted);
crfModel.tag(table);
List<Term> termList = new LinkedList<Term>();
if (HanLP.Config.DEBUG) {
System.out.println("CRF标注结果");
System.out.println(table);
}
int offset = 0;
OUTER: for (int i = 0; i < table.v.length; offset += table.v[i][1].length(), ++i) {
String[] line = table.v[i];
switch(line[2].charAt(0)) {
case 'B':
{
int begin = offset;
while (table.v[i][2].charAt(0) != 'E') {
offset += table.v[i][1].length();
++i;
if (i == table.v.length) {
break;
}
}
if (i == table.v.length) {
termList.add(new Term(new String(sentence, begin, offset - begin), null));
break OUTER;
} else
termList.add(new Term(new String(sentence, begin, offset - begin + table.v[i][1].length()), null));
}
break;
default:
{
termList.add(new Term(new String(sentence, offset, table.v[i][1].length()), null));
}
break;
}
}
if (config.speechTagging) {
List<Vertex> vertexList = toVertexList(termList, true);
Viterbi.compute(vertexList, CoreDictionaryTransformMatrixDictionary.transformMatrixDictionary);
int i = 0;
for (Term term : termList) {
if (term.nature != null)
term.nature = vertexList.get(i + 1).guessNature();
++i;
}
}
if (config.useCustomDictionary) {
List<Vertex> vertexList = toVertexList(termList, false);
combineByCustomDictionary(vertexList);
termList = toTermList(vertexList, config.offset);
}
return termList;
}
Aggregations