use of com.hankcs.hanlp.seg.common.Term in project HanLP by hankcs.
the class TestSegment method testViterbi.
public void testViterbi() throws Exception {
HanLP.Config.enableDebug(true);
CustomDictionary.add("网剧");
Segment seg = new DijkstraSegment();
List<Term> termList = seg.seg("优酷总裁魏明介绍了优酷2015年的内容战略,表示要以“大电影、大网剧、大综艺”为关键词");
System.out.println(termList);
}
use of com.hankcs.hanlp.seg.common.Term in project HanLP by hankcs.
the class TestSegment method testWrapper.
public void testWrapper() throws Exception {
SegmentWrapper wrapper = new SegmentWrapper(new BufferedReader(new StringReader("中科院预测科学研究中心学术委员会\nhaha")), StandardTokenizer.SEGMENT);
Term fullTerm;
while ((fullTerm = wrapper.next()) != null) {
System.out.println(fullTerm);
}
}
use of com.hankcs.hanlp.seg.common.Term in project HanLP by hankcs.
the class TestSegment method testIssue199.
public void testIssue199() throws Exception {
Segment segment = new CRFSegment();
// 开启自定义词典
segment.enableCustomDictionary(false);
segment.enablePartOfSpeechTagging(true);
List<Term> termList = segment.seg("更多采购");
System.out.println(termList);
for (Term term : termList) {
if (term.nature == null) {
System.out.println("识别到新词:" + term.word);
}
}
}
use of com.hankcs.hanlp.seg.common.Term in project HanLP by hankcs.
the class Occurrence method addAll.
public void addAll(List<Term> resultList) {
// System.out.println(resultList);
String[] termList = new String[resultList.size()];
int i = 0;
for (Term word : resultList) {
termList[i] = word.word;
++i;
}
addAll(termList);
}
use of com.hankcs.hanlp.seg.common.Term in project HanLP by hankcs.
the class CRFDependencyParser method parse.
@Override
public CoNLLSentence parse(List<Term> termList) {
Table table = new Table();
table.v = new String[termList.size()][4];
Iterator<Term> iterator = termList.iterator();
for (String[] line : table.v) {
Term term = iterator.next();
line[0] = term.word;
line[2] = POSUtil.compilePOS(term.nature);
line[1] = line[2].substring(0, 1);
}
crfModel.tag(table);
if (HanLP.Config.DEBUG) {
System.out.println(table);
}
CoNLLWord[] coNLLWordArray = new CoNLLWord[table.size()];
for (int i = 0; i < coNLLWordArray.length; i++) {
coNLLWordArray[i] = new CoNLLWord(i + 1, table.v[i][0], table.v[i][2], table.v[i][1]);
}
int i = 0;
for (String[] line : table.v) {
CRFModelForDependency.DTag dTag = new CRFModelForDependency.DTag(line[3]);
if (dTag.pos.endsWith("ROOT")) {
coNLLWordArray[i].HEAD = CoNLLWord.ROOT;
} else {
int index = convertOffset2Index(dTag, table, i);
if (index == -1)
coNLLWordArray[i].HEAD = CoNLLWord.NULL;
else
coNLLWordArray[i].HEAD = coNLLWordArray[index];
}
++i;
}
for (i = 0; i < coNLLWordArray.length; i++) {
coNLLWordArray[i].DEPREL = BigramDependencyModel.get(coNLLWordArray[i].NAME, coNLLWordArray[i].POSTAG, coNLLWordArray[i].HEAD.NAME, coNLLWordArray[i].HEAD.POSTAG);
}
return new CoNLLSentence(coNLLWordArray);
}
Aggregations