use of org.ansj.domain.Term in project ansj_seg by NLPchina.
the class ForeignPersonRecognition method recognition.
public void recognition(Term[] terms) {
this.terms = terms;
String name = null;
Term term = null;
reset();
for (int i = 0; i < terms.length; i++) {
if (terms[i] == null) {
continue;
}
term = terms[i];
// 如果名字的开始是人名的前缀,或者后缀.那么忽略
if (tempList.size() == 0) {
if (term.termNatures().personAttr.end > 10) {
continue;
}
if ((terms[i].getName().length() == 1 && ISNOTFIRST.contains(terms[i].getName().charAt(0)))) {
continue;
}
}
name = term.getName();
if (term.termNatures() == TermNatures.NR || term.termNatures() == TermNatures.NW || name.length() == 1) {
boolean flag = validate(name);
if (flag) {
tempList.add(term);
}
} else if (tempList.size() == 1) {
reset();
} else if (tempList.size() > 1) {
TermUtil.insertTerm(terms, tempList, TermNatures.NR);
reset();
}
}
}
use of org.ansj.domain.Term in project ansj_seg by NLPchina.
the class NewWordRecognition method makeNewTerm.
private void makeNewTerm() {
Term term = new Term(sb.toString(), offe, tempNature.natureStr, 1);
term.selfScore(score);
term.setNature(tempNature);
if (sb.length() > 3) {
term.setSubTerm(TermUtil.getSubTerm(from, to));
}
TermUtil.termLink(from, term);
TermUtil.termLink(term, to);
TermUtil.insertTerm(terms, term, InsertTermType.SCORE_ADD_SORT);
TermUtil.parseNature(term);
}
use of org.ansj.domain.Term in project ansj_seg by NLPchina.
the class BookRecognition method recognition.
public void recognition(Result result) {
List<Term> terms = result.getTerms();
String end = null;
String name;
LinkedList<Term> mergeList = null;
List<Term> list = new LinkedList<Term>();
for (Term term : terms) {
name = term.getName();
if (end == null) {
if ((end = ruleMap.get(name)) != null) {
mergeList = new LinkedList<Term>();
mergeList.add(term);
} else {
list.add(term);
}
} else {
mergeList.add(term);
if (end.equals(name)) {
Term ft = mergeList.pollFirst();
for (Term sub : mergeList) {
ft.merage(sub);
}
ft.setNature(nature);
list.add(ft);
mergeList = null;
end = null;
}
}
}
if (mergeList != null) {
for (Term term : list) {
list.add(term);
}
}
result.setTerms(list);
}
use of org.ansj.domain.Term in project ansj_seg by NLPchina.
the class NatureRecognition method guessNature.
/**
* 通过规则 猜测词性
*
* @param word
* @return
*/
public static TermNatures guessNature(String word) {
String nature = null;
SmartForest<String[]> smartForest = SUFFIX_FOREST;
int len = 0;
for (int i = word.length() - 1; i >= 0; i--) {
smartForest = smartForest.get(word.charAt(i));
if (smartForest == null) {
break;
}
len++;
if (smartForest.getStatus() == 2) {
nature = smartForest.getParam()[0];
} else if (smartForest.getStatus() == 3) {
nature = smartForest.getParam()[0];
break;
}
}
if ("nt".equals(nature) && (len > 1 || word.length() > 3)) {
return TermNatures.NT;
} else if ("ns".equals(nature)) {
return TermNatures.NS;
} else if (word.length() < 5) {
Result parse = ToAnalysis.parse(word);
for (Term term : parse.getTerms()) {
if ("nr".equals(term.getNatureStr())) {
return TermNatures.NR;
}
}
} else if (ForeignPersonRecognition.isFName(word)) {
return TermNatures.NRF;
}
return TermNatures.NW;
}
use of org.ansj.domain.Term in project ansj_seg by NLPchina.
the class NatureRecognition method recognition.
/**
* 传入一组。词对词语进行。词性标注
*
* @param words
* @param offe
* @return
*/
public List<Term> recognition(List<String> words, int offe) {
List<Term> terms = new ArrayList<Term>(words.size());
int tempOffe = 0;
for (String word : words) {
TermNatures tn = getTermNatures(word);
terms.add(new Term(word, offe + tempOffe, tn));
tempOffe += word.length();
}
new NatureRecognition().recognition(new Result(terms));
return terms;
}
Aggregations