use of org.ansj.domain.Term in project ansj_seg by NLPchina.
the class Graph method optimalRoot.
/**
* 取得最优路径的root Term
*
* @return
*/
protected Term optimalRoot() {
Term to = end;
to.clearScore();
Term from = null;
while ((from = to.from()) != null) {
for (int i = from.getOffe() + 1; i < to.getOffe(); i++) {
terms[i] = null;
}
if (from.getOffe() > -1) {
terms[from.getOffe()] = from;
}
// 断开横向链表.节省内存
from.setNext(null);
from.setTo(to);
from.clearScore();
to = from;
}
return root;
}
use of org.ansj.domain.Term in project ansj_seg by NLPchina.
the class Graph method rmLittlePath.
/**
* 删除最短的节点
*/
public void rmLittlePath() {
int maxTo = -1;
Term temp = null;
Term maxTerm = null;
// 是否有交叉
boolean flag = false;
final int length = terms.length - 1;
for (int i = 0; i < length; i++) {
maxTerm = getMaxTerm(i);
if (maxTerm == null)
continue;
maxTo = maxTerm.toValue();
/**
* 对字数进行优化.如果一个字.就跳过..两个字.且第二个为null则.也跳过.从第二个后开始
*/
switch(maxTerm.getName().length()) {
case 1:
continue;
case 2:
if (terms[i + 1] == null) {
i = i + 1;
continue;
}
}
/**
* 判断是否有交叉
*/
for (int j = i + 1; j < maxTo; j++) {
temp = getMaxTerm(j);
if (temp == null) {
continue;
}
if (maxTo < temp.toValue()) {
maxTo = temp.toValue();
flag = true;
}
}
if (flag) {
i = maxTo - 1;
flag = false;
} else {
maxTerm.setNext(null);
terms[i] = maxTerm;
for (int j = i + 1; j < maxTo; j++) {
terms[j] = null;
}
// FIXME: 这里理论上得设置。但是跑了这么久,还不发生错误。应该是不依赖于双向链接。需要确认下。这段代码是否有用
// //将下面的to的from设置回来
// temp = terms[i+maxTerm.getName().length()] ;
// do{
// temp.setFrom(maxTerm) ;
// }while((temp=temp.next())!=null) ;
}
}
}
use of org.ansj.domain.Term in project ansj_seg by NLPchina.
the class Graph method rmLittlePathByScore.
/**
* 删除小节点。保证被删除的小节点的单个分数小于等于大节点的分数
*/
public void rmLittlePathByScore() {
int maxTo = -1;
Term temp = null;
for (int i = 0; i < terms.length; i++) {
if (terms[i] == null) {
continue;
}
Term maxTerm = null;
double maxScore = 0;
Term term = terms[i];
do {
if (maxTerm == null || maxScore > term.score()) {
maxTerm = term;
} else if (maxScore == term.score() && maxTerm.getName().length() < term.getName().length()) {
maxTerm = term;
}
} while ((term = term.next()) != null);
term = maxTerm;
do {
maxTo = term.toValue();
maxScore = term.score();
if (maxTo - i == 1 || i + 1 == terms.length)
continue;
// 可以删除
boolean flag = true;
out: for (int j = i; j < maxTo; j++) {
temp = terms[j];
if (temp == null) {
continue;
}
do {
if (temp.toValue() > maxTo || temp.score() < maxScore) {
flag = false;
break out;
}
} while ((temp = temp.next()) != null);
}
// 验证通过可以删除了
if (flag) {
for (int j = i + 1; j < maxTo; j++) {
terms[j] = null;
}
}
} while ((term = term.next()) != null);
}
}
use of org.ansj.domain.Term in project ansj_seg by NLPchina.
the class TermUtil method insertTerm.
/**
* 将一个term插入到链表中的对应位置中, 如果这个term已经存在参照type type 0.跳过 1. 替换 2.累积分值 保证顺序,由大到小
*
* @param terms
* @param term
*/
public static void insertTerm(Term[] terms, Term term, InsertTermType type) {
Term self = terms[term.getOffe()];
if (self == null) {
terms[term.getOffe()] = term;
return;
}
int len = term.getName().length();
// 如果是第一位置
if (self.getName().length() == len) {
if (type == InsertTermType.REPLACE) {
term.setNext(self.next());
terms[term.getOffe()] = term;
} else if (type == InsertTermType.SCORE_ADD_SORT) {
self.score(self.score() + term.score());
self.selfScore(self.selfScore() + term.selfScore());
}
return;
}
if (self.getName().length() > len) {
term.setNext(self);
terms[term.getOffe()] = term;
return;
}
Term next = self;
Term before = self;
while ((next = before.next()) != null) {
if (next.getName().length() == len) {
if (type == InsertTermType.REPLACE) {
term.setNext(next.next());
before.setNext(term);
} else if (type == InsertTermType.SCORE_ADD_SORT) {
next.score(next.score() + term.score());
next.selfScore(next.selfScore() + term.selfScore());
}
return;
} else if (next.getName().length() > len) {
before.setNext(term);
term.setNext(next);
return;
}
before = next;
}
// 如果都没有命中
before.setNext(term);
}
use of org.ansj.domain.Term in project ansj_seg by NLPchina.
the class TermUtil method makeNewTermNum.
/**
* 将两个term合并为一个全新的term
*
* @param termNatures
* @return
*/
public static Term makeNewTermNum(Term from, Term to, TermNatures termNatures) {
Term term = new Term(from.getName() + to.getName(), from.getOffe(), termNatures);
term.termNatures().numAttr = from.termNatures().numAttr;
TermUtil.termLink(term, to.to());
TermUtil.termLink(term.from(), term);
return term;
}
Aggregations