Search in sources :

Example 46 with Term

use of org.ansj.domain.Term in project ansj_seg by NLPchina.

the class Graph method optimalRoot.

/**
	 * 取得最优路径的root Term
	 * 
	 * @return
	 */
protected Term optimalRoot() {
    Term to = end;
    to.clearScore();
    Term from = null;
    while ((from = to.from()) != null) {
        for (int i = from.getOffe() + 1; i < to.getOffe(); i++) {
            terms[i] = null;
        }
        if (from.getOffe() > -1) {
            terms[from.getOffe()] = from;
        }
        // 断开横向链表.节省内存
        from.setNext(null);
        from.setTo(to);
        from.clearScore();
        to = from;
    }
    return root;
}
Also used : Term(org.ansj.domain.Term)

Example 47 with Term

use of org.ansj.domain.Term in project ansj_seg by NLPchina.

the class Graph method rmLittlePath.

/**
	 * 删除最短的节点
	 */
public void rmLittlePath() {
    int maxTo = -1;
    Term temp = null;
    Term maxTerm = null;
    // 是否有交叉
    boolean flag = false;
    final int length = terms.length - 1;
    for (int i = 0; i < length; i++) {
        maxTerm = getMaxTerm(i);
        if (maxTerm == null)
            continue;
        maxTo = maxTerm.toValue();
        /**
			 * 对字数进行优化.如果一个字.就跳过..两个字.且第二个为null则.也跳过.从第二个后开始
			 */
        switch(maxTerm.getName().length()) {
            case 1:
                continue;
            case 2:
                if (terms[i + 1] == null) {
                    i = i + 1;
                    continue;
                }
        }
        /**
			 * 判断是否有交叉
			 */
        for (int j = i + 1; j < maxTo; j++) {
            temp = getMaxTerm(j);
            if (temp == null) {
                continue;
            }
            if (maxTo < temp.toValue()) {
                maxTo = temp.toValue();
                flag = true;
            }
        }
        if (flag) {
            i = maxTo - 1;
            flag = false;
        } else {
            maxTerm.setNext(null);
            terms[i] = maxTerm;
            for (int j = i + 1; j < maxTo; j++) {
                terms[j] = null;
            }
        // FIXME: 这里理论上得设置。但是跑了这么久,还不发生错误。应该是不依赖于双向链接。需要确认下。这段代码是否有用
        // //将下面的to的from设置回来
        // temp = terms[i+maxTerm.getName().length()] ;
        // do{
        // temp.setFrom(maxTerm) ;
        // }while((temp=temp.next())!=null) ;
        }
    }
}
Also used : Term(org.ansj.domain.Term)

Example 48 with Term

use of org.ansj.domain.Term in project ansj_seg by NLPchina.

the class Graph method rmLittlePathByScore.

/**
	 * 删除小节点。保证被删除的小节点的单个分数小于等于大节点的分数
	 */
public void rmLittlePathByScore() {
    int maxTo = -1;
    Term temp = null;
    for (int i = 0; i < terms.length; i++) {
        if (terms[i] == null) {
            continue;
        }
        Term maxTerm = null;
        double maxScore = 0;
        Term term = terms[i];
        do {
            if (maxTerm == null || maxScore > term.score()) {
                maxTerm = term;
            } else if (maxScore == term.score() && maxTerm.getName().length() < term.getName().length()) {
                maxTerm = term;
            }
        } while ((term = term.next()) != null);
        term = maxTerm;
        do {
            maxTo = term.toValue();
            maxScore = term.score();
            if (maxTo - i == 1 || i + 1 == terms.length)
                continue;
            // 可以删除
            boolean flag = true;
            out: for (int j = i; j < maxTo; j++) {
                temp = terms[j];
                if (temp == null) {
                    continue;
                }
                do {
                    if (temp.toValue() > maxTo || temp.score() < maxScore) {
                        flag = false;
                        break out;
                    }
                } while ((temp = temp.next()) != null);
            }
            // 验证通过可以删除了
            if (flag) {
                for (int j = i + 1; j < maxTo; j++) {
                    terms[j] = null;
                }
            }
        } while ((term = term.next()) != null);
    }
}
Also used : Term(org.ansj.domain.Term)

Example 49 with Term

use of org.ansj.domain.Term in project ansj_seg by NLPchina.

the class TermUtil method insertTerm.

/**
	 * 将一个term插入到链表中的对应位置中, 如果这个term已经存在参照type type 0.跳过 1. 替换 2.累积分值 保证顺序,由大到小
	 * 
	 * @param terms
	 * @param term
	 */
public static void insertTerm(Term[] terms, Term term, InsertTermType type) {
    Term self = terms[term.getOffe()];
    if (self == null) {
        terms[term.getOffe()] = term;
        return;
    }
    int len = term.getName().length();
    // 如果是第一位置
    if (self.getName().length() == len) {
        if (type == InsertTermType.REPLACE) {
            term.setNext(self.next());
            terms[term.getOffe()] = term;
        } else if (type == InsertTermType.SCORE_ADD_SORT) {
            self.score(self.score() + term.score());
            self.selfScore(self.selfScore() + term.selfScore());
        }
        return;
    }
    if (self.getName().length() > len) {
        term.setNext(self);
        terms[term.getOffe()] = term;
        return;
    }
    Term next = self;
    Term before = self;
    while ((next = before.next()) != null) {
        if (next.getName().length() == len) {
            if (type == InsertTermType.REPLACE) {
                term.setNext(next.next());
                before.setNext(term);
            } else if (type == InsertTermType.SCORE_ADD_SORT) {
                next.score(next.score() + term.score());
                next.selfScore(next.selfScore() + term.selfScore());
            }
            return;
        } else if (next.getName().length() > len) {
            before.setNext(term);
            term.setNext(next);
            return;
        }
        before = next;
    }
    // 如果都没有命中
    before.setNext(term);
}
Also used : Term(org.ansj.domain.Term)

Example 50 with Term

use of org.ansj.domain.Term in project ansj_seg by NLPchina.

the class TermUtil method makeNewTermNum.

/**
	 * 将两个term合并为一个全新的term
	 * 
	 * @param termNatures
	 * @return
	 */
public static Term makeNewTermNum(Term from, Term to, TermNatures termNatures) {
    Term term = new Term(from.getName() + to.getName(), from.getOffe(), termNatures);
    term.termNatures().numAttr = from.termNatures().numAttr;
    TermUtil.termLink(term, to.to());
    TermUtil.termLink(term.from(), term);
    return term;
}
Also used : Term(org.ansj.domain.Term)

Aggregations

Term (org.ansj.domain.Term)55 ArrayList (java.util.ArrayList)10 Result (org.ansj.domain.Result)8 Test (org.junit.Test)8 TermNatures (org.ansj.domain.TermNatures)5 AsianPersonRecognition (org.ansj.recognition.arrimpl.AsianPersonRecognition)4 ForeignPersonRecognition (org.ansj.recognition.arrimpl.ForeignPersonRecognition)4 NumRecognition (org.ansj.recognition.arrimpl.NumRecognition)4 Graph (org.ansj.util.Graph)4 Forest (org.nlpcn.commons.lang.tire.domain.Forest)4 LinkedList (java.util.LinkedList)3 NewWord (org.ansj.domain.NewWord)3 UserDefineRecognition (org.ansj.recognition.arrimpl.UserDefineRecognition)3 NatureRecognition (org.ansj.recognition.impl.NatureRecognition)3 GetWord (org.nlpcn.commons.lang.tire.GetWord)3 BufferedReader (java.io.BufferedReader)2 HashMap (java.util.HashMap)2 TermNature (org.ansj.domain.TermNature)2 ToAnalysis (org.ansj.splitWord.analysis.ToAnalysis)2 Analyzer (org.apache.lucene.analysis.Analyzer)2