Search in sources :

Example 6 with Term

use of org.ansj.domain.Term in project ansj_seg by NLPchina.

the class FileDemo method main.

public static void main(String[] args) throws IOException {
    //
    // MyStaticValue.isRealName = true;
    BufferedReader reader = IOUtil.getReader("/home/ansj/temp/360baikeData/360tag_all.txt", "utf-8");
    ToAnalysis.parse("test 123 孙");
    Analysis na = new BaseAnalysis(reader);
    long start = System.currentTimeMillis();
    int allCount = 0;
    Term term = null;
    while ((term = na.next()) != null) {
        if (term.getOffe() % 10000 == 0)
            System.out.println(term.getOffe() + "\t" + term.getName());
        allCount += term.getName().length();
        if (allCount > 30000000) {
            break;
        }
    }
    long end = System.currentTimeMillis();
    System.out.println(end - start);
    System.out.println("共 " + allCount + " 个字符,每秒处理了:" + (allCount * 1000.0 / (end - start)));
}
Also used : BaseAnalysis(org.ansj.splitWord.analysis.BaseAnalysis) ToAnalysis(org.ansj.splitWord.analysis.ToAnalysis) Analysis(org.ansj.splitWord.Analysis) BaseAnalysis(org.ansj.splitWord.analysis.BaseAnalysis) BufferedReader(java.io.BufferedReader) Term(org.ansj.domain.Term)

Example 7 with Term

use of org.ansj.domain.Term in project ansj_seg by NLPchina.

the class NatureTagDemo method main.

public static void main(String[] args) {
    String[] strs = { "对", "非", "ansj", "的", "分词", "结果", "进行", "词性", "标注" };
    List<String> lists = Arrays.asList(strs);
    List<Term> recognition = new NatureRecognition().recognition(lists, 0);
    System.out.println(recognition);
}
Also used : NatureRecognition(org.ansj.recognition.impl.NatureRecognition) Term(org.ansj.domain.Term)

Example 8 with Term

use of org.ansj.domain.Term in project ansj_seg by NLPchina.

the class NlpDemoTest method main.

public static void main(String[] args) throws IOException {
    NlpAnalysis nlp = (NlpAnalysis) new NlpAnalysis().setForests(new Forest[] { DicLibrary.get() });
    nlp.resetContent(new StringReader("2015年无锡市突发环境事件"));
    Term term = nlp.next();
    while (term != null) {
        System.out.println(term.getRealName() + "\t|\t" + term.getName());
        term = nlp.next();
    }
//		System.out.println(parse);
}
Also used : StringReader(java.io.StringReader) Forest(org.nlpcn.commons.lang.tire.domain.Forest) NlpAnalysis(org.ansj.splitWord.analysis.NlpAnalysis) Term(org.ansj.domain.Term)

Example 9 with Term

use of org.ansj.domain.Term in project ansj_seg by NLPchina.

the class AsianPersonRecognition method recogntion_.

private List<Term> recogntion_() {
    Term term = null;
    Term tempTerm = null;
    List<Term> termList = new ArrayList<Term>();
    int beginFreq = 10;
    for (int i = 0; i < terms.length; i++) {
        term = terms[i];
        if (term == null || !term.termNatures().personAttr.flag) {
            continue;
        }
        term.score(0);
        term.selfScore(0);
        int freq = 0;
        for (int j = 2; j > -1; j--) {
            freq = term.termNatures().personAttr.getFreq(j, 0);
            if ((freq > 10) || (term.getName().length() == 2 && freq > 10)) {
                tempTerm = nameFind(i, beginFreq, j);
                if (tempTerm != null) {
                    termList.add(tempTerm);
                    // 如果是无争议性识别
                    if (skip) {
                        for (int j2 = i; j2 < tempTerm.toValue(); j2++) {
                            if (terms[j2] != null) {
                                terms[j2].score(0);
                                terms[j2].selfScore(0);
                            }
                        }
                        i = tempTerm.toValue() - 1;
                        break;
                    }
                }
            }
        }
        beginFreq = term.termNatures().personAttr.begin + 1;
    }
    return termList;
}
Also used : ArrayList(java.util.ArrayList) Term(org.ansj.domain.Term)

Example 10 with Term

use of org.ansj.domain.Term in project ansj_seg by NLPchina.

the class AsianPersonRecognition method nameFind.

/**
	 * 人名识别
	 * 
	 * @param term
	 * @param offe
	 * @param freq
	 */
private Term nameFind(int offe, int beginFreq, int size) {
    StringBuilder sb = new StringBuilder();
    int undefinite = 0;
    skip = false;
    PersonNatureAttr pna = null;
    int index = 0;
    int freq = 0;
    double allFreq = 0;
    Term term = null;
    int i = offe;
    for (; i < terms.length; i++) {
        // 走到结尾处识别出来一个名字.
        if (terms[i] == null) {
            continue;
        }
        term = terms[i];
        pna = term.termNatures().personAttr;
        // 在这个长度的这个位置的词频,如果没有可能就干掉,跳出循环
        if ((freq = pna.getFreq(size, index)) == 0) {
            return null;
        }
        if (pna.allFreq > 0) {
            undefinite++;
        }
        sb.append(term.getName());
        allFreq += Math.log(term.termNatures().allFreq + 1);
        allFreq += -Math.log((freq));
        index++;
        if (index == size + 2) {
            break;
        }
    }
    double score = -Math.log(FACTORY[size]);
    score += allFreq;
    double endFreq = 0;
    // 开始寻找结尾词
    boolean flag = true;
    while (flag) {
        i++;
        if (i >= terms.length) {
            endFreq = 10;
            flag = false;
        } else if (terms[i] != null) {
            int twoWordFreq = NgramLibrary.getTwoWordFreq(term, terms[i]);
            if (twoWordFreq > 3) {
                return null;
            }
            endFreq = terms[i].termNatures().personAttr.end + 1;
            flag = false;
        }
    }
    score -= Math.log(endFreq);
    score -= Math.log(beginFreq);
    if (score > -3) {
        return null;
    }
    if (allFreq > 0 && undefinite > 0) {
        return null;
    }
    skip = undefinite == 0;
    term = new Term(sb.toString(), offe, TermNatures.NR);
    term.selfScore(score);
    return term;
}
Also used : PersonNatureAttr(org.ansj.domain.PersonNatureAttr) Term(org.ansj.domain.Term)

Aggregations

Term (org.ansj.domain.Term)55 ArrayList (java.util.ArrayList)10 Result (org.ansj.domain.Result)8 Test (org.junit.Test)8 TermNatures (org.ansj.domain.TermNatures)5 AsianPersonRecognition (org.ansj.recognition.arrimpl.AsianPersonRecognition)4 ForeignPersonRecognition (org.ansj.recognition.arrimpl.ForeignPersonRecognition)4 NumRecognition (org.ansj.recognition.arrimpl.NumRecognition)4 Graph (org.ansj.util.Graph)4 Forest (org.nlpcn.commons.lang.tire.domain.Forest)4 LinkedList (java.util.LinkedList)3 NewWord (org.ansj.domain.NewWord)3 UserDefineRecognition (org.ansj.recognition.arrimpl.UserDefineRecognition)3 NatureRecognition (org.ansj.recognition.impl.NatureRecognition)3 GetWord (org.nlpcn.commons.lang.tire.GetWord)3 BufferedReader (java.io.BufferedReader)2 HashMap (java.util.HashMap)2 TermNature (org.ansj.domain.TermNature)2 ToAnalysis (org.ansj.splitWord.analysis.ToAnalysis)2 Analyzer (org.apache.lucene.analysis.Analyzer)2