Search in sources :

Example 51 with Term

use of org.ansj.domain.Term in project ansj_seg by NLPchina.

the class TermUtil method parseNature.

/**
	 * 得到细颗粒度的分词,并且确定词性
	 * 
	 * @return 返回是null说明已经是最细颗粒度
	 */
public static void parseNature(Term term) {
    if (!Nature.NW.equals(term.natrue())) {
        return;
    }
    String name = term.getName();
    if (name.length() <= 3) {
        return;
    }
    // 是否是外国人名
    if (ForeignPersonRecognition.isFName(name)) {
        term.setNature(NatureLibrary.getNature("nrf"));
        return;
    }
    List<Term> subTerm = term.getSubTerm();
    // 判断是否是机构名
    term.setSubTerm(subTerm);
    Term first = subTerm.get(0);
    Term last = subTerm.get(subTerm.size() - 1);
    int[] is = companyMap.get(first.getName());
    int all = 0;
    is = companyMap.get(last.getName());
    if (is != null) {
        all += is[1];
    }
    if (all > 1000) {
        term.setNature(NatureLibrary.getNature("nt"));
        return;
    }
}
Also used : Term(org.ansj.domain.Term)

Example 52 with Term

use of org.ansj.domain.Term in project ansj_seg by NLPchina.

the class AsianPersonRecognition method getNewWords.

public List<NewWord> getNewWords(Term[] terms) {
    this.terms = terms;
    List<NewWord> all = new ArrayList<NewWord>();
    List<Term> termList = recogntion_();
    for (Term term2 : termList) {
        all.add(new NewWord(term2.getName(), Nature.NR));
    }
    return all;
}
Also used : ArrayList(java.util.ArrayList) Term(org.ansj.domain.Term) NewWord(org.ansj.domain.NewWord)

Example 53 with Term

use of org.ansj.domain.Term in project ansj_seg by NLPchina.

the class RealWordDemo method main.

public static void main(String[] args) {
    // 默认方式
    Result parse = ToAnalysis.parse("Hello word是每个程序员必经之路");
    System.out.println(parse);
    // 保证方式
    MyStaticValue.isRealName = true;
    parse = ToAnalysis.parse("Hello word是每个程序员必经之路");
    for (Term term : parse) {
        System.out.print(term.getRealName() + " ");
    }
}
Also used : Term(org.ansj.domain.Term) Result(org.ansj.domain.Result)

Example 54 with Term

use of org.ansj.domain.Term in project ansj_seg by NLPchina.

the class DicLibraryTest method insertTest2.

/**
	 * 增加关键词
	 *
	 * @param keyword
	 */
@Test
public void insertTest2() {
    DicLibrary.insert(DicLibrary.DEFAULT, "增加新词");
    Result parse = DicAnalysis.parse("这是用户自定义词典增加新词的例子");
    System.out.println(parse);
    boolean flag = false;
    for (Term term : parse) {
        flag = flag || "增加新词".equals(term.getName());
    }
    Assert.assertTrue(flag);
}
Also used : Term(org.ansj.domain.Term) Result(org.ansj.domain.Result) Test(org.junit.Test)

Example 55 with Term

use of org.ansj.domain.Term in project ansj_seg by NLPchina.

the class DicLibraryTest method insertTest.

/**
	 * 关键词增加
	 *
	 * @param keyword 所要增加的关键词
	 * @param nature 关键词的词性
	 * @param freq 关键词的词频
	 */
@Test
public void insertTest() {
    DicLibrary.insert(DicLibrary.DEFAULT, "增加新词", "我是词性", 1000);
    Result parse = DicAnalysis.parse("这是用户自定义词典增加新词的例子");
    System.out.println(parse);
    boolean flag = false;
    for (Term term : parse) {
        flag = flag || "增加新词".equals(term.getName());
    }
    Assert.assertTrue(flag);
}
Also used : Term(org.ansj.domain.Term) Result(org.ansj.domain.Result) Test(org.junit.Test)

Aggregations

Term (org.ansj.domain.Term)55 ArrayList (java.util.ArrayList)10 Result (org.ansj.domain.Result)8 Test (org.junit.Test)8 TermNatures (org.ansj.domain.TermNatures)5 AsianPersonRecognition (org.ansj.recognition.arrimpl.AsianPersonRecognition)4 ForeignPersonRecognition (org.ansj.recognition.arrimpl.ForeignPersonRecognition)4 NumRecognition (org.ansj.recognition.arrimpl.NumRecognition)4 Graph (org.ansj.util.Graph)4 Forest (org.nlpcn.commons.lang.tire.domain.Forest)4 LinkedList (java.util.LinkedList)3 NewWord (org.ansj.domain.NewWord)3 UserDefineRecognition (org.ansj.recognition.arrimpl.UserDefineRecognition)3 NatureRecognition (org.ansj.recognition.impl.NatureRecognition)3 GetWord (org.nlpcn.commons.lang.tire.GetWord)3 BufferedReader (java.io.BufferedReader)2 HashMap (java.util.HashMap)2 TermNature (org.ansj.domain.TermNature)2 ToAnalysis (org.ansj.splitWord.analysis.ToAnalysis)2 Analyzer (org.apache.lucene.analysis.Analyzer)2