use of org.ansj.domain.Term in project ansj_seg by NLPchina.
the class SynonymsRecgnitionTest method test.
@Test
public void test() {
//使用默认的同义词词典
SynonymsRecgnition synonymsRecgnition = new SynonymsRecgnition();
String str = "我国中国就是华夏,也是天朝";
for (Term term : ToAnalysis.parse("我国中国就是华夏")) {
System.out.println(term.getName() + "\t" + (term.getSynonyms()));
}
System.out.println("-------------init library------------------");
for (Term term : ToAnalysis.parse(str).recognition(synonymsRecgnition)) {
System.out.println(term.getName() + "\t" + (term.getSynonyms()));
}
System.out.println("---------------insert----------------");
SynonymsLibrary.insert(SynonymsLibrary.DEFAULT, new String[] { "中国", "我国" });
for (Term term : ToAnalysis.parse(str).recognition(synonymsRecgnition)) {
System.out.println(term.getName() + "\t" + (term.getSynonyms()));
}
System.out.println("---------------append----------------");
SynonymsLibrary.append(SynonymsLibrary.DEFAULT, new String[] { "中国", "华夏", "天朝" });
for (Term term : ToAnalysis.parse(str).recognition(synonymsRecgnition)) {
System.out.println(term.getName() + "\t" + (term.getSynonyms()));
}
System.out.println("---------------remove----------------");
SynonymsLibrary.remove(SynonymsLibrary.DEFAULT, "我国");
for (Term term : ToAnalysis.parse(str).recognition(synonymsRecgnition)) {
System.out.println(term.getName() + "\t" + (term.getSynonyms()));
}
}
use of org.ansj.domain.Term in project ansj_seg by NLPchina.
the class DicAnalysisTest method test1.
@Test
public void test1() {
DicLibrary.insert(DicLibrary.DEFAULT, "金水区", "ad", 1000);
DicLibrary.insert(DicLibrary.DEFAULT, "渝北区", "ad", 1000);
DicLibrary.insert(DicLibrary.DEFAULT, "金童路", "ad", 1000);
DicLibrary.insert(DicLibrary.DEFAULT, "奥山", "ad", 1000);
DicLibrary.insert(DicLibrary.DEFAULT, "来自大", "ab", 1000);
DicLibrary.insert(DicLibrary.DEFAULT, "自大学", "ab", 2000);
DicLibrary.insert(DicLibrary.DEFAULT, "网大学", "ab", 1000);
System.out.println(DicAnalysis.parse("重庆重庆市渝北区金童路奥山别墅162"));
System.out.println(DicAnalysis.parse("河南省郑州市金水区金水区农科路与文博西路交叉口向东200米路南"));
System.out.println(DicAnalysis.parse("来自大学生小说网大学"));
String newWord = "爸爸去哪儿";
String nature = "aaaaa";
String str = "上海电力2012年财务报表如下怎爸爸去哪儿么办";
//增加新词
DicLibrary.insert(DicLibrary.DEFAULT, newWord, nature, 1000);
DicLibrary.insert(DicLibrary.DEFAULT, "上海电力", nature, 1000);
List<Term> parse = DicAnalysis.parse(str).getTerms();
HashMap<String, Term> hs = new HashMap<String, Term>();
for (Term term : parse) {
hs.put(term.getName(), term);
}
Assert.assertTrue(hs.containsKey(newWord));
Assert.assertEquals(hs.get(newWord).natrue().natureStr, nature);
Library.insertWord(DicLibrary.get(), new Value("北京卡", "UserDefined", "1000"));
Assert.assertEquals(DicAnalysis.parse("北京卡机场服务").get(0).getName(), "北京卡");
//删除词
DicLibrary.delete(DicLibrary.DEFAULT, newWord);
parse = DicAnalysis.parse(str).getTerms();
hs = new HashMap<String, Term>();
for (Term term : parse) {
hs.put(term.getName(), term);
}
Assert.assertTrue(!hs.containsKey(newWord));
}
use of org.ansj.domain.Term in project ansj_seg by NLPchina.
the class SpeedTest method main.
public static void main(String[] args) throws IOException {
ToAnalysis.parse("test---aaaa中国孙健测试");
BufferedReader reader = IOUtil.getReader("/home/ansj/data/allSportsArticle", IOUtil.UTF8);
long start = System.currentTimeMillis();
long allCount = 0;
// for (int j = 0; j < 1; j++) {
// for (String string : all) {
// allCount += string.length();
// ToAnalysis.parse(string);
// }
// }
// String temp = null ;
// while((temp=reader.readLine())!=null){
// GetWordsImpl gwi = new GetWordsImpl(temp) ;
// allCount += temp.length() ;
// while((gwi.allWords())!=null){
//
// }
// }
ToAnalysis toAnalysis = new ToAnalysis(IOUtil.getReader("/home/ansj/data/allSportsArticle", IOUtil.UTF8));
Term term = null;
while ((term = toAnalysis.next()) != null) {
allCount += term.getName().length();
}
long end = System.currentTimeMillis();
System.out.println(start - end);
System.out.println("共 " + allCount + " 个字符,每秒处理了:" + (allCount * 1000 / (end - start)));
}
use of org.ansj.domain.Term in project ansj_seg by NLPchina.
the class StopRecognition method recognition.
@Override
public void recognition(Result result) {
List<Term> list = result.getTerms();
Iterator<Term> iterator = list.iterator();
while (iterator.hasNext()) {
Term term = iterator.next();
if (filter(term)) {
iterator.remove();
}
}
}
use of org.ansj.domain.Term in project ansj_seg by NLPchina.
the class Analysis method analysisStr.
/**
* 一整句话分词,用户设置的歧异优先
*
* @param temp
* @return
*/
private List<Term> analysisStr(String temp) {
Graph gp = new Graph(temp);
int startOffe = 0;
if (this.ambiguityForest != null) {
GetWord gw = new GetWord(this.ambiguityForest, gp.chars);
String[] params = null;
while ((gw.getFrontWords()) != null) {
if (gw.offe > startOffe) {
analysis(gp, startOffe, gw.offe);
}
params = gw.getParams();
startOffe = gw.offe;
for (int i = 0; i < params.length; i += 2) {
gp.addTerm(new Term(params[i], startOffe, new TermNatures(new TermNature(params[i + 1], 1))));
startOffe += params[i].length();
}
}
}
if (startOffe < gp.chars.length) {
analysis(gp, startOffe, gp.chars.length);
}
List<Term> result = this.getResult(gp);
return result;
}
Aggregations