use of org.ansj.domain.Term in project ansj_seg by NLPchina.
the class Analysis method analysis.
private void analysis(Graph gp, int startOffe, int endOffe) {
int start = 0;
int end = 0;
char[] chars = gp.chars;
String str = null;
for (int i = startOffe; i < endOffe; i++) {
switch(status(chars[i])) {
case 4:
start = i;
end = 1;
while (++i < endOffe && status(chars[i]) == 4) {
end++;
}
str = WordAlert.alertEnglish(chars, start, end);
gp.addTerm(new Term(str, start, TermNatures.EN));
i--;
break;
case 5:
start = i;
end = 1;
while (++i < endOffe && status(chars[i]) == 5) {
end++;
}
str = WordAlert.alertNumber(chars, start, end);
gp.addTerm(new Term(str, start, TermNatures.M));
i--;
break;
default:
start = i;
end = i;
int status = 0;
do {
end = ++i;
if (i >= endOffe) {
break;
}
status = status(chars[i]);
} while (status < 4);
if (status > 3) {
i--;
}
gwi.setChars(chars, start, end);
int max = start;
while ((str = gwi.allWords()) != null) {
Term term = new Term(str, gwi.offe, gwi.getItem());
int len = term.getOffe() - max;
if (len > 0) {
for (; max < term.getOffe(); ) {
gp.addTerm(new Term(String.valueOf(chars[max]), max, TermNatures.NULL));
max++;
}
}
gp.addTerm(term);
max = term.toValue();
}
int len = end - max;
if (len > 0) {
for (; max < end; ) {
gp.addTerm(new Term(String.valueOf(chars[max]), max, TermNatures.NULL));
max++;
}
}
break;
}
}
}
use of org.ansj.domain.Term in project ansj_seg by NLPchina.
the class Analysis method parse.
/**
* 通过构造方法传入的reader直接获取到分词结果
*
* @return
* @throws IOException
*/
public Result parse() throws IOException {
List<Term> list = new ArrayList<Term>();
Term temp = null;
while ((temp = next()) != null) {
list.add(temp);
}
Result result = new Result(list);
return result;
}
use of org.ansj.domain.Term in project ansj_seg by NLPchina.
the class Graph method merger.
/**
* 具体的遍历打分方法
*
* @param i 起始位置
* @param j 起始属性
* @param to
*/
private void merger(Term fromTerm, int to, Map<String, Double> relationMap) {
Term term = null;
if (terms[to] != null) {
term = terms[to];
while (term != null) {
// 关系式to.set(from)
term.setPathScore(fromTerm, relationMap);
term = term.next();
}
} else {
char c = chars[to];
TermNatures tn = DATDictionary.getItem(c).termNatures;
if (tn == null || tn == TermNatures.NULL) {
tn = TermNatures.NULL;
}
terms[to] = new Term(String.valueOf(c), to, tn);
terms[to].setPathScore(fromTerm, relationMap);
}
}
use of org.ansj.domain.Term in project ansj_seg by NLPchina.
the class AnsjTokenizer method incrementToken.
@Override
public final boolean incrementToken() throws IOException {
if (result == null) {
parse();
}
Object obj = result.pollFirst();
if (obj == null) {
result = null;
return false;
}
if (obj instanceof Term) {
clearAttributes();
Term term = (Term) obj;
while (filterTerm(term)) {
//停用词
term = (Term) result.pollFirst();
if (term == null) {
result = null;
return false;
}
position++;
}
position++;
//获得同义词
List<String> synonyms = term.getSynonyms();
String rName = null;
if (synonyms != null) {
for (int i = 1; i < synonyms.size(); i++) {
result.addFirst(synonyms.get(i));
}
rName = synonyms.get(0);
} else {
rName = term.getName();
}
offsetAtt.setOffset(term.getOffe(), term.getOffe() + term.getName().length());
typeAtt.setType(term.getNatureStr());
positionAttr.setPositionIncrement(position);
termAtt.setEmpty().append(rName);
} else {
positionAttr.setPositionIncrement(position);
termAtt.setEmpty().append(obj.toString());
}
return true;
}
use of org.ansj.domain.Term in project ansj_seg by NLPchina.
the class IndexAndTest method test.
@Test
public void test() throws Exception {
DicLibrary.put(DicLibrary.DEFAULT, "../../library/default.dic");
PerFieldAnalyzerWrapper analyzer = new PerFieldAnalyzerWrapper(new AnsjAnalyzer(TYPE.index_ansj));
Directory directory = null;
IndexWriter iwriter = null;
IndexWriterConfig ic = new IndexWriterConfig(analyzer);
String text = "旅游和服务是最好的";
System.out.println(IndexAnalysis.parse(text));
// 建立内存索引对象
directory = new RAMDirectory();
iwriter = new IndexWriter(directory, ic);
addContent(iwriter, text);
iwriter.commit();
iwriter.close();
System.out.println("索引建立完毕");
Analyzer queryAnalyzer = new AnsjAnalyzer(AnsjAnalyzer.TYPE.index_ansj);
System.out.println("index ok to search!");
for (Term t : IndexAnalysis.parse(text)) {
System.out.println(t.getName());
search(queryAnalyzer, directory, "\"" + t.getName() + "\"");
}
}
Aggregations