use of org.ansj.domain.AnsjItem in project ansj_seg by NLPchina.
the class MyStaticValue method initBigramTables.
/**
* 词与词之间的关联表数据
*
* @return
*/
public static void initBigramTables() {
try (BufferedReader reader = IOUtil.getReader(DicReader.getInputStream("bigramdict.dic"), "UTF-8")) {
String temp = null;
String[] strs = null;
int freq = 0;
while ((temp = reader.readLine()) != null) {
if (StringUtil.isBlank(temp)) {
continue;
}
strs = temp.split("\t");
freq = Integer.parseInt(strs[1]);
strs = strs[0].split("@");
AnsjItem fromItem = DATDictionary.getItem(strs[0]);
AnsjItem toItem = DATDictionary.getItem(strs[1]);
if (fromItem == AnsjItem.NULL && strs[0].contains("#")) {
fromItem = AnsjItem.BEGIN;
}
if (toItem == AnsjItem.NULL && strs[1].contains("#")) {
toItem = AnsjItem.END;
}
if (fromItem == AnsjItem.NULL || toItem == AnsjItem.NULL) {
continue;
}
if (fromItem.bigramEntryMap == null) {
fromItem.bigramEntryMap = new HashMap<Integer, Integer>();
}
fromItem.bigramEntryMap.put(toItem.getIndex(), freq);
}
} catch (NumberFormatException e) {
LOG.warn("数字格式异常", e);
} catch (UnsupportedEncodingException e) {
LOG.warn("不支持的编码", e);
} catch (IOException e) {
LOG.warn("IO异常", e);
}
}
use of org.ansj.domain.AnsjItem in project ansj_seg by NLPchina.
the class NatureRecognition method getTermNatures.
/**
* 传入一次词语获得相关的词性
*
* @param word
* @return
*/
public TermNatures getTermNatures(String word) {
String[] params = null;
// 获得词性 , 先从系统辞典。在从用户自定义辞典
AnsjItem ansjItem = DATDictionary.getItem(word);
TermNatures tn = null;
if (ansjItem != AnsjItem.NULL) {
tn = ansjItem.termNatures;
} else if ((params = getParams(word)) != null) {
tn = new TermNatures(new TermNature(params[0], 1));
} else if (WordAlert.isEnglish(word)) {
tn = TermNatures.EN;
} else if (WordAlert.isNumber(word)) {
tn = TermNatures.M;
} else {
tn = TermNatures.NULL;
}
return tn;
}
use of org.ansj.domain.AnsjItem in project ansj_seg by NLPchina.
the class CoreLibraryMaker method insertToArray.
private static void insertToArray(Item[] dat, char c, byte status, String param) {
AnsjItem ansjItem1 = new AnsjItem();
ansjItem1.setName(String.valueOf(c));
ansjItem1.setIndex(c);
ansjItem1.setCheck(-1);
ansjItem1.setStatus(status);
ansjItem1.param = param;
dat[c] = ansjItem1;
}
use of org.ansj.domain.AnsjItem in project ansj_seg by NLPchina.
the class DATDictionary method personNameFull.
private static void personNameFull(DoubleArrayTire dat) throws NumberFormatException, IOException {
HashMap<String, PersonNatureAttr> personMap = new PersonAttrLibrary().getPersonMap();
AnsjItem ansjItem = null;
// 人名词性补录
Set<Entry<String, PersonNatureAttr>> entrySet = personMap.entrySet();
char c = 0;
String temp = null;
for (Entry<String, PersonNatureAttr> entry : entrySet) {
temp = entry.getKey();
if (temp.length() == 1 && (ansjItem = (AnsjItem) dat.getDAT()[temp.charAt(0)]) == null) {
ansjItem = new AnsjItem();
ansjItem.setBase(c);
ansjItem.setCheck(-1);
ansjItem.setStatus((byte) 3);
ansjItem.setName(temp);
dat.getDAT()[temp.charAt(0)] = ansjItem;
} else {
ansjItem = dat.getItem(temp);
}
if (ansjItem == null) {
continue;
}
if ((ansjItem.termNatures) == null) {
if (temp.length() == 1 && temp.charAt(0) < 256) {
ansjItem.termNatures = TermNatures.NULL;
} else {
ansjItem.termNatures = new TermNatures(TermNature.NR);
}
}
ansjItem.termNatures.setPersonNatureAttr(entry.getValue());
}
}
Aggregations