use of com.hankcs.hanlp.corpus.synonym.Synonym in project HanLP by hankcs.
the class CommonSynonymDictionary method load.
public boolean load(InputStream inputStream) {
trie = new DoubleArrayTrie<SynonymItem>();
TreeMap<String, SynonymItem> treeMap = new TreeMap<String, SynonymItem>();
String line = null;
try {
BufferedReader bw = new BufferedReader(new InputStreamReader(inputStream, "UTF-8"));
ArrayList<Synonym> synonymList = null;
while ((line = bw.readLine()) != null) {
String[] args = line.split(" ");
synonymList = Synonym.create(args);
char type = args[0].charAt(args[0].length() - 1);
for (Synonym synonym : synonymList) {
treeMap.put(synonym.realWord, new SynonymItem(synonym, synonymList, type));
// 这里稍微做个test
//assert synonym.getIdString().startsWith(line.split(" ")[0].substring(0, line.split(" ")[0].length() - 1)) : "词典有问题" + line + synonym.toString();
}
}
bw.close();
// 获取最大语义id
if (synonymList != null && synonymList.size() > 0) {
maxSynonymItemIdDistance = synonymList.get(synonymList.size() - 1).id - SynonymHelper.convertString2IdWithIndex("Aa01A01", 0) + 1;
}
int resultCode = trie.build(treeMap);
if (resultCode != 0) {
logger.warning("构建" + inputStream + "失败,错误码" + resultCode);
return false;
}
} catch (Exception e) {
logger.warning("读取" + inputStream + "失败,可能由行" + line + "造成");
return false;
}
return true;
}
use of com.hankcs.hanlp.corpus.synonym.Synonym in project HanLP by hankcs.
the class CommonSynonymDictionaryEx method load.
public boolean load(InputStream inputStream) {
trie = new DoubleArrayTrie<Long[]>();
TreeMap<String, Set<Long>> treeMap = new TreeMap<String, Set<Long>>();
String line = null;
try {
BufferedReader bw = new BufferedReader(new InputStreamReader(inputStream, "UTF-8"));
while ((line = bw.readLine()) != null) {
String[] args = line.split(" ");
List<Synonym> synonymList = Synonym.create(args);
for (Synonym synonym : synonymList) {
Set<Long> idSet = treeMap.get(synonym.realWord);
if (idSet == null) {
idSet = new TreeSet<Long>();
treeMap.put(synonym.realWord, idSet);
}
idSet.add(synonym.id);
}
}
bw.close();
List<String> keyList = new ArrayList<String>(treeMap.size());
for (String key : treeMap.keySet()) {
keyList.add(key);
}
List<Long[]> valueList = new ArrayList<Long[]>(treeMap.size());
for (Set<Long> idSet : treeMap.values()) {
valueList.add(idSet.toArray(new Long[0]));
}
int resultCode = trie.build(keyList, valueList);
if (resultCode != 0) {
logger.warning("构建" + inputStream + "失败,错误码" + resultCode);
return false;
}
} catch (Exception e) {
logger.warning("读取" + inputStream + "失败,可能由行" + line + "造成" + e);
return false;
}
return true;
}
use of com.hankcs.hanlp.corpus.synonym.Synonym in project HanLP by hankcs.
the class CommonSynonymDictionary method rewrite.
public String rewrite(String text) {
List<Term> termList = StandardTokenizer.segment(text.toCharArray());
StringBuilder sbOut = new StringBuilder((int) (text.length() * 1.2));
String preWord = Predefine.TAG_BIGIN;
for (Term term : termList) {
SynonymItem synonymItem = get(term.word);
Synonym synonym;
if (synonymItem != null && (synonym = synonymItem.randomSynonym(Type.EQUAL, preWord)) != null) {
sbOut.append(synonym.realWord);
} else
sbOut.append(term.word);
preWord = PosTagCompiler.compile(term.nature.toString(), term.word);
}
return sbOut.toString();
}
use of com.hankcs.hanlp.corpus.synonym.Synonym in project HanLP by hankcs.
the class CommonSynonymDictionary method rewriteQuickly.
public String rewriteQuickly(String text) {
assert text != null;
StringBuilder sbOut = new StringBuilder((int) (text.length() * 1.2));
String preWord = Predefine.TAG_BIGIN;
for (int i = 0; i < text.length(); ++i) {
int state = 1;
state = trie.transition(text.charAt(i), state);
if (state > 0) {
int start = i;
int to = i + 1;
int end = -1;
SynonymItem value = null;
for (; to < text.length(); ++to) {
state = trie.transition(text.charAt(to), state);
if (state < 0)
break;
SynonymItem output = trie.output(state);
if (output != null) {
value = output;
end = to + 1;
}
}
if (value != null) {
Synonym synonym = value.randomSynonym(Type.EQUAL, preWord);
if (synonym != null) {
sbOut.append(synonym.realWord);
preWord = synonym.realWord;
} else {
preWord = text.substring(start, end);
sbOut.append(preWord);
}
i = end - 1;
} else {
preWord = String.valueOf(text.charAt(i));
sbOut.append(text.charAt(i));
}
} else {
preWord = String.valueOf(text.charAt(i));
sbOut.append(text.charAt(i));
}
}
return sbOut.toString();
}
Aggregations