use of com.hankcs.hanlp.corpus.dictionary.item.Item in project HanLP by hankcs.
the class DictionaryMaker method normalizeFrequency.
/**
* 调整频次,按排序后的次序给定频次
*
* @param itemList
* @return 处理后的列表
*/
public static List<Item> normalizeFrequency(List<Item> itemList) {
for (Item item : itemList) {
ArrayList<Map.Entry<String, Integer>> entryArray = new ArrayList<Map.Entry<String, Integer>>(item.labelMap.entrySet());
Collections.sort(entryArray, new Comparator<Map.Entry<String, Integer>>() {
@Override
public int compare(Map.Entry<String, Integer> o1, Map.Entry<String, Integer> o2) {
return o1.getValue().compareTo(o2.getValue());
}
});
int index = 1;
for (Map.Entry<String, Integer> pair : entryArray) {
item.labelMap.put(pair.getKey(), index);
++index;
}
}
return itemList;
}
use of com.hankcs.hanlp.corpus.dictionary.item.Item in project HanLP by hankcs.
the class DictionaryMaker method add.
/**
* 插入条目
*
* @param item
*/
public void add(Item item) {
Item innerItem = trie.get(item.key);
if (innerItem == null) {
innerItem = item;
trie.put(innerItem.key, innerItem);
} else {
innerItem.combine(item);
}
}
use of com.hankcs.hanlp.corpus.dictionary.item.Item in project HanLP by hankcs.
the class DictionaryMaker method addNotCombine.
/**
* 插入条目,但是不合并,如果已有则忽略
*
* @param item
*/
public void addNotCombine(Item item) {
Item innerItem = trie.get(item.key);
if (innerItem == null) {
innerItem = item;
trie.put(innerItem.key, innerItem);
}
}
use of com.hankcs.hanlp.corpus.dictionary.item.Item in project HanLP by hankcs.
the class DictionaryMaker method loadAsItemList.
/**
* 读取所有条目
*
* @param path
* @return
*/
public static List<Item> loadAsItemList(String path) {
List<Item> itemList = new LinkedList<Item>();
try {
BufferedReader br = new BufferedReader(new InputStreamReader(IOAdapter == null ? new FileInputStream(path) : IOAdapter.open(path), "UTF-8"));
String line;
while ((line = br.readLine()) != null) {
Item item = Item.create(line);
if (item == null) {
logger.warning("使用【" + line + "】创建Item失败");
return null;
// continue;
}
itemList.add(item);
}
} catch (Exception e) {
logger.warning("读取词典" + path + "发生异常" + e);
return null;
}
return itemList;
}
use of com.hankcs.hanlp.corpus.dictionary.item.Item in project HanLP by hankcs.
the class TestCustomDictionary method testRemoveJunkWord.
public void testRemoveJunkWord() throws Exception {
DictionaryMaker dictionaryMaker = DictionaryMaker.load("data/dictionary/custom/CustomDictionary.txt");
dictionaryMaker.saveTxtTo("data/dictionary/custom/CustomDictionary.txt", new DictionaryMaker.Filter() {
@Override
public boolean onSave(Item item) {
if (item.containsLabel("mq") || item.containsLabel("m") || item.containsLabel("t")) {
return false;
}
return true;
}
});
}
Aggregations