use of com.hankcs.hanlp.corpus.occurrence.TermFrequency in project HanLP by hankcs.
the class TFDictionary method add.
public void add(String key) {
TermFrequency termFrequency = trie.get(key);
if (termFrequency == null) {
termFrequency = new TermFrequency(key);
trie.put(key, termFrequency);
} else {
termFrequency.increase();
}
}
use of com.hankcs.hanlp.corpus.occurrence.TermFrequency in project HanLP by hankcs.
the class TestAdjustCoreDictionary method testViewNGramDictionary.
public void testViewNGramDictionary() throws Exception {
TFDictionary tfDictionary = new TFDictionary();
tfDictionary.load("data/dictionary/CoreNatureDictionary.ngram.txt");
for (Map.Entry<String, TermFrequency> entry : tfDictionary.entrySet()) {
String word = entry.getKey();
TermFrequency frequency = entry.getValue();
if (word.contains("##")) {
System.out.println(frequency);
}
}
}
use of com.hankcs.hanlp.corpus.occurrence.TermFrequency in project HanLP by hankcs.
the class DemoOccurrence method main.
public static void main(String[] args) {
Occurrence occurrence = new Occurrence();
occurrence.addAll("在计算机音视频和图形图像技术等二维信息算法处理方面目前比较先进的视频处理算法");
occurrence.compute();
Set<Map.Entry<String, TermFrequency>> uniGram = occurrence.getUniGram();
for (Map.Entry<String, TermFrequency> entry : uniGram) {
TermFrequency termFrequency = entry.getValue();
System.out.println(termFrequency);
}
Set<Map.Entry<String, PairFrequency>> biGram = occurrence.getBiGram();
for (Map.Entry<String, PairFrequency> entry : biGram) {
PairFrequency pairFrequency = entry.getValue();
if (pairFrequency.isRight())
System.out.println(pairFrequency);
}
Set<Map.Entry<String, TriaFrequency>> triGram = occurrence.getTriGram();
for (Map.Entry<String, TriaFrequency> entry : triGram) {
TriaFrequency triaFrequency = entry.getValue();
if (triaFrequency.isRight())
System.out.println(triaFrequency);
}
}
use of com.hankcs.hanlp.corpus.occurrence.TermFrequency in project HanLP by hankcs.
the class TFDictionary method combine.
/**
* 合并多个词典
* @param path 多个词典的路径,第一个是主词典。主词典与其他词典的区别详见com.hankcs.hanlp.corpus.dictionary.TFDictionary#combine(com.hankcs.hanlp.corpus.dictionary.TFDictionary, int, boolean)
* @return 词条的增量
*/
public static int combine(String... path) {
TFDictionary dictionaryMain = new TFDictionary();
dictionaryMain.load(path[0]);
int preSize = dictionaryMain.trie.size();
for (int i = 1; i < path.length; ++i) {
TFDictionary dictionary = new TFDictionary();
dictionary.load(path[i]);
dictionaryMain.combine(dictionary, 1, true);
}
try {
BufferedWriter bw = new BufferedWriter(new OutputStreamWriter(IOUtil.newOutputStream(path[0]), "UTF-8"));
for (Map.Entry<String, TermFrequency> entry : dictionaryMain.trie.entrySet()) {
bw.write(entry.getKey());
bw.write(' ');
bw.write(String.valueOf(entry.getValue().getValue()));
bw.newLine();
}
bw.close();
} catch (Exception e) {
e.printStackTrace();
return -1;
}
return dictionaryMain.trie.size() - preSize;
}
Aggregations