use of com.hankcs.hanlp.collection.trie.DoubleArrayTrie in project HanLP by hankcs.
the class CRFDependencyParser method loadDat.
boolean loadDat(String path) {
ByteArray byteArray = ByteArray.createByteArray(path);
if (byteArray == null)
return false;
crfModel = new CRFModelForDependency(new DoubleArrayTrie<FeatureFunction>());
return crfModel.load(byteArray);
}
use of com.hankcs.hanlp.collection.trie.DoubleArrayTrie in project HanLP by hankcs.
the class WordBasedGenerativeModelSegment method GenerateWordNet.
/**
* 生成一元词网
*
* @param wordNetStorage
*/
protected void GenerateWordNet(final WordNet wordNetStorage) {
final char[] charArray = wordNetStorage.charArray;
// 核心词典查询
DoubleArrayTrie<CoreDictionary.Attribute>.Searcher<CoreDictionary.Attribute> searcher = CoreDictionary.trie.getSearcher(charArray, 0);
while (searcher.next()) {
wordNetStorage.add(searcher.begin + 1, new Vertex(new String(charArray, searcher.begin, searcher.length), searcher.value, searcher.index));
}
// 用户词典查询
// if (config.useCustomDictionary)
// {
// searcher = CustomDictionary.dat.getSearcher(charArray, 0);
// while (searcher.next())
// {
// wordNetStorage.add(searcher.begin + 1, new Vertex(new String(charArray, searcher.begin, searcher.length), searcher.value));
// }
// }
// 原子分词,保证图连通
LinkedList<Vertex>[] vertexes = wordNetStorage.getVertexes();
for (int i = 1; i < vertexes.length; ) {
if (vertexes[i].isEmpty()) {
int j = i + 1;
for (; j < vertexes.length - 1; ++j) {
if (!vertexes[j].isEmpty())
break;
}
wordNetStorage.add(i, quickAtomSegment(charArray, i - 1, j - 1));
i = j;
} else
i += vertexes[i].getLast().realWord.length();
}
}
use of com.hankcs.hanlp.collection.trie.DoubleArrayTrie in project HanLP by hankcs.
the class DoubleArrayTrieSegment method segSentence.
@Override
protected List<Term> segSentence(char[] sentence) {
char[] charArray = sentence;
final int[] wordNet = new int[charArray.length];
Arrays.fill(wordNet, 1);
final Nature[] natureArray = config.speechTagging ? new Nature[charArray.length] : null;
DoubleArrayTrie<CoreDictionary.Attribute>.Searcher<CoreDictionary.Attribute> searcher = CoreDictionary.trie.getSearcher(sentence, 0);
while (searcher.next()) {
int length = searcher.length;
if (length > wordNet[searcher.begin]) {
wordNet[searcher.begin] = length;
if (config.speechTagging) {
natureArray[searcher.begin] = searcher.value.nature[0];
}
}
}
if (config.useCustomDictionary) {
CustomDictionary.parseText(charArray, new AhoCorasickDoubleArrayTrie.IHit<CoreDictionary.Attribute>() {
@Override
public void hit(int begin, int end, CoreDictionary.Attribute value) {
int length = end - begin;
if (length > wordNet[begin]) {
wordNet[begin] = length;
if (config.speechTagging) {
natureArray[begin] = value.nature[0];
}
}
}
});
}
LinkedList<Term> termList = new LinkedList<Term>();
if (config.speechTagging) {
for (int i = 0; i < natureArray.length; ) {
if (natureArray[i] == null) {
int j = i + 1;
for (; j < natureArray.length; ++j) {
if (natureArray[j] != null)
break;
}
List<AtomNode> atomNodeList = quickAtomSegment(charArray, i, j);
for (AtomNode atomNode : atomNodeList) {
if (atomNode.sWord.length() >= wordNet[i]) {
wordNet[i] = atomNode.sWord.length();
natureArray[i] = atomNode.getNature();
i += wordNet[i];
}
}
i = j;
} else {
++i;
}
}
}
for (int i = 0; i < wordNet.length; ) {
Term term = new Term(new String(charArray, i, wordNet[i]), config.speechTagging ? (natureArray[i] == null ? Nature.nz : natureArray[i]) : null);
term.offset = i;
termList.add(term);
i += wordNet[i];
}
return termList;
}
Aggregations