use of com.hankcs.hanlp.corpus.document.sentence.Sentence in project HanLP by hankcs.
the class Document method toString.
@Override
public String toString() {
StringBuilder sb = new StringBuilder();
for (Sentence sentence : sentenceList) {
sb.append(sentence);
sb.append(' ');
}
if (sb.length() > 0)
sb.deleteCharAt(sb.length() - 1);
return sb.toString();
}
use of com.hankcs.hanlp.corpus.document.sentence.Sentence in project HanLP by hankcs.
the class Document method create.
public static Document create(String param) {
Pattern pattern = Pattern.compile(".+?((。/w)|(!/w )|(?/w )|\\n|$)");
Matcher matcher = pattern.matcher(param);
List<Sentence> sentenceList = new LinkedList<Sentence>();
while (matcher.find()) {
String single = matcher.group();
Sentence sentence = Sentence.create(single);
if (sentence == null) {
logger.warning("使用" + single + "构建句子失败");
return null;
}
sentenceList.add(sentence);
}
return new Document(sentenceList);
}
use of com.hankcs.hanlp.corpus.document.sentence.Sentence in project HanLP by hankcs.
the class Document method getSimpleSentenceList.
/**
* 获取简单的句子列表,其中复合词会被拆分为简单词
* @return
*/
public List<List<Word>> getSimpleSentenceList() {
List<List<Word>> simpleList = new LinkedList<List<Word>>();
for (Sentence sentence : sentenceList) {
List<Word> wordList = new LinkedList<Word>();
for (IWord word : sentence.wordList) {
if (word instanceof CompoundWord) {
for (Word inner : ((CompoundWord) word).innerList) {
wordList.add(inner);
}
} else {
wordList.add((Word) word);
}
}
simpleList.add(wordList);
}
return simpleList;
}
use of com.hankcs.hanlp.corpus.document.sentence.Sentence in project HanLP by hankcs.
the class Document method getSimpleSentenceList.
/**
* 获取简单的句子列表
* @param spilt 如果为真,其中复合词会被拆分为简单词
* @return
*/
public List<List<Word>> getSimpleSentenceList(boolean spilt) {
List<List<Word>> simpleList = new LinkedList<List<Word>>();
for (Sentence sentence : sentenceList) {
List<Word> wordList = new LinkedList<Word>();
for (IWord word : sentence.wordList) {
if (word instanceof CompoundWord) {
if (spilt) {
for (Word inner : ((CompoundWord) word).innerList) {
wordList.add(inner);
}
} else {
wordList.add(((CompoundWord) word).toWord());
}
} else {
wordList.add((Word) word);
}
}
simpleList.add(wordList);
}
return simpleList;
}
use of com.hankcs.hanlp.corpus.document.sentence.Sentence in project HanLP by hankcs.
the class Document method getSimpleSentenceList.
/**
* 获取简单的句子列表,其中复合词的标签如果是set中指定的话会被拆分为简单词
* @param labelSet
* @return
*/
public List<List<Word>> getSimpleSentenceList(Set<String> labelSet) {
List<List<Word>> simpleList = new LinkedList<List<Word>>();
for (Sentence sentence : sentenceList) {
List<Word> wordList = new LinkedList<Word>();
for (IWord word : sentence.wordList) {
if (word instanceof CompoundWord) {
if (labelSet.contains(word.getLabel())) {
for (Word inner : ((CompoundWord) word).innerList) {
wordList.add(inner);
}
} else {
wordList.add(((CompoundWord) word).toWord());
}
} else {
wordList.add((Word) word);
}
}
simpleList.add(wordList);
}
return simpleList;
}
Aggregations