use of com.chenlb.mmseg4j.MMSeg in project java-basic by tzuyichao.
the class TestMMSeg4J method main.
public static void main(String[] args) throws IOException {
Dictionary dictionary = Dictionary.getInstance();
MMSeg mmSeg = new MMSeg(new StringReader("上一堂課之後跑18km與2500rpm的挑戰"), new ComplexSeg(dictionary));
Word word = null;
boolean first = true;
while ((word = mmSeg.next()) != null) {
System.out.println(word.getString());
}
}
use of com.chenlb.mmseg4j.MMSeg in project jstarcraft-nlp by HongZhaoHua.
the class MmsegSegmentFactory method getNlpTokenizer.
@Override
protected NlpTokenizer<? extends NlpToken> getNlpTokenizer(Map<String, String> configurations) {
MMSeg segment = build(configurations);
MmsegTokenizer tokenizer = new MmsegTokenizer(segment);
return tokenizer;
}
use of com.chenlb.mmseg4j.MMSeg in project jstarcraft-nlp by HongZhaoHua.
the class MmsegSegmentFactory method build.
@Override
public MMSeg build(Map<String, String> configurations) {
Dictionary dictionary;
String dictionaryPath = get(configurations, "dictionaryPath");
if (StringUtility.isBlank(dictionaryPath)) {
dictionary = Dictionary.getInstance();
} else {
File file = new File(dictionaryPath);
dictionary = Dictionary.getInstance(file);
}
String configuration = get(configurations, "mode", "MaxWord");
Seg seg = null;
switch(configuration) {
case "Complex":
seg = new ComplexSeg(dictionary);
break;
case "Simple":
seg = new SimpleSeg(dictionary);
break;
case "MaxWord":
seg = new MaxWordSeg(dictionary);
break;
default:
throw new IllegalArgumentException();
}
MMSeg mmSeg = new MMSeg(new StringReader(""), seg);
return mmSeg;
}
use of com.chenlb.mmseg4j.MMSeg in project jstarcraft-nlp by HongZhaoHua.
the class MmsegTokenizerTestCase method getTokenizer.
@Override
protected NlpTokenizer<? extends NlpToken> getTokenizer() {
Dictionary dictionary = Dictionary.getInstance();
ComplexSeg complex = new ComplexSeg(dictionary);
MMSeg mmSeg = new MMSeg(new StringReader(""), complex);
return new MmsegTokenizer(mmSeg);
}
use of com.chenlb.mmseg4j.MMSeg in project incubator-hugegraph by apache.
the class MMSeg4JAnalyzer method segment.
@Override
public Set<String> segment(String text) {
Set<String> result = InsertionOrderUtil.newSet();
MMSeg mmSeg = new MMSeg(new StringReader(text), this.seg);
try {
Word word = null;
while ((word = mmSeg.next()) != null) {
result.add(word.getString());
}
} catch (Exception e) {
throw new HugeException("MMSeg4j segment text '%s' failed", e, text);
}
return result;
}
Aggregations