use of org.lionsoul.jcseg.tokenizer.core.ISegment in project jstarcraft-nlp by HongZhaoHua.
the class JcsegTokenizerTestCase method getTokenizer.
@Override
protected NlpTokenizer<? extends NlpToken> getTokenizer() {
try {
// 自动查找jcseg.properties配置
JcsegTaskConfig configuration = new JcsegTaskConfig(true);
// 默认单例词库
ADictionary dictionary = DictionaryFactory.createSingletonDictionary(configuration);
// 依据给定JcsegTaskConfig和ADictionary构建ISegment
ISegment segment = SegmentFactory.createJcseg(JcsegTaskConfig.COMPLEX_MODE, new Object[] { configuration, dictionary });
return new JcsegTokenizer(segment);
} catch (Exception exception) {
throw new RuntimeException(exception);
}
}
use of org.lionsoul.jcseg.tokenizer.core.ISegment in project jstarcraft-nlp by HongZhaoHua.
the class JcsegSegmentFactory method build.
@Override
public ISegment build(Map<String, String> configurations) {
String configuration = get(configurations, "mode", "nlp");
int mode;
switch(configuration) {
case "simple":
mode = JcsegTaskConfig.SIMPLE_MODE;
break;
case "complex":
mode = JcsegTaskConfig.COMPLEX_MODE;
break;
case "detech":
mode = JcsegTaskConfig.DETECT_MODE;
break;
case "search":
mode = JcsegTaskConfig.SEARCH_MODE;
break;
case "delimiter":
mode = JcsegTaskConfig.DELIMITER_MODE;
break;
case "nlp":
mode = JcsegTaskConfig.NLP_MODE;
break;
default:
throw new IllegalArgumentException();
}
JcsegTaskConfig config = new JcsegTaskConfig(true);
ADictionary dictionary = DictionaryFactory.createSingletonDictionary(config);
try {
ISegment segment = SegmentFactory.createJcseg(mode, new Object[] { config, dictionary });
return segment;
} catch (Exception exception) {
throw new RuntimeException(exception);
}
}
use of org.lionsoul.jcseg.tokenizer.core.ISegment in project jstarcraft-nlp by HongZhaoHua.
the class JcsegSegmentFactory method getNlpTokenizer.
@Override
protected NlpTokenizer<? extends NlpToken> getNlpTokenizer(Map<String, String> configurations) {
ISegment segment = build(configurations);
JcsegTokenizer tokenizer = new JcsegTokenizer(segment);
return tokenizer;
}
use of org.lionsoul.jcseg.tokenizer.core.ISegment in project incubator-hugegraph by apache.
the class JcsegAnalyzer method segment.
@Override
public Set<String> segment(String text) {
Set<String> result = InsertionOrderUtil.newSet();
try {
Object[] args = new Object[] { new StringReader(text), CONFIG, DIC };
ISegment seg = SegmentFactory.createJcseg(this.segMode, args);
IWord word = null;
while ((word = seg.next()) != null) {
result.add(word.getValue());
}
} catch (Exception e) {
throw new HugeException("Jcseg segment text '%s' failed", e, text);
}
return result;
}
Aggregations