use of com.hankcs.hanlp.seg.Dijkstra.DijkstraSegment in project HanLP by hankcs.
the class TestMakeCompanyCorpus method testMake.
public void testMake() throws Exception {
DijkstraSegment segment = new DijkstraSegment();
String line = null;
BufferedReader bw = new BufferedReader(new InputStreamReader(new FileInputStream("D:\\Doc\\语料库\\company.dic")));
BufferedWriter br = new BufferedWriter(new OutputStreamWriter(new FileOutputStream("data/test/nt/company.txt")));
int limit = Integer.MAX_VALUE;
while ((line = bw.readLine()) != null && limit-- > 0) {
if (line.endsWith(")"))
continue;
if (line.length() < 4)
continue;
if (line.contains("个体") || line.contains("个人")) {
continue;
}
List<Term> termList = segment.seg(line);
if (termList.size() == 0)
continue;
Term last = termList.get(termList.size() - 1);
last.nature = Nature.nis;
br.write("[");
for (Term term : termList) {
br.write(term.toString());
if (term != last)
br.write(" ");
}
br.write("]/ntc");
br.newLine();
br.flush();
}
bw.close();
br.close();
}
use of com.hankcs.hanlp.seg.Dijkstra.DijkstraSegment in project HanLP by hankcs.
the class TestMakeTranslateName method testHeadNRF.
public void testHeadNRF() throws Exception {
DijkstraSegment segment = new DijkstraSegment();
segment.enableTranslatedNameRecognize(false);
for (String name : IOUtil.readLineList("data/dictionary/person/nrf.txt")) {
List<Term> termList = segment.seg(name);
if (termList.get(0).nature != Nature.nrf) {
System.out.println(name + " : " + termList);
}
}
}
use of com.hankcs.hanlp.seg.Dijkstra.DijkstraSegment in project HanLP by hankcs.
the class TestDijkstra method testFixResult.
public void testFixResult() throws Exception {
DijkstraSegment segment = new DijkstraSegment();
HanLP.Config.enableDebug(true);
System.out.println(segment.seg("2014年"));
}
use of com.hankcs.hanlp.seg.Dijkstra.DijkstraSegment in project HanLP by hankcs.
the class TestDijkstra method testNameRecognize.
public void testNameRecognize() throws Exception {
DijkstraSegment segment = new DijkstraSegment();
HanLP.Config.enableDebug(true);
System.out.println(segment.seg("妈蛋,你认识波多野结衣老师吗?"));
}
use of com.hankcs.hanlp.seg.Dijkstra.DijkstraSegment in project HanLP by hankcs.
the class TestPersonRecognition method testJPName.
public void testJPName() throws Exception {
HanLP.Config.enableDebug();
Segment segment = new DijkstraSegment().enableJapaneseNameRecognize(true);
System.out.println(segment.seg("北川景子参演了林诣彬导演"));
}
Aggregations