Search in sources :

Example 1 with Term

use of org.ansj.domain.Term in project ansj_seg by NLPchina.

the class IndexAndTest method test.

@Test
public void test() throws Exception {
    DicLibrary.put(DicLibrary.DEFAULT, "../../library/default.dic");
    PerFieldAnalyzerWrapper analyzer = new PerFieldAnalyzerWrapper(new AnsjAnalyzer(TYPE.index_ansj));
    Directory directory = null;
    IndexWriter iwriter = null;
    IndexWriterConfig ic = new IndexWriterConfig(analyzer);
    String text = "旅游和服务是最好的";
    System.out.println(IndexAnalysis.parse(text));
    // 建立内存索引对象
    directory = new RAMDirectory();
    iwriter = new IndexWriter(directory, ic);
    addContent(iwriter, text);
    iwriter.commit();
    iwriter.close();
    System.out.println("索引建立完毕");
    Analyzer queryAnalyzer = new AnsjAnalyzer(AnsjAnalyzer.TYPE.index_ansj);
    System.out.println("index ok to search!");
    for (Term t : IndexAnalysis.parse(text)) {
        System.out.println(t.getName());
        search(queryAnalyzer, directory, "\"" + t.getName() + "\"");
    }
}
Also used : AnsjAnalyzer(org.ansj.lucene6.AnsjAnalyzer) IndexWriter(org.apache.lucene.index.IndexWriter) Term(org.ansj.domain.Term) AnsjAnalyzer(org.ansj.lucene6.AnsjAnalyzer) Analyzer(org.apache.lucene.analysis.Analyzer) RAMDirectory(org.apache.lucene.store.RAMDirectory) PerFieldAnalyzerWrapper(org.apache.lucene.analysis.miscellaneous.PerFieldAnalyzerWrapper) RAMDirectory(org.apache.lucene.store.RAMDirectory) Directory(org.apache.lucene.store.Directory) IndexWriterConfig(org.apache.lucene.index.IndexWriterConfig) Test(org.junit.Test)

Example 2 with Term

use of org.ansj.domain.Term in project ansj_seg by NLPchina.

the class KeyWordComputer method computeArticleTfidf.

/**
     * @param content 正文
     * @return
     */
private List<Keyword> computeArticleTfidf(String content, int titleLength) {
    Map<String, Keyword> tm = new HashMap<String, Keyword>();
    List<Term> parse = analysisType.parseStr(content).getTerms();
    for (Term term : parse) {
        double weight = getWeight(term, content.length(), titleLength);
        if (weight == 0)
            continue;
        Keyword keyword = tm.get(term.getName());
        if (keyword == null) {
            keyword = new Keyword(term.getName(), term.natrue().allFrequency, weight);
            tm.put(term.getName(), keyword);
        } else {
            keyword.updateWeight(1);
        }
    }
    TreeSet<Keyword> treeSet = new TreeSet<Keyword>(tm.values());
    ArrayList<Keyword> arrayList = new ArrayList<Keyword>(treeSet);
    if (treeSet.size() <= nKeyword) {
        return arrayList;
    } else {
        return arrayList.subList(0, nKeyword);
    }
}
Also used : HashMap(java.util.HashMap) TreeSet(java.util.TreeSet) ArrayList(java.util.ArrayList) Term(org.ansj.domain.Term)

Example 3 with Term

use of org.ansj.domain.Term in project ansj_seg by NLPchina.

the class SummaryComputer method toSummary.

/**
	 * 根据用户查询串计算摘要
	 * 
	 * @return
	 */
public Summary toSummary(String query) {
    List<Term> parse = NlpAnalysis.parse(query).getTerms();
    List<Keyword> keywords = new ArrayList<Keyword>();
    for (Term term : parse) {
        if (FILTER_SET.contains(term.natrue().natureStr)) {
            continue;
        }
        keywords.add(new Keyword(term.getName(), term.termNatures().allFreq, 1));
    }
    return toSummary(keywords);
}
Also used : Keyword(org.ansj.app.keyword.Keyword) ArrayList(java.util.ArrayList) Term(org.ansj.domain.Term)

Example 4 with Term

use of org.ansj.domain.Term in project ansj_seg by NLPchina.

the class DicLibraryTest method delete.

/**
	 * 删除关键词
	 */
@Test
public void delete() {
    insertTest();
    DicLibrary.delete(DicLibrary.DEFAULT, "增加新词");
    Result parse = DicAnalysis.parse("这是用户自定义词典增加新词的例子");
    System.out.println(parse);
    boolean flag = false;
    for (Term term : parse) {
        flag = flag || "增加新词".equals(term.getName());
    }
    Assert.assertFalse(flag);
}
Also used : Term(org.ansj.domain.Term) Result(org.ansj.domain.Result) Test(org.junit.Test)

Example 5 with Term

use of org.ansj.domain.Term in project ansj_seg by NLPchina.

the class FilterRecognitionTest method test.

@Test
public void test() {
    String str = "我的小鸡鸡丢了!";
    Result parse = ToAnalysis.parse(str);
    System.out.println(parse);
    StopRecognition fitler = new StopRecognition();
    fitler.insertStopNatures("uj");
    fitler.insertStopNatures("ul");
    fitler.insertStopNatures("null");
    fitler.insertStopWords("我");
    fitler.insertStopRegexes("小.*?");
    Result modifResult = parse.recognition(fitler);
    for (Term term : modifResult) {
        Assert.assertNotSame(term.getNatureStr(), "uj");
        Assert.assertNotSame(term.getNatureStr(), "ul");
        Assert.assertNotSame(term.getNatureStr(), "null");
        Assert.assertNotSame(term.getName(), "我");
        Assert.assertNotSame(term.getName(), "小鸡鸡");
    }
    System.out.println(modifResult);
}
Also used : Term(org.ansj.domain.Term) Result(org.ansj.domain.Result) Test(org.junit.Test)

Aggregations

Term (org.ansj.domain.Term)55 ArrayList (java.util.ArrayList)10 Result (org.ansj.domain.Result)8 Test (org.junit.Test)8 TermNatures (org.ansj.domain.TermNatures)5 AsianPersonRecognition (org.ansj.recognition.arrimpl.AsianPersonRecognition)4 ForeignPersonRecognition (org.ansj.recognition.arrimpl.ForeignPersonRecognition)4 NumRecognition (org.ansj.recognition.arrimpl.NumRecognition)4 Graph (org.ansj.util.Graph)4 Forest (org.nlpcn.commons.lang.tire.domain.Forest)4 LinkedList (java.util.LinkedList)3 NewWord (org.ansj.domain.NewWord)3 UserDefineRecognition (org.ansj.recognition.arrimpl.UserDefineRecognition)3 NatureRecognition (org.ansj.recognition.impl.NatureRecognition)3 GetWord (org.nlpcn.commons.lang.tire.GetWord)3 BufferedReader (java.io.BufferedReader)2 HashMap (java.util.HashMap)2 TermNature (org.ansj.domain.TermNature)2 ToAnalysis (org.ansj.splitWord.analysis.ToAnalysis)2 Analyzer (org.apache.lucene.analysis.Analyzer)2