Search in sources :

Example 51 with IndexWriter

use of org.apache.lucene.index.IndexWriter in project elasticsearch by elastic.

the class BlendedTermQueryTests method testDismaxQuery.

public void testDismaxQuery() throws IOException {
    Directory dir = newDirectory();
    IndexWriter w = new IndexWriter(dir, newIndexWriterConfig(new MockAnalyzer(random())));
    String[] username = new String[] { "foo fighters", "some cool fan", "cover band" };
    String[] song = new String[] { "generator", "foo fighers - generator", "foo fighters generator" };
    final boolean omitNorms = random().nextBoolean();
    FieldType ft = new FieldType(TextField.TYPE_NOT_STORED);
    ft.setIndexOptions(random().nextBoolean() ? IndexOptions.DOCS : IndexOptions.DOCS_AND_FREQS);
    ft.setOmitNorms(omitNorms);
    ft.freeze();
    FieldType ft1 = new FieldType(TextField.TYPE_NOT_STORED);
    ft1.setIndexOptions(random().nextBoolean() ? IndexOptions.DOCS : IndexOptions.DOCS_AND_FREQS);
    ft1.setOmitNorms(omitNorms);
    ft1.freeze();
    for (int i = 0; i < username.length; i++) {
        Document d = new Document();
        d.add(new TextField("id", Integer.toString(i), Field.Store.YES));
        d.add(new Field("username", username[i], ft));
        d.add(new Field("song", song[i], ft));
        w.addDocument(d);
    }
    int iters = scaledRandomIntBetween(25, 100);
    for (int j = 0; j < iters; j++) {
        Document d = new Document();
        d.add(new TextField("id", Integer.toString(username.length + j), Field.Store.YES));
        d.add(new Field("username", "foo fighters", ft1));
        d.add(new Field("song", "some bogus text to bump up IDF", ft1));
        w.addDocument(d);
    }
    w.commit();
    DirectoryReader reader = DirectoryReader.open(w);
    IndexSearcher searcher = setSimilarity(newSearcher(reader));
    {
        String[] fields = new String[] { "username", "song" };
        BooleanQuery.Builder query = new BooleanQuery.Builder();
        query.setDisableCoord(true);
        query.add(BlendedTermQuery.dismaxBlendedQuery(toTerms(fields, "foo"), 0.1f), BooleanClause.Occur.SHOULD);
        query.add(BlendedTermQuery.dismaxBlendedQuery(toTerms(fields, "fighters"), 0.1f), BooleanClause.Occur.SHOULD);
        query.add(BlendedTermQuery.dismaxBlendedQuery(toTerms(fields, "generator"), 0.1f), BooleanClause.Occur.SHOULD);
        TopDocs search = searcher.search(query.build(), 10);
        ScoreDoc[] scoreDocs = search.scoreDocs;
        assertEquals(Integer.toString(0), reader.document(scoreDocs[0].doc).getField("id").stringValue());
    }
    {
        BooleanQuery.Builder query = new BooleanQuery.Builder();
        query.setDisableCoord(true);
        DisjunctionMaxQuery uname = new DisjunctionMaxQuery(Arrays.asList(new TermQuery(new Term("username", "foo")), new TermQuery(new Term("song", "foo"))), 0.0f);
        DisjunctionMaxQuery s = new DisjunctionMaxQuery(Arrays.asList(new TermQuery(new Term("username", "fighers")), new TermQuery(new Term("song", "fighers"))), 0.0f);
        DisjunctionMaxQuery gen = new DisjunctionMaxQuery(Arrays.asList(new TermQuery(new Term("username", "generator")), new TermQuery(new Term("song", "generator"))), 0f);
        query.add(uname, BooleanClause.Occur.SHOULD);
        query.add(s, BooleanClause.Occur.SHOULD);
        query.add(gen, BooleanClause.Occur.SHOULD);
        TopDocs search = searcher.search(query.build(), 4);
        ScoreDoc[] scoreDocs = search.scoreDocs;
        assertEquals(Integer.toString(1), reader.document(scoreDocs[0].doc).getField("id").stringValue());
    }
    reader.close();
    w.close();
    dir.close();
}
Also used : IndexSearcher(org.apache.lucene.search.IndexSearcher) BooleanQuery(org.apache.lucene.search.BooleanQuery) TermQuery(org.apache.lucene.search.TermQuery) DirectoryReader(org.apache.lucene.index.DirectoryReader) DisjunctionMaxQuery(org.apache.lucene.search.DisjunctionMaxQuery) Term(org.apache.lucene.index.Term) Document(org.apache.lucene.document.Document) FieldType(org.apache.lucene.document.FieldType) TopDocs(org.apache.lucene.search.TopDocs) Field(org.apache.lucene.document.Field) TextField(org.apache.lucene.document.TextField) MockAnalyzer(org.apache.lucene.analysis.MockAnalyzer) IndexWriter(org.apache.lucene.index.IndexWriter) TextField(org.apache.lucene.document.TextField) Directory(org.apache.lucene.store.Directory)

Example 52 with IndexWriter

use of org.apache.lucene.index.IndexWriter in project ansj_seg by NLPchina.

the class HeightLightTest method index.

private static void index(Analyzer analysis, String content) throws CorruptIndexException, IOException {
    Document doc = new Document();
    IndexWriter iwriter = new IndexWriter(directory, new IndexWriterConfig(analysis));
    doc.add(new TextField("text", content, Field.Store.YES));
    iwriter.addDocument(doc);
    iwriter.commit();
    iwriter.close();
}
Also used : IndexWriter(org.apache.lucene.index.IndexWriter) TextField(org.apache.lucene.document.TextField) Document(org.apache.lucene.document.Document) IndexWriterConfig(org.apache.lucene.index.IndexWriterConfig)

Example 53 with IndexWriter

use of org.apache.lucene.index.IndexWriter in project ansj_seg by NLPchina.

the class IndexAndTest method test.

@Test
public void test() throws Exception {
    DicLibrary.put(DicLibrary.DEFAULT, "../../library/default.dic");
    PerFieldAnalyzerWrapper analyzer = new PerFieldAnalyzerWrapper(new AnsjAnalyzer(TYPE.index_ansj));
    Directory directory = null;
    IndexWriter iwriter = null;
    IndexWriterConfig ic = new IndexWriterConfig(analyzer);
    String text = "旅游和服务是最好的";
    System.out.println(IndexAnalysis.parse(text));
    // 建立内存索引对象
    directory = new RAMDirectory();
    iwriter = new IndexWriter(directory, ic);
    addContent(iwriter, text);
    iwriter.commit();
    iwriter.close();
    System.out.println("索引建立完毕");
    Analyzer queryAnalyzer = new AnsjAnalyzer(AnsjAnalyzer.TYPE.index_ansj);
    System.out.println("index ok to search!");
    for (Term t : IndexAnalysis.parse(text)) {
        System.out.println(t.getName());
        search(queryAnalyzer, directory, "\"" + t.getName() + "\"");
    }
}
Also used : AnsjAnalyzer(org.ansj.lucene6.AnsjAnalyzer) IndexWriter(org.apache.lucene.index.IndexWriter) Term(org.ansj.domain.Term) AnsjAnalyzer(org.ansj.lucene6.AnsjAnalyzer) Analyzer(org.apache.lucene.analysis.Analyzer) RAMDirectory(org.apache.lucene.store.RAMDirectory) PerFieldAnalyzerWrapper(org.apache.lucene.analysis.miscellaneous.PerFieldAnalyzerWrapper) RAMDirectory(org.apache.lucene.store.RAMDirectory) Directory(org.apache.lucene.store.Directory) IndexWriterConfig(org.apache.lucene.index.IndexWriterConfig) Test(org.junit.Test)

Example 54 with IndexWriter

use of org.apache.lucene.index.IndexWriter in project ansj_seg by NLPchina.

the class IndexTest method indexTest.

@Test
public void indexTest() throws CorruptIndexException, LockObtainFailedException, IOException, ParseException {
    PerFieldAnalyzerWrapper analyzer = new PerFieldAnalyzerWrapper(new AnsjAnalyzer(TYPE.index_ansj));
    Directory directory = null;
    IndexWriter iwriter = null;
    IndexWriterConfig ic = new IndexWriterConfig(analyzer);
    // 建立内存索引对象
    directory = new RAMDirectory();
    iwriter = new IndexWriter(directory, ic);
    addContent(iwriter, "助推企业转型升级提供强有力的技术支持和服保障。中心的建成将使青岛的服务器承载能力突破10万台,达到世界一流水平。");
    addContent(iwriter, "涉及民生的部分商品和服务成本监审政策");
    addContent(iwriter, "我穿着和服");
    iwriter.commit();
    iwriter.close();
    System.out.println("索引建立完毕");
    Analyzer queryAnalyzer = new AnsjAnalyzer(AnsjAnalyzer.TYPE.dic_ansj);
    System.out.println("index ok to search!");
    search(queryAnalyzer, directory, "\"和服\"");
}
Also used : AnsjAnalyzer(org.ansj.lucene6.AnsjAnalyzer) IndexWriter(org.apache.lucene.index.IndexWriter) AnsjAnalyzer(org.ansj.lucene6.AnsjAnalyzer) Analyzer(org.apache.lucene.analysis.Analyzer) RAMDirectory(org.apache.lucene.store.RAMDirectory) PerFieldAnalyzerWrapper(org.apache.lucene.analysis.miscellaneous.PerFieldAnalyzerWrapper) RAMDirectory(org.apache.lucene.store.RAMDirectory) Directory(org.apache.lucene.store.Directory) IndexWriterConfig(org.apache.lucene.index.IndexWriterConfig) Test(org.junit.Test)

Example 55 with IndexWriter

use of org.apache.lucene.index.IndexWriter in project ansj_seg by NLPchina.

the class PhraseTest method main.

public static void main(String[] args) throws IOException, ParseException {
    DicLibrary.insert(DicLibrary.DEFAULT, "上网人");
    DicLibrary.insert(DicLibrary.DEFAULT, "网人");
    AnsjAnalyzer ansjAnalyzer = new AnsjAnalyzer(AnsjAnalyzer.TYPE.index_ansj);
    TokenStream tokenStream = ansjAnalyzer.tokenStream("上网人员测试", "test");
    while (tokenStream.incrementToken()) {
        System.out.println(tokenStream.getAttribute(CharTermAttribute.class));
    }
    IndexWriterConfig config = new IndexWriterConfig(ansjAnalyzer);
    IndexWriter writer = new IndexWriter(new RAMDirectory(), config);
    Document doc = new Document();
    doc.add(new TextField("test", "上网人员测试", Field.Store.YES));
    writer.addDocument(doc);
    writer.commit();
    IndexSearcher searcher = new IndexSearcher(DirectoryReader.open(writer));
    System.out.println(searcher.count(new TermQuery(new Term("test", "网人"))));
    Query q = new QueryParser("test", new AnsjAnalyzer(AnsjAnalyzer.TYPE.index_ansj)).parse("\"上网人\"");
    System.out.println(q);
    System.out.println(searcher.count(q));
}
Also used : IndexSearcher(org.apache.lucene.search.IndexSearcher) TermQuery(org.apache.lucene.search.TermQuery) TokenStream(org.apache.lucene.analysis.TokenStream) Query(org.apache.lucene.search.Query) TermQuery(org.apache.lucene.search.TermQuery) Term(org.apache.lucene.index.Term) Document(org.apache.lucene.document.Document) RAMDirectory(org.apache.lucene.store.RAMDirectory) QueryParser(org.apache.lucene.queryparser.classic.QueryParser) CharTermAttribute(org.apache.lucene.analysis.tokenattributes.CharTermAttribute) AnsjAnalyzer(org.ansj.lucene6.AnsjAnalyzer) IndexWriter(org.apache.lucene.index.IndexWriter) TextField(org.apache.lucene.document.TextField) IndexWriterConfig(org.apache.lucene.index.IndexWriterConfig)

Aggregations

IndexWriter (org.apache.lucene.index.IndexWriter)529 Document (org.apache.lucene.document.Document)311 Directory (org.apache.lucene.store.Directory)306 IndexWriterConfig (org.apache.lucene.index.IndexWriterConfig)293 IndexReader (org.apache.lucene.index.IndexReader)144 MockAnalyzer (org.apache.lucene.analysis.MockAnalyzer)136 DirectoryReader (org.apache.lucene.index.DirectoryReader)127 Term (org.apache.lucene.index.Term)125 IndexSearcher (org.apache.lucene.search.IndexSearcher)110 RandomIndexWriter (org.apache.lucene.index.RandomIndexWriter)107 TextField (org.apache.lucene.document.TextField)104 RAMDirectory (org.apache.lucene.store.RAMDirectory)88 IOException (java.io.IOException)86 Field (org.apache.lucene.document.Field)86 TermQuery (org.apache.lucene.search.TermQuery)56 StringField (org.apache.lucene.document.StringField)52 BytesRef (org.apache.lucene.util.BytesRef)52 FieldType (org.apache.lucene.document.FieldType)50 Test (org.junit.Test)49 Query (org.apache.lucene.search.Query)45