use of org.apache.lucene.index.IndexWriter in project elasticsearch by elastic.
the class BlendedTermQueryTests method testDismaxQuery.
public void testDismaxQuery() throws IOException {
Directory dir = newDirectory();
IndexWriter w = new IndexWriter(dir, newIndexWriterConfig(new MockAnalyzer(random())));
String[] username = new String[] { "foo fighters", "some cool fan", "cover band" };
String[] song = new String[] { "generator", "foo fighers - generator", "foo fighters generator" };
final boolean omitNorms = random().nextBoolean();
FieldType ft = new FieldType(TextField.TYPE_NOT_STORED);
ft.setIndexOptions(random().nextBoolean() ? IndexOptions.DOCS : IndexOptions.DOCS_AND_FREQS);
ft.setOmitNorms(omitNorms);
ft.freeze();
FieldType ft1 = new FieldType(TextField.TYPE_NOT_STORED);
ft1.setIndexOptions(random().nextBoolean() ? IndexOptions.DOCS : IndexOptions.DOCS_AND_FREQS);
ft1.setOmitNorms(omitNorms);
ft1.freeze();
for (int i = 0; i < username.length; i++) {
Document d = new Document();
d.add(new TextField("id", Integer.toString(i), Field.Store.YES));
d.add(new Field("username", username[i], ft));
d.add(new Field("song", song[i], ft));
w.addDocument(d);
}
int iters = scaledRandomIntBetween(25, 100);
for (int j = 0; j < iters; j++) {
Document d = new Document();
d.add(new TextField("id", Integer.toString(username.length + j), Field.Store.YES));
d.add(new Field("username", "foo fighters", ft1));
d.add(new Field("song", "some bogus text to bump up IDF", ft1));
w.addDocument(d);
}
w.commit();
DirectoryReader reader = DirectoryReader.open(w);
IndexSearcher searcher = setSimilarity(newSearcher(reader));
{
String[] fields = new String[] { "username", "song" };
BooleanQuery.Builder query = new BooleanQuery.Builder();
query.setDisableCoord(true);
query.add(BlendedTermQuery.dismaxBlendedQuery(toTerms(fields, "foo"), 0.1f), BooleanClause.Occur.SHOULD);
query.add(BlendedTermQuery.dismaxBlendedQuery(toTerms(fields, "fighters"), 0.1f), BooleanClause.Occur.SHOULD);
query.add(BlendedTermQuery.dismaxBlendedQuery(toTerms(fields, "generator"), 0.1f), BooleanClause.Occur.SHOULD);
TopDocs search = searcher.search(query.build(), 10);
ScoreDoc[] scoreDocs = search.scoreDocs;
assertEquals(Integer.toString(0), reader.document(scoreDocs[0].doc).getField("id").stringValue());
}
{
BooleanQuery.Builder query = new BooleanQuery.Builder();
query.setDisableCoord(true);
DisjunctionMaxQuery uname = new DisjunctionMaxQuery(Arrays.asList(new TermQuery(new Term("username", "foo")), new TermQuery(new Term("song", "foo"))), 0.0f);
DisjunctionMaxQuery s = new DisjunctionMaxQuery(Arrays.asList(new TermQuery(new Term("username", "fighers")), new TermQuery(new Term("song", "fighers"))), 0.0f);
DisjunctionMaxQuery gen = new DisjunctionMaxQuery(Arrays.asList(new TermQuery(new Term("username", "generator")), new TermQuery(new Term("song", "generator"))), 0f);
query.add(uname, BooleanClause.Occur.SHOULD);
query.add(s, BooleanClause.Occur.SHOULD);
query.add(gen, BooleanClause.Occur.SHOULD);
TopDocs search = searcher.search(query.build(), 4);
ScoreDoc[] scoreDocs = search.scoreDocs;
assertEquals(Integer.toString(1), reader.document(scoreDocs[0].doc).getField("id").stringValue());
}
reader.close();
w.close();
dir.close();
}
use of org.apache.lucene.index.IndexWriter in project ansj_seg by NLPchina.
the class HeightLightTest method index.
private static void index(Analyzer analysis, String content) throws CorruptIndexException, IOException {
Document doc = new Document();
IndexWriter iwriter = new IndexWriter(directory, new IndexWriterConfig(analysis));
doc.add(new TextField("text", content, Field.Store.YES));
iwriter.addDocument(doc);
iwriter.commit();
iwriter.close();
}
use of org.apache.lucene.index.IndexWriter in project ansj_seg by NLPchina.
the class IndexAndTest method test.
@Test
public void test() throws Exception {
DicLibrary.put(DicLibrary.DEFAULT, "../../library/default.dic");
PerFieldAnalyzerWrapper analyzer = new PerFieldAnalyzerWrapper(new AnsjAnalyzer(TYPE.index_ansj));
Directory directory = null;
IndexWriter iwriter = null;
IndexWriterConfig ic = new IndexWriterConfig(analyzer);
String text = "旅游和服务是最好的";
System.out.println(IndexAnalysis.parse(text));
// 建立内存索引对象
directory = new RAMDirectory();
iwriter = new IndexWriter(directory, ic);
addContent(iwriter, text);
iwriter.commit();
iwriter.close();
System.out.println("索引建立完毕");
Analyzer queryAnalyzer = new AnsjAnalyzer(AnsjAnalyzer.TYPE.index_ansj);
System.out.println("index ok to search!");
for (Term t : IndexAnalysis.parse(text)) {
System.out.println(t.getName());
search(queryAnalyzer, directory, "\"" + t.getName() + "\"");
}
}
use of org.apache.lucene.index.IndexWriter in project ansj_seg by NLPchina.
the class IndexTest method indexTest.
@Test
public void indexTest() throws CorruptIndexException, LockObtainFailedException, IOException, ParseException {
PerFieldAnalyzerWrapper analyzer = new PerFieldAnalyzerWrapper(new AnsjAnalyzer(TYPE.index_ansj));
Directory directory = null;
IndexWriter iwriter = null;
IndexWriterConfig ic = new IndexWriterConfig(analyzer);
// 建立内存索引对象
directory = new RAMDirectory();
iwriter = new IndexWriter(directory, ic);
addContent(iwriter, "助推企业转型升级提供强有力的技术支持和服保障。中心的建成将使青岛的服务器承载能力突破10万台,达到世界一流水平。");
addContent(iwriter, "涉及民生的部分商品和服务成本监审政策");
addContent(iwriter, "我穿着和服");
iwriter.commit();
iwriter.close();
System.out.println("索引建立完毕");
Analyzer queryAnalyzer = new AnsjAnalyzer(AnsjAnalyzer.TYPE.dic_ansj);
System.out.println("index ok to search!");
search(queryAnalyzer, directory, "\"和服\"");
}
use of org.apache.lucene.index.IndexWriter in project ansj_seg by NLPchina.
the class PhraseTest method main.
public static void main(String[] args) throws IOException, ParseException {
DicLibrary.insert(DicLibrary.DEFAULT, "上网人");
DicLibrary.insert(DicLibrary.DEFAULT, "网人");
AnsjAnalyzer ansjAnalyzer = new AnsjAnalyzer(AnsjAnalyzer.TYPE.index_ansj);
TokenStream tokenStream = ansjAnalyzer.tokenStream("上网人员测试", "test");
while (tokenStream.incrementToken()) {
System.out.println(tokenStream.getAttribute(CharTermAttribute.class));
}
IndexWriterConfig config = new IndexWriterConfig(ansjAnalyzer);
IndexWriter writer = new IndexWriter(new RAMDirectory(), config);
Document doc = new Document();
doc.add(new TextField("test", "上网人员测试", Field.Store.YES));
writer.addDocument(doc);
writer.commit();
IndexSearcher searcher = new IndexSearcher(DirectoryReader.open(writer));
System.out.println(searcher.count(new TermQuery(new Term("test", "网人"))));
Query q = new QueryParser("test", new AnsjAnalyzer(AnsjAnalyzer.TYPE.index_ansj)).parse("\"上网人\"");
System.out.println(q);
System.out.println(searcher.count(q));
}
Aggregations