Search in sources :

Example 21 with TextField

use of org.apache.lucene.document.TextField in project elasticsearch by elastic.

the class TranslogTests method testTranslogOpSerialization.

public void testTranslogOpSerialization() throws Exception {
    BytesReference B_1 = new BytesArray(new byte[] { 1 });
    SeqNoFieldMapper.SequenceID seqID = SeqNoFieldMapper.SequenceID.emptySeqID();
    assert Version.CURRENT.major <= 6 : "Using UNASSIGNED_SEQ_NO can be removed in 7.0, because 6.0+ nodes have actual sequence numbers";
    long randomSeqNum = randomBoolean() ? SequenceNumbersService.UNASSIGNED_SEQ_NO : randomNonNegativeLong();
    long randomPrimaryTerm = randomBoolean() ? 0 : randomNonNegativeLong();
    seqID.seqNo.setLongValue(randomSeqNum);
    seqID.seqNoDocValue.setLongValue(randomSeqNum);
    seqID.primaryTerm.setLongValue(randomPrimaryTerm);
    Field uidField = new Field("_uid", Uid.createUid("test", "1"), UidFieldMapper.Defaults.FIELD_TYPE);
    Field versionField = new NumericDocValuesField("_version", 1);
    Document document = new Document();
    document.add(new TextField("value", "test", Field.Store.YES));
    document.add(uidField);
    document.add(versionField);
    document.add(seqID.seqNo);
    document.add(seqID.seqNoDocValue);
    document.add(seqID.primaryTerm);
    ParsedDocument doc = new ParsedDocument(versionField, seqID, "1", "type", null, Arrays.asList(document), B_1, XContentType.JSON, null);
    Engine.Index eIndex = new Engine.Index(newUid(doc), doc, randomSeqNum, randomPrimaryTerm, 1, VersionType.INTERNAL, Origin.PRIMARY, 0, 0, false);
    Engine.IndexResult eIndexResult = new Engine.IndexResult(1, randomSeqNum, true);
    Translog.Index index = new Translog.Index(eIndex, eIndexResult);
    BytesStreamOutput out = new BytesStreamOutput();
    index.writeTo(out);
    StreamInput in = out.bytes().streamInput();
    Translog.Index serializedIndex = new Translog.Index(in);
    assertEquals(index, serializedIndex);
    Engine.Delete eDelete = new Engine.Delete(doc.type(), doc.id(), newUid(doc), randomSeqNum, randomPrimaryTerm, 2, VersionType.INTERNAL, Origin.PRIMARY, 0);
    Engine.DeleteResult eDeleteResult = new Engine.DeleteResult(2, randomSeqNum, true);
    Translog.Delete delete = new Translog.Delete(eDelete, eDeleteResult);
    out = new BytesStreamOutput();
    delete.writeTo(out);
    in = out.bytes().streamInput();
    Translog.Delete serializedDelete = new Translog.Delete(in);
    assertEquals(delete, serializedDelete);
}
Also used : BytesReference(org.elasticsearch.common.bytes.BytesReference) SeqNoFieldMapper(org.elasticsearch.index.mapper.SeqNoFieldMapper) BytesArray(org.elasticsearch.common.bytes.BytesArray) Document(org.elasticsearch.index.mapper.ParseContext.Document) ParsedDocument(org.elasticsearch.index.mapper.ParsedDocument) BytesStreamOutput(org.elasticsearch.common.io.stream.BytesStreamOutput) NumericDocValuesField(org.apache.lucene.document.NumericDocValuesField) Field(org.apache.lucene.document.Field) TextField(org.apache.lucene.document.TextField) NumericDocValuesField(org.apache.lucene.document.NumericDocValuesField) ParsedDocument(org.elasticsearch.index.mapper.ParsedDocument) StreamInput(org.elasticsearch.common.io.stream.StreamInput) TextField(org.apache.lucene.document.TextField) Engine(org.elasticsearch.index.engine.Engine)

Example 22 with TextField

use of org.apache.lucene.document.TextField in project elasticsearch by elastic.

the class BlendedTermQueryTests method testDismaxQuery.

public void testDismaxQuery() throws IOException {
    Directory dir = newDirectory();
    IndexWriter w = new IndexWriter(dir, newIndexWriterConfig(new MockAnalyzer(random())));
    String[] username = new String[] { "foo fighters", "some cool fan", "cover band" };
    String[] song = new String[] { "generator", "foo fighers - generator", "foo fighters generator" };
    final boolean omitNorms = random().nextBoolean();
    FieldType ft = new FieldType(TextField.TYPE_NOT_STORED);
    ft.setIndexOptions(random().nextBoolean() ? IndexOptions.DOCS : IndexOptions.DOCS_AND_FREQS);
    ft.setOmitNorms(omitNorms);
    ft.freeze();
    FieldType ft1 = new FieldType(TextField.TYPE_NOT_STORED);
    ft1.setIndexOptions(random().nextBoolean() ? IndexOptions.DOCS : IndexOptions.DOCS_AND_FREQS);
    ft1.setOmitNorms(omitNorms);
    ft1.freeze();
    for (int i = 0; i < username.length; i++) {
        Document d = new Document();
        d.add(new TextField("id", Integer.toString(i), Field.Store.YES));
        d.add(new Field("username", username[i], ft));
        d.add(new Field("song", song[i], ft));
        w.addDocument(d);
    }
    int iters = scaledRandomIntBetween(25, 100);
    for (int j = 0; j < iters; j++) {
        Document d = new Document();
        d.add(new TextField("id", Integer.toString(username.length + j), Field.Store.YES));
        d.add(new Field("username", "foo fighters", ft1));
        d.add(new Field("song", "some bogus text to bump up IDF", ft1));
        w.addDocument(d);
    }
    w.commit();
    DirectoryReader reader = DirectoryReader.open(w);
    IndexSearcher searcher = setSimilarity(newSearcher(reader));
    {
        String[] fields = new String[] { "username", "song" };
        BooleanQuery.Builder query = new BooleanQuery.Builder();
        query.setDisableCoord(true);
        query.add(BlendedTermQuery.dismaxBlendedQuery(toTerms(fields, "foo"), 0.1f), BooleanClause.Occur.SHOULD);
        query.add(BlendedTermQuery.dismaxBlendedQuery(toTerms(fields, "fighters"), 0.1f), BooleanClause.Occur.SHOULD);
        query.add(BlendedTermQuery.dismaxBlendedQuery(toTerms(fields, "generator"), 0.1f), BooleanClause.Occur.SHOULD);
        TopDocs search = searcher.search(query.build(), 10);
        ScoreDoc[] scoreDocs = search.scoreDocs;
        assertEquals(Integer.toString(0), reader.document(scoreDocs[0].doc).getField("id").stringValue());
    }
    {
        BooleanQuery.Builder query = new BooleanQuery.Builder();
        query.setDisableCoord(true);
        DisjunctionMaxQuery uname = new DisjunctionMaxQuery(Arrays.asList(new TermQuery(new Term("username", "foo")), new TermQuery(new Term("song", "foo"))), 0.0f);
        DisjunctionMaxQuery s = new DisjunctionMaxQuery(Arrays.asList(new TermQuery(new Term("username", "fighers")), new TermQuery(new Term("song", "fighers"))), 0.0f);
        DisjunctionMaxQuery gen = new DisjunctionMaxQuery(Arrays.asList(new TermQuery(new Term("username", "generator")), new TermQuery(new Term("song", "generator"))), 0f);
        query.add(uname, BooleanClause.Occur.SHOULD);
        query.add(s, BooleanClause.Occur.SHOULD);
        query.add(gen, BooleanClause.Occur.SHOULD);
        TopDocs search = searcher.search(query.build(), 4);
        ScoreDoc[] scoreDocs = search.scoreDocs;
        assertEquals(Integer.toString(1), reader.document(scoreDocs[0].doc).getField("id").stringValue());
    }
    reader.close();
    w.close();
    dir.close();
}
Also used : IndexSearcher(org.apache.lucene.search.IndexSearcher) BooleanQuery(org.apache.lucene.search.BooleanQuery) TermQuery(org.apache.lucene.search.TermQuery) DirectoryReader(org.apache.lucene.index.DirectoryReader) DisjunctionMaxQuery(org.apache.lucene.search.DisjunctionMaxQuery) Term(org.apache.lucene.index.Term) Document(org.apache.lucene.document.Document) FieldType(org.apache.lucene.document.FieldType) TopDocs(org.apache.lucene.search.TopDocs) Field(org.apache.lucene.document.Field) TextField(org.apache.lucene.document.TextField) MockAnalyzer(org.apache.lucene.analysis.MockAnalyzer) IndexWriter(org.apache.lucene.index.IndexWriter) TextField(org.apache.lucene.document.TextField) Directory(org.apache.lucene.store.Directory)

Example 23 with TextField

use of org.apache.lucene.document.TextField in project ansj_seg by NLPchina.

the class HeightLightTest method index.

private static void index(Analyzer analysis, String content) throws CorruptIndexException, IOException {
    Document doc = new Document();
    IndexWriter iwriter = new IndexWriter(directory, new IndexWriterConfig(analysis));
    doc.add(new TextField("text", content, Field.Store.YES));
    iwriter.addDocument(doc);
    iwriter.commit();
    iwriter.close();
}
Also used : IndexWriter(org.apache.lucene.index.IndexWriter) TextField(org.apache.lucene.document.TextField) Document(org.apache.lucene.document.Document) IndexWriterConfig(org.apache.lucene.index.IndexWriterConfig)

Example 24 with TextField

use of org.apache.lucene.document.TextField in project ansj_seg by NLPchina.

the class PhraseTest method main.

public static void main(String[] args) throws IOException, ParseException {
    DicLibrary.insert(DicLibrary.DEFAULT, "上网人");
    DicLibrary.insert(DicLibrary.DEFAULT, "网人");
    AnsjAnalyzer ansjAnalyzer = new AnsjAnalyzer(AnsjAnalyzer.TYPE.index_ansj);
    TokenStream tokenStream = ansjAnalyzer.tokenStream("上网人员测试", "test");
    while (tokenStream.incrementToken()) {
        System.out.println(tokenStream.getAttribute(CharTermAttribute.class));
    }
    IndexWriterConfig config = new IndexWriterConfig(ansjAnalyzer);
    IndexWriter writer = new IndexWriter(new RAMDirectory(), config);
    Document doc = new Document();
    doc.add(new TextField("test", "上网人员测试", Field.Store.YES));
    writer.addDocument(doc);
    writer.commit();
    IndexSearcher searcher = new IndexSearcher(DirectoryReader.open(writer));
    System.out.println(searcher.count(new TermQuery(new Term("test", "网人"))));
    Query q = new QueryParser("test", new AnsjAnalyzer(AnsjAnalyzer.TYPE.index_ansj)).parse("\"上网人\"");
    System.out.println(q);
    System.out.println(searcher.count(q));
}
Also used : IndexSearcher(org.apache.lucene.search.IndexSearcher) TermQuery(org.apache.lucene.search.TermQuery) TokenStream(org.apache.lucene.analysis.TokenStream) Query(org.apache.lucene.search.Query) TermQuery(org.apache.lucene.search.TermQuery) Term(org.apache.lucene.index.Term) Document(org.apache.lucene.document.Document) RAMDirectory(org.apache.lucene.store.RAMDirectory) QueryParser(org.apache.lucene.queryparser.classic.QueryParser) CharTermAttribute(org.apache.lucene.analysis.tokenattributes.CharTermAttribute) AnsjAnalyzer(org.ansj.lucene6.AnsjAnalyzer) IndexWriter(org.apache.lucene.index.IndexWriter) TextField(org.apache.lucene.document.TextField) IndexWriterConfig(org.apache.lucene.index.IndexWriterConfig)

Example 25 with TextField

use of org.apache.lucene.document.TextField in project ansj_seg by NLPchina.

the class IndexAndTest method addContent.

private void addContent(IndexWriter iwriter, String text) throws CorruptIndexException, IOException {
    Document doc = new Document();
    IndexableField field = new TextField("text", text, Store.YES);
    doc.add(field);
    iwriter.addDocument(doc);
}
Also used : IndexableField(org.apache.lucene.index.IndexableField) TextField(org.apache.lucene.document.TextField) Document(org.apache.lucene.document.Document)

Aggregations

TextField (org.apache.lucene.document.TextField)192 Document (org.apache.lucene.document.Document)171 Directory (org.apache.lucene.store.Directory)99 MockAnalyzer (org.apache.lucene.analysis.MockAnalyzer)61 Term (org.apache.lucene.index.Term)61 IndexWriter (org.apache.lucene.index.IndexWriter)58 IndexSearcher (org.apache.lucene.search.IndexSearcher)55 IndexWriterConfig (org.apache.lucene.index.IndexWriterConfig)52 Field (org.apache.lucene.document.Field)50 StringField (org.apache.lucene.document.StringField)48 BytesRef (org.apache.lucene.util.BytesRef)48 RandomIndexWriter (org.apache.lucene.index.RandomIndexWriter)44 IndexReader (org.apache.lucene.index.IndexReader)43 TermQuery (org.apache.lucene.search.TermQuery)41 NumericDocValuesField (org.apache.lucene.document.NumericDocValuesField)31 SortedDocValuesField (org.apache.lucene.document.SortedDocValuesField)30 TopDocs (org.apache.lucene.search.TopDocs)29 RAMDirectory (org.apache.lucene.store.RAMDirectory)29 FieldType (org.apache.lucene.document.FieldType)23 Query (org.apache.lucene.search.Query)23