use of org.apache.lucene.document.TextField in project elasticsearch by elastic.
the class TranslogTests method testTranslogOpSerialization.
public void testTranslogOpSerialization() throws Exception {
BytesReference B_1 = new BytesArray(new byte[] { 1 });
SeqNoFieldMapper.SequenceID seqID = SeqNoFieldMapper.SequenceID.emptySeqID();
assert Version.CURRENT.major <= 6 : "Using UNASSIGNED_SEQ_NO can be removed in 7.0, because 6.0+ nodes have actual sequence numbers";
long randomSeqNum = randomBoolean() ? SequenceNumbersService.UNASSIGNED_SEQ_NO : randomNonNegativeLong();
long randomPrimaryTerm = randomBoolean() ? 0 : randomNonNegativeLong();
seqID.seqNo.setLongValue(randomSeqNum);
seqID.seqNoDocValue.setLongValue(randomSeqNum);
seqID.primaryTerm.setLongValue(randomPrimaryTerm);
Field uidField = new Field("_uid", Uid.createUid("test", "1"), UidFieldMapper.Defaults.FIELD_TYPE);
Field versionField = new NumericDocValuesField("_version", 1);
Document document = new Document();
document.add(new TextField("value", "test", Field.Store.YES));
document.add(uidField);
document.add(versionField);
document.add(seqID.seqNo);
document.add(seqID.seqNoDocValue);
document.add(seqID.primaryTerm);
ParsedDocument doc = new ParsedDocument(versionField, seqID, "1", "type", null, Arrays.asList(document), B_1, XContentType.JSON, null);
Engine.Index eIndex = new Engine.Index(newUid(doc), doc, randomSeqNum, randomPrimaryTerm, 1, VersionType.INTERNAL, Origin.PRIMARY, 0, 0, false);
Engine.IndexResult eIndexResult = new Engine.IndexResult(1, randomSeqNum, true);
Translog.Index index = new Translog.Index(eIndex, eIndexResult);
BytesStreamOutput out = new BytesStreamOutput();
index.writeTo(out);
StreamInput in = out.bytes().streamInput();
Translog.Index serializedIndex = new Translog.Index(in);
assertEquals(index, serializedIndex);
Engine.Delete eDelete = new Engine.Delete(doc.type(), doc.id(), newUid(doc), randomSeqNum, randomPrimaryTerm, 2, VersionType.INTERNAL, Origin.PRIMARY, 0);
Engine.DeleteResult eDeleteResult = new Engine.DeleteResult(2, randomSeqNum, true);
Translog.Delete delete = new Translog.Delete(eDelete, eDeleteResult);
out = new BytesStreamOutput();
delete.writeTo(out);
in = out.bytes().streamInput();
Translog.Delete serializedDelete = new Translog.Delete(in);
assertEquals(delete, serializedDelete);
}
use of org.apache.lucene.document.TextField in project elasticsearch by elastic.
the class BlendedTermQueryTests method testDismaxQuery.
public void testDismaxQuery() throws IOException {
Directory dir = newDirectory();
IndexWriter w = new IndexWriter(dir, newIndexWriterConfig(new MockAnalyzer(random())));
String[] username = new String[] { "foo fighters", "some cool fan", "cover band" };
String[] song = new String[] { "generator", "foo fighers - generator", "foo fighters generator" };
final boolean omitNorms = random().nextBoolean();
FieldType ft = new FieldType(TextField.TYPE_NOT_STORED);
ft.setIndexOptions(random().nextBoolean() ? IndexOptions.DOCS : IndexOptions.DOCS_AND_FREQS);
ft.setOmitNorms(omitNorms);
ft.freeze();
FieldType ft1 = new FieldType(TextField.TYPE_NOT_STORED);
ft1.setIndexOptions(random().nextBoolean() ? IndexOptions.DOCS : IndexOptions.DOCS_AND_FREQS);
ft1.setOmitNorms(omitNorms);
ft1.freeze();
for (int i = 0; i < username.length; i++) {
Document d = new Document();
d.add(new TextField("id", Integer.toString(i), Field.Store.YES));
d.add(new Field("username", username[i], ft));
d.add(new Field("song", song[i], ft));
w.addDocument(d);
}
int iters = scaledRandomIntBetween(25, 100);
for (int j = 0; j < iters; j++) {
Document d = new Document();
d.add(new TextField("id", Integer.toString(username.length + j), Field.Store.YES));
d.add(new Field("username", "foo fighters", ft1));
d.add(new Field("song", "some bogus text to bump up IDF", ft1));
w.addDocument(d);
}
w.commit();
DirectoryReader reader = DirectoryReader.open(w);
IndexSearcher searcher = setSimilarity(newSearcher(reader));
{
String[] fields = new String[] { "username", "song" };
BooleanQuery.Builder query = new BooleanQuery.Builder();
query.setDisableCoord(true);
query.add(BlendedTermQuery.dismaxBlendedQuery(toTerms(fields, "foo"), 0.1f), BooleanClause.Occur.SHOULD);
query.add(BlendedTermQuery.dismaxBlendedQuery(toTerms(fields, "fighters"), 0.1f), BooleanClause.Occur.SHOULD);
query.add(BlendedTermQuery.dismaxBlendedQuery(toTerms(fields, "generator"), 0.1f), BooleanClause.Occur.SHOULD);
TopDocs search = searcher.search(query.build(), 10);
ScoreDoc[] scoreDocs = search.scoreDocs;
assertEquals(Integer.toString(0), reader.document(scoreDocs[0].doc).getField("id").stringValue());
}
{
BooleanQuery.Builder query = new BooleanQuery.Builder();
query.setDisableCoord(true);
DisjunctionMaxQuery uname = new DisjunctionMaxQuery(Arrays.asList(new TermQuery(new Term("username", "foo")), new TermQuery(new Term("song", "foo"))), 0.0f);
DisjunctionMaxQuery s = new DisjunctionMaxQuery(Arrays.asList(new TermQuery(new Term("username", "fighers")), new TermQuery(new Term("song", "fighers"))), 0.0f);
DisjunctionMaxQuery gen = new DisjunctionMaxQuery(Arrays.asList(new TermQuery(new Term("username", "generator")), new TermQuery(new Term("song", "generator"))), 0f);
query.add(uname, BooleanClause.Occur.SHOULD);
query.add(s, BooleanClause.Occur.SHOULD);
query.add(gen, BooleanClause.Occur.SHOULD);
TopDocs search = searcher.search(query.build(), 4);
ScoreDoc[] scoreDocs = search.scoreDocs;
assertEquals(Integer.toString(1), reader.document(scoreDocs[0].doc).getField("id").stringValue());
}
reader.close();
w.close();
dir.close();
}
use of org.apache.lucene.document.TextField in project ansj_seg by NLPchina.
the class HeightLightTest method index.
private static void index(Analyzer analysis, String content) throws CorruptIndexException, IOException {
Document doc = new Document();
IndexWriter iwriter = new IndexWriter(directory, new IndexWriterConfig(analysis));
doc.add(new TextField("text", content, Field.Store.YES));
iwriter.addDocument(doc);
iwriter.commit();
iwriter.close();
}
use of org.apache.lucene.document.TextField in project ansj_seg by NLPchina.
the class PhraseTest method main.
public static void main(String[] args) throws IOException, ParseException {
DicLibrary.insert(DicLibrary.DEFAULT, "上网人");
DicLibrary.insert(DicLibrary.DEFAULT, "网人");
AnsjAnalyzer ansjAnalyzer = new AnsjAnalyzer(AnsjAnalyzer.TYPE.index_ansj);
TokenStream tokenStream = ansjAnalyzer.tokenStream("上网人员测试", "test");
while (tokenStream.incrementToken()) {
System.out.println(tokenStream.getAttribute(CharTermAttribute.class));
}
IndexWriterConfig config = new IndexWriterConfig(ansjAnalyzer);
IndexWriter writer = new IndexWriter(new RAMDirectory(), config);
Document doc = new Document();
doc.add(new TextField("test", "上网人员测试", Field.Store.YES));
writer.addDocument(doc);
writer.commit();
IndexSearcher searcher = new IndexSearcher(DirectoryReader.open(writer));
System.out.println(searcher.count(new TermQuery(new Term("test", "网人"))));
Query q = new QueryParser("test", new AnsjAnalyzer(AnsjAnalyzer.TYPE.index_ansj)).parse("\"上网人\"");
System.out.println(q);
System.out.println(searcher.count(q));
}
use of org.apache.lucene.document.TextField in project ansj_seg by NLPchina.
the class IndexAndTest method addContent.
private void addContent(IndexWriter iwriter, String text) throws CorruptIndexException, IOException {
Document doc = new Document();
IndexableField field = new TextField("text", text, Store.YES);
doc.add(field);
iwriter.addDocument(doc);
}
Aggregations