use of org.apache.lucene.document.StringField in project che by eclipse.
the class LuceneSearcher method createDocument.
protected Document createDocument(VirtualFile virtualFile, Reader reader) throws ServerException {
final Document doc = new Document();
doc.add(new StringField(PATH_FIELD, virtualFile.getPath().toString(), Field.Store.YES));
doc.add(new TextField(NAME_FIELD, virtualFile.getName(), Field.Store.YES));
if (reader != null) {
doc.add(new TextField(TEXT_FIELD, reader));
}
return doc;
}
use of org.apache.lucene.document.StringField in project elasticsearch by elastic.
the class FreqTermsEnumTests method setUp.
@Before
@Override
public void setUp() throws Exception {
super.setUp();
referenceAll = new HashMap<>();
referenceNotDeleted = new HashMap<>();
referenceFilter = new HashMap<>();
Directory dir = newDirectory();
// use keyword analyzer we rely on the stored field holding the exact term.
IndexWriterConfig conf = newIndexWriterConfig(new KeywordAnalyzer());
if (frequently()) {
// we don't want to do any merges, so we won't expunge deletes
conf.setMergePolicy(NoMergePolicy.INSTANCE);
}
iw = new IndexWriter(dir, conf);
terms = new String[scaledRandomIntBetween(10, 300)];
for (int i = 0; i < terms.length; i++) {
terms[i] = randomAsciiOfLength(5);
}
int numberOfDocs = scaledRandomIntBetween(30, 300);
Document[] docs = new Document[numberOfDocs];
for (int i = 0; i < numberOfDocs; i++) {
Document doc = new Document();
doc.add(new StringField("id", Integer.toString(i), Field.Store.YES));
docs[i] = doc;
for (String term : terms) {
if (randomBoolean()) {
continue;
}
int freq = randomIntBetween(1, 3);
for (int j = 0; j < freq; j++) {
doc.add(new TextField("field", term, Field.Store.YES));
}
}
}
for (int i = 0; i < docs.length; i++) {
Document doc = docs[i];
iw.addDocument(doc);
if (rarely()) {
iw.commit();
}
}
Set<String> deletedIds = new HashSet<>();
for (int i = 0; i < docs.length; i++) {
Document doc = docs[i];
if (randomInt(5) == 2) {
Term idTerm = new Term("id", doc.getField("id").stringValue());
deletedIds.add(idTerm.text());
iw.deleteDocuments(idTerm);
}
}
for (String term : terms) {
referenceAll.put(term, new FreqHolder());
referenceFilter.put(term, new FreqHolder());
referenceNotDeleted.put(term, new FreqHolder());
}
// now go over each doc, build the relevant references and filter
reader = DirectoryReader.open(iw);
List<BytesRef> filterTerms = new ArrayList<>();
for (int docId = 0; docId < reader.maxDoc(); docId++) {
Document doc = reader.document(docId);
addFreqs(doc, referenceAll);
if (!deletedIds.contains(doc.getField("id").stringValue())) {
addFreqs(doc, referenceNotDeleted);
if (randomBoolean()) {
filterTerms.add(new BytesRef(doc.getField("id").stringValue()));
addFreqs(doc, referenceFilter);
}
}
}
filter = new TermInSetQuery("id", filterTerms);
}
use of org.apache.lucene.document.StringField in project elasticsearch by elastic.
the class AbstractStringFieldDataTestCase method testNestedSorting.
public void testNestedSorting(MultiValueMode sortMode) throws IOException {
final String[] values = new String[randomIntBetween(2, 20)];
for (int i = 0; i < values.length; ++i) {
values[i] = TestUtil.randomSimpleString(random());
}
final int numParents = scaledRandomIntBetween(10, 3072);
List<Document> docs = new ArrayList<>();
FixedBitSet parents = new FixedBitSet(64);
for (int i = 0; i < numParents; ++i) {
docs.clear();
final int numChildren = randomInt(4);
for (int j = 0; j < numChildren; ++j) {
final Document child = new Document();
final int numValues = randomInt(3);
for (int k = 0; k < numValues; ++k) {
final String value = RandomPicks.randomFrom(random(), values);
addField(child, "text", value);
}
docs.add(child);
}
final Document parent = new Document();
parent.add(new StringField("type", "parent", Store.YES));
final String value = RandomPicks.randomFrom(random(), values);
if (value != null) {
addField(parent, "text", value);
}
docs.add(parent);
int bit = parents.prevSetBit(parents.length() - 1) + docs.size();
parents = FixedBitSet.ensureCapacity(parents, bit);
parents.set(bit);
writer.addDocuments(docs);
if (randomInt(10) == 0) {
writer.commit();
}
}
DirectoryReader directoryReader = DirectoryReader.open(writer);
directoryReader = ElasticsearchDirectoryReader.wrap(directoryReader, new ShardId(indexService.index(), 0));
IndexSearcher searcher = new IndexSearcher(directoryReader);
IndexFieldData<?> fieldData = getForField("text");
final Object missingValue;
switch(randomInt(4)) {
case 0:
missingValue = "_first";
break;
case 1:
missingValue = "_last";
break;
case 2:
missingValue = new BytesRef(RandomPicks.randomFrom(random(), values));
break;
default:
missingValue = new BytesRef(TestUtil.randomSimpleString(random()));
break;
}
Query parentFilter = new TermQuery(new Term("type", "parent"));
Query childFilter = Queries.not(parentFilter);
Nested nested = createNested(searcher, parentFilter, childFilter);
BytesRefFieldComparatorSource nestedComparatorSource = new BytesRefFieldComparatorSource(fieldData, missingValue, sortMode, nested);
ToParentBlockJoinQuery query = new ToParentBlockJoinQuery(new ConstantScoreQuery(childFilter), new QueryBitSetProducer(parentFilter), ScoreMode.None);
Sort sort = new Sort(new SortField("text", nestedComparatorSource));
TopFieldDocs topDocs = searcher.search(query, randomIntBetween(1, numParents), sort);
assertTrue(topDocs.scoreDocs.length > 0);
BytesRef previous = null;
for (int i = 0; i < topDocs.scoreDocs.length; ++i) {
final int docID = topDocs.scoreDocs[i].doc;
assertTrue("expected " + docID + " to be a parent", parents.get(docID));
BytesRef cmpValue = null;
for (int child = parents.prevSetBit(docID - 1) + 1; child < docID; ++child) {
String[] sVals = searcher.doc(child).getValues("text");
final BytesRef[] vals;
if (sVals.length == 0) {
vals = new BytesRef[0];
} else {
vals = new BytesRef[sVals.length];
for (int j = 0; j < vals.length; ++j) {
vals[j] = new BytesRef(sVals[j]);
}
}
for (BytesRef value : vals) {
if (cmpValue == null) {
cmpValue = value;
} else if (sortMode == MultiValueMode.MIN && value.compareTo(cmpValue) < 0) {
cmpValue = value;
} else if (sortMode == MultiValueMode.MAX && value.compareTo(cmpValue) > 0) {
cmpValue = value;
}
}
}
if (cmpValue == null) {
if ("_first".equals(missingValue)) {
cmpValue = new BytesRef();
} else if ("_last".equals(missingValue) == false) {
cmpValue = (BytesRef) missingValue;
}
}
if (previous != null && cmpValue != null) {
assertTrue(previous.utf8ToString() + " / " + cmpValue.utf8ToString(), previous.compareTo(cmpValue) <= 0);
}
previous = cmpValue;
}
searcher.getIndexReader().close();
}
use of org.apache.lucene.document.StringField in project elasticsearch by elastic.
the class FieldDataCacheTests method testLoadGlobal_neverCacheIfFieldIsMissing.
public void testLoadGlobal_neverCacheIfFieldIsMissing() throws Exception {
Directory dir = newDirectory();
IndexWriterConfig iwc = new IndexWriterConfig(null);
iwc.setMergePolicy(NoMergePolicy.INSTANCE);
IndexWriter iw = new IndexWriter(dir, iwc);
long numDocs = scaledRandomIntBetween(32, 128);
for (int i = 1; i <= numDocs; i++) {
Document doc = new Document();
doc.add(new SortedSetDocValuesField("field1", new BytesRef(String.valueOf(i))));
doc.add(new StringField("field2", String.valueOf(i), Field.Store.NO));
iw.addDocument(doc);
if (i % 24 == 0) {
iw.commit();
}
}
iw.close();
DirectoryReader ir = ElasticsearchDirectoryReader.wrap(DirectoryReader.open(dir), new ShardId("_index", "_na_", 0));
DummyAccountingFieldDataCache fieldDataCache = new DummyAccountingFieldDataCache();
// Testing SortedSetDVOrdinalsIndexFieldData:
SortedSetDVOrdinalsIndexFieldData sortedSetDVOrdinalsIndexFieldData = createSortedDV("field1", fieldDataCache);
sortedSetDVOrdinalsIndexFieldData.loadGlobal(ir);
assertThat(fieldDataCache.cachedGlobally, equalTo(1));
sortedSetDVOrdinalsIndexFieldData.loadGlobal(new FieldMaskingReader("field1", ir));
assertThat(fieldDataCache.cachedGlobally, equalTo(1));
// Testing PagedBytesIndexFieldData
PagedBytesIndexFieldData pagedBytesIndexFieldData = createPagedBytes("field2", fieldDataCache);
pagedBytesIndexFieldData.loadGlobal(ir);
assertThat(fieldDataCache.cachedGlobally, equalTo(2));
pagedBytesIndexFieldData.loadGlobal(new FieldMaskingReader("field2", ir));
assertThat(fieldDataCache.cachedGlobally, equalTo(2));
ir.close();
dir.close();
}
use of org.apache.lucene.document.StringField in project elasticsearch by elastic.
the class ParentChildFieldDataTests method setupData.
@Before
public void setupData() throws Exception {
mapperService.merge(childType, new CompressedXContent(PutMappingRequest.buildFromSimplifiedDef(childType, "_parent", "type=" + parentType).string()), MapperService.MergeReason.MAPPING_UPDATE, false);
mapperService.merge(grandChildType, new CompressedXContent(PutMappingRequest.buildFromSimplifiedDef(grandChildType, "_parent", "type=" + childType).string()), MapperService.MergeReason.MAPPING_UPDATE, false);
Document d = new Document();
d.add(new StringField(UidFieldMapper.NAME, Uid.createUid(parentType, "1"), Field.Store.NO));
d.add(createJoinField(parentType, "1"));
writer.addDocument(d);
d = new Document();
d.add(new StringField(UidFieldMapper.NAME, Uid.createUid(childType, "2"), Field.Store.NO));
d.add(new StringField(ParentFieldMapper.NAME, Uid.createUid(parentType, "1"), Field.Store.NO));
d.add(createJoinField(parentType, "1"));
d.add(createJoinField(childType, "2"));
writer.addDocument(d);
writer.commit();
d = new Document();
d.add(new StringField(UidFieldMapper.NAME, Uid.createUid(childType, "3"), Field.Store.NO));
d.add(new StringField(ParentFieldMapper.NAME, Uid.createUid(parentType, "1"), Field.Store.NO));
d.add(createJoinField(parentType, "1"));
d.add(createJoinField(childType, "3"));
writer.addDocument(d);
d = new Document();
d.add(new StringField(UidFieldMapper.NAME, Uid.createUid(parentType, "2"), Field.Store.NO));
d.add(createJoinField(parentType, "2"));
writer.addDocument(d);
d = new Document();
d.add(new StringField(UidFieldMapper.NAME, Uid.createUid(childType, "4"), Field.Store.NO));
d.add(new StringField(ParentFieldMapper.NAME, Uid.createUid(parentType, "2"), Field.Store.NO));
d.add(createJoinField(parentType, "2"));
d.add(createJoinField(childType, "4"));
writer.addDocument(d);
d = new Document();
d.add(new StringField(UidFieldMapper.NAME, Uid.createUid(childType, "5"), Field.Store.NO));
d.add(new StringField(ParentFieldMapper.NAME, Uid.createUid(parentType, "1"), Field.Store.NO));
d.add(createJoinField(parentType, "1"));
d.add(createJoinField(childType, "5"));
writer.addDocument(d);
writer.commit();
d = new Document();
d.add(new StringField(UidFieldMapper.NAME, Uid.createUid(grandChildType, "6"), Field.Store.NO));
d.add(new StringField(ParentFieldMapper.NAME, Uid.createUid(childType, "2"), Field.Store.NO));
d.add(createJoinField(childType, "2"));
writer.addDocument(d);
d = new Document();
d.add(new StringField(UidFieldMapper.NAME, Uid.createUid("other-type", "1"), Field.Store.NO));
writer.addDocument(d);
}
Aggregations