Examples with Document - org.apache.lucene.document.Document

Example 51 with Document

use of org.apache.lucene.document.Document in project elasticsearch by elastic.

the class AbstractStringFieldDataTestCase method testNestedSorting.

public void testNestedSorting(MultiValueMode sortMode) throws IOException {
    final String[] values = new String[randomIntBetween(2, 20)];
    for (int i = 0; i < values.length; ++i) {
        values[i] = TestUtil.randomSimpleString(random());
    }
    final int numParents = scaledRandomIntBetween(10, 3072);
    List<Document> docs = new ArrayList<>();
    FixedBitSet parents = new FixedBitSet(64);
    for (int i = 0; i < numParents; ++i) {
        docs.clear();
        final int numChildren = randomInt(4);
        for (int j = 0; j < numChildren; ++j) {
            final Document child = new Document();
            final int numValues = randomInt(3);
            for (int k = 0; k < numValues; ++k) {
                final String value = RandomPicks.randomFrom(random(), values);
                addField(child, "text", value);
            }
            docs.add(child);
        }
        final Document parent = new Document();
        parent.add(new StringField("type", "parent", Store.YES));
        final String value = RandomPicks.randomFrom(random(), values);
        if (value != null) {
            addField(parent, "text", value);
        }
        docs.add(parent);
        int bit = parents.prevSetBit(parents.length() - 1) + docs.size();
        parents = FixedBitSet.ensureCapacity(parents, bit);
        parents.set(bit);
        writer.addDocuments(docs);
        if (randomInt(10) == 0) {
            writer.commit();
        }
    }
    DirectoryReader directoryReader = DirectoryReader.open(writer);
    directoryReader = ElasticsearchDirectoryReader.wrap(directoryReader, new ShardId(indexService.index(), 0));
    IndexSearcher searcher = new IndexSearcher(directoryReader);
    IndexFieldData<?> fieldData = getForField("text");
    final Object missingValue;
    switch(randomInt(4)) {
        case 0:
            missingValue = "_first";
            break;
        case 1:
            missingValue = "_last";
            break;
        case 2:
            missingValue = new BytesRef(RandomPicks.randomFrom(random(), values));
            break;
        default:
            missingValue = new BytesRef(TestUtil.randomSimpleString(random()));
            break;
    }
    Query parentFilter = new TermQuery(new Term("type", "parent"));
    Query childFilter = Queries.not(parentFilter);
    Nested nested = createNested(searcher, parentFilter, childFilter);
    BytesRefFieldComparatorSource nestedComparatorSource = new BytesRefFieldComparatorSource(fieldData, missingValue, sortMode, nested);
    ToParentBlockJoinQuery query = new ToParentBlockJoinQuery(new ConstantScoreQuery(childFilter), new QueryBitSetProducer(parentFilter), ScoreMode.None);
    Sort sort = new Sort(new SortField("text", nestedComparatorSource));
    TopFieldDocs topDocs = searcher.search(query, randomIntBetween(1, numParents), sort);
    assertTrue(topDocs.scoreDocs.length > 0);
    BytesRef previous = null;
    for (int i = 0; i < topDocs.scoreDocs.length; ++i) {
        final int docID = topDocs.scoreDocs[i].doc;
        assertTrue("expected " + docID + " to be a parent", parents.get(docID));
        BytesRef cmpValue = null;
        for (int child = parents.prevSetBit(docID - 1) + 1; child < docID; ++child) {
            String[] sVals = searcher.doc(child).getValues("text");
            final BytesRef[] vals;
            if (sVals.length == 0) {
                vals = new BytesRef[0];
            } else {
                vals = new BytesRef[sVals.length];
                for (int j = 0; j < vals.length; ++j) {
                    vals[j] = new BytesRef(sVals[j]);
                }
            }
            for (BytesRef value : vals) {
                if (cmpValue == null) {
                    cmpValue = value;
                } else if (sortMode == MultiValueMode.MIN && value.compareTo(cmpValue) < 0) {
                    cmpValue = value;
                } else if (sortMode == MultiValueMode.MAX && value.compareTo(cmpValue) > 0) {
                    cmpValue = value;
                }
            }
        }
        if (cmpValue == null) {
            if ("_first".equals(missingValue)) {
                cmpValue = new BytesRef();
            } else if ("_last".equals(missingValue) == false) {
                cmpValue = (BytesRef) missingValue;
            }
        }
        if (previous != null && cmpValue != null) {
            assertTrue(previous.utf8ToString() + "   /   " + cmpValue.utf8ToString(), previous.compareTo(cmpValue) <= 0);
        }
        previous = cmpValue;
    }
    searcher.getIndexReader().close();
}

Also used : IndexSearcher(org.apache.lucene.search.IndexSearcher) Query(org.apache.lucene.search.Query) ConstantScoreQuery(org.apache.lucene.search.ConstantScoreQuery) MatchAllDocsQuery(org.apache.lucene.search.MatchAllDocsQuery) TermQuery(org.apache.lucene.search.TermQuery) ToParentBlockJoinQuery(org.apache.lucene.search.join.ToParentBlockJoinQuery) ArrayList(java.util.ArrayList) Nested(org.elasticsearch.index.fielddata.IndexFieldData.XFieldComparatorSource.Nested) TopFieldDocs(org.apache.lucene.search.TopFieldDocs) SortField(org.apache.lucene.search.SortField) Document(org.apache.lucene.document.Document) ShardId(org.elasticsearch.index.shard.ShardId) FixedBitSet(org.apache.lucene.util.FixedBitSet) ConstantScoreQuery(org.apache.lucene.search.ConstantScoreQuery) QueryBitSetProducer(org.apache.lucene.search.join.QueryBitSetProducer) Sort(org.apache.lucene.search.Sort) BytesRef(org.apache.lucene.util.BytesRef) TermQuery(org.apache.lucene.search.TermQuery) ElasticsearchDirectoryReader(org.elasticsearch.common.lucene.index.ElasticsearchDirectoryReader) DirectoryReader(org.apache.lucene.index.DirectoryReader) BytesRefFieldComparatorSource(org.elasticsearch.index.fielddata.fieldcomparator.BytesRefFieldComparatorSource) Term(org.apache.lucene.index.Term) ToParentBlockJoinQuery(org.apache.lucene.search.join.ToParentBlockJoinQuery) StringField(org.apache.lucene.document.StringField)

Example 52 with Document

use of org.apache.lucene.document.Document in project elasticsearch by elastic.

the class AbstractStringFieldDataTestCase method testSortMissing.

public void testSortMissing(boolean first, boolean reverse) throws IOException {
    final String[] values = new String[randomIntBetween(2, 10)];
    for (int i = 1; i < values.length; ++i) {
        values[i] = TestUtil.randomUnicodeString(random());
    }
    final int numDocs = scaledRandomIntBetween(10, 3072);
    for (int i = 0; i < numDocs; ++i) {
        final String value = RandomPicks.randomFrom(random(), values);
        if (value == null) {
            writer.addDocument(new Document());
        } else {
            Document d = new Document();
            addField(d, "value", value);
            writer.addDocument(d);
        }
        if (randomInt(10) == 0) {
            writer.commit();
        }
    }
    final IndexFieldData indexFieldData = getForField("value");
    IndexSearcher searcher = new IndexSearcher(DirectoryReader.open(writer));
    XFieldComparatorSource comparator = indexFieldData.comparatorSource(first ? "_first" : "_last", MultiValueMode.MIN, null);
    TopFieldDocs topDocs = searcher.search(new MatchAllDocsQuery(), randomBoolean() ? numDocs : randomIntBetween(10, numDocs), new Sort(new SortField("value", comparator, reverse)));
    assertEquals(numDocs, topDocs.totalHits);
    BytesRef previousValue = first ? null : reverse ? UnicodeUtil.BIG_TERM : new BytesRef();
    for (int i = 0; i < topDocs.scoreDocs.length; ++i) {
        final String docValue = searcher.doc(topDocs.scoreDocs[i].doc).get("value");
        if (first && docValue == null) {
            assertNull(previousValue);
        } else if (!first && docValue != null) {
            assertNotNull(previousValue);
        }
        final BytesRef value = docValue == null ? null : new BytesRef(docValue);
        if (previousValue != null && value != null) {
            if (reverse) {
                assertTrue(previousValue.compareTo(value) >= 0);
            } else {
                assertTrue(previousValue.compareTo(value) <= 0);
            }
        }
        previousValue = value;
    }
    searcher.getIndexReader().close();
}

Also used : IndexSearcher(org.apache.lucene.search.IndexSearcher) GlobalOrdinalsIndexFieldData(org.elasticsearch.index.fielddata.ordinals.GlobalOrdinalsIndexFieldData) TopFieldDocs(org.apache.lucene.search.TopFieldDocs) Sort(org.apache.lucene.search.Sort) SortField(org.apache.lucene.search.SortField) XFieldComparatorSource(org.elasticsearch.index.fielddata.IndexFieldData.XFieldComparatorSource) Document(org.apache.lucene.document.Document) MatchAllDocsQuery(org.apache.lucene.search.MatchAllDocsQuery) BytesRef(org.apache.lucene.util.BytesRef)

Example 53 with Document

use of org.apache.lucene.document.Document in project elasticsearch by elastic.

the class AbstractStringFieldDataTestCase method add2SingleValuedDocumentsAndDeleteOneOfThem.

@Override
protected void add2SingleValuedDocumentsAndDeleteOneOfThem() throws Exception {
    Document d = new Document();
    addField(d, "_id", "1");
    addField(d, "value", "2");
    writer.addDocument(d);
    d = new Document();
    addField(d, "_id", "2");
    addField(d, "value", "4");
    writer.addDocument(d);
    writer.commit();
    writer.deleteDocuments(new Term("_id", "1"));
}

Also used : Term(org.apache.lucene.index.Term) Document(org.apache.lucene.document.Document)

Example 54 with Document

use of org.apache.lucene.document.Document in project elasticsearch by elastic.

the class FieldDataCacheTests method testLoadGlobal_neverCacheIfFieldIsMissing.

public void testLoadGlobal_neverCacheIfFieldIsMissing() throws Exception {
    Directory dir = newDirectory();
    IndexWriterConfig iwc = new IndexWriterConfig(null);
    iwc.setMergePolicy(NoMergePolicy.INSTANCE);
    IndexWriter iw = new IndexWriter(dir, iwc);
    long numDocs = scaledRandomIntBetween(32, 128);
    for (int i = 1; i <= numDocs; i++) {
        Document doc = new Document();
        doc.add(new SortedSetDocValuesField("field1", new BytesRef(String.valueOf(i))));
        doc.add(new StringField("field2", String.valueOf(i), Field.Store.NO));
        iw.addDocument(doc);
        if (i % 24 == 0) {
            iw.commit();
        }
    }
    iw.close();
    DirectoryReader ir = ElasticsearchDirectoryReader.wrap(DirectoryReader.open(dir), new ShardId("_index", "_na_", 0));
    DummyAccountingFieldDataCache fieldDataCache = new DummyAccountingFieldDataCache();
    // Testing SortedSetDVOrdinalsIndexFieldData:
    SortedSetDVOrdinalsIndexFieldData sortedSetDVOrdinalsIndexFieldData = createSortedDV("field1", fieldDataCache);
    sortedSetDVOrdinalsIndexFieldData.loadGlobal(ir);
    assertThat(fieldDataCache.cachedGlobally, equalTo(1));
    sortedSetDVOrdinalsIndexFieldData.loadGlobal(new FieldMaskingReader("field1", ir));
    assertThat(fieldDataCache.cachedGlobally, equalTo(1));
    // Testing PagedBytesIndexFieldData
    PagedBytesIndexFieldData pagedBytesIndexFieldData = createPagedBytes("field2", fieldDataCache);
    pagedBytesIndexFieldData.loadGlobal(ir);
    assertThat(fieldDataCache.cachedGlobally, equalTo(2));
    pagedBytesIndexFieldData.loadGlobal(new FieldMaskingReader("field2", ir));
    assertThat(fieldDataCache.cachedGlobally, equalTo(2));
    ir.close();
    dir.close();
}

Also used : PagedBytesIndexFieldData(org.elasticsearch.index.fielddata.plain.PagedBytesIndexFieldData) ElasticsearchDirectoryReader(org.elasticsearch.common.lucene.index.ElasticsearchDirectoryReader) DirectoryReader(org.apache.lucene.index.DirectoryReader) FieldMaskingReader(org.elasticsearch.test.FieldMaskingReader) Document(org.apache.lucene.document.Document) ShardId(org.elasticsearch.index.shard.ShardId) SortedSetDVOrdinalsIndexFieldData(org.elasticsearch.index.fielddata.plain.SortedSetDVOrdinalsIndexFieldData) IndexWriter(org.apache.lucene.index.IndexWriter) StringField(org.apache.lucene.document.StringField) SortedSetDocValuesField(org.apache.lucene.document.SortedSetDocValuesField) BytesRef(org.apache.lucene.util.BytesRef) Directory(org.apache.lucene.store.Directory) IndexWriterConfig(org.apache.lucene.index.IndexWriterConfig)

Example 55 with Document

use of org.apache.lucene.document.Document in project elasticsearch by elastic.

the class ParentChildFieldDataTests method setupData.

@Before
public void setupData() throws Exception {
    mapperService.merge(childType, new CompressedXContent(PutMappingRequest.buildFromSimplifiedDef(childType, "_parent", "type=" + parentType).string()), MapperService.MergeReason.MAPPING_UPDATE, false);
    mapperService.merge(grandChildType, new CompressedXContent(PutMappingRequest.buildFromSimplifiedDef(grandChildType, "_parent", "type=" + childType).string()), MapperService.MergeReason.MAPPING_UPDATE, false);
    Document d = new Document();
    d.add(new StringField(UidFieldMapper.NAME, Uid.createUid(parentType, "1"), Field.Store.NO));
    d.add(createJoinField(parentType, "1"));
    writer.addDocument(d);
    d = new Document();
    d.add(new StringField(UidFieldMapper.NAME, Uid.createUid(childType, "2"), Field.Store.NO));
    d.add(new StringField(ParentFieldMapper.NAME, Uid.createUid(parentType, "1"), Field.Store.NO));
    d.add(createJoinField(parentType, "1"));
    d.add(createJoinField(childType, "2"));
    writer.addDocument(d);
    writer.commit();
    d = new Document();
    d.add(new StringField(UidFieldMapper.NAME, Uid.createUid(childType, "3"), Field.Store.NO));
    d.add(new StringField(ParentFieldMapper.NAME, Uid.createUid(parentType, "1"), Field.Store.NO));
    d.add(createJoinField(parentType, "1"));
    d.add(createJoinField(childType, "3"));
    writer.addDocument(d);
    d = new Document();
    d.add(new StringField(UidFieldMapper.NAME, Uid.createUid(parentType, "2"), Field.Store.NO));
    d.add(createJoinField(parentType, "2"));
    writer.addDocument(d);
    d = new Document();
    d.add(new StringField(UidFieldMapper.NAME, Uid.createUid(childType, "4"), Field.Store.NO));
    d.add(new StringField(ParentFieldMapper.NAME, Uid.createUid(parentType, "2"), Field.Store.NO));
    d.add(createJoinField(parentType, "2"));
    d.add(createJoinField(childType, "4"));
    writer.addDocument(d);
    d = new Document();
    d.add(new StringField(UidFieldMapper.NAME, Uid.createUid(childType, "5"), Field.Store.NO));
    d.add(new StringField(ParentFieldMapper.NAME, Uid.createUid(parentType, "1"), Field.Store.NO));
    d.add(createJoinField(parentType, "1"));
    d.add(createJoinField(childType, "5"));
    writer.addDocument(d);
    writer.commit();
    d = new Document();
    d.add(new StringField(UidFieldMapper.NAME, Uid.createUid(grandChildType, "6"), Field.Store.NO));
    d.add(new StringField(ParentFieldMapper.NAME, Uid.createUid(childType, "2"), Field.Store.NO));
    d.add(createJoinField(childType, "2"));
    writer.addDocument(d);
    d = new Document();
    d.add(new StringField(UidFieldMapper.NAME, Uid.createUid("other-type", "1"), Field.Store.NO));
    writer.addDocument(d);
}

Also used : StringField(org.apache.lucene.document.StringField) CompressedXContent(org.elasticsearch.common.compress.CompressedXContent) Document(org.apache.lucene.document.Document) Before(org.junit.Before)

Aggregations

Document (org.apache.lucene.document.Document)2344 Directory (org.apache.lucene.store.Directory)1374 MockAnalyzer (org.apache.lucene.analysis.MockAnalyzer)798 RandomIndexWriter (org.apache.lucene.index.RandomIndexWriter)752 IndexReader (org.apache.lucene.index.IndexReader)598 Field (org.apache.lucene.document.Field)480 IndexSearcher (org.apache.lucene.search.IndexSearcher)470 Term (org.apache.lucene.index.Term)456 BytesRef (org.apache.lucene.util.BytesRef)415 StringField (org.apache.lucene.document.StringField)403 TextField (org.apache.lucene.document.TextField)389 NumericDocValuesField (org.apache.lucene.document.NumericDocValuesField)325 IndexWriter (org.apache.lucene.index.IndexWriter)312 IndexWriterConfig (org.apache.lucene.index.IndexWriterConfig)278 TopDocs (org.apache.lucene.search.TopDocs)270 TermQuery (org.apache.lucene.search.TermQuery)237 FieldType (org.apache.lucene.document.FieldType)231 DirectoryReader (org.apache.lucene.index.DirectoryReader)226 Test (org.junit.Test)222 RAMDirectory (org.apache.lucene.store.RAMDirectory)211