Search in sources :

Example 11 with StringField

use of org.apache.lucene.document.StringField in project che by eclipse.

the class LuceneSearcher method createDocument.

protected Document createDocument(VirtualFile virtualFile, Reader reader) throws ServerException {
    final Document doc = new Document();
    doc.add(new StringField(PATH_FIELD, virtualFile.getPath().toString(), Field.Store.YES));
    doc.add(new TextField(NAME_FIELD, virtualFile.getName(), Field.Store.YES));
    if (reader != null) {
        doc.add(new TextField(TEXT_FIELD, reader));
    }
    return doc;
}
Also used : StringField(org.apache.lucene.document.StringField) TextField(org.apache.lucene.document.TextField) Document(org.apache.lucene.document.Document)

Example 12 with StringField

use of org.apache.lucene.document.StringField in project elasticsearch by elastic.

the class FreqTermsEnumTests method setUp.

@Before
@Override
public void setUp() throws Exception {
    super.setUp();
    referenceAll = new HashMap<>();
    referenceNotDeleted = new HashMap<>();
    referenceFilter = new HashMap<>();
    Directory dir = newDirectory();
    // use keyword analyzer we rely on the stored field holding the exact term.
    IndexWriterConfig conf = newIndexWriterConfig(new KeywordAnalyzer());
    if (frequently()) {
        // we don't want to do any merges, so we won't expunge deletes
        conf.setMergePolicy(NoMergePolicy.INSTANCE);
    }
    iw = new IndexWriter(dir, conf);
    terms = new String[scaledRandomIntBetween(10, 300)];
    for (int i = 0; i < terms.length; i++) {
        terms[i] = randomAsciiOfLength(5);
    }
    int numberOfDocs = scaledRandomIntBetween(30, 300);
    Document[] docs = new Document[numberOfDocs];
    for (int i = 0; i < numberOfDocs; i++) {
        Document doc = new Document();
        doc.add(new StringField("id", Integer.toString(i), Field.Store.YES));
        docs[i] = doc;
        for (String term : terms) {
            if (randomBoolean()) {
                continue;
            }
            int freq = randomIntBetween(1, 3);
            for (int j = 0; j < freq; j++) {
                doc.add(new TextField("field", term, Field.Store.YES));
            }
        }
    }
    for (int i = 0; i < docs.length; i++) {
        Document doc = docs[i];
        iw.addDocument(doc);
        if (rarely()) {
            iw.commit();
        }
    }
    Set<String> deletedIds = new HashSet<>();
    for (int i = 0; i < docs.length; i++) {
        Document doc = docs[i];
        if (randomInt(5) == 2) {
            Term idTerm = new Term("id", doc.getField("id").stringValue());
            deletedIds.add(idTerm.text());
            iw.deleteDocuments(idTerm);
        }
    }
    for (String term : terms) {
        referenceAll.put(term, new FreqHolder());
        referenceFilter.put(term, new FreqHolder());
        referenceNotDeleted.put(term, new FreqHolder());
    }
    // now go over each doc, build the relevant references and filter
    reader = DirectoryReader.open(iw);
    List<BytesRef> filterTerms = new ArrayList<>();
    for (int docId = 0; docId < reader.maxDoc(); docId++) {
        Document doc = reader.document(docId);
        addFreqs(doc, referenceAll);
        if (!deletedIds.contains(doc.getField("id").stringValue())) {
            addFreqs(doc, referenceNotDeleted);
            if (randomBoolean()) {
                filterTerms.add(new BytesRef(doc.getField("id").stringValue()));
                addFreqs(doc, referenceFilter);
            }
        }
    }
    filter = new TermInSetQuery("id", filterTerms);
}
Also used : KeywordAnalyzer(org.apache.lucene.analysis.core.KeywordAnalyzer) ArrayList(java.util.ArrayList) Term(org.apache.lucene.index.Term) Document(org.apache.lucene.document.Document) IndexWriter(org.apache.lucene.index.IndexWriter) TermInSetQuery(org.apache.lucene.search.TermInSetQuery) StringField(org.apache.lucene.document.StringField) TextField(org.apache.lucene.document.TextField) BytesRef(org.apache.lucene.util.BytesRef) Directory(org.apache.lucene.store.Directory) IndexWriterConfig(org.apache.lucene.index.IndexWriterConfig) HashSet(java.util.HashSet) Before(org.junit.Before)

Example 13 with StringField

use of org.apache.lucene.document.StringField in project elasticsearch by elastic.

the class AbstractStringFieldDataTestCase method testNestedSorting.

public void testNestedSorting(MultiValueMode sortMode) throws IOException {
    final String[] values = new String[randomIntBetween(2, 20)];
    for (int i = 0; i < values.length; ++i) {
        values[i] = TestUtil.randomSimpleString(random());
    }
    final int numParents = scaledRandomIntBetween(10, 3072);
    List<Document> docs = new ArrayList<>();
    FixedBitSet parents = new FixedBitSet(64);
    for (int i = 0; i < numParents; ++i) {
        docs.clear();
        final int numChildren = randomInt(4);
        for (int j = 0; j < numChildren; ++j) {
            final Document child = new Document();
            final int numValues = randomInt(3);
            for (int k = 0; k < numValues; ++k) {
                final String value = RandomPicks.randomFrom(random(), values);
                addField(child, "text", value);
            }
            docs.add(child);
        }
        final Document parent = new Document();
        parent.add(new StringField("type", "parent", Store.YES));
        final String value = RandomPicks.randomFrom(random(), values);
        if (value != null) {
            addField(parent, "text", value);
        }
        docs.add(parent);
        int bit = parents.prevSetBit(parents.length() - 1) + docs.size();
        parents = FixedBitSet.ensureCapacity(parents, bit);
        parents.set(bit);
        writer.addDocuments(docs);
        if (randomInt(10) == 0) {
            writer.commit();
        }
    }
    DirectoryReader directoryReader = DirectoryReader.open(writer);
    directoryReader = ElasticsearchDirectoryReader.wrap(directoryReader, new ShardId(indexService.index(), 0));
    IndexSearcher searcher = new IndexSearcher(directoryReader);
    IndexFieldData<?> fieldData = getForField("text");
    final Object missingValue;
    switch(randomInt(4)) {
        case 0:
            missingValue = "_first";
            break;
        case 1:
            missingValue = "_last";
            break;
        case 2:
            missingValue = new BytesRef(RandomPicks.randomFrom(random(), values));
            break;
        default:
            missingValue = new BytesRef(TestUtil.randomSimpleString(random()));
            break;
    }
    Query parentFilter = new TermQuery(new Term("type", "parent"));
    Query childFilter = Queries.not(parentFilter);
    Nested nested = createNested(searcher, parentFilter, childFilter);
    BytesRefFieldComparatorSource nestedComparatorSource = new BytesRefFieldComparatorSource(fieldData, missingValue, sortMode, nested);
    ToParentBlockJoinQuery query = new ToParentBlockJoinQuery(new ConstantScoreQuery(childFilter), new QueryBitSetProducer(parentFilter), ScoreMode.None);
    Sort sort = new Sort(new SortField("text", nestedComparatorSource));
    TopFieldDocs topDocs = searcher.search(query, randomIntBetween(1, numParents), sort);
    assertTrue(topDocs.scoreDocs.length > 0);
    BytesRef previous = null;
    for (int i = 0; i < topDocs.scoreDocs.length; ++i) {
        final int docID = topDocs.scoreDocs[i].doc;
        assertTrue("expected " + docID + " to be a parent", parents.get(docID));
        BytesRef cmpValue = null;
        for (int child = parents.prevSetBit(docID - 1) + 1; child < docID; ++child) {
            String[] sVals = searcher.doc(child).getValues("text");
            final BytesRef[] vals;
            if (sVals.length == 0) {
                vals = new BytesRef[0];
            } else {
                vals = new BytesRef[sVals.length];
                for (int j = 0; j < vals.length; ++j) {
                    vals[j] = new BytesRef(sVals[j]);
                }
            }
            for (BytesRef value : vals) {
                if (cmpValue == null) {
                    cmpValue = value;
                } else if (sortMode == MultiValueMode.MIN && value.compareTo(cmpValue) < 0) {
                    cmpValue = value;
                } else if (sortMode == MultiValueMode.MAX && value.compareTo(cmpValue) > 0) {
                    cmpValue = value;
                }
            }
        }
        if (cmpValue == null) {
            if ("_first".equals(missingValue)) {
                cmpValue = new BytesRef();
            } else if ("_last".equals(missingValue) == false) {
                cmpValue = (BytesRef) missingValue;
            }
        }
        if (previous != null && cmpValue != null) {
            assertTrue(previous.utf8ToString() + "   /   " + cmpValue.utf8ToString(), previous.compareTo(cmpValue) <= 0);
        }
        previous = cmpValue;
    }
    searcher.getIndexReader().close();
}
Also used : IndexSearcher(org.apache.lucene.search.IndexSearcher) Query(org.apache.lucene.search.Query) ConstantScoreQuery(org.apache.lucene.search.ConstantScoreQuery) MatchAllDocsQuery(org.apache.lucene.search.MatchAllDocsQuery) TermQuery(org.apache.lucene.search.TermQuery) ToParentBlockJoinQuery(org.apache.lucene.search.join.ToParentBlockJoinQuery) ArrayList(java.util.ArrayList) Nested(org.elasticsearch.index.fielddata.IndexFieldData.XFieldComparatorSource.Nested) TopFieldDocs(org.apache.lucene.search.TopFieldDocs) SortField(org.apache.lucene.search.SortField) Document(org.apache.lucene.document.Document) ShardId(org.elasticsearch.index.shard.ShardId) FixedBitSet(org.apache.lucene.util.FixedBitSet) ConstantScoreQuery(org.apache.lucene.search.ConstantScoreQuery) QueryBitSetProducer(org.apache.lucene.search.join.QueryBitSetProducer) Sort(org.apache.lucene.search.Sort) BytesRef(org.apache.lucene.util.BytesRef) TermQuery(org.apache.lucene.search.TermQuery) ElasticsearchDirectoryReader(org.elasticsearch.common.lucene.index.ElasticsearchDirectoryReader) DirectoryReader(org.apache.lucene.index.DirectoryReader) BytesRefFieldComparatorSource(org.elasticsearch.index.fielddata.fieldcomparator.BytesRefFieldComparatorSource) Term(org.apache.lucene.index.Term) ToParentBlockJoinQuery(org.apache.lucene.search.join.ToParentBlockJoinQuery) StringField(org.apache.lucene.document.StringField)

Example 14 with StringField

use of org.apache.lucene.document.StringField in project elasticsearch by elastic.

the class FieldDataCacheTests method testLoadGlobal_neverCacheIfFieldIsMissing.

public void testLoadGlobal_neverCacheIfFieldIsMissing() throws Exception {
    Directory dir = newDirectory();
    IndexWriterConfig iwc = new IndexWriterConfig(null);
    iwc.setMergePolicy(NoMergePolicy.INSTANCE);
    IndexWriter iw = new IndexWriter(dir, iwc);
    long numDocs = scaledRandomIntBetween(32, 128);
    for (int i = 1; i <= numDocs; i++) {
        Document doc = new Document();
        doc.add(new SortedSetDocValuesField("field1", new BytesRef(String.valueOf(i))));
        doc.add(new StringField("field2", String.valueOf(i), Field.Store.NO));
        iw.addDocument(doc);
        if (i % 24 == 0) {
            iw.commit();
        }
    }
    iw.close();
    DirectoryReader ir = ElasticsearchDirectoryReader.wrap(DirectoryReader.open(dir), new ShardId("_index", "_na_", 0));
    DummyAccountingFieldDataCache fieldDataCache = new DummyAccountingFieldDataCache();
    // Testing SortedSetDVOrdinalsIndexFieldData:
    SortedSetDVOrdinalsIndexFieldData sortedSetDVOrdinalsIndexFieldData = createSortedDV("field1", fieldDataCache);
    sortedSetDVOrdinalsIndexFieldData.loadGlobal(ir);
    assertThat(fieldDataCache.cachedGlobally, equalTo(1));
    sortedSetDVOrdinalsIndexFieldData.loadGlobal(new FieldMaskingReader("field1", ir));
    assertThat(fieldDataCache.cachedGlobally, equalTo(1));
    // Testing PagedBytesIndexFieldData
    PagedBytesIndexFieldData pagedBytesIndexFieldData = createPagedBytes("field2", fieldDataCache);
    pagedBytesIndexFieldData.loadGlobal(ir);
    assertThat(fieldDataCache.cachedGlobally, equalTo(2));
    pagedBytesIndexFieldData.loadGlobal(new FieldMaskingReader("field2", ir));
    assertThat(fieldDataCache.cachedGlobally, equalTo(2));
    ir.close();
    dir.close();
}
Also used : PagedBytesIndexFieldData(org.elasticsearch.index.fielddata.plain.PagedBytesIndexFieldData) ElasticsearchDirectoryReader(org.elasticsearch.common.lucene.index.ElasticsearchDirectoryReader) DirectoryReader(org.apache.lucene.index.DirectoryReader) FieldMaskingReader(org.elasticsearch.test.FieldMaskingReader) Document(org.apache.lucene.document.Document) ShardId(org.elasticsearch.index.shard.ShardId) SortedSetDVOrdinalsIndexFieldData(org.elasticsearch.index.fielddata.plain.SortedSetDVOrdinalsIndexFieldData) IndexWriter(org.apache.lucene.index.IndexWriter) StringField(org.apache.lucene.document.StringField) SortedSetDocValuesField(org.apache.lucene.document.SortedSetDocValuesField) BytesRef(org.apache.lucene.util.BytesRef) Directory(org.apache.lucene.store.Directory) IndexWriterConfig(org.apache.lucene.index.IndexWriterConfig)

Example 15 with StringField

use of org.apache.lucene.document.StringField in project elasticsearch by elastic.

the class ParentChildFieldDataTests method setupData.

@Before
public void setupData() throws Exception {
    mapperService.merge(childType, new CompressedXContent(PutMappingRequest.buildFromSimplifiedDef(childType, "_parent", "type=" + parentType).string()), MapperService.MergeReason.MAPPING_UPDATE, false);
    mapperService.merge(grandChildType, new CompressedXContent(PutMappingRequest.buildFromSimplifiedDef(grandChildType, "_parent", "type=" + childType).string()), MapperService.MergeReason.MAPPING_UPDATE, false);
    Document d = new Document();
    d.add(new StringField(UidFieldMapper.NAME, Uid.createUid(parentType, "1"), Field.Store.NO));
    d.add(createJoinField(parentType, "1"));
    writer.addDocument(d);
    d = new Document();
    d.add(new StringField(UidFieldMapper.NAME, Uid.createUid(childType, "2"), Field.Store.NO));
    d.add(new StringField(ParentFieldMapper.NAME, Uid.createUid(parentType, "1"), Field.Store.NO));
    d.add(createJoinField(parentType, "1"));
    d.add(createJoinField(childType, "2"));
    writer.addDocument(d);
    writer.commit();
    d = new Document();
    d.add(new StringField(UidFieldMapper.NAME, Uid.createUid(childType, "3"), Field.Store.NO));
    d.add(new StringField(ParentFieldMapper.NAME, Uid.createUid(parentType, "1"), Field.Store.NO));
    d.add(createJoinField(parentType, "1"));
    d.add(createJoinField(childType, "3"));
    writer.addDocument(d);
    d = new Document();
    d.add(new StringField(UidFieldMapper.NAME, Uid.createUid(parentType, "2"), Field.Store.NO));
    d.add(createJoinField(parentType, "2"));
    writer.addDocument(d);
    d = new Document();
    d.add(new StringField(UidFieldMapper.NAME, Uid.createUid(childType, "4"), Field.Store.NO));
    d.add(new StringField(ParentFieldMapper.NAME, Uid.createUid(parentType, "2"), Field.Store.NO));
    d.add(createJoinField(parentType, "2"));
    d.add(createJoinField(childType, "4"));
    writer.addDocument(d);
    d = new Document();
    d.add(new StringField(UidFieldMapper.NAME, Uid.createUid(childType, "5"), Field.Store.NO));
    d.add(new StringField(ParentFieldMapper.NAME, Uid.createUid(parentType, "1"), Field.Store.NO));
    d.add(createJoinField(parentType, "1"));
    d.add(createJoinField(childType, "5"));
    writer.addDocument(d);
    writer.commit();
    d = new Document();
    d.add(new StringField(UidFieldMapper.NAME, Uid.createUid(grandChildType, "6"), Field.Store.NO));
    d.add(new StringField(ParentFieldMapper.NAME, Uid.createUid(childType, "2"), Field.Store.NO));
    d.add(createJoinField(childType, "2"));
    writer.addDocument(d);
    d = new Document();
    d.add(new StringField(UidFieldMapper.NAME, Uid.createUid("other-type", "1"), Field.Store.NO));
    writer.addDocument(d);
}
Also used : StringField(org.apache.lucene.document.StringField) CompressedXContent(org.elasticsearch.common.compress.CompressedXContent) Document(org.apache.lucene.document.Document) Before(org.junit.Before)

Aggregations

StringField (org.apache.lucene.document.StringField)323 Document (org.apache.lucene.document.Document)302 Directory (org.apache.lucene.store.Directory)227 MockAnalyzer (org.apache.lucene.analysis.MockAnalyzer)129 NumericDocValuesField (org.apache.lucene.document.NumericDocValuesField)94 Term (org.apache.lucene.index.Term)90 RandomIndexWriter (org.apache.lucene.index.RandomIndexWriter)82 BytesRef (org.apache.lucene.util.BytesRef)73 IndexSearcher (org.apache.lucene.search.IndexSearcher)57 DirectoryReader (org.apache.lucene.index.DirectoryReader)56 BinaryDocValuesField (org.apache.lucene.document.BinaryDocValuesField)55 ArrayList (java.util.ArrayList)54 TextField (org.apache.lucene.document.TextField)54 IndexReader (org.apache.lucene.index.IndexReader)51 Field (org.apache.lucene.document.Field)50 TermQuery (org.apache.lucene.search.TermQuery)50 IndexWriter (org.apache.lucene.index.IndexWriter)45 SortedNumericDocValuesField (org.apache.lucene.document.SortedNumericDocValuesField)43 NRTCachingDirectory (org.apache.lucene.store.NRTCachingDirectory)43 SortedDocValuesField (org.apache.lucene.document.SortedDocValuesField)40