Examples with BytesRef - org.apache.lucene.util.BytesRef

Example 61 with BytesRef

use of org.apache.lucene.util.BytesRef in project elasticsearch by elastic.

the class XContentBuilderTests method testReuseJsonGenerator.

public void testReuseJsonGenerator() throws Exception {
    ByteArrayOutputStream os = new ByteArrayOutputStream();
    XContentGenerator generator = XContentFactory.xContent(XContentType.JSON).createGenerator(os);
    generator.writeStartObject();
    generator.writeStringField("test", "value");
    generator.writeEndObject();
    generator.flush();
    assertThat(new BytesRef(os.toByteArray()), equalTo(new BytesRef("{\"test\":\"value\"}")));
    // try again...
    os.reset();
    generator.writeStartObject();
    generator.writeStringField("test", "value");
    generator.writeEndObject();
    generator.flush();
    // we get a space at the start here since it thinks we are not in the root object (fine, we will ignore it in the real code we use)
    assertThat(new BytesRef(os.toByteArray()), equalTo(new BytesRef(" {\"test\":\"value\"}")));
}

Also used : ByteArrayOutputStream(java.io.ByteArrayOutputStream) XContentGenerator(org.elasticsearch.common.xcontent.XContentGenerator) BytesRef(org.apache.lucene.util.BytesRef)

Example 62 with BytesRef

use of org.apache.lucene.util.BytesRef in project elasticsearch by elastic.

the class CustomNormalizerTests method testCharFilters.

public void testCharFilters() throws IOException {
    Settings settings = Settings.builder().put("index.analysis.char_filter.my_mapping.type", "mapping").putArray("index.analysis.char_filter.my_mapping.mappings", "a => z").putArray("index.analysis.normalizer.my_normalizer.char_filter", "my_mapping").put(Environment.PATH_HOME_SETTING.getKey(), createTempDir().toString()).build();
    ESTestCase.TestAnalysis analysis = AnalysisTestsHelper.createTestAnalysisFromSettings(settings);
    assertNull(analysis.indexAnalyzers.get("my_normalizer"));
    NamedAnalyzer normalizer = analysis.indexAnalyzers.getNormalizer("my_normalizer");
    assertNotNull(normalizer);
    assertEquals("my_normalizer", normalizer.name());
    assertTokenStreamContents(normalizer.tokenStream("foo", "abc"), new String[] { "zbc" });
    assertEquals(new BytesRef("zbc"), normalizer.normalize("foo", "abc"));
}

Also used : ESTestCase(org.elasticsearch.test.ESTestCase) Settings(org.elasticsearch.common.settings.Settings) BytesRef(org.apache.lucene.util.BytesRef)

Example 63 with BytesRef

use of org.apache.lucene.util.BytesRef in project elasticsearch by elastic.

the class AbstractStringFieldDataTestCase method testNestedSorting.

public void testNestedSorting(MultiValueMode sortMode) throws IOException {
    final String[] values = new String[randomIntBetween(2, 20)];
    for (int i = 0; i < values.length; ++i) {
        values[i] = TestUtil.randomSimpleString(random());
    }
    final int numParents = scaledRandomIntBetween(10, 3072);
    List<Document> docs = new ArrayList<>();
    FixedBitSet parents = new FixedBitSet(64);
    for (int i = 0; i < numParents; ++i) {
        docs.clear();
        final int numChildren = randomInt(4);
        for (int j = 0; j < numChildren; ++j) {
            final Document child = new Document();
            final int numValues = randomInt(3);
            for (int k = 0; k < numValues; ++k) {
                final String value = RandomPicks.randomFrom(random(), values);
                addField(child, "text", value);
            }
            docs.add(child);
        }
        final Document parent = new Document();
        parent.add(new StringField("type", "parent", Store.YES));
        final String value = RandomPicks.randomFrom(random(), values);
        if (value != null) {
            addField(parent, "text", value);
        }
        docs.add(parent);
        int bit = parents.prevSetBit(parents.length() - 1) + docs.size();
        parents = FixedBitSet.ensureCapacity(parents, bit);
        parents.set(bit);
        writer.addDocuments(docs);
        if (randomInt(10) == 0) {
            writer.commit();
        }
    }
    DirectoryReader directoryReader = DirectoryReader.open(writer);
    directoryReader = ElasticsearchDirectoryReader.wrap(directoryReader, new ShardId(indexService.index(), 0));
    IndexSearcher searcher = new IndexSearcher(directoryReader);
    IndexFieldData<?> fieldData = getForField("text");
    final Object missingValue;
    switch(randomInt(4)) {
        case 0:
            missingValue = "_first";
            break;
        case 1:
            missingValue = "_last";
            break;
        case 2:
            missingValue = new BytesRef(RandomPicks.randomFrom(random(), values));
            break;
        default:
            missingValue = new BytesRef(TestUtil.randomSimpleString(random()));
            break;
    }
    Query parentFilter = new TermQuery(new Term("type", "parent"));
    Query childFilter = Queries.not(parentFilter);
    Nested nested = createNested(searcher, parentFilter, childFilter);
    BytesRefFieldComparatorSource nestedComparatorSource = new BytesRefFieldComparatorSource(fieldData, missingValue, sortMode, nested);
    ToParentBlockJoinQuery query = new ToParentBlockJoinQuery(new ConstantScoreQuery(childFilter), new QueryBitSetProducer(parentFilter), ScoreMode.None);
    Sort sort = new Sort(new SortField("text", nestedComparatorSource));
    TopFieldDocs topDocs = searcher.search(query, randomIntBetween(1, numParents), sort);
    assertTrue(topDocs.scoreDocs.length > 0);
    BytesRef previous = null;
    for (int i = 0; i < topDocs.scoreDocs.length; ++i) {
        final int docID = topDocs.scoreDocs[i].doc;
        assertTrue("expected " + docID + " to be a parent", parents.get(docID));
        BytesRef cmpValue = null;
        for (int child = parents.prevSetBit(docID - 1) + 1; child < docID; ++child) {
            String[] sVals = searcher.doc(child).getValues("text");
            final BytesRef[] vals;
            if (sVals.length == 0) {
                vals = new BytesRef[0];
            } else {
                vals = new BytesRef[sVals.length];
                for (int j = 0; j < vals.length; ++j) {
                    vals[j] = new BytesRef(sVals[j]);
                }
            }
            for (BytesRef value : vals) {
                if (cmpValue == null) {
                    cmpValue = value;
                } else if (sortMode == MultiValueMode.MIN && value.compareTo(cmpValue) < 0) {
                    cmpValue = value;
                } else if (sortMode == MultiValueMode.MAX && value.compareTo(cmpValue) > 0) {
                    cmpValue = value;
                }
            }
        }
        if (cmpValue == null) {
            if ("_first".equals(missingValue)) {
                cmpValue = new BytesRef();
            } else if ("_last".equals(missingValue) == false) {
                cmpValue = (BytesRef) missingValue;
            }
        }
        if (previous != null && cmpValue != null) {
            assertTrue(previous.utf8ToString() + "   /   " + cmpValue.utf8ToString(), previous.compareTo(cmpValue) <= 0);
        }
        previous = cmpValue;
    }
    searcher.getIndexReader().close();
}

Also used : IndexSearcher(org.apache.lucene.search.IndexSearcher) Query(org.apache.lucene.search.Query) ConstantScoreQuery(org.apache.lucene.search.ConstantScoreQuery) MatchAllDocsQuery(org.apache.lucene.search.MatchAllDocsQuery) TermQuery(org.apache.lucene.search.TermQuery) ToParentBlockJoinQuery(org.apache.lucene.search.join.ToParentBlockJoinQuery) ArrayList(java.util.ArrayList) Nested(org.elasticsearch.index.fielddata.IndexFieldData.XFieldComparatorSource.Nested) TopFieldDocs(org.apache.lucene.search.TopFieldDocs) SortField(org.apache.lucene.search.SortField) Document(org.apache.lucene.document.Document) ShardId(org.elasticsearch.index.shard.ShardId) FixedBitSet(org.apache.lucene.util.FixedBitSet) ConstantScoreQuery(org.apache.lucene.search.ConstantScoreQuery) QueryBitSetProducer(org.apache.lucene.search.join.QueryBitSetProducer) Sort(org.apache.lucene.search.Sort) BytesRef(org.apache.lucene.util.BytesRef) TermQuery(org.apache.lucene.search.TermQuery) ElasticsearchDirectoryReader(org.elasticsearch.common.lucene.index.ElasticsearchDirectoryReader) DirectoryReader(org.apache.lucene.index.DirectoryReader) BytesRefFieldComparatorSource(org.elasticsearch.index.fielddata.fieldcomparator.BytesRefFieldComparatorSource) Term(org.apache.lucene.index.Term) ToParentBlockJoinQuery(org.apache.lucene.search.join.ToParentBlockJoinQuery) StringField(org.apache.lucene.document.StringField)

Example 64 with BytesRef

use of org.apache.lucene.util.BytesRef in project elasticsearch by elastic.

the class AbstractStringFieldDataTestCase method testSortMissing.

public void testSortMissing(boolean first, boolean reverse) throws IOException {
    final String[] values = new String[randomIntBetween(2, 10)];
    for (int i = 1; i < values.length; ++i) {
        values[i] = TestUtil.randomUnicodeString(random());
    }
    final int numDocs = scaledRandomIntBetween(10, 3072);
    for (int i = 0; i < numDocs; ++i) {
        final String value = RandomPicks.randomFrom(random(), values);
        if (value == null) {
            writer.addDocument(new Document());
        } else {
            Document d = new Document();
            addField(d, "value", value);
            writer.addDocument(d);
        }
        if (randomInt(10) == 0) {
            writer.commit();
        }
    }
    final IndexFieldData indexFieldData = getForField("value");
    IndexSearcher searcher = new IndexSearcher(DirectoryReader.open(writer));
    XFieldComparatorSource comparator = indexFieldData.comparatorSource(first ? "_first" : "_last", MultiValueMode.MIN, null);
    TopFieldDocs topDocs = searcher.search(new MatchAllDocsQuery(), randomBoolean() ? numDocs : randomIntBetween(10, numDocs), new Sort(new SortField("value", comparator, reverse)));
    assertEquals(numDocs, topDocs.totalHits);
    BytesRef previousValue = first ? null : reverse ? UnicodeUtil.BIG_TERM : new BytesRef();
    for (int i = 0; i < topDocs.scoreDocs.length; ++i) {
        final String docValue = searcher.doc(topDocs.scoreDocs[i].doc).get("value");
        if (first && docValue == null) {
            assertNull(previousValue);
        } else if (!first && docValue != null) {
            assertNotNull(previousValue);
        }
        final BytesRef value = docValue == null ? null : new BytesRef(docValue);
        if (previousValue != null && value != null) {
            if (reverse) {
                assertTrue(previousValue.compareTo(value) >= 0);
            } else {
                assertTrue(previousValue.compareTo(value) <= 0);
            }
        }
        previousValue = value;
    }
    searcher.getIndexReader().close();
}

Also used : IndexSearcher(org.apache.lucene.search.IndexSearcher) GlobalOrdinalsIndexFieldData(org.elasticsearch.index.fielddata.ordinals.GlobalOrdinalsIndexFieldData) TopFieldDocs(org.apache.lucene.search.TopFieldDocs) Sort(org.apache.lucene.search.Sort) SortField(org.apache.lucene.search.SortField) XFieldComparatorSource(org.elasticsearch.index.fielddata.IndexFieldData.XFieldComparatorSource) Document(org.apache.lucene.document.Document) MatchAllDocsQuery(org.apache.lucene.search.MatchAllDocsQuery) BytesRef(org.apache.lucene.util.BytesRef)

Example 65 with BytesRef

use of org.apache.lucene.util.BytesRef in project elasticsearch by elastic.

the class FieldDataCacheTests method testLoadGlobal_neverCacheIfFieldIsMissing.

public void testLoadGlobal_neverCacheIfFieldIsMissing() throws Exception {
    Directory dir = newDirectory();
    IndexWriterConfig iwc = new IndexWriterConfig(null);
    iwc.setMergePolicy(NoMergePolicy.INSTANCE);
    IndexWriter iw = new IndexWriter(dir, iwc);
    long numDocs = scaledRandomIntBetween(32, 128);
    for (int i = 1; i <= numDocs; i++) {
        Document doc = new Document();
        doc.add(new SortedSetDocValuesField("field1", new BytesRef(String.valueOf(i))));
        doc.add(new StringField("field2", String.valueOf(i), Field.Store.NO));
        iw.addDocument(doc);
        if (i % 24 == 0) {
            iw.commit();
        }
    }
    iw.close();
    DirectoryReader ir = ElasticsearchDirectoryReader.wrap(DirectoryReader.open(dir), new ShardId("_index", "_na_", 0));
    DummyAccountingFieldDataCache fieldDataCache = new DummyAccountingFieldDataCache();
    // Testing SortedSetDVOrdinalsIndexFieldData:
    SortedSetDVOrdinalsIndexFieldData sortedSetDVOrdinalsIndexFieldData = createSortedDV("field1", fieldDataCache);
    sortedSetDVOrdinalsIndexFieldData.loadGlobal(ir);
    assertThat(fieldDataCache.cachedGlobally, equalTo(1));
    sortedSetDVOrdinalsIndexFieldData.loadGlobal(new FieldMaskingReader("field1", ir));
    assertThat(fieldDataCache.cachedGlobally, equalTo(1));
    // Testing PagedBytesIndexFieldData
    PagedBytesIndexFieldData pagedBytesIndexFieldData = createPagedBytes("field2", fieldDataCache);
    pagedBytesIndexFieldData.loadGlobal(ir);
    assertThat(fieldDataCache.cachedGlobally, equalTo(2));
    pagedBytesIndexFieldData.loadGlobal(new FieldMaskingReader("field2", ir));
    assertThat(fieldDataCache.cachedGlobally, equalTo(2));
    ir.close();
    dir.close();
}

Also used : PagedBytesIndexFieldData(org.elasticsearch.index.fielddata.plain.PagedBytesIndexFieldData) ElasticsearchDirectoryReader(org.elasticsearch.common.lucene.index.ElasticsearchDirectoryReader) DirectoryReader(org.apache.lucene.index.DirectoryReader) FieldMaskingReader(org.elasticsearch.test.FieldMaskingReader) Document(org.apache.lucene.document.Document) ShardId(org.elasticsearch.index.shard.ShardId) SortedSetDVOrdinalsIndexFieldData(org.elasticsearch.index.fielddata.plain.SortedSetDVOrdinalsIndexFieldData) IndexWriter(org.apache.lucene.index.IndexWriter) StringField(org.apache.lucene.document.StringField) SortedSetDocValuesField(org.apache.lucene.document.SortedSetDocValuesField) BytesRef(org.apache.lucene.util.BytesRef) Directory(org.apache.lucene.store.Directory) IndexWriterConfig(org.apache.lucene.index.IndexWriterConfig)

Aggregations

BytesRef (org.apache.lucene.util.BytesRef)1449 Document (org.apache.lucene.document.Document)410 Directory (org.apache.lucene.store.Directory)370 MockAnalyzer (org.apache.lucene.analysis.MockAnalyzer)266 ArrayList (java.util.ArrayList)186 Test (org.junit.Test)182 SortedDocValuesField (org.apache.lucene.document.SortedDocValuesField)164 RandomIndexWriter (org.apache.lucene.index.RandomIndexWriter)152 Term (org.apache.lucene.index.Term)124 Analyzer (org.apache.lucene.analysis.Analyzer)121 IndexReader (org.apache.lucene.index.IndexReader)121 TermsEnum (org.apache.lucene.index.TermsEnum)116 SortedSetDocValuesField (org.apache.lucene.document.SortedSetDocValuesField)110 NumericDocValuesField (org.apache.lucene.document.NumericDocValuesField)105 IOException (java.io.IOException)104 Field (org.apache.lucene.document.Field)101 StringField (org.apache.lucene.document.StringField)101 CrateUnitTest (io.crate.test.integration.CrateUnitTest)95 TextField (org.apache.lucene.document.TextField)95 BytesRefBuilder (org.apache.lucene.util.BytesRefBuilder)87