Search in sources :

Example 96 with BytesRef

use of org.apache.lucene.util.BytesRef in project elasticsearch by elastic.

the class MultiValueModeTests method verify.

private void verify(SortedBinaryDocValues values, int maxDoc) {
    for (BytesRef missingValue : new BytesRef[] { new BytesRef(), new BytesRef(RandomStrings.randomAsciiOfLength(random(), 8)) }) {
        for (MultiValueMode mode : new MultiValueMode[] { MultiValueMode.MIN, MultiValueMode.MAX }) {
            final BinaryDocValues selected = mode.select(values, missingValue);
            for (int i = 0; i < maxDoc; ++i) {
                final BytesRef actual = selected.get(i);
                BytesRef expected = null;
                values.setDocument(i);
                int numValues = values.count();
                if (numValues == 0) {
                    expected = missingValue;
                } else {
                    for (int j = 0; j < numValues; ++j) {
                        if (expected == null) {
                            expected = BytesRef.deepCopyOf(values.valueAt(j));
                        } else {
                            if (mode == MultiValueMode.MIN) {
                                expected = expected.compareTo(values.valueAt(j)) <= 0 ? expected : BytesRef.deepCopyOf(values.valueAt(j));
                            } else if (mode == MultiValueMode.MAX) {
                                expected = expected.compareTo(values.valueAt(j)) > 0 ? expected : BytesRef.deepCopyOf(values.valueAt(j));
                            }
                        }
                    }
                    if (expected == null) {
                        expected = missingValue;
                    }
                }
                assertEquals(mode.toString() + " docId=" + i, expected, actual);
            }
        }
    }
}
Also used : BytesRef(org.apache.lucene.util.BytesRef) SortedBinaryDocValues(org.elasticsearch.index.fielddata.SortedBinaryDocValues) BinaryDocValues(org.apache.lucene.index.BinaryDocValues)

Example 97 with BytesRef

use of org.apache.lucene.util.BytesRef in project elasticsearch by elastic.

the class DocValueFormatTests method testRawParse.

public void testRawParse() {
    assertEquals(-1L, DocValueFormat.RAW.parseLong("-1", randomBoolean(), null));
    assertEquals(1L, DocValueFormat.RAW.parseLong("1", randomBoolean(), null));
    // not checking exception messages as they could depend on the JVM
    expectThrows(IllegalArgumentException.class, () -> DocValueFormat.RAW.parseLong("", randomBoolean(), null));
    expectThrows(IllegalArgumentException.class, () -> DocValueFormat.RAW.parseLong("abc", randomBoolean(), null));
    assertEquals(-1d, DocValueFormat.RAW.parseDouble("-1", randomBoolean(), null), 0d);
    assertEquals(1d, DocValueFormat.RAW.parseDouble("1", randomBoolean(), null), 0d);
    assertEquals(.5, DocValueFormat.RAW.parseDouble("0.5", randomBoolean(), null), 0d);
    // not checking exception messages as they could depend on the JVM
    expectThrows(IllegalArgumentException.class, () -> DocValueFormat.RAW.parseLong("", randomBoolean(), null));
    expectThrows(IllegalArgumentException.class, () -> DocValueFormat.RAW.parseLong("abc", randomBoolean(), null));
    assertEquals(new BytesRef("abc"), DocValueFormat.RAW.parseBytesRef("abc"));
}
Also used : BytesRef(org.apache.lucene.util.BytesRef)

Example 98 with BytesRef

use of org.apache.lucene.util.BytesRef in project elasticsearch by elastic.

the class MultiValueModeTests method testSingleValuedStrings.

public void testSingleValuedStrings() throws Exception {
    final int numDocs = scaledRandomIntBetween(1, 100);
    final BytesRef[] array = new BytesRef[numDocs];
    final FixedBitSet docsWithValue = randomBoolean() ? null : new FixedBitSet(numDocs);
    for (int i = 0; i < array.length; ++i) {
        if (randomBoolean()) {
            array[i] = new BytesRef(RandomStrings.randomAsciiOfLength(random(), 8));
            if (docsWithValue != null) {
                docsWithValue.set(i);
            }
        } else {
            array[i] = new BytesRef();
            if (docsWithValue != null && randomBoolean()) {
                docsWithValue.set(i);
            }
        }
    }
    final BinaryDocValues singleValues = new BinaryDocValues() {

        @Override
        public BytesRef get(int docID) {
            return BytesRef.deepCopyOf(array[docID]);
        }
    };
    final SortedBinaryDocValues multiValues = FieldData.singleton(singleValues, docsWithValue);
    verify(multiValues, numDocs);
    final FixedBitSet rootDocs = randomRootDocs(numDocs);
    final FixedBitSet innerDocs = randomInnerDocs(rootDocs);
    verify(multiValues, numDocs, rootDocs, innerDocs);
}
Also used : FixedBitSet(org.apache.lucene.util.FixedBitSet) BytesRef(org.apache.lucene.util.BytesRef) SortedBinaryDocValues(org.elasticsearch.index.fielddata.SortedBinaryDocValues) BinaryDocValues(org.apache.lucene.index.BinaryDocValues) SortedBinaryDocValues(org.elasticsearch.index.fielddata.SortedBinaryDocValues)

Example 99 with BytesRef

use of org.apache.lucene.util.BytesRef in project elasticsearch by elastic.

the class MultiValueModeTests method verify.

private void verify(SortedBinaryDocValues values, int maxDoc, FixedBitSet rootDocs, FixedBitSet innerDocs) throws IOException {
    for (BytesRef missingValue : new BytesRef[] { new BytesRef(), new BytesRef(RandomStrings.randomAsciiOfLength(random(), 8)) }) {
        for (MultiValueMode mode : new MultiValueMode[] { MultiValueMode.MIN, MultiValueMode.MAX }) {
            final BinaryDocValues selected = mode.select(values, missingValue, rootDocs, new BitSetIterator(innerDocs, 0L), maxDoc);
            int prevRoot = -1;
            for (int root = rootDocs.nextSetBit(0); root != -1; root = root + 1 < maxDoc ? rootDocs.nextSetBit(root + 1) : -1) {
                final BytesRef actual = selected.get(root);
                BytesRef expected = null;
                for (int child = innerDocs.nextSetBit(prevRoot + 1); child != -1 && child < root; child = innerDocs.nextSetBit(child + 1)) {
                    values.setDocument(child);
                    for (int j = 0; j < values.count(); ++j) {
                        if (expected == null) {
                            expected = BytesRef.deepCopyOf(values.valueAt(j));
                        } else {
                            if (mode == MultiValueMode.MIN) {
                                expected = expected.compareTo(values.valueAt(j)) <= 0 ? expected : BytesRef.deepCopyOf(values.valueAt(j));
                            } else if (mode == MultiValueMode.MAX) {
                                expected = expected.compareTo(values.valueAt(j)) > 0 ? expected : BytesRef.deepCopyOf(values.valueAt(j));
                            }
                        }
                    }
                }
                if (expected == null) {
                    expected = missingValue;
                }
                assertEquals(mode.toString() + " docId=" + root, expected, actual);
                prevRoot = root;
            }
        }
    }
}
Also used : BitSetIterator(org.apache.lucene.util.BitSetIterator) BytesRef(org.apache.lucene.util.BytesRef) SortedBinaryDocValues(org.elasticsearch.index.fielddata.SortedBinaryDocValues) BinaryDocValues(org.apache.lucene.index.BinaryDocValues)

Example 100 with BytesRef

use of org.apache.lucene.util.BytesRef in project elasticsearch by elastic.

the class SignificantTermsTests method createTestAggregatorBuilder.

@Override
protected SignificantTermsAggregationBuilder createTestAggregatorBuilder() {
    String name = randomAsciiOfLengthBetween(3, 20);
    SignificantTermsAggregationBuilder factory = new SignificantTermsAggregationBuilder(name, null);
    String field = randomAsciiOfLengthBetween(3, 20);
    int randomFieldBranch = randomInt(2);
    switch(randomFieldBranch) {
        case 0:
            factory.field(field);
            break;
        case 1:
            factory.field(field);
            factory.script(new Script("_value + 1"));
            break;
        case 2:
            factory.script(new Script("doc[" + field + "] + 1"));
            break;
        default:
            fail();
    }
    if (randomBoolean()) {
        factory.missing("MISSING");
    }
    if (randomBoolean()) {
        factory.bucketCountThresholds().setRequiredSize(randomIntBetween(1, Integer.MAX_VALUE));
    }
    if (randomBoolean()) {
        factory.bucketCountThresholds().setShardSize(randomIntBetween(1, Integer.MAX_VALUE));
    }
    if (randomBoolean()) {
        int minDocCount = randomInt(4);
        switch(minDocCount) {
            case 0:
                break;
            case 1:
            case 2:
            case 3:
            case 4:
                minDocCount = randomIntBetween(0, Integer.MAX_VALUE);
                break;
        }
        factory.bucketCountThresholds().setMinDocCount(minDocCount);
    }
    if (randomBoolean()) {
        int shardMinDocCount = randomInt(4);
        switch(shardMinDocCount) {
            case 0:
                break;
            case 1:
            case 2:
            case 3:
            case 4:
                shardMinDocCount = randomIntBetween(0, Integer.MAX_VALUE);
                break;
            default:
                fail();
        }
        factory.bucketCountThresholds().setShardMinDocCount(shardMinDocCount);
    }
    if (randomBoolean()) {
        factory.executionHint(randomFrom(executionHints));
    }
    if (randomBoolean()) {
        factory.format("###.##");
    }
    if (randomBoolean()) {
        IncludeExclude incExc = null;
        switch(randomInt(5)) {
            case 0:
                incExc = new IncludeExclude(new RegExp("foobar"), null);
                break;
            case 1:
                incExc = new IncludeExclude(null, new RegExp("foobaz"));
                break;
            case 2:
                incExc = new IncludeExclude(new RegExp("foobar"), new RegExp("foobaz"));
                break;
            case 3:
                SortedSet<BytesRef> includeValues = new TreeSet<>();
                int numIncs = randomIntBetween(1, 20);
                for (int i = 0; i < numIncs; i++) {
                    includeValues.add(new BytesRef(randomAsciiOfLengthBetween(1, 30)));
                }
                SortedSet<BytesRef> excludeValues = null;
                incExc = new IncludeExclude(includeValues, excludeValues);
                break;
            case 4:
                SortedSet<BytesRef> includeValues2 = null;
                SortedSet<BytesRef> excludeValues2 = new TreeSet<>();
                int numExcs2 = randomIntBetween(1, 20);
                for (int i = 0; i < numExcs2; i++) {
                    excludeValues2.add(new BytesRef(randomAsciiOfLengthBetween(1, 30)));
                }
                incExc = new IncludeExclude(includeValues2, excludeValues2);
                break;
            case 5:
                SortedSet<BytesRef> includeValues3 = new TreeSet<>();
                int numIncs3 = randomIntBetween(1, 20);
                for (int i = 0; i < numIncs3; i++) {
                    includeValues3.add(new BytesRef(randomAsciiOfLengthBetween(1, 30)));
                }
                SortedSet<BytesRef> excludeValues3 = new TreeSet<>();
                int numExcs3 = randomIntBetween(1, 20);
                for (int i = 0; i < numExcs3; i++) {
                    excludeValues3.add(new BytesRef(randomAsciiOfLengthBetween(1, 30)));
                }
                incExc = new IncludeExclude(includeValues3, excludeValues3);
                break;
            default:
                fail();
        }
        factory.includeExclude(incExc);
    }
    if (randomBoolean()) {
        SignificanceHeuristic significanceHeuristic = null;
        switch(randomInt(5)) {
            case 0:
                significanceHeuristic = new PercentageScore();
                break;
            case 1:
                significanceHeuristic = new ChiSquare(randomBoolean(), randomBoolean());
                break;
            case 2:
                significanceHeuristic = new GND(randomBoolean());
                break;
            case 3:
                significanceHeuristic = new MutualInformation(randomBoolean(), randomBoolean());
                break;
            case 4:
                significanceHeuristic = new ScriptHeuristic(new Script("foo"));
                break;
            case 5:
                significanceHeuristic = new JLHScore();
                break;
            default:
                fail();
        }
        factory.significanceHeuristic(significanceHeuristic);
    }
    if (randomBoolean()) {
        factory.backgroundFilter(QueryBuilders.termsQuery("foo", "bar"));
    }
    return factory;
}
Also used : Script(org.elasticsearch.script.Script) JLHScore(org.elasticsearch.search.aggregations.bucket.significant.heuristics.JLHScore) ChiSquare(org.elasticsearch.search.aggregations.bucket.significant.heuristics.ChiSquare) RegExp(org.apache.lucene.util.automaton.RegExp) IncludeExclude(org.elasticsearch.search.aggregations.bucket.terms.support.IncludeExclude) PercentageScore(org.elasticsearch.search.aggregations.bucket.significant.heuristics.PercentageScore) TreeSet(java.util.TreeSet) ScriptHeuristic(org.elasticsearch.search.aggregations.bucket.significant.heuristics.ScriptHeuristic) SignificanceHeuristic(org.elasticsearch.search.aggregations.bucket.significant.heuristics.SignificanceHeuristic) MutualInformation(org.elasticsearch.search.aggregations.bucket.significant.heuristics.MutualInformation) SignificantTermsAggregationBuilder(org.elasticsearch.search.aggregations.bucket.significant.SignificantTermsAggregationBuilder) GND(org.elasticsearch.search.aggregations.bucket.significant.heuristics.GND) BytesRef(org.apache.lucene.util.BytesRef)

Aggregations

BytesRef (org.apache.lucene.util.BytesRef)1448 Document (org.apache.lucene.document.Document)409 Directory (org.apache.lucene.store.Directory)370 MockAnalyzer (org.apache.lucene.analysis.MockAnalyzer)266 ArrayList (java.util.ArrayList)186 Test (org.junit.Test)182 SortedDocValuesField (org.apache.lucene.document.SortedDocValuesField)164 RandomIndexWriter (org.apache.lucene.index.RandomIndexWriter)152 Term (org.apache.lucene.index.Term)123 Analyzer (org.apache.lucene.analysis.Analyzer)121 IndexReader (org.apache.lucene.index.IndexReader)121 TermsEnum (org.apache.lucene.index.TermsEnum)116 SortedSetDocValuesField (org.apache.lucene.document.SortedSetDocValuesField)110 NumericDocValuesField (org.apache.lucene.document.NumericDocValuesField)105 IOException (java.io.IOException)103 Field (org.apache.lucene.document.Field)101 StringField (org.apache.lucene.document.StringField)100 CrateUnitTest (io.crate.test.integration.CrateUnitTest)95 TextField (org.apache.lucene.document.TextField)94 BytesRefBuilder (org.apache.lucene.util.BytesRefBuilder)87