Search in sources :

Example 56 with SortedSetDocValuesField

use of org.apache.lucene.document.SortedSetDocValuesField in project elasticsearch by elastic.

the class KeywordFieldMapper method parseCreateField.

@Override
protected void parseCreateField(ParseContext context, List<IndexableField> fields) throws IOException {
    String value;
    if (context.externalValueSet()) {
        value = context.externalValue().toString();
    } else {
        XContentParser parser = context.parser();
        if (parser.currentToken() == XContentParser.Token.VALUE_NULL) {
            value = fieldType().nullValueAsString();
        } else {
            value = parser.textOrNull();
        }
    }
    if (value == null || value.length() > ignoreAbove) {
        return;
    }
    final NamedAnalyzer normalizer = fieldType().normalizer();
    if (normalizer != null) {
        try (TokenStream ts = normalizer.tokenStream(name(), value)) {
            final CharTermAttribute termAtt = ts.addAttribute(CharTermAttribute.class);
            ts.reset();
            if (ts.incrementToken() == false) {
                throw new IllegalStateException("The normalization token stream is " + "expected to produce exactly 1 token, but got 0 for analyzer " + normalizer + " and input \"" + value + "\"");
            }
            final String newValue = termAtt.toString();
            if (ts.incrementToken()) {
                throw new IllegalStateException("The normalization token stream is " + "expected to produce exactly 1 token, but got 2+ for analyzer " + normalizer + " and input \"" + value + "\"");
            }
            ts.end();
            value = newValue;
        }
    }
    if (context.includeInAll(includeInAll, this)) {
        context.allEntries().addText(fieldType().name(), value, fieldType().boost());
    }
    // convert to utf8 only once before feeding postings/dv/stored fields
    final BytesRef binaryValue = new BytesRef(value);
    if (fieldType().indexOptions() != IndexOptions.NONE || fieldType().stored()) {
        Field field = new Field(fieldType().name(), binaryValue, fieldType());
        fields.add(field);
    }
    if (fieldType().hasDocValues()) {
        fields.add(new SortedSetDocValuesField(fieldType().name(), binaryValue));
    }
}
Also used : SortedSetDocValuesField(org.apache.lucene.document.SortedSetDocValuesField) IndexableField(org.apache.lucene.index.IndexableField) Field(org.apache.lucene.document.Field) TypeParsers.parseField(org.elasticsearch.index.mapper.TypeParsers.parseField) TokenStream(org.apache.lucene.analysis.TokenStream) CharTermAttribute(org.apache.lucene.analysis.tokenattributes.CharTermAttribute) NamedAnalyzer(org.elasticsearch.index.analysis.NamedAnalyzer) SortedSetDocValuesField(org.apache.lucene.document.SortedSetDocValuesField) XContentParser(org.elasticsearch.common.xcontent.XContentParser) BytesRef(org.apache.lucene.util.BytesRef)

Example 57 with SortedSetDocValuesField

use of org.apache.lucene.document.SortedSetDocValuesField in project elasticsearch by elastic.

the class IpRangeAggregatorTests method testRanges.

public void testRanges() throws Exception {
    boolean v4 = randomBoolean();
    IpRangeAggregationBuilder builder = new IpRangeAggregationBuilder("test_agg").field("field");
    int numRanges = randomIntBetween(1, 10);
    Tuple<BytesRef, BytesRef>[] requestedRanges = new Tuple[numRanges];
    for (int i = 0; i < numRanges; i++) {
        Tuple<InetAddress, BytesRef>[] arr = new Tuple[2];
        for (int j = 0; j < 2; j++) {
            InetAddress addr = randomIp(v4);
            arr[j] = new Tuple(addr, new BytesRef(InetAddressPoint.encode(addr)));
        }
        Arrays.sort(arr, (t1, t2) -> t1.v2().compareTo(t2.v2()));
        if (rarely()) {
            if (randomBoolean()) {
                builder.addRange(NetworkAddress.format(arr[0].v1()), null);
                requestedRanges[i] = new Tuple(arr[0].v2(), null);
            } else {
                builder.addRange(null, NetworkAddress.format(arr[1].v1()));
                requestedRanges[i] = new Tuple(null, arr[1].v2());
            }
        } else {
            builder.addRange(NetworkAddress.format(arr[0].v1()), NetworkAddress.format(arr[1].v1()));
            requestedRanges[i] = new Tuple(arr[0].v2(), arr[1].v2());
        }
    }
    Arrays.sort(requestedRanges, RANGE_COMPARATOR);
    int[] expectedCounts = new int[numRanges];
    try (Directory dir = newDirectory();
        RandomIndexWriter w = new RandomIndexWriter(random(), dir)) {
        int numDocs = randomIntBetween(10, 100);
        for (int i = 0; i < numDocs; i++) {
            Document doc = new Document();
            int numValues = randomIntBetween(1, 5);
            BytesRef[] values = new BytesRef[numValues];
            for (int j = 0; j < numValues; j++) {
                values[j] = new BytesRef(InetAddressPoint.encode(randomIp(v4)));
                doc.add(new SortedSetDocValuesField("field", values[j]));
            }
            Arrays.sort(values);
            for (int j = 0; j < numRanges; j++) {
                for (int k = 0; k < numValues; k++) {
                    if (isInRange(values[k], requestedRanges[j].v1(), requestedRanges[j].v2())) {
                        expectedCounts[j]++;
                        break;
                    }
                }
            }
            w.addDocument(doc);
        }
        MappedFieldType fieldType = new IpFieldMapper.IpFieldType();
        fieldType.setName("field");
        try (IndexReader reader = w.getReader()) {
            IndexSearcher searcher = new IndexSearcher(reader);
            InternalBinaryRange range = search(searcher, new MatchAllDocsQuery(), builder, fieldType);
            assertEquals(numRanges, range.getBuckets().size());
            for (int i = 0; i < range.getBuckets().size(); i++) {
                Tuple<BytesRef, BytesRef> expected = requestedRanges[i];
                Range.Bucket bucket = range.getBuckets().get(i);
                if (expected.v1() == null) {
                    assertNull(bucket.getFrom());
                } else {
                    assertEquals(DocValueFormat.IP.format(expected.v1()), bucket.getFrom());
                }
                if (expected.v2() == null) {
                    assertNull(bucket.getTo());
                } else {
                    assertEquals(DocValueFormat.IP.format(expected.v2()), bucket.getTo());
                }
                assertEquals(expectedCounts[i], bucket.getDocCount());
            }
        }
    }
}
Also used : IndexSearcher(org.apache.lucene.search.IndexSearcher) IpRangeAggregationBuilder(org.elasticsearch.search.aggregations.bucket.range.ip.IpRangeAggregationBuilder) Document(org.apache.lucene.document.Document) MatchAllDocsQuery(org.apache.lucene.search.MatchAllDocsQuery) InetAddressPoint(org.apache.lucene.document.InetAddressPoint) MappedFieldType(org.elasticsearch.index.mapper.MappedFieldType) IndexReader(org.apache.lucene.index.IndexReader) SortedSetDocValuesField(org.apache.lucene.document.SortedSetDocValuesField) InetAddress(java.net.InetAddress) Tuple(org.elasticsearch.common.collect.Tuple) BytesRef(org.apache.lucene.util.BytesRef) RandomIndexWriter(org.apache.lucene.index.RandomIndexWriter) Directory(org.apache.lucene.store.Directory)

Example 58 with SortedSetDocValuesField

use of org.apache.lucene.document.SortedSetDocValuesField in project elasticsearch by elastic.

the class TermsAggregatorTests method testTermsAggregator.

public void testTermsAggregator() throws Exception {
    Directory directory = newDirectory();
    RandomIndexWriter indexWriter = new RandomIndexWriter(random(), directory);
    Document document = new Document();
    document.add(new SortedSetDocValuesField("string", new BytesRef("a")));
    document.add(new SortedSetDocValuesField("string", new BytesRef("b")));
    indexWriter.addDocument(document);
    document = new Document();
    document.add(new SortedSetDocValuesField("string", new BytesRef("c")));
    document.add(new SortedSetDocValuesField("string", new BytesRef("a")));
    indexWriter.addDocument(document);
    document = new Document();
    document.add(new SortedSetDocValuesField("string", new BytesRef("b")));
    document.add(new SortedSetDocValuesField("string", new BytesRef("d")));
    indexWriter.addDocument(document);
    indexWriter.close();
    IndexReader indexReader = DirectoryReader.open(directory);
    // We do not use LuceneTestCase.newSearcher because we need a DirectoryReader
    IndexSearcher indexSearcher = new IndexSearcher(indexReader);
    for (TermsAggregatorFactory.ExecutionMode executionMode : TermsAggregatorFactory.ExecutionMode.values()) {
        TermsAggregationBuilder aggregationBuilder = new TermsAggregationBuilder("_name", ValueType.STRING).executionHint(executionMode.toString()).field("string").order(Terms.Order.term(true));
        MappedFieldType fieldType = new KeywordFieldMapper.KeywordFieldType();
        fieldType.setName("string");
        fieldType.setHasDocValues(true);
        try (TermsAggregator aggregator = createAggregator(aggregationBuilder, indexSearcher, fieldType)) {
            aggregator.preCollection();
            indexSearcher.search(new MatchAllDocsQuery(), aggregator);
            aggregator.postCollection();
            Terms result = (Terms) aggregator.buildAggregation(0L);
            assertEquals(4, result.getBuckets().size());
            assertEquals("a", result.getBuckets().get(0).getKeyAsString());
            assertEquals(2L, result.getBuckets().get(0).getDocCount());
            assertEquals("b", result.getBuckets().get(1).getKeyAsString());
            assertEquals(2L, result.getBuckets().get(1).getDocCount());
            assertEquals("c", result.getBuckets().get(2).getKeyAsString());
            assertEquals(1L, result.getBuckets().get(2).getDocCount());
            assertEquals("d", result.getBuckets().get(3).getKeyAsString());
            assertEquals(1L, result.getBuckets().get(3).getDocCount());
        }
    }
    indexReader.close();
    directory.close();
}
Also used : IndexSearcher(org.apache.lucene.search.IndexSearcher) Document(org.apache.lucene.document.Document) MatchAllDocsQuery(org.apache.lucene.search.MatchAllDocsQuery) IndexReader(org.apache.lucene.index.IndexReader) MappedFieldType(org.elasticsearch.index.mapper.MappedFieldType) SortedSetDocValuesField(org.apache.lucene.document.SortedSetDocValuesField) RandomIndexWriter(org.apache.lucene.index.RandomIndexWriter) BytesRef(org.apache.lucene.util.BytesRef) Directory(org.apache.lucene.store.Directory)

Example 59 with SortedSetDocValuesField

use of org.apache.lucene.document.SortedSetDocValuesField in project lucene-solr by apache.

the class TestDocValuesQueries method doTestDuelPointRangeSortedRangeQuery.

private void doTestDuelPointRangeSortedRangeQuery(boolean sortedSet, int maxValuesPerDoc) throws IOException {
    final int iters = atLeast(10);
    for (int iter = 0; iter < iters; ++iter) {
        Directory dir = newDirectory();
        RandomIndexWriter iw = new RandomIndexWriter(random(), dir);
        final int numDocs = atLeast(100);
        for (int i = 0; i < numDocs; ++i) {
            Document doc = new Document();
            final int numValues = TestUtil.nextInt(random(), 0, maxValuesPerDoc);
            for (int j = 0; j < numValues; ++j) {
                final long value = TestUtil.nextLong(random(), -100, 10000);
                byte[] encoded = new byte[Long.BYTES];
                LongPoint.encodeDimension(value, encoded, 0);
                if (sortedSet) {
                    doc.add(new SortedSetDocValuesField("dv", new BytesRef(encoded)));
                } else {
                    doc.add(new SortedDocValuesField("dv", new BytesRef(encoded)));
                }
                doc.add(new LongPoint("idx", value));
            }
            iw.addDocument(doc);
        }
        if (random().nextBoolean()) {
            iw.deleteDocuments(LongPoint.newRangeQuery("idx", 0L, 10L));
        }
        final IndexReader reader = iw.getReader();
        final IndexSearcher searcher = newSearcher(reader, false);
        iw.close();
        for (int i = 0; i < 100; ++i) {
            long min = random().nextBoolean() ? Long.MIN_VALUE : TestUtil.nextLong(random(), -100, 10000);
            long max = random().nextBoolean() ? Long.MAX_VALUE : TestUtil.nextLong(random(), -100, 10000);
            byte[] encodedMin = new byte[Long.BYTES];
            byte[] encodedMax = new byte[Long.BYTES];
            LongPoint.encodeDimension(min, encodedMin, 0);
            LongPoint.encodeDimension(max, encodedMax, 0);
            boolean includeMin = true;
            boolean includeMax = true;
            if (random().nextBoolean()) {
                includeMin = false;
                min++;
            }
            if (random().nextBoolean()) {
                includeMax = false;
                max--;
            }
            final Query q1 = LongPoint.newRangeQuery("idx", min, max);
            final Query q2;
            if (sortedSet) {
                q2 = SortedSetDocValuesField.newRangeQuery("dv", min == Long.MIN_VALUE && random().nextBoolean() ? null : new BytesRef(encodedMin), max == Long.MAX_VALUE && random().nextBoolean() ? null : new BytesRef(encodedMax), includeMin, includeMax);
            } else {
                q2 = SortedDocValuesField.newRangeQuery("dv", min == Long.MIN_VALUE && random().nextBoolean() ? null : new BytesRef(encodedMin), max == Long.MAX_VALUE && random().nextBoolean() ? null : new BytesRef(encodedMax), includeMin, includeMax);
            }
            assertSameMatches(searcher, q1, q2, false);
        }
        reader.close();
        dir.close();
    }
}
Also used : LongPoint(org.apache.lucene.document.LongPoint) Document(org.apache.lucene.document.Document) LongPoint(org.apache.lucene.document.LongPoint) SortedDocValuesField(org.apache.lucene.document.SortedDocValuesField) IndexReader(org.apache.lucene.index.IndexReader) SortedSetDocValuesField(org.apache.lucene.document.SortedSetDocValuesField) RandomIndexWriter(org.apache.lucene.index.RandomIndexWriter) BytesRef(org.apache.lucene.util.BytesRef) Directory(org.apache.lucene.store.Directory)

Example 60 with SortedSetDocValuesField

use of org.apache.lucene.document.SortedSetDocValuesField in project lucene-solr by apache.

the class TestMultiDocValues method testSortedSet.

public void testSortedSet() throws Exception {
    Directory dir = newDirectory();
    IndexWriterConfig iwc = newIndexWriterConfig(random(), null);
    iwc.setMergePolicy(newLogMergePolicy());
    RandomIndexWriter iw = new RandomIndexWriter(random(), dir, iwc);
    int numDocs = TEST_NIGHTLY ? atLeast(500) : atLeast(50);
    for (int i = 0; i < numDocs; i++) {
        Document doc = new Document();
        int numValues = random().nextInt(5);
        for (int j = 0; j < numValues; j++) {
            doc.add(new SortedSetDocValuesField("bytes", new BytesRef(TestUtil.randomUnicodeString(random()))));
        }
        iw.addDocument(doc);
        if (random().nextInt(17) == 0) {
            iw.commit();
        }
    }
    DirectoryReader ir = iw.getReader();
    iw.forceMerge(1);
    DirectoryReader ir2 = iw.getReader();
    LeafReader merged = getOnlyLeafReader(ir2);
    iw.close();
    SortedSetDocValues multi = MultiDocValues.getSortedSetValues(ir, "bytes");
    SortedSetDocValues single = merged.getSortedSetDocValues("bytes");
    if (multi == null) {
        assertNull(single);
    } else {
        assertEquals(single.getValueCount(), multi.getValueCount());
        // check values
        for (long i = 0; i < single.getValueCount(); i++) {
            final BytesRef expected = BytesRef.deepCopyOf(single.lookupOrd(i));
            final BytesRef actual = multi.lookupOrd(i);
            assertEquals(expected, actual);
        }
        // check ord list
        while (true) {
            int docID = single.nextDoc();
            assertEquals(docID, multi.nextDoc());
            if (docID == NO_MORE_DOCS) {
                break;
            }
            ArrayList<Long> expectedList = new ArrayList<>();
            long ord;
            while ((ord = single.nextOrd()) != SortedSetDocValues.NO_MORE_ORDS) {
                expectedList.add(ord);
            }
            int upto = 0;
            while ((ord = multi.nextOrd()) != SortedSetDocValues.NO_MORE_ORDS) {
                assertEquals(expectedList.get(upto).longValue(), ord);
                upto++;
            }
            assertEquals(expectedList.size(), upto);
        }
    }
    testRandomAdvance(merged.getSortedSetDocValues("bytes"), MultiDocValues.getSortedSetValues(ir, "bytes"));
    testRandomAdvanceExact(merged.getSortedSetDocValues("bytes"), MultiDocValues.getSortedSetValues(ir, "bytes"), merged.maxDoc());
    ir.close();
    ir2.close();
    dir.close();
}
Also used : ArrayList(java.util.ArrayList) Document(org.apache.lucene.document.Document) SortedSetDocValuesField(org.apache.lucene.document.SortedSetDocValuesField) BytesRef(org.apache.lucene.util.BytesRef) Directory(org.apache.lucene.store.Directory)

Aggregations

SortedSetDocValuesField (org.apache.lucene.document.SortedSetDocValuesField)98 BytesRef (org.apache.lucene.util.BytesRef)96 Document (org.apache.lucene.document.Document)82 Directory (org.apache.lucene.store.Directory)74 RandomIndexWriter (org.apache.lucene.index.RandomIndexWriter)38 MockAnalyzer (org.apache.lucene.analysis.MockAnalyzer)36 SortedDocValuesField (org.apache.lucene.document.SortedDocValuesField)33 NumericDocValuesField (org.apache.lucene.document.NumericDocValuesField)27 IndexReader (org.apache.lucene.index.IndexReader)27 StringField (org.apache.lucene.document.StringField)23 BinaryDocValuesField (org.apache.lucene.document.BinaryDocValuesField)22 SortedNumericDocValuesField (org.apache.lucene.document.SortedNumericDocValuesField)20 ArrayList (java.util.ArrayList)18 Analyzer (org.apache.lucene.analysis.Analyzer)14 IndexableField (org.apache.lucene.index.IndexableField)13 Field (org.apache.lucene.document.Field)12 DirectoryReader (org.apache.lucene.index.DirectoryReader)11 LeafReader (org.apache.lucene.index.LeafReader)11 IntPoint (org.apache.lucene.document.IntPoint)10 StoredField (org.apache.lucene.document.StoredField)10