use of org.apache.lucene.document.SortedSetDocValuesField in project elasticsearch by elastic.
the class KeywordFieldMapper method parseCreateField.
@Override
protected void parseCreateField(ParseContext context, List<IndexableField> fields) throws IOException {
String value;
if (context.externalValueSet()) {
value = context.externalValue().toString();
} else {
XContentParser parser = context.parser();
if (parser.currentToken() == XContentParser.Token.VALUE_NULL) {
value = fieldType().nullValueAsString();
} else {
value = parser.textOrNull();
}
}
if (value == null || value.length() > ignoreAbove) {
return;
}
final NamedAnalyzer normalizer = fieldType().normalizer();
if (normalizer != null) {
try (TokenStream ts = normalizer.tokenStream(name(), value)) {
final CharTermAttribute termAtt = ts.addAttribute(CharTermAttribute.class);
ts.reset();
if (ts.incrementToken() == false) {
throw new IllegalStateException("The normalization token stream is " + "expected to produce exactly 1 token, but got 0 for analyzer " + normalizer + " and input \"" + value + "\"");
}
final String newValue = termAtt.toString();
if (ts.incrementToken()) {
throw new IllegalStateException("The normalization token stream is " + "expected to produce exactly 1 token, but got 2+ for analyzer " + normalizer + " and input \"" + value + "\"");
}
ts.end();
value = newValue;
}
}
if (context.includeInAll(includeInAll, this)) {
context.allEntries().addText(fieldType().name(), value, fieldType().boost());
}
// convert to utf8 only once before feeding postings/dv/stored fields
final BytesRef binaryValue = new BytesRef(value);
if (fieldType().indexOptions() != IndexOptions.NONE || fieldType().stored()) {
Field field = new Field(fieldType().name(), binaryValue, fieldType());
fields.add(field);
}
if (fieldType().hasDocValues()) {
fields.add(new SortedSetDocValuesField(fieldType().name(), binaryValue));
}
}
use of org.apache.lucene.document.SortedSetDocValuesField in project elasticsearch by elastic.
the class IpRangeAggregatorTests method testRanges.
public void testRanges() throws Exception {
boolean v4 = randomBoolean();
IpRangeAggregationBuilder builder = new IpRangeAggregationBuilder("test_agg").field("field");
int numRanges = randomIntBetween(1, 10);
Tuple<BytesRef, BytesRef>[] requestedRanges = new Tuple[numRanges];
for (int i = 0; i < numRanges; i++) {
Tuple<InetAddress, BytesRef>[] arr = new Tuple[2];
for (int j = 0; j < 2; j++) {
InetAddress addr = randomIp(v4);
arr[j] = new Tuple(addr, new BytesRef(InetAddressPoint.encode(addr)));
}
Arrays.sort(arr, (t1, t2) -> t1.v2().compareTo(t2.v2()));
if (rarely()) {
if (randomBoolean()) {
builder.addRange(NetworkAddress.format(arr[0].v1()), null);
requestedRanges[i] = new Tuple(arr[0].v2(), null);
} else {
builder.addRange(null, NetworkAddress.format(arr[1].v1()));
requestedRanges[i] = new Tuple(null, arr[1].v2());
}
} else {
builder.addRange(NetworkAddress.format(arr[0].v1()), NetworkAddress.format(arr[1].v1()));
requestedRanges[i] = new Tuple(arr[0].v2(), arr[1].v2());
}
}
Arrays.sort(requestedRanges, RANGE_COMPARATOR);
int[] expectedCounts = new int[numRanges];
try (Directory dir = newDirectory();
RandomIndexWriter w = new RandomIndexWriter(random(), dir)) {
int numDocs = randomIntBetween(10, 100);
for (int i = 0; i < numDocs; i++) {
Document doc = new Document();
int numValues = randomIntBetween(1, 5);
BytesRef[] values = new BytesRef[numValues];
for (int j = 0; j < numValues; j++) {
values[j] = new BytesRef(InetAddressPoint.encode(randomIp(v4)));
doc.add(new SortedSetDocValuesField("field", values[j]));
}
Arrays.sort(values);
for (int j = 0; j < numRanges; j++) {
for (int k = 0; k < numValues; k++) {
if (isInRange(values[k], requestedRanges[j].v1(), requestedRanges[j].v2())) {
expectedCounts[j]++;
break;
}
}
}
w.addDocument(doc);
}
MappedFieldType fieldType = new IpFieldMapper.IpFieldType();
fieldType.setName("field");
try (IndexReader reader = w.getReader()) {
IndexSearcher searcher = new IndexSearcher(reader);
InternalBinaryRange range = search(searcher, new MatchAllDocsQuery(), builder, fieldType);
assertEquals(numRanges, range.getBuckets().size());
for (int i = 0; i < range.getBuckets().size(); i++) {
Tuple<BytesRef, BytesRef> expected = requestedRanges[i];
Range.Bucket bucket = range.getBuckets().get(i);
if (expected.v1() == null) {
assertNull(bucket.getFrom());
} else {
assertEquals(DocValueFormat.IP.format(expected.v1()), bucket.getFrom());
}
if (expected.v2() == null) {
assertNull(bucket.getTo());
} else {
assertEquals(DocValueFormat.IP.format(expected.v2()), bucket.getTo());
}
assertEquals(expectedCounts[i], bucket.getDocCount());
}
}
}
}
use of org.apache.lucene.document.SortedSetDocValuesField in project elasticsearch by elastic.
the class TermsAggregatorTests method testTermsAggregator.
public void testTermsAggregator() throws Exception {
Directory directory = newDirectory();
RandomIndexWriter indexWriter = new RandomIndexWriter(random(), directory);
Document document = new Document();
document.add(new SortedSetDocValuesField("string", new BytesRef("a")));
document.add(new SortedSetDocValuesField("string", new BytesRef("b")));
indexWriter.addDocument(document);
document = new Document();
document.add(new SortedSetDocValuesField("string", new BytesRef("c")));
document.add(new SortedSetDocValuesField("string", new BytesRef("a")));
indexWriter.addDocument(document);
document = new Document();
document.add(new SortedSetDocValuesField("string", new BytesRef("b")));
document.add(new SortedSetDocValuesField("string", new BytesRef("d")));
indexWriter.addDocument(document);
indexWriter.close();
IndexReader indexReader = DirectoryReader.open(directory);
// We do not use LuceneTestCase.newSearcher because we need a DirectoryReader
IndexSearcher indexSearcher = new IndexSearcher(indexReader);
for (TermsAggregatorFactory.ExecutionMode executionMode : TermsAggregatorFactory.ExecutionMode.values()) {
TermsAggregationBuilder aggregationBuilder = new TermsAggregationBuilder("_name", ValueType.STRING).executionHint(executionMode.toString()).field("string").order(Terms.Order.term(true));
MappedFieldType fieldType = new KeywordFieldMapper.KeywordFieldType();
fieldType.setName("string");
fieldType.setHasDocValues(true);
try (TermsAggregator aggregator = createAggregator(aggregationBuilder, indexSearcher, fieldType)) {
aggregator.preCollection();
indexSearcher.search(new MatchAllDocsQuery(), aggregator);
aggregator.postCollection();
Terms result = (Terms) aggregator.buildAggregation(0L);
assertEquals(4, result.getBuckets().size());
assertEquals("a", result.getBuckets().get(0).getKeyAsString());
assertEquals(2L, result.getBuckets().get(0).getDocCount());
assertEquals("b", result.getBuckets().get(1).getKeyAsString());
assertEquals(2L, result.getBuckets().get(1).getDocCount());
assertEquals("c", result.getBuckets().get(2).getKeyAsString());
assertEquals(1L, result.getBuckets().get(2).getDocCount());
assertEquals("d", result.getBuckets().get(3).getKeyAsString());
assertEquals(1L, result.getBuckets().get(3).getDocCount());
}
}
indexReader.close();
directory.close();
}
use of org.apache.lucene.document.SortedSetDocValuesField in project lucene-solr by apache.
the class TestDocValuesQueries method doTestDuelPointRangeSortedRangeQuery.
private void doTestDuelPointRangeSortedRangeQuery(boolean sortedSet, int maxValuesPerDoc) throws IOException {
final int iters = atLeast(10);
for (int iter = 0; iter < iters; ++iter) {
Directory dir = newDirectory();
RandomIndexWriter iw = new RandomIndexWriter(random(), dir);
final int numDocs = atLeast(100);
for (int i = 0; i < numDocs; ++i) {
Document doc = new Document();
final int numValues = TestUtil.nextInt(random(), 0, maxValuesPerDoc);
for (int j = 0; j < numValues; ++j) {
final long value = TestUtil.nextLong(random(), -100, 10000);
byte[] encoded = new byte[Long.BYTES];
LongPoint.encodeDimension(value, encoded, 0);
if (sortedSet) {
doc.add(new SortedSetDocValuesField("dv", new BytesRef(encoded)));
} else {
doc.add(new SortedDocValuesField("dv", new BytesRef(encoded)));
}
doc.add(new LongPoint("idx", value));
}
iw.addDocument(doc);
}
if (random().nextBoolean()) {
iw.deleteDocuments(LongPoint.newRangeQuery("idx", 0L, 10L));
}
final IndexReader reader = iw.getReader();
final IndexSearcher searcher = newSearcher(reader, false);
iw.close();
for (int i = 0; i < 100; ++i) {
long min = random().nextBoolean() ? Long.MIN_VALUE : TestUtil.nextLong(random(), -100, 10000);
long max = random().nextBoolean() ? Long.MAX_VALUE : TestUtil.nextLong(random(), -100, 10000);
byte[] encodedMin = new byte[Long.BYTES];
byte[] encodedMax = new byte[Long.BYTES];
LongPoint.encodeDimension(min, encodedMin, 0);
LongPoint.encodeDimension(max, encodedMax, 0);
boolean includeMin = true;
boolean includeMax = true;
if (random().nextBoolean()) {
includeMin = false;
min++;
}
if (random().nextBoolean()) {
includeMax = false;
max--;
}
final Query q1 = LongPoint.newRangeQuery("idx", min, max);
final Query q2;
if (sortedSet) {
q2 = SortedSetDocValuesField.newRangeQuery("dv", min == Long.MIN_VALUE && random().nextBoolean() ? null : new BytesRef(encodedMin), max == Long.MAX_VALUE && random().nextBoolean() ? null : new BytesRef(encodedMax), includeMin, includeMax);
} else {
q2 = SortedDocValuesField.newRangeQuery("dv", min == Long.MIN_VALUE && random().nextBoolean() ? null : new BytesRef(encodedMin), max == Long.MAX_VALUE && random().nextBoolean() ? null : new BytesRef(encodedMax), includeMin, includeMax);
}
assertSameMatches(searcher, q1, q2, false);
}
reader.close();
dir.close();
}
}
use of org.apache.lucene.document.SortedSetDocValuesField in project lucene-solr by apache.
the class TestMultiDocValues method testSortedSet.
public void testSortedSet() throws Exception {
Directory dir = newDirectory();
IndexWriterConfig iwc = newIndexWriterConfig(random(), null);
iwc.setMergePolicy(newLogMergePolicy());
RandomIndexWriter iw = new RandomIndexWriter(random(), dir, iwc);
int numDocs = TEST_NIGHTLY ? atLeast(500) : atLeast(50);
for (int i = 0; i < numDocs; i++) {
Document doc = new Document();
int numValues = random().nextInt(5);
for (int j = 0; j < numValues; j++) {
doc.add(new SortedSetDocValuesField("bytes", new BytesRef(TestUtil.randomUnicodeString(random()))));
}
iw.addDocument(doc);
if (random().nextInt(17) == 0) {
iw.commit();
}
}
DirectoryReader ir = iw.getReader();
iw.forceMerge(1);
DirectoryReader ir2 = iw.getReader();
LeafReader merged = getOnlyLeafReader(ir2);
iw.close();
SortedSetDocValues multi = MultiDocValues.getSortedSetValues(ir, "bytes");
SortedSetDocValues single = merged.getSortedSetDocValues("bytes");
if (multi == null) {
assertNull(single);
} else {
assertEquals(single.getValueCount(), multi.getValueCount());
// check values
for (long i = 0; i < single.getValueCount(); i++) {
final BytesRef expected = BytesRef.deepCopyOf(single.lookupOrd(i));
final BytesRef actual = multi.lookupOrd(i);
assertEquals(expected, actual);
}
// check ord list
while (true) {
int docID = single.nextDoc();
assertEquals(docID, multi.nextDoc());
if (docID == NO_MORE_DOCS) {
break;
}
ArrayList<Long> expectedList = new ArrayList<>();
long ord;
while ((ord = single.nextOrd()) != SortedSetDocValues.NO_MORE_ORDS) {
expectedList.add(ord);
}
int upto = 0;
while ((ord = multi.nextOrd()) != SortedSetDocValues.NO_MORE_ORDS) {
assertEquals(expectedList.get(upto).longValue(), ord);
upto++;
}
assertEquals(expectedList.size(), upto);
}
}
testRandomAdvance(merged.getSortedSetDocValues("bytes"), MultiDocValues.getSortedSetValues(ir, "bytes"));
testRandomAdvanceExact(merged.getSortedSetDocValues("bytes"), MultiDocValues.getSortedSetValues(ir, "bytes"), merged.maxDoc());
ir.close();
ir2.close();
dir.close();
}
Aggregations