use of org.apache.lucene.index.RandomAccessOrds in project elasticsearch by elastic.
the class MultiOrdinalsTests method testRandomValues.
public void testRandomValues() throws IOException {
Random random = random();
int numDocs = 100 + random.nextInt(1000);
int numOrdinals = 1 + random.nextInt(200);
int numValues = 100 + random.nextInt(100000);
OrdinalsBuilder builder = new OrdinalsBuilder(numDocs);
Set<OrdAndId> ordsAndIdSet = new HashSet<>();
for (int i = 0; i < numValues; i++) {
ordsAndIdSet.add(new OrdAndId(random.nextInt(numOrdinals), random.nextInt(numDocs)));
}
List<OrdAndId> ordsAndIds = new ArrayList<>(ordsAndIdSet);
Collections.sort(ordsAndIds, new Comparator<OrdAndId>() {
@Override
public int compare(OrdAndId o1, OrdAndId o2) {
if (o1.ord < o2.ord) {
return -1;
}
if (o1.ord == o2.ord) {
if (o1.id < o2.id) {
return -1;
}
if (o1.id > o2.id) {
return 1;
}
return 0;
}
return 1;
}
});
long lastOrd = -1;
for (OrdAndId ordAndId : ordsAndIds) {
if (lastOrd != ordAndId.ord) {
lastOrd = ordAndId.ord;
builder.nextOrdinal();
}
// remap the ordinals in case we have gaps?
ordAndId.ord = builder.currentOrdinal();
builder.addDoc(ordAndId.id);
}
Collections.sort(ordsAndIds, new Comparator<OrdAndId>() {
@Override
public int compare(OrdAndId o1, OrdAndId o2) {
if (o1.id < o2.id) {
return -1;
}
if (o1.id == o2.id) {
if (o1.ord < o2.ord) {
return -1;
}
if (o1.ord > o2.ord) {
return 1;
}
return 0;
}
return 1;
}
});
Ordinals ords = creationMultiOrdinals(builder);
RandomAccessOrds docs = ords.ordinals();
final SortedDocValues singleOrds = MultiValueMode.MIN.select(docs);
int docId = ordsAndIds.get(0).id;
List<Long> docOrds = new ArrayList<>();
for (OrdAndId ordAndId : ordsAndIds) {
if (docId == ordAndId.id) {
docOrds.add(ordAndId.ord);
} else {
if (!docOrds.isEmpty()) {
assertThat((long) singleOrds.getOrd(docId), equalTo(docOrds.get(0)));
docs.setDocument(docId);
final int numOrds = docs.cardinality();
assertThat(numOrds, equalTo(docOrds.size()));
for (int i = 0; i < numOrds; i++) {
assertThat(docs.nextOrd(), equalTo(docOrds.get(i)));
}
final long[] array = new long[docOrds.size()];
for (int i = 0; i < array.length; i++) {
array[i] = docOrds.get(i);
}
assertIter(docs, docId, array);
}
for (int i = docId + 1; i < ordAndId.id; i++) {
assertThat((long) singleOrds.getOrd(i), equalTo(RandomAccessOrds.NO_MORE_ORDS));
}
docId = ordAndId.id;
docOrds.clear();
docOrds.add(ordAndId.ord);
}
}
}
use of org.apache.lucene.index.RandomAccessOrds in project elasticsearch by elastic.
the class FilterFieldDataTests method testFilterByFrequency.
public void testFilterByFrequency() throws Exception {
Random random = random();
for (int i = 0; i < 1000; i++) {
Document d = new Document();
d.add(new StringField("id", "" + i, Field.Store.NO));
if (i % 100 == 0) {
d.add(new StringField("high_freq", "100", Field.Store.NO));
d.add(new StringField("low_freq", "100", Field.Store.NO));
d.add(new StringField("med_freq", "100", Field.Store.NO));
}
if (i % 10 == 0) {
d.add(new StringField("high_freq", "10", Field.Store.NO));
d.add(new StringField("med_freq", "10", Field.Store.NO));
}
if (i % 5 == 0) {
d.add(new StringField("high_freq", "5", Field.Store.NO));
}
writer.addDocument(d);
}
writer.forceMerge(1, true);
List<LeafReaderContext> contexts = refreshReader();
final BuilderContext builderCtx = new BuilderContext(indexService.getIndexSettings().getSettings(), new ContentPath(1));
{
ifdService.clear();
MappedFieldType ft = new TextFieldMapper.Builder("high_freq").fielddata(true).fielddataFrequencyFilter(0, random.nextBoolean() ? 100 : 0.5d, 0).build(builderCtx).fieldType();
IndexOrdinalsFieldData fieldData = ifdService.getForField(ft);
for (LeafReaderContext context : contexts) {
AtomicOrdinalsFieldData loadDirect = fieldData.loadDirect(context);
RandomAccessOrds bytesValues = loadDirect.getOrdinalsValues();
assertThat(2L, equalTo(bytesValues.getValueCount()));
assertThat(bytesValues.lookupOrd(0).utf8ToString(), equalTo("10"));
assertThat(bytesValues.lookupOrd(1).utf8ToString(), equalTo("100"));
}
}
{
ifdService.clear();
MappedFieldType ft = new TextFieldMapper.Builder("high_freq").fielddata(true).fielddataFrequencyFilter(random.nextBoolean() ? 101 : 101d / 200.0d, 201, 100).build(builderCtx).fieldType();
IndexOrdinalsFieldData fieldData = ifdService.getForField(ft);
for (LeafReaderContext context : contexts) {
AtomicOrdinalsFieldData loadDirect = fieldData.loadDirect(context);
RandomAccessOrds bytesValues = loadDirect.getOrdinalsValues();
assertThat(1L, equalTo(bytesValues.getValueCount()));
assertThat(bytesValues.lookupOrd(0).utf8ToString(), equalTo("5"));
}
}
{
// test # docs with value
ifdService.clear();
MappedFieldType ft = new TextFieldMapper.Builder("med_freq").fielddata(true).fielddataFrequencyFilter(random.nextBoolean() ? 101 : 101d / 200.0d, Integer.MAX_VALUE, 101).build(builderCtx).fieldType();
IndexOrdinalsFieldData fieldData = ifdService.getForField(ft);
for (LeafReaderContext context : contexts) {
AtomicOrdinalsFieldData loadDirect = fieldData.loadDirect(context);
RandomAccessOrds bytesValues = loadDirect.getOrdinalsValues();
assertThat(2L, equalTo(bytesValues.getValueCount()));
assertThat(bytesValues.lookupOrd(0).utf8ToString(), equalTo("10"));
assertThat(bytesValues.lookupOrd(1).utf8ToString(), equalTo("100"));
}
}
{
ifdService.clear();
MappedFieldType ft = new TextFieldMapper.Builder("med_freq").fielddata(true).fielddataFrequencyFilter(random.nextBoolean() ? 101 : 101d / 200.0d, Integer.MAX_VALUE, 101).build(builderCtx).fieldType();
IndexOrdinalsFieldData fieldData = ifdService.getForField(ft);
for (LeafReaderContext context : contexts) {
AtomicOrdinalsFieldData loadDirect = fieldData.loadDirect(context);
RandomAccessOrds bytesValues = loadDirect.getOrdinalsValues();
assertThat(2L, equalTo(bytesValues.getValueCount()));
assertThat(bytesValues.lookupOrd(0).utf8ToString(), equalTo("10"));
assertThat(bytesValues.lookupOrd(1).utf8ToString(), equalTo("100"));
}
}
}
Aggregations