use of org.apache.lucene.util.BytesRefHash in project lucene-solr by apache.
the class BaseDocValuesFormatTestCase method testRandomSortedBytes.
public void testRandomSortedBytes() throws IOException {
Directory dir = newDirectory();
IndexWriterConfig cfg = newIndexWriterConfig(new MockAnalyzer(random()));
RandomIndexWriter w = new RandomIndexWriter(random(), dir, cfg);
int numDocs = atLeast(100);
BytesRefHash hash = new BytesRefHash();
Map<String, String> docToString = new HashMap<>();
int maxLength = TestUtil.nextInt(random(), 1, 50);
for (int i = 0; i < numDocs; i++) {
Document doc = new Document();
doc.add(newTextField("id", "" + i, Field.Store.YES));
String string = TestUtil.randomRealisticUnicodeString(random(), 1, maxLength);
BytesRef br = new BytesRef(string);
doc.add(new SortedDocValuesField("field", br));
hash.add(br);
docToString.put("" + i, string);
w.addDocument(doc);
}
if (rarely()) {
w.commit();
}
int numDocsNoValue = atLeast(10);
for (int i = 0; i < numDocsNoValue; i++) {
Document doc = new Document();
doc.add(newTextField("id", "noValue", Field.Store.YES));
w.addDocument(doc);
}
if (rarely()) {
w.commit();
}
for (int i = 0; i < numDocs; i++) {
Document doc = new Document();
String id = "" + i + numDocs;
doc.add(newTextField("id", id, Field.Store.YES));
String string = TestUtil.randomRealisticUnicodeString(random(), 1, maxLength);
BytesRef br = new BytesRef(string);
hash.add(br);
docToString.put(id, string);
doc.add(new SortedDocValuesField("field", br));
w.addDocument(doc);
}
w.commit();
IndexReader reader = w.getReader();
SortedDocValues docValues = MultiDocValues.getSortedValues(reader, "field");
int[] sort = hash.sort();
BytesRef expected = new BytesRef();
assertEquals(hash.size(), docValues.getValueCount());
for (int i = 0; i < hash.size(); i++) {
hash.get(sort[i], expected);
final BytesRef actual = docValues.lookupOrd(i);
assertEquals(expected.utf8ToString(), actual.utf8ToString());
int ord = docValues.lookupTerm(expected);
assertEquals(i, ord);
}
Set<Entry<String, String>> entrySet = docToString.entrySet();
for (Entry<String, String> entry : entrySet) {
// pk lookup
PostingsEnum termPostingsEnum = TestUtil.docs(random(), reader, "id", new BytesRef(entry.getKey()), null, 0);
int docId = termPostingsEnum.nextDoc();
expected = new BytesRef(entry.getValue());
docValues = MultiDocValues.getSortedValues(reader, "field");
assertEquals(docId, docValues.advance(docId));
final BytesRef actual = docValues.binaryValue();
assertEquals(expected, actual);
}
reader.close();
w.close();
dir.close();
}
use of org.apache.lucene.util.BytesRefHash in project lucene-solr by apache.
the class MemoryIndex method storeDocValues.
private void storeDocValues(Info info, DocValuesType docValuesType, Object docValuesValue) {
String fieldName = info.fieldInfo.name;
DocValuesType existingDocValuesType = info.fieldInfo.getDocValuesType();
if (existingDocValuesType == DocValuesType.NONE) {
// first time we add doc values for this field:
info.fieldInfo = new FieldInfo(info.fieldInfo.name, info.fieldInfo.number, info.fieldInfo.hasVectors(), info.fieldInfo.hasPayloads(), info.fieldInfo.hasPayloads(), info.fieldInfo.getIndexOptions(), docValuesType, -1, info.fieldInfo.attributes(), info.fieldInfo.getPointDimensionCount(), info.fieldInfo.getPointNumBytes());
} else if (existingDocValuesType != docValuesType) {
throw new IllegalArgumentException("Can't add [" + docValuesType + "] doc values field [" + fieldName + "], because [" + existingDocValuesType + "] doc values field already exists");
}
switch(docValuesType) {
case NUMERIC:
if (info.numericProducer.dvLongValues != null) {
throw new IllegalArgumentException("Only one value per field allowed for [" + docValuesType + "] doc values field [" + fieldName + "]");
}
info.numericProducer.dvLongValues = new long[] { (long) docValuesValue };
info.numericProducer.count++;
break;
case SORTED_NUMERIC:
if (info.numericProducer.dvLongValues == null) {
info.numericProducer.dvLongValues = new long[4];
}
info.numericProducer.dvLongValues = ArrayUtil.grow(info.numericProducer.dvLongValues, info.numericProducer.count + 1);
info.numericProducer.dvLongValues[info.numericProducer.count++] = (long) docValuesValue;
break;
case BINARY:
if (info.binaryProducer.dvBytesValuesSet != null) {
throw new IllegalArgumentException("Only one value per field allowed for [" + docValuesType + "] doc values field [" + fieldName + "]");
}
info.binaryProducer.dvBytesValuesSet = new BytesRefHash(byteBlockPool);
info.binaryProducer.dvBytesValuesSet.add((BytesRef) docValuesValue);
break;
case SORTED:
if (info.binaryProducer.dvBytesValuesSet != null) {
throw new IllegalArgumentException("Only one value per field allowed for [" + docValuesType + "] doc values field [" + fieldName + "]");
}
info.binaryProducer.dvBytesValuesSet = new BytesRefHash(byteBlockPool);
info.binaryProducer.dvBytesValuesSet.add((BytesRef) docValuesValue);
break;
case SORTED_SET:
if (info.binaryProducer.dvBytesValuesSet == null) {
info.binaryProducer.dvBytesValuesSet = new BytesRefHash(byteBlockPool);
}
info.binaryProducer.dvBytesValuesSet.add((BytesRef) docValuesValue);
break;
default:
throw new UnsupportedOperationException("unknown doc values type [" + docValuesType + "]");
}
}
Aggregations