Examples with BytesRefHash - org.apache.lucene.util.BytesRefHash

Example 1 with BytesRefHash

use of org.apache.lucene.util.BytesRefHash in project lucene-solr by apache.

the class BaseDocValuesFormatTestCase method testRandomSortedBytes.

public void testRandomSortedBytes() throws IOException {
    Directory dir = newDirectory();
    IndexWriterConfig cfg = newIndexWriterConfig(new MockAnalyzer(random()));
    RandomIndexWriter w = new RandomIndexWriter(random(), dir, cfg);
    int numDocs = atLeast(100);
    BytesRefHash hash = new BytesRefHash();
    Map<String, String> docToString = new HashMap<>();
    int maxLength = TestUtil.nextInt(random(), 1, 50);
    for (int i = 0; i < numDocs; i++) {
        Document doc = new Document();
        doc.add(newTextField("id", "" + i, Field.Store.YES));
        String string = TestUtil.randomRealisticUnicodeString(random(), 1, maxLength);
        BytesRef br = new BytesRef(string);
        doc.add(new SortedDocValuesField("field", br));
        hash.add(br);
        docToString.put("" + i, string);
        w.addDocument(doc);
    }
    if (rarely()) {
        w.commit();
    }
    int numDocsNoValue = atLeast(10);
    for (int i = 0; i < numDocsNoValue; i++) {
        Document doc = new Document();
        doc.add(newTextField("id", "noValue", Field.Store.YES));
        w.addDocument(doc);
    }
    if (rarely()) {
        w.commit();
    }
    for (int i = 0; i < numDocs; i++) {
        Document doc = new Document();
        String id = "" + i + numDocs;
        doc.add(newTextField("id", id, Field.Store.YES));
        String string = TestUtil.randomRealisticUnicodeString(random(), 1, maxLength);
        BytesRef br = new BytesRef(string);
        hash.add(br);
        docToString.put(id, string);
        doc.add(new SortedDocValuesField("field", br));
        w.addDocument(doc);
    }
    w.commit();
    IndexReader reader = w.getReader();
    SortedDocValues docValues = MultiDocValues.getSortedValues(reader, "field");
    int[] sort = hash.sort();
    BytesRef expected = new BytesRef();
    assertEquals(hash.size(), docValues.getValueCount());
    for (int i = 0; i < hash.size(); i++) {
        hash.get(sort[i], expected);
        final BytesRef actual = docValues.lookupOrd(i);
        assertEquals(expected.utf8ToString(), actual.utf8ToString());
        int ord = docValues.lookupTerm(expected);
        assertEquals(i, ord);
    }
    Set<Entry<String, String>> entrySet = docToString.entrySet();
    for (Entry<String, String> entry : entrySet) {
        // pk lookup
        PostingsEnum termPostingsEnum = TestUtil.docs(random(), reader, "id", new BytesRef(entry.getKey()), null, 0);
        int docId = termPostingsEnum.nextDoc();
        expected = new BytesRef(entry.getValue());
        docValues = MultiDocValues.getSortedValues(reader, "field");
        assertEquals(docId, docValues.advance(docId));
        final BytesRef actual = docValues.binaryValue();
        assertEquals(expected, actual);
    }
    reader.close();
    w.close();
    dir.close();
}

Also used : HashMap(java.util.HashMap) Document(org.apache.lucene.document.Document) Entry(java.util.Map.Entry) MockAnalyzer(org.apache.lucene.analysis.MockAnalyzer) SortedDocValuesField(org.apache.lucene.document.SortedDocValuesField) BytesRefHash(org.apache.lucene.util.BytesRefHash) BytesRef(org.apache.lucene.util.BytesRef) Directory(org.apache.lucene.store.Directory)

Example 2 with BytesRefHash

use of org.apache.lucene.util.BytesRefHash in project lucene-solr by apache.

the class MemoryIndex method storeDocValues.

private void storeDocValues(Info info, DocValuesType docValuesType, Object docValuesValue) {
    String fieldName = info.fieldInfo.name;
    DocValuesType existingDocValuesType = info.fieldInfo.getDocValuesType();
    if (existingDocValuesType == DocValuesType.NONE) {
        // first time we add doc values for this field:
        info.fieldInfo = new FieldInfo(info.fieldInfo.name, info.fieldInfo.number, info.fieldInfo.hasVectors(), info.fieldInfo.hasPayloads(), info.fieldInfo.hasPayloads(), info.fieldInfo.getIndexOptions(), docValuesType, -1, info.fieldInfo.attributes(), info.fieldInfo.getPointDimensionCount(), info.fieldInfo.getPointNumBytes());
    } else if (existingDocValuesType != docValuesType) {
        throw new IllegalArgumentException("Can't add [" + docValuesType + "] doc values field [" + fieldName + "], because [" + existingDocValuesType + "] doc values field already exists");
    }
    switch(docValuesType) {
        case NUMERIC:
            if (info.numericProducer.dvLongValues != null) {
                throw new IllegalArgumentException("Only one value per field allowed for [" + docValuesType + "] doc values field [" + fieldName + "]");
            }
            info.numericProducer.dvLongValues = new long[] { (long) docValuesValue };
            info.numericProducer.count++;
            break;
        case SORTED_NUMERIC:
            if (info.numericProducer.dvLongValues == null) {
                info.numericProducer.dvLongValues = new long[4];
            }
            info.numericProducer.dvLongValues = ArrayUtil.grow(info.numericProducer.dvLongValues, info.numericProducer.count + 1);
            info.numericProducer.dvLongValues[info.numericProducer.count++] = (long) docValuesValue;
            break;
        case BINARY:
            if (info.binaryProducer.dvBytesValuesSet != null) {
                throw new IllegalArgumentException("Only one value per field allowed for [" + docValuesType + "] doc values field [" + fieldName + "]");
            }
            info.binaryProducer.dvBytesValuesSet = new BytesRefHash(byteBlockPool);
            info.binaryProducer.dvBytesValuesSet.add((BytesRef) docValuesValue);
            break;
        case SORTED:
            if (info.binaryProducer.dvBytesValuesSet != null) {
                throw new IllegalArgumentException("Only one value per field allowed for [" + docValuesType + "] doc values field [" + fieldName + "]");
            }
            info.binaryProducer.dvBytesValuesSet = new BytesRefHash(byteBlockPool);
            info.binaryProducer.dvBytesValuesSet.add((BytesRef) docValuesValue);
            break;
        case SORTED_SET:
            if (info.binaryProducer.dvBytesValuesSet == null) {
                info.binaryProducer.dvBytesValuesSet = new BytesRefHash(byteBlockPool);
            }
            info.binaryProducer.dvBytesValuesSet.add((BytesRef) docValuesValue);
            break;
        default:
            throw new UnsupportedOperationException("unknown doc values type [" + docValuesType + "]");
    }
}

Also used : BytesRefHash(org.apache.lucene.util.BytesRefHash)

Aggregations

BytesRefHash (org.apache.lucene.util.BytesRefHash)2 HashMap (java.util.HashMap)1 Entry (java.util.Map.Entry)1 MockAnalyzer (org.apache.lucene.analysis.MockAnalyzer)1 Document (org.apache.lucene.document.Document)1 SortedDocValuesField (org.apache.lucene.document.SortedDocValuesField)1 Directory (org.apache.lucene.store.Directory)1 BytesRef (org.apache.lucene.util.BytesRef)1