Search in sources :

Example 56 with BinaryDocValuesField

use of org.apache.lucene.document.BinaryDocValuesField in project lucene-solr by apache.

the class TestFieldCacheVsDocValues method testHugeBinaryValueLimit.

// TODO: get this out of here and into the deprecated codecs (4.0, 4.2)
public void testHugeBinaryValueLimit() throws Exception {
    // We only test DVFormats that have a limit
    assumeFalse("test requires codec with limits on max binary field length", codecAcceptsHugeBinaryValues("field"));
    Analyzer analyzer = new MockAnalyzer(random());
    // FSDirectory because SimpleText will consume gobbs of
    // space when storing big binary values:
    Directory d = newFSDirectory(createTempDir("hugeBinaryValues"));
    boolean doFixed = random().nextBoolean();
    int numDocs;
    int fixedLength = 0;
    if (doFixed) {
        // Sometimes make all values fixed length since some
        // codecs have different code paths for this:
        numDocs = TestUtil.nextInt(random(), 10, 20);
        fixedLength = LARGE_BINARY_FIELD_LENGTH;
    } else {
        numDocs = TestUtil.nextInt(random(), 100, 200);
    }
    IndexWriter w = new IndexWriter(d, newIndexWriterConfig(analyzer));
    List<byte[]> docBytes = new ArrayList<>();
    long totalBytes = 0;
    for (int docID = 0; docID < numDocs; docID++) {
        // we don't use RandomIndexWriter because it might add
        // more docvalues than we expect !!!!
        // Must be > 64KB in size to ensure more than 2 pages in
        // PagedBytes would be needed:
        int numBytes;
        if (doFixed) {
            numBytes = fixedLength;
        } else if (docID == 0 || random().nextInt(5) == 3) {
            numBytes = LARGE_BINARY_FIELD_LENGTH;
        } else {
            numBytes = TestUtil.nextInt(random(), 1, LARGE_BINARY_FIELD_LENGTH);
        }
        totalBytes += numBytes;
        if (totalBytes > 5 * 1024 * 1024) {
            break;
        }
        byte[] bytes = new byte[numBytes];
        random().nextBytes(bytes);
        docBytes.add(bytes);
        Document doc = new Document();
        BytesRef b = new BytesRef(bytes);
        b.length = bytes.length;
        doc.add(new BinaryDocValuesField("field", b));
        doc.add(new StringField("id", "" + docID, Field.Store.YES));
        w.addDocument(doc);
    }
    DirectoryReader r = DirectoryReader.open(w);
    w.close();
    LeafReader ar = SlowCompositeReaderWrapper.wrap(r);
    TestUtil.checkReader(ar);
    BinaryDocValues s = FieldCache.DEFAULT.getTerms(ar, "field");
    for (int docID = 0; docID < docBytes.size(); docID++) {
        assertEquals(docID, s.nextDoc());
        Document doc = ar.document(docID);
        BytesRef bytes = s.binaryValue();
        byte[] expected = docBytes.get(Integer.parseInt(doc.get("id")));
        assertEquals(expected.length, bytes.length);
        assertEquals(new BytesRef(expected), bytes);
    }
    ar.close();
    d.close();
}
Also used : LeafReader(org.apache.lucene.index.LeafReader) DirectoryReader(org.apache.lucene.index.DirectoryReader) ArrayList(java.util.ArrayList) Analyzer(org.apache.lucene.analysis.Analyzer) MockAnalyzer(org.apache.lucene.analysis.MockAnalyzer) Document(org.apache.lucene.document.Document) BinaryDocValuesField(org.apache.lucene.document.BinaryDocValuesField) BinaryDocValues(org.apache.lucene.index.BinaryDocValues) MockAnalyzer(org.apache.lucene.analysis.MockAnalyzer) IndexWriter(org.apache.lucene.index.IndexWriter) RandomIndexWriter(org.apache.lucene.index.RandomIndexWriter) StringField(org.apache.lucene.document.StringField) BytesRef(org.apache.lucene.util.BytesRef) Directory(org.apache.lucene.store.Directory)

Example 57 with BinaryDocValuesField

use of org.apache.lucene.document.BinaryDocValuesField in project lucene-solr by apache.

the class TestFieldCacheVsDocValues method testHugeBinaryValues.

// LUCENE-4853
public void testHugeBinaryValues() throws Exception {
    Analyzer analyzer = new MockAnalyzer(random());
    // FSDirectory because SimpleText will consume gobbs of
    // space when storing big binary values:
    Directory d = newFSDirectory(createTempDir("hugeBinaryValues"));
    boolean doFixed = random().nextBoolean();
    int numDocs;
    int fixedLength = 0;
    if (doFixed) {
        // Sometimes make all values fixed length since some
        // codecs have different code paths for this:
        numDocs = TestUtil.nextInt(random(), 10, 20);
        fixedLength = TestUtil.nextInt(random(), 65537, 256 * 1024);
    } else {
        numDocs = TestUtil.nextInt(random(), 100, 200);
    }
    IndexWriter w = new IndexWriter(d, newIndexWriterConfig(analyzer));
    List<byte[]> docBytes = new ArrayList<>();
    long totalBytes = 0;
    for (int docID = 0; docID < numDocs; docID++) {
        // we don't use RandomIndexWriter because it might add
        // more docvalues than we expect !!!!
        // Must be > 64KB in size to ensure more than 2 pages in
        // PagedBytes would be needed:
        int numBytes;
        if (doFixed) {
            numBytes = fixedLength;
        } else if (docID == 0 || random().nextInt(5) == 3) {
            numBytes = TestUtil.nextInt(random(), 65537, 3 * 1024 * 1024);
        } else {
            numBytes = TestUtil.nextInt(random(), 1, 1024 * 1024);
        }
        totalBytes += numBytes;
        if (totalBytes > 5 * 1024 * 1024) {
            break;
        }
        byte[] bytes = new byte[numBytes];
        random().nextBytes(bytes);
        docBytes.add(bytes);
        Document doc = new Document();
        BytesRef b = new BytesRef(bytes);
        b.length = bytes.length;
        doc.add(new BinaryDocValuesField("field", b));
        doc.add(new StringField("id", "" + docID, Field.Store.YES));
        try {
            w.addDocument(doc);
        } catch (IllegalArgumentException iae) {
            if (iae.getMessage().indexOf("is too large") == -1) {
                throw iae;
            } else {
                // OK: some codecs can't handle binary DV > 32K
                assertFalse(codecAcceptsHugeBinaryValues("field"));
                w.rollback();
                d.close();
                return;
            }
        }
    }
    DirectoryReader r;
    try {
        r = DirectoryReader.open(w);
    } catch (IllegalArgumentException iae) {
        if (iae.getMessage().indexOf("is too large") == -1) {
            throw iae;
        } else {
            assertFalse(codecAcceptsHugeBinaryValues("field"));
            // OK: some codecs can't handle binary DV > 32K
            w.rollback();
            d.close();
            return;
        }
    }
    w.close();
    LeafReader ar = SlowCompositeReaderWrapper.wrap(r);
    TestUtil.checkReader(ar);
    BinaryDocValues s = FieldCache.DEFAULT.getTerms(ar, "field");
    for (int docID = 0; docID < docBytes.size(); docID++) {
        Document doc = ar.document(docID);
        assertEquals(docID, s.nextDoc());
        BytesRef bytes = s.binaryValue();
        byte[] expected = docBytes.get(Integer.parseInt(doc.get("id")));
        assertEquals(expected.length, bytes.length);
        assertEquals(new BytesRef(expected), bytes);
    }
    assertTrue(codecAcceptsHugeBinaryValues("field"));
    ar.close();
    d.close();
}
Also used : LeafReader(org.apache.lucene.index.LeafReader) DirectoryReader(org.apache.lucene.index.DirectoryReader) ArrayList(java.util.ArrayList) Analyzer(org.apache.lucene.analysis.Analyzer) MockAnalyzer(org.apache.lucene.analysis.MockAnalyzer) Document(org.apache.lucene.document.Document) BinaryDocValuesField(org.apache.lucene.document.BinaryDocValuesField) BinaryDocValues(org.apache.lucene.index.BinaryDocValues) MockAnalyzer(org.apache.lucene.analysis.MockAnalyzer) IndexWriter(org.apache.lucene.index.IndexWriter) RandomIndexWriter(org.apache.lucene.index.RandomIndexWriter) StringField(org.apache.lucene.document.StringField) BytesRef(org.apache.lucene.util.BytesRef) Directory(org.apache.lucene.store.Directory)

Example 58 with BinaryDocValuesField

use of org.apache.lucene.document.BinaryDocValuesField in project lucene-solr by apache.

the class FacetsConfig method processFacetFields.

private void processFacetFields(TaxonomyWriter taxoWriter, Map<String, List<FacetField>> byField, Document doc) throws IOException {
    for (Map.Entry<String, List<FacetField>> ent : byField.entrySet()) {
        String indexFieldName = ent.getKey();
        //System.out.println("  indexFieldName=" + indexFieldName + " fields=" + ent.getValue());
        IntsRefBuilder ordinals = new IntsRefBuilder();
        for (FacetField facetField : ent.getValue()) {
            FacetsConfig.DimConfig ft = getDimConfig(facetField.dim);
            if (facetField.path.length > 1 && ft.hierarchical == false) {
                throw new IllegalArgumentException("dimension \"" + facetField.dim + "\" is not hierarchical yet has " + facetField.path.length + " components");
            }
            FacetLabel cp = new FacetLabel(facetField.dim, facetField.path);
            checkTaxoWriter(taxoWriter);
            int ordinal = taxoWriter.addCategory(cp);
            ordinals.append(ordinal);
            if (ft.multiValued && (ft.hierarchical || ft.requireDimCount)) {
                //System.out.println("  add parents");
                // Add all parents too:
                int parent = taxoWriter.getParent(ordinal);
                while (parent > 0) {
                    ordinals.append(parent);
                    parent = taxoWriter.getParent(parent);
                }
                if (ft.requireDimCount == false) {
                    // Remove last (dimension) ord:
                    ordinals.setLength(ordinals.length() - 1);
                }
            }
            // Drill down:
            for (int i = 1; i <= cp.length; i++) {
                doc.add(new StringField(indexFieldName, pathToString(cp.components, i), Field.Store.NO));
            }
        }
        // Facet counts:
        // DocValues are considered stored fields:
        doc.add(new BinaryDocValuesField(indexFieldName, dedupAndEncode(ordinals.get())));
    }
}
Also used : FacetLabel(org.apache.lucene.facet.taxonomy.FacetLabel) FloatAssociationFacetField(org.apache.lucene.facet.taxonomy.FloatAssociationFacetField) AssociationFacetField(org.apache.lucene.facet.taxonomy.AssociationFacetField) IntAssociationFacetField(org.apache.lucene.facet.taxonomy.IntAssociationFacetField) SortedSetDocValuesFacetField(org.apache.lucene.facet.sortedset.SortedSetDocValuesFacetField) IntsRefBuilder(org.apache.lucene.util.IntsRefBuilder) BinaryDocValuesField(org.apache.lucene.document.BinaryDocValuesField) StringField(org.apache.lucene.document.StringField) ArrayList(java.util.ArrayList) List(java.util.List) HashMap(java.util.HashMap) Map(java.util.Map) ConcurrentHashMap(java.util.concurrent.ConcurrentHashMap)

Example 59 with BinaryDocValuesField

use of org.apache.lucene.document.BinaryDocValuesField in project lucene-solr by apache.

the class FacetsConfig method processAssocFacetFields.

private void processAssocFacetFields(TaxonomyWriter taxoWriter, Map<String, List<AssociationFacetField>> byField, Document doc) throws IOException {
    for (Map.Entry<String, List<AssociationFacetField>> ent : byField.entrySet()) {
        byte[] bytes = new byte[16];
        int upto = 0;
        String indexFieldName = ent.getKey();
        for (AssociationFacetField field : ent.getValue()) {
            // NOTE: we don't add parents for associations
            checkTaxoWriter(taxoWriter);
            FacetLabel label = new FacetLabel(field.dim, field.path);
            int ordinal = taxoWriter.addCategory(label);
            if (upto + 4 > bytes.length) {
                bytes = ArrayUtil.grow(bytes, upto + 4);
            }
            // big-endian:
            bytes[upto++] = (byte) (ordinal >> 24);
            bytes[upto++] = (byte) (ordinal >> 16);
            bytes[upto++] = (byte) (ordinal >> 8);
            bytes[upto++] = (byte) ordinal;
            if (upto + field.assoc.length > bytes.length) {
                bytes = ArrayUtil.grow(bytes, upto + field.assoc.length);
            }
            System.arraycopy(field.assoc.bytes, field.assoc.offset, bytes, upto, field.assoc.length);
            upto += field.assoc.length;
            // Drill down:
            for (int i = 1; i <= label.length; i++) {
                doc.add(new StringField(indexFieldName, pathToString(label.components, i), Field.Store.NO));
            }
        }
        doc.add(new BinaryDocValuesField(indexFieldName, new BytesRef(bytes, 0, upto)));
    }
}
Also used : FacetLabel(org.apache.lucene.facet.taxonomy.FacetLabel) StringField(org.apache.lucene.document.StringField) ArrayList(java.util.ArrayList) List(java.util.List) FloatAssociationFacetField(org.apache.lucene.facet.taxonomy.FloatAssociationFacetField) AssociationFacetField(org.apache.lucene.facet.taxonomy.AssociationFacetField) IntAssociationFacetField(org.apache.lucene.facet.taxonomy.IntAssociationFacetField) HashMap(java.util.HashMap) Map(java.util.Map) ConcurrentHashMap(java.util.concurrent.ConcurrentHashMap) BinaryDocValuesField(org.apache.lucene.document.BinaryDocValuesField) BytesRef(org.apache.lucene.util.BytesRef)

Example 60 with BinaryDocValuesField

use of org.apache.lucene.document.BinaryDocValuesField in project lucene-solr by apache.

the class TestBinaryDocValuesUpdates method testUpdateDocumentByMultipleTerms.

public void testUpdateDocumentByMultipleTerms() throws Exception {
    // make sure the order of updates is respected, even when multiple terms affect same document
    Directory dir = newDirectory();
    IndexWriterConfig conf = newIndexWriterConfig(new MockAnalyzer(random()));
    IndexWriter writer = new IndexWriter(dir, conf);
    Document doc = new Document();
    doc.add(new StringField("k1", "v1", Store.NO));
    doc.add(new StringField("k2", "v2", Store.NO));
    doc.add(new BinaryDocValuesField("bdv", toBytes(5L)));
    // flushed document
    writer.addDocument(doc);
    writer.commit();
    // in-memory document
    writer.addDocument(doc);
    writer.updateBinaryDocValue(new Term("k1", "v1"), "bdv", toBytes(17L));
    writer.updateBinaryDocValue(new Term("k2", "v2"), "bdv", toBytes(3L));
    writer.close();
    final DirectoryReader reader = DirectoryReader.open(dir);
    BinaryDocValues bdv = MultiDocValues.getBinaryValues(reader, "bdv");
    for (int i = 0; i < reader.maxDoc(); i++) {
        assertEquals(i, bdv.nextDoc());
        assertEquals(3, getValue(bdv));
    }
    reader.close();
    dir.close();
}
Also used : MockAnalyzer(org.apache.lucene.analysis.MockAnalyzer) StringField(org.apache.lucene.document.StringField) Document(org.apache.lucene.document.Document) BinaryDocValuesField(org.apache.lucene.document.BinaryDocValuesField) Directory(org.apache.lucene.store.Directory) NRTCachingDirectory(org.apache.lucene.store.NRTCachingDirectory)

Aggregations

BinaryDocValuesField (org.apache.lucene.document.BinaryDocValuesField)90 Document (org.apache.lucene.document.Document)84 Directory (org.apache.lucene.store.Directory)71 BytesRef (org.apache.lucene.util.BytesRef)65 MockAnalyzer (org.apache.lucene.analysis.MockAnalyzer)57 StringField (org.apache.lucene.document.StringField)50 NumericDocValuesField (org.apache.lucene.document.NumericDocValuesField)40 SortedDocValuesField (org.apache.lucene.document.SortedDocValuesField)29 SortedSetDocValuesField (org.apache.lucene.document.SortedSetDocValuesField)24 SortedNumericDocValuesField (org.apache.lucene.document.SortedNumericDocValuesField)23 NRTCachingDirectory (org.apache.lucene.store.NRTCachingDirectory)21 Field (org.apache.lucene.document.Field)16 Analyzer (org.apache.lucene.analysis.Analyzer)15 Random (java.util.Random)12 RandomIndexWriter (org.apache.lucene.index.RandomIndexWriter)12 StoredField (org.apache.lucene.document.StoredField)11 TextField (org.apache.lucene.document.TextField)11 IOException (java.io.IOException)9 BinaryDocValues (org.apache.lucene.index.BinaryDocValues)9 LeafReader (org.apache.lucene.index.LeafReader)9