Search in sources :

Example 31 with SortedSetDocValuesField

use of org.apache.lucene.document.SortedSetDocValuesField in project lucene-solr by apache.

the class FacetsConfig method processSSDVFacetFields.

private void processSSDVFacetFields(Map<String, List<SortedSetDocValuesFacetField>> byField, Document doc) throws IOException {
    //System.out.println("process SSDV: " + byField);
    for (Map.Entry<String, List<SortedSetDocValuesFacetField>> ent : byField.entrySet()) {
        String indexFieldName = ent.getKey();
        for (SortedSetDocValuesFacetField facetField : ent.getValue()) {
            FacetLabel cp = new FacetLabel(facetField.dim, facetField.label);
            String fullPath = pathToString(cp.components, cp.length);
            //System.out.println("add " + fullPath);
            // For facet counts:
            doc.add(new SortedSetDocValuesField(indexFieldName, new BytesRef(fullPath)));
            // For drill-down:
            doc.add(new StringField(indexFieldName, fullPath, Field.Store.NO));
            doc.add(new StringField(indexFieldName, facetField.dim, Field.Store.NO));
        }
    }
}
Also used : FacetLabel(org.apache.lucene.facet.taxonomy.FacetLabel) StringField(org.apache.lucene.document.StringField) ArrayList(java.util.ArrayList) List(java.util.List) SortedSetDocValuesField(org.apache.lucene.document.SortedSetDocValuesField) SortedSetDocValuesFacetField(org.apache.lucene.facet.sortedset.SortedSetDocValuesFacetField) HashMap(java.util.HashMap) Map(java.util.Map) ConcurrentHashMap(java.util.concurrent.ConcurrentHashMap) BytesRef(org.apache.lucene.util.BytesRef)

Example 32 with SortedSetDocValuesField

use of org.apache.lucene.document.SortedSetDocValuesField in project lucene-solr by apache.

the class TestDocValuesStatsCollector method testDocsWithSortedSetValues.

public void testDocsWithSortedSetValues() throws IOException {
    try (Directory dir = newDirectory();
        IndexWriter indexWriter = new IndexWriter(dir, newIndexWriterConfig())) {
        String field = "sorted";
        int numDocs = TestUtil.nextInt(random(), 1, 100);
        BytesRef[][] docValues = new BytesRef[numDocs][];
        for (int i = 0; i < numDocs; i++) {
            Document doc = new Document();
            if (random().nextBoolean()) {
                // not all documents have a value
                int numValues = TestUtil.nextInt(random(), 1, 5);
                docValues[i] = new BytesRef[numValues];
                for (int j = 0; j < numValues; j++) {
                    BytesRef val = TestUtil.randomBinaryTerm(random());
                    doc.add(new SortedSetDocValuesField(field, val));
                    docValues[i][j] = val;
                }
                doc.add(new StringField("id", "doc" + i, Store.NO));
            }
            indexWriter.addDocument(doc);
        }
        // 20% of cases delete some docs
        if (random().nextDouble() < 0.2) {
            for (int i = 0; i < numDocs; i++) {
                if (random().nextBoolean()) {
                    indexWriter.deleteDocuments(new Term("id", "doc" + i));
                    docValues[i] = null;
                }
            }
        }
        try (DirectoryReader reader = DirectoryReader.open(indexWriter)) {
            IndexSearcher searcher = new IndexSearcher(reader);
            SortedSetDocValuesStats stats = new SortedSetDocValuesStats(field);
            TotalHitCountCollector totalHitCount = new TotalHitCountCollector();
            searcher.search(new MatchAllDocsQuery(), MultiCollector.wrap(totalHitCount, new DocValuesStatsCollector(stats)));
            int expCount = (int) nonNull(docValues).count();
            assertEquals(expCount, stats.count());
            int numDocsWithoutField = (int) isNull(docValues).count();
            assertEquals(computeExpMissing(numDocsWithoutField, numDocs, reader), stats.missing());
            if (stats.count() > 0) {
                assertEquals(nonNull(docValues).flatMap(Arrays::stream).min(BytesRef::compareTo).get(), stats.min());
                assertEquals(nonNull(docValues).flatMap(Arrays::stream).max(BytesRef::compareTo).get(), stats.max());
            }
        }
    }
}
Also used : DirectoryReader(org.apache.lucene.index.DirectoryReader) Term(org.apache.lucene.index.Term) Document(org.apache.lucene.document.Document) SortedSetDocValuesStats(org.apache.lucene.search.DocValuesStats.SortedSetDocValuesStats) IndexWriter(org.apache.lucene.index.IndexWriter) StringField(org.apache.lucene.document.StringField) SortedSetDocValuesField(org.apache.lucene.document.SortedSetDocValuesField) BytesRef(org.apache.lucene.util.BytesRef) Directory(org.apache.lucene.store.Directory)

Example 33 with SortedSetDocValuesField

use of org.apache.lucene.document.SortedSetDocValuesField in project lucene-solr by apache.

the class BaseDocValuesFormatTestCase method testThreads2.

/** Tests dv against stored fields with threads (all types + missing) */
@Slow
public void testThreads2() throws Exception {
    Directory dir = newDirectory();
    IndexWriterConfig conf = newIndexWriterConfig(new MockAnalyzer(random()));
    RandomIndexWriter writer = new RandomIndexWriter(random(), dir, conf);
    Field idField = new StringField("id", "", Field.Store.NO);
    Field storedBinField = new StoredField("storedBin", new byte[0]);
    Field dvBinField = new BinaryDocValuesField("dvBin", new BytesRef());
    Field dvSortedField = new SortedDocValuesField("dvSorted", new BytesRef());
    Field storedNumericField = new StoredField("storedNum", "");
    Field dvNumericField = new NumericDocValuesField("dvNum", 0);
    // index some docs
    int numDocs = TestUtil.nextInt(random(), 1025, 2047);
    for (int i = 0; i < numDocs; i++) {
        idField.setStringValue(Integer.toString(i));
        int length = TestUtil.nextInt(random(), 0, 8);
        byte[] buffer = new byte[length];
        random().nextBytes(buffer);
        storedBinField.setBytesValue(buffer);
        dvBinField.setBytesValue(buffer);
        dvSortedField.setBytesValue(buffer);
        long numericValue = random().nextLong();
        storedNumericField.setStringValue(Long.toString(numericValue));
        dvNumericField.setLongValue(numericValue);
        Document doc = new Document();
        doc.add(idField);
        if (random().nextInt(4) > 0) {
            doc.add(storedBinField);
            doc.add(dvBinField);
            doc.add(dvSortedField);
        }
        if (random().nextInt(4) > 0) {
            doc.add(storedNumericField);
            doc.add(dvNumericField);
        }
        int numSortedSetFields = random().nextInt(3);
        Set<String> values = new TreeSet<>();
        for (int j = 0; j < numSortedSetFields; j++) {
            values.add(TestUtil.randomSimpleString(random()));
        }
        for (String v : values) {
            doc.add(new SortedSetDocValuesField("dvSortedSet", new BytesRef(v)));
            doc.add(new StoredField("storedSortedSet", v));
        }
        int numSortedNumericFields = random().nextInt(3);
        Set<Long> numValues = new TreeSet<>();
        for (int j = 0; j < numSortedNumericFields; j++) {
            numValues.add(TestUtil.nextLong(random(), Long.MIN_VALUE, Long.MAX_VALUE));
        }
        for (Long l : numValues) {
            doc.add(new SortedNumericDocValuesField("dvSortedNumeric", l));
            doc.add(new StoredField("storedSortedNumeric", Long.toString(l)));
        }
        writer.addDocument(doc);
        if (random().nextInt(31) == 0) {
            writer.commit();
        }
    }
    // delete some docs
    int numDeletions = random().nextInt(numDocs / 10);
    for (int i = 0; i < numDeletions; i++) {
        int id = random().nextInt(numDocs);
        writer.deleteDocuments(new Term("id", Integer.toString(id)));
    }
    writer.close();
    // compare
    final DirectoryReader ir = DirectoryReader.open(dir);
    int numThreads = TestUtil.nextInt(random(), 2, 7);
    Thread[] threads = new Thread[numThreads];
    final CountDownLatch startingGun = new CountDownLatch(1);
    for (int i = 0; i < threads.length; i++) {
        threads[i] = new Thread() {

            @Override
            public void run() {
                try {
                    startingGun.await();
                    for (LeafReaderContext context : ir.leaves()) {
                        LeafReader r = context.reader();
                        BinaryDocValues binaries = r.getBinaryDocValues("dvBin");
                        SortedDocValues sorted = r.getSortedDocValues("dvSorted");
                        NumericDocValues numerics = r.getNumericDocValues("dvNum");
                        SortedSetDocValues sortedSet = r.getSortedSetDocValues("dvSortedSet");
                        SortedNumericDocValues sortedNumeric = r.getSortedNumericDocValues("dvSortedNumeric");
                        for (int j = 0; j < r.maxDoc(); j++) {
                            BytesRef binaryValue = r.document(j).getBinaryValue("storedBin");
                            if (binaryValue != null) {
                                if (binaries != null) {
                                    assertEquals(j, binaries.nextDoc());
                                    BytesRef scratch = binaries.binaryValue();
                                    assertEquals(binaryValue, scratch);
                                    assertEquals(j, sorted.nextDoc());
                                    scratch = sorted.binaryValue();
                                    assertEquals(binaryValue, scratch);
                                }
                            }
                            String number = r.document(j).get("storedNum");
                            if (number != null) {
                                if (numerics != null) {
                                    assertEquals(j, numerics.advance(j));
                                    assertEquals(Long.parseLong(number), numerics.longValue());
                                }
                            }
                            String[] values = r.document(j).getValues("storedSortedSet");
                            if (values.length > 0) {
                                assertNotNull(sortedSet);
                                assertEquals(j, sortedSet.nextDoc());
                                for (int k = 0; k < values.length; k++) {
                                    long ord = sortedSet.nextOrd();
                                    assertTrue(ord != SortedSetDocValues.NO_MORE_ORDS);
                                    BytesRef value = sortedSet.lookupOrd(ord);
                                    assertEquals(values[k], value.utf8ToString());
                                }
                                assertEquals(SortedSetDocValues.NO_MORE_ORDS, sortedSet.nextOrd());
                            }
                            String[] numValues = r.document(j).getValues("storedSortedNumeric");
                            if (numValues.length > 0) {
                                assertNotNull(sortedNumeric);
                                assertEquals(j, sortedNumeric.nextDoc());
                                assertEquals(numValues.length, sortedNumeric.docValueCount());
                                for (int k = 0; k < numValues.length; k++) {
                                    long v = sortedNumeric.nextValue();
                                    assertEquals(numValues[k], Long.toString(v));
                                }
                            }
                        }
                    }
                    TestUtil.checkReader(ir);
                } catch (Exception e) {
                    throw new RuntimeException(e);
                }
            }
        };
        threads[i].start();
    }
    startingGun.countDown();
    for (Thread t : threads) {
        t.join();
    }
    ir.close();
    dir.close();
}
Also used : Document(org.apache.lucene.document.Document) SortedNumericDocValuesField(org.apache.lucene.document.SortedNumericDocValuesField) StoredField(org.apache.lucene.document.StoredField) NumericDocValuesField(org.apache.lucene.document.NumericDocValuesField) SortedSetDocValuesField(org.apache.lucene.document.SortedSetDocValuesField) BinaryDocValuesField(org.apache.lucene.document.BinaryDocValuesField) SortedDocValuesField(org.apache.lucene.document.SortedDocValuesField) StringField(org.apache.lucene.document.StringField) FloatDocValuesField(org.apache.lucene.document.FloatDocValuesField) Field(org.apache.lucene.document.Field) TextField(org.apache.lucene.document.TextField) StoredField(org.apache.lucene.document.StoredField) MockAnalyzer(org.apache.lucene.analysis.MockAnalyzer) SortedNumericDocValuesField(org.apache.lucene.document.SortedNumericDocValuesField) NumericDocValuesField(org.apache.lucene.document.NumericDocValuesField) TreeSet(java.util.TreeSet) SortedDocValuesField(org.apache.lucene.document.SortedDocValuesField) BytesRef(org.apache.lucene.util.BytesRef) Directory(org.apache.lucene.store.Directory) CountDownLatch(java.util.concurrent.CountDownLatch) BinaryDocValuesField(org.apache.lucene.document.BinaryDocValuesField) IOException(java.io.IOException) SortedNumericDocValuesField(org.apache.lucene.document.SortedNumericDocValuesField) StringField(org.apache.lucene.document.StringField) SortedSetDocValuesField(org.apache.lucene.document.SortedSetDocValuesField)

Example 34 with SortedSetDocValuesField

use of org.apache.lucene.document.SortedSetDocValuesField in project lucene-solr by apache.

the class BaseDocValuesFormatTestCase method testSortedSetTwoValuesUnordered.

public void testSortedSetTwoValuesUnordered() throws IOException {
    Directory directory = newDirectory();
    RandomIndexWriter iwriter = new RandomIndexWriter(random(), directory);
    Document doc = new Document();
    doc.add(new SortedSetDocValuesField("field", new BytesRef("world")));
    doc.add(new SortedSetDocValuesField("field", new BytesRef("hello")));
    iwriter.addDocument(doc);
    DirectoryReader ireader = iwriter.getReader();
    iwriter.close();
    SortedSetDocValues dv = getOnlyLeafReader(ireader).getSortedSetDocValues("field");
    assertEquals(0, dv.nextDoc());
    assertEquals(0, dv.nextOrd());
    assertEquals(1, dv.nextOrd());
    assertEquals(NO_MORE_ORDS, dv.nextOrd());
    BytesRef bytes = dv.lookupOrd(0);
    assertEquals(new BytesRef("hello"), bytes);
    bytes = dv.lookupOrd(1);
    assertEquals(new BytesRef("world"), bytes);
    ireader.close();
    directory.close();
}
Also used : SortedSetDocValuesField(org.apache.lucene.document.SortedSetDocValuesField) Document(org.apache.lucene.document.Document) BytesRef(org.apache.lucene.util.BytesRef) Directory(org.apache.lucene.store.Directory)

Example 35 with SortedSetDocValuesField

use of org.apache.lucene.document.SortedSetDocValuesField in project lucene-solr by apache.

the class BaseDocValuesFormatTestCase method testThreads3.

@Slow
public void testThreads3() throws Exception {
    Directory dir = newFSDirectory(createTempDir());
    IndexWriterConfig conf = newIndexWriterConfig(new MockAnalyzer(random()));
    RandomIndexWriter writer = new RandomIndexWriter(random(), dir, conf);
    int numSortedSets = random().nextInt(21);
    int numBinaries = random().nextInt(21);
    int numSortedNums = random().nextInt(21);
    int numDocs = TestUtil.nextInt(random(), 2025, 2047);
    for (int i = 0; i < numDocs; i++) {
        Document doc = new Document();
        for (int j = 0; j < numSortedSets; j++) {
            doc.add(new SortedSetDocValuesField("ss" + j, new BytesRef(TestUtil.randomSimpleString(random()))));
            doc.add(new SortedSetDocValuesField("ss" + j, new BytesRef(TestUtil.randomSimpleString(random()))));
        }
        for (int j = 0; j < numBinaries; j++) {
            doc.add(new BinaryDocValuesField("b" + j, new BytesRef(TestUtil.randomSimpleString(random()))));
        }
        for (int j = 0; j < numSortedNums; j++) {
            doc.add(new SortedNumericDocValuesField("sn" + j, TestUtil.nextLong(random(), Long.MIN_VALUE, Long.MAX_VALUE)));
            doc.add(new SortedNumericDocValuesField("sn" + j, TestUtil.nextLong(random(), Long.MIN_VALUE, Long.MAX_VALUE)));
        }
        writer.addDocument(doc);
    }
    writer.close();
    // now check with threads
    for (int i = 0; i < 10; i++) {
        final DirectoryReader r = DirectoryReader.open(dir);
        final CountDownLatch startingGun = new CountDownLatch(1);
        Thread[] threads = new Thread[TestUtil.nextInt(random(), 4, 10)];
        for (int tid = 0; tid < threads.length; tid++) {
            threads[tid] = new Thread() {

                @Override
                public void run() {
                    try {
                        ByteArrayOutputStream bos = new ByteArrayOutputStream(1024);
                        PrintStream infoStream = new PrintStream(bos, false, IOUtils.UTF_8);
                        startingGun.await();
                        for (LeafReaderContext leaf : r.leaves()) {
                            DocValuesStatus status = CheckIndex.testDocValues((SegmentReader) leaf.reader(), infoStream, true);
                            if (status.error != null) {
                                throw status.error;
                            }
                        }
                    } catch (Throwable e) {
                        throw new RuntimeException(e);
                    }
                }
            };
        }
        for (int tid = 0; tid < threads.length; tid++) {
            threads[tid].start();
        }
        startingGun.countDown();
        for (int tid = 0; tid < threads.length; tid++) {
            threads[tid].join();
        }
        r.close();
    }
    dir.close();
}
Also used : PrintStream(java.io.PrintStream) DocValuesStatus(org.apache.lucene.index.CheckIndex.Status.DocValuesStatus) ByteArrayOutputStream(java.io.ByteArrayOutputStream) Document(org.apache.lucene.document.Document) CountDownLatch(java.util.concurrent.CountDownLatch) BinaryDocValuesField(org.apache.lucene.document.BinaryDocValuesField) SortedNumericDocValuesField(org.apache.lucene.document.SortedNumericDocValuesField) MockAnalyzer(org.apache.lucene.analysis.MockAnalyzer) SortedSetDocValuesField(org.apache.lucene.document.SortedSetDocValuesField) BytesRef(org.apache.lucene.util.BytesRef) Directory(org.apache.lucene.store.Directory)

Aggregations

SortedSetDocValuesField (org.apache.lucene.document.SortedSetDocValuesField)98 BytesRef (org.apache.lucene.util.BytesRef)96 Document (org.apache.lucene.document.Document)82 Directory (org.apache.lucene.store.Directory)74 RandomIndexWriter (org.apache.lucene.index.RandomIndexWriter)38 MockAnalyzer (org.apache.lucene.analysis.MockAnalyzer)36 SortedDocValuesField (org.apache.lucene.document.SortedDocValuesField)33 NumericDocValuesField (org.apache.lucene.document.NumericDocValuesField)27 IndexReader (org.apache.lucene.index.IndexReader)27 StringField (org.apache.lucene.document.StringField)23 BinaryDocValuesField (org.apache.lucene.document.BinaryDocValuesField)22 SortedNumericDocValuesField (org.apache.lucene.document.SortedNumericDocValuesField)20 ArrayList (java.util.ArrayList)18 Analyzer (org.apache.lucene.analysis.Analyzer)14 IndexableField (org.apache.lucene.index.IndexableField)13 Field (org.apache.lucene.document.Field)12 DirectoryReader (org.apache.lucene.index.DirectoryReader)11 LeafReader (org.apache.lucene.index.LeafReader)11 IntPoint (org.apache.lucene.document.IntPoint)10 StoredField (org.apache.lucene.document.StoredField)10