use of org.apache.lucene.document.SortedSetDocValuesField in project lucene-solr by apache.
the class FacetsConfig method processSSDVFacetFields.
private void processSSDVFacetFields(Map<String, List<SortedSetDocValuesFacetField>> byField, Document doc) throws IOException {
//System.out.println("process SSDV: " + byField);
for (Map.Entry<String, List<SortedSetDocValuesFacetField>> ent : byField.entrySet()) {
String indexFieldName = ent.getKey();
for (SortedSetDocValuesFacetField facetField : ent.getValue()) {
FacetLabel cp = new FacetLabel(facetField.dim, facetField.label);
String fullPath = pathToString(cp.components, cp.length);
//System.out.println("add " + fullPath);
// For facet counts:
doc.add(new SortedSetDocValuesField(indexFieldName, new BytesRef(fullPath)));
// For drill-down:
doc.add(new StringField(indexFieldName, fullPath, Field.Store.NO));
doc.add(new StringField(indexFieldName, facetField.dim, Field.Store.NO));
}
}
}
use of org.apache.lucene.document.SortedSetDocValuesField in project lucene-solr by apache.
the class TestDocValuesStatsCollector method testDocsWithSortedSetValues.
public void testDocsWithSortedSetValues() throws IOException {
try (Directory dir = newDirectory();
IndexWriter indexWriter = new IndexWriter(dir, newIndexWriterConfig())) {
String field = "sorted";
int numDocs = TestUtil.nextInt(random(), 1, 100);
BytesRef[][] docValues = new BytesRef[numDocs][];
for (int i = 0; i < numDocs; i++) {
Document doc = new Document();
if (random().nextBoolean()) {
// not all documents have a value
int numValues = TestUtil.nextInt(random(), 1, 5);
docValues[i] = new BytesRef[numValues];
for (int j = 0; j < numValues; j++) {
BytesRef val = TestUtil.randomBinaryTerm(random());
doc.add(new SortedSetDocValuesField(field, val));
docValues[i][j] = val;
}
doc.add(new StringField("id", "doc" + i, Store.NO));
}
indexWriter.addDocument(doc);
}
// 20% of cases delete some docs
if (random().nextDouble() < 0.2) {
for (int i = 0; i < numDocs; i++) {
if (random().nextBoolean()) {
indexWriter.deleteDocuments(new Term("id", "doc" + i));
docValues[i] = null;
}
}
}
try (DirectoryReader reader = DirectoryReader.open(indexWriter)) {
IndexSearcher searcher = new IndexSearcher(reader);
SortedSetDocValuesStats stats = new SortedSetDocValuesStats(field);
TotalHitCountCollector totalHitCount = new TotalHitCountCollector();
searcher.search(new MatchAllDocsQuery(), MultiCollector.wrap(totalHitCount, new DocValuesStatsCollector(stats)));
int expCount = (int) nonNull(docValues).count();
assertEquals(expCount, stats.count());
int numDocsWithoutField = (int) isNull(docValues).count();
assertEquals(computeExpMissing(numDocsWithoutField, numDocs, reader), stats.missing());
if (stats.count() > 0) {
assertEquals(nonNull(docValues).flatMap(Arrays::stream).min(BytesRef::compareTo).get(), stats.min());
assertEquals(nonNull(docValues).flatMap(Arrays::stream).max(BytesRef::compareTo).get(), stats.max());
}
}
}
}
use of org.apache.lucene.document.SortedSetDocValuesField in project lucene-solr by apache.
the class BaseDocValuesFormatTestCase method testThreads2.
/** Tests dv against stored fields with threads (all types + missing) */
@Slow
public void testThreads2() throws Exception {
Directory dir = newDirectory();
IndexWriterConfig conf = newIndexWriterConfig(new MockAnalyzer(random()));
RandomIndexWriter writer = new RandomIndexWriter(random(), dir, conf);
Field idField = new StringField("id", "", Field.Store.NO);
Field storedBinField = new StoredField("storedBin", new byte[0]);
Field dvBinField = new BinaryDocValuesField("dvBin", new BytesRef());
Field dvSortedField = new SortedDocValuesField("dvSorted", new BytesRef());
Field storedNumericField = new StoredField("storedNum", "");
Field dvNumericField = new NumericDocValuesField("dvNum", 0);
// index some docs
int numDocs = TestUtil.nextInt(random(), 1025, 2047);
for (int i = 0; i < numDocs; i++) {
idField.setStringValue(Integer.toString(i));
int length = TestUtil.nextInt(random(), 0, 8);
byte[] buffer = new byte[length];
random().nextBytes(buffer);
storedBinField.setBytesValue(buffer);
dvBinField.setBytesValue(buffer);
dvSortedField.setBytesValue(buffer);
long numericValue = random().nextLong();
storedNumericField.setStringValue(Long.toString(numericValue));
dvNumericField.setLongValue(numericValue);
Document doc = new Document();
doc.add(idField);
if (random().nextInt(4) > 0) {
doc.add(storedBinField);
doc.add(dvBinField);
doc.add(dvSortedField);
}
if (random().nextInt(4) > 0) {
doc.add(storedNumericField);
doc.add(dvNumericField);
}
int numSortedSetFields = random().nextInt(3);
Set<String> values = new TreeSet<>();
for (int j = 0; j < numSortedSetFields; j++) {
values.add(TestUtil.randomSimpleString(random()));
}
for (String v : values) {
doc.add(new SortedSetDocValuesField("dvSortedSet", new BytesRef(v)));
doc.add(new StoredField("storedSortedSet", v));
}
int numSortedNumericFields = random().nextInt(3);
Set<Long> numValues = new TreeSet<>();
for (int j = 0; j < numSortedNumericFields; j++) {
numValues.add(TestUtil.nextLong(random(), Long.MIN_VALUE, Long.MAX_VALUE));
}
for (Long l : numValues) {
doc.add(new SortedNumericDocValuesField("dvSortedNumeric", l));
doc.add(new StoredField("storedSortedNumeric", Long.toString(l)));
}
writer.addDocument(doc);
if (random().nextInt(31) == 0) {
writer.commit();
}
}
// delete some docs
int numDeletions = random().nextInt(numDocs / 10);
for (int i = 0; i < numDeletions; i++) {
int id = random().nextInt(numDocs);
writer.deleteDocuments(new Term("id", Integer.toString(id)));
}
writer.close();
// compare
final DirectoryReader ir = DirectoryReader.open(dir);
int numThreads = TestUtil.nextInt(random(), 2, 7);
Thread[] threads = new Thread[numThreads];
final CountDownLatch startingGun = new CountDownLatch(1);
for (int i = 0; i < threads.length; i++) {
threads[i] = new Thread() {
@Override
public void run() {
try {
startingGun.await();
for (LeafReaderContext context : ir.leaves()) {
LeafReader r = context.reader();
BinaryDocValues binaries = r.getBinaryDocValues("dvBin");
SortedDocValues sorted = r.getSortedDocValues("dvSorted");
NumericDocValues numerics = r.getNumericDocValues("dvNum");
SortedSetDocValues sortedSet = r.getSortedSetDocValues("dvSortedSet");
SortedNumericDocValues sortedNumeric = r.getSortedNumericDocValues("dvSortedNumeric");
for (int j = 0; j < r.maxDoc(); j++) {
BytesRef binaryValue = r.document(j).getBinaryValue("storedBin");
if (binaryValue != null) {
if (binaries != null) {
assertEquals(j, binaries.nextDoc());
BytesRef scratch = binaries.binaryValue();
assertEquals(binaryValue, scratch);
assertEquals(j, sorted.nextDoc());
scratch = sorted.binaryValue();
assertEquals(binaryValue, scratch);
}
}
String number = r.document(j).get("storedNum");
if (number != null) {
if (numerics != null) {
assertEquals(j, numerics.advance(j));
assertEquals(Long.parseLong(number), numerics.longValue());
}
}
String[] values = r.document(j).getValues("storedSortedSet");
if (values.length > 0) {
assertNotNull(sortedSet);
assertEquals(j, sortedSet.nextDoc());
for (int k = 0; k < values.length; k++) {
long ord = sortedSet.nextOrd();
assertTrue(ord != SortedSetDocValues.NO_MORE_ORDS);
BytesRef value = sortedSet.lookupOrd(ord);
assertEquals(values[k], value.utf8ToString());
}
assertEquals(SortedSetDocValues.NO_MORE_ORDS, sortedSet.nextOrd());
}
String[] numValues = r.document(j).getValues("storedSortedNumeric");
if (numValues.length > 0) {
assertNotNull(sortedNumeric);
assertEquals(j, sortedNumeric.nextDoc());
assertEquals(numValues.length, sortedNumeric.docValueCount());
for (int k = 0; k < numValues.length; k++) {
long v = sortedNumeric.nextValue();
assertEquals(numValues[k], Long.toString(v));
}
}
}
}
TestUtil.checkReader(ir);
} catch (Exception e) {
throw new RuntimeException(e);
}
}
};
threads[i].start();
}
startingGun.countDown();
for (Thread t : threads) {
t.join();
}
ir.close();
dir.close();
}
use of org.apache.lucene.document.SortedSetDocValuesField in project lucene-solr by apache.
the class BaseDocValuesFormatTestCase method testSortedSetTwoValuesUnordered.
public void testSortedSetTwoValuesUnordered() throws IOException {
Directory directory = newDirectory();
RandomIndexWriter iwriter = new RandomIndexWriter(random(), directory);
Document doc = new Document();
doc.add(new SortedSetDocValuesField("field", new BytesRef("world")));
doc.add(new SortedSetDocValuesField("field", new BytesRef("hello")));
iwriter.addDocument(doc);
DirectoryReader ireader = iwriter.getReader();
iwriter.close();
SortedSetDocValues dv = getOnlyLeafReader(ireader).getSortedSetDocValues("field");
assertEquals(0, dv.nextDoc());
assertEquals(0, dv.nextOrd());
assertEquals(1, dv.nextOrd());
assertEquals(NO_MORE_ORDS, dv.nextOrd());
BytesRef bytes = dv.lookupOrd(0);
assertEquals(new BytesRef("hello"), bytes);
bytes = dv.lookupOrd(1);
assertEquals(new BytesRef("world"), bytes);
ireader.close();
directory.close();
}
use of org.apache.lucene.document.SortedSetDocValuesField in project lucene-solr by apache.
the class BaseDocValuesFormatTestCase method testThreads3.
@Slow
public void testThreads3() throws Exception {
Directory dir = newFSDirectory(createTempDir());
IndexWriterConfig conf = newIndexWriterConfig(new MockAnalyzer(random()));
RandomIndexWriter writer = new RandomIndexWriter(random(), dir, conf);
int numSortedSets = random().nextInt(21);
int numBinaries = random().nextInt(21);
int numSortedNums = random().nextInt(21);
int numDocs = TestUtil.nextInt(random(), 2025, 2047);
for (int i = 0; i < numDocs; i++) {
Document doc = new Document();
for (int j = 0; j < numSortedSets; j++) {
doc.add(new SortedSetDocValuesField("ss" + j, new BytesRef(TestUtil.randomSimpleString(random()))));
doc.add(new SortedSetDocValuesField("ss" + j, new BytesRef(TestUtil.randomSimpleString(random()))));
}
for (int j = 0; j < numBinaries; j++) {
doc.add(new BinaryDocValuesField("b" + j, new BytesRef(TestUtil.randomSimpleString(random()))));
}
for (int j = 0; j < numSortedNums; j++) {
doc.add(new SortedNumericDocValuesField("sn" + j, TestUtil.nextLong(random(), Long.MIN_VALUE, Long.MAX_VALUE)));
doc.add(new SortedNumericDocValuesField("sn" + j, TestUtil.nextLong(random(), Long.MIN_VALUE, Long.MAX_VALUE)));
}
writer.addDocument(doc);
}
writer.close();
// now check with threads
for (int i = 0; i < 10; i++) {
final DirectoryReader r = DirectoryReader.open(dir);
final CountDownLatch startingGun = new CountDownLatch(1);
Thread[] threads = new Thread[TestUtil.nextInt(random(), 4, 10)];
for (int tid = 0; tid < threads.length; tid++) {
threads[tid] = new Thread() {
@Override
public void run() {
try {
ByteArrayOutputStream bos = new ByteArrayOutputStream(1024);
PrintStream infoStream = new PrintStream(bos, false, IOUtils.UTF_8);
startingGun.await();
for (LeafReaderContext leaf : r.leaves()) {
DocValuesStatus status = CheckIndex.testDocValues((SegmentReader) leaf.reader(), infoStream, true);
if (status.error != null) {
throw status.error;
}
}
} catch (Throwable e) {
throw new RuntimeException(e);
}
}
};
}
for (int tid = 0; tid < threads.length; tid++) {
threads[tid].start();
}
startingGun.countDown();
for (int tid = 0; tid < threads.length; tid++) {
threads[tid].join();
}
r.close();
}
dir.close();
}
Aggregations