use of org.apache.lucene.document.BinaryDocValuesField in project lucene-solr by apache.
the class TestIndexSorting method testRandom2.
public void testRandom2() throws Exception {
int numDocs = atLeast(100);
FieldType POSITIONS_TYPE = new FieldType(TextField.TYPE_NOT_STORED);
POSITIONS_TYPE.setIndexOptions(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS);
POSITIONS_TYPE.freeze();
FieldType TERM_VECTORS_TYPE = new FieldType(TextField.TYPE_NOT_STORED);
TERM_VECTORS_TYPE.setStoreTermVectors(true);
TERM_VECTORS_TYPE.freeze();
Analyzer a = new Analyzer() {
@Override
protected TokenStreamComponents createComponents(String fieldName) {
Tokenizer tokenizer = new MockTokenizer();
return new TokenStreamComponents(tokenizer, tokenizer);
}
};
List<Document> docs = new ArrayList<>();
for (int i = 0; i < numDocs; i++) {
int id = i * 10;
Document doc = new Document();
doc.add(new StringField("id", Integer.toString(id), Store.YES));
doc.add(new StringField("docs", "#all#", Store.NO));
PositionsTokenStream positions = new PositionsTokenStream();
positions.setId(id);
doc.add(new Field("positions", positions, POSITIONS_TYPE));
doc.add(new NumericDocValuesField("numeric", id));
String value = IntStream.range(0, id).mapToObj(k -> Integer.toString(id)).collect(Collectors.joining(" "));
TextField norms = new TextField("norms", value, Store.NO);
doc.add(norms);
doc.add(new BinaryDocValuesField("binary", new BytesRef(Integer.toString(id))));
doc.add(new SortedDocValuesField("sorted", new BytesRef(Integer.toString(id))));
doc.add(new SortedSetDocValuesField("multi_valued_string", new BytesRef(Integer.toString(id))));
doc.add(new SortedSetDocValuesField("multi_valued_string", new BytesRef(Integer.toString(id + 1))));
doc.add(new SortedNumericDocValuesField("multi_valued_numeric", id));
doc.add(new SortedNumericDocValuesField("multi_valued_numeric", id + 1));
doc.add(new Field("term_vectors", Integer.toString(id), TERM_VECTORS_TYPE));
byte[] bytes = new byte[4];
NumericUtils.intToSortableBytes(id, bytes, 0);
doc.add(new BinaryPoint("points", bytes));
docs.add(doc);
}
// Must use the same seed for both RandomIndexWriters so they behave identically
long seed = random().nextLong();
// We add document alread in ID order for the first writer:
Directory dir1 = newFSDirectory(createTempDir());
Random random1 = new Random(seed);
IndexWriterConfig iwc1 = newIndexWriterConfig(random1, a);
// for testing norms field
iwc1.setSimilarity(new NormsSimilarity(iwc1.getSimilarity()));
// preserve docIDs
iwc1.setMergePolicy(newLogMergePolicy());
if (VERBOSE) {
System.out.println("TEST: now index pre-sorted");
}
RandomIndexWriter w1 = new RandomIndexWriter(random1, dir1, iwc1);
for (Document doc : docs) {
((PositionsTokenStream) ((Field) doc.getField("positions")).tokenStreamValue()).setId(Integer.parseInt(doc.get("id")));
w1.addDocument(doc);
}
// We shuffle documents, but set index sort, for the second writer:
Directory dir2 = newFSDirectory(createTempDir());
Random random2 = new Random(seed);
IndexWriterConfig iwc2 = newIndexWriterConfig(random2, a);
// for testing norms field
iwc2.setSimilarity(new NormsSimilarity(iwc2.getSimilarity()));
Sort sort = new Sort(new SortField("numeric", SortField.Type.INT));
iwc2.setIndexSort(sort);
Collections.shuffle(docs, random());
if (VERBOSE) {
System.out.println("TEST: now index with index-time sorting");
}
RandomIndexWriter w2 = new RandomIndexWriter(random2, dir2, iwc2);
int count = 0;
int commitAtCount = TestUtil.nextInt(random(), 1, numDocs - 1);
for (Document doc : docs) {
((PositionsTokenStream) ((Field) doc.getField("positions")).tokenStreamValue()).setId(Integer.parseInt(doc.get("id")));
if (count++ == commitAtCount) {
// Ensure forceMerge really does merge
w2.commit();
}
w2.addDocument(doc);
}
if (VERBOSE) {
System.out.println("TEST: now force merge");
}
w2.forceMerge(1);
DirectoryReader r1 = w1.getReader();
DirectoryReader r2 = w2.getReader();
if (VERBOSE) {
System.out.println("TEST: now compare r1=" + r1 + " r2=" + r2);
}
assertEquals(sort, getOnlyLeafReader(r2).getMetaData().getSort());
assertReaderEquals("left: sorted by hand; right: sorted by Lucene", r1, r2);
IOUtils.close(w1, w2, r1, r2, dir1, dir2);
}
use of org.apache.lucene.document.BinaryDocValuesField in project lucene-solr by apache.
the class TestIndexSorting method testIndexSortWithSparseField.
public void testIndexSortWithSparseField() throws Exception {
Directory dir = newDirectory();
IndexWriterConfig iwc = new IndexWriterConfig(new MockAnalyzer(random()));
SortField sortField = new SortField("dense_int", SortField.Type.INT, true);
Sort indexSort = new Sort(sortField);
iwc.setIndexSort(indexSort);
IndexWriter w = new IndexWriter(dir, iwc);
Field textField = newTextField("sparse_text", "", Field.Store.NO);
for (int i = 0; i < 128; i++) {
Document doc = new Document();
doc.add(new NumericDocValuesField("dense_int", i));
if (i < 64) {
doc.add(new NumericDocValuesField("sparse_int", i));
doc.add(new BinaryDocValuesField("sparse_binary", new BytesRef(Integer.toString(i))));
textField.setStringValue("foo");
doc.add(textField);
}
w.addDocument(doc);
}
w.commit();
w.forceMerge(1);
DirectoryReader r = DirectoryReader.open(w);
assertEquals(1, r.leaves().size());
LeafReader leafReader = r.leaves().get(0).reader();
NumericDocValues denseValues = leafReader.getNumericDocValues("dense_int");
NumericDocValues sparseValues = leafReader.getNumericDocValues("sparse_int");
BinaryDocValues sparseBinaryValues = leafReader.getBinaryDocValues("sparse_binary");
NumericDocValues normsValues = leafReader.getNormValues("sparse_text");
for (int docID = 0; docID < 128; docID++) {
assertTrue(denseValues.advanceExact(docID));
assertEquals(127 - docID, (int) denseValues.longValue());
if (docID >= 64) {
assertTrue(denseValues.advanceExact(docID));
assertTrue(sparseValues.advanceExact(docID));
assertTrue(sparseBinaryValues.advanceExact(docID));
assertTrue(normsValues.advanceExact(docID));
assertEquals(1, normsValues.longValue());
assertEquals(127 - docID, (int) sparseValues.longValue());
assertEquals(new BytesRef(Integer.toString(127 - docID)), sparseBinaryValues.binaryValue());
} else {
assertFalse(sparseBinaryValues.advanceExact(docID));
assertFalse(sparseValues.advanceExact(docID));
assertFalse(normsValues.advanceExact(docID));
}
}
IOUtils.close(r, w, dir);
}
use of org.apache.lucene.document.BinaryDocValuesField in project lucene-solr by apache.
the class TestFieldCache method testDocValuesIntegration.
public void testDocValuesIntegration() throws Exception {
Directory dir = newDirectory();
IndexWriterConfig iwc = newIndexWriterConfig(null);
RandomIndexWriter iw = new RandomIndexWriter(random(), dir, iwc);
Document doc = new Document();
doc.add(new BinaryDocValuesField("binary", new BytesRef("binary value")));
doc.add(new SortedDocValuesField("sorted", new BytesRef("sorted value")));
doc.add(new NumericDocValuesField("numeric", 42));
doc.add(new SortedSetDocValuesField("sortedset", new BytesRef("sortedset value1")));
doc.add(new SortedSetDocValuesField("sortedset", new BytesRef("sortedset value2")));
iw.addDocument(doc);
DirectoryReader ir = iw.getReader();
iw.close();
LeafReader ar = getOnlyLeafReader(ir);
// Binary type: can be retrieved via getTerms()
expectThrows(IllegalStateException.class, () -> {
FieldCache.DEFAULT.getNumerics(ar, "binary", FieldCache.INT_POINT_PARSER);
});
BinaryDocValues binary = FieldCache.DEFAULT.getTerms(ar, "binary");
assertEquals(0, binary.nextDoc());
final BytesRef term = binary.binaryValue();
assertEquals("binary value", term.utf8ToString());
expectThrows(IllegalStateException.class, () -> {
FieldCache.DEFAULT.getTermsIndex(ar, "binary");
});
expectThrows(IllegalStateException.class, () -> {
FieldCache.DEFAULT.getDocTermOrds(ar, "binary", null);
});
expectThrows(IllegalStateException.class, () -> {
new DocTermOrds(ar, null, "binary");
});
Bits bits = FieldCache.DEFAULT.getDocsWithField(ar, "binary", null);
assertTrue(bits.get(0));
// Sorted type: can be retrieved via getTerms(), getTermsIndex(), getDocTermOrds()
expectThrows(IllegalStateException.class, () -> {
FieldCache.DEFAULT.getNumerics(ar, "sorted", FieldCache.INT_POINT_PARSER);
});
expectThrows(IllegalStateException.class, () -> {
new DocTermOrds(ar, null, "sorted");
});
binary = FieldCache.DEFAULT.getTerms(ar, "sorted");
assertEquals(0, binary.nextDoc());
BytesRef scratch = binary.binaryValue();
assertEquals("sorted value", scratch.utf8ToString());
SortedDocValues sorted = FieldCache.DEFAULT.getTermsIndex(ar, "sorted");
assertEquals(0, sorted.nextDoc());
assertEquals(0, sorted.ordValue());
assertEquals(1, sorted.getValueCount());
scratch = sorted.binaryValue();
assertEquals("sorted value", scratch.utf8ToString());
SortedSetDocValues sortedSet = FieldCache.DEFAULT.getDocTermOrds(ar, "sorted", null);
assertEquals(0, sortedSet.nextDoc());
assertEquals(0, sortedSet.nextOrd());
assertEquals(SortedSetDocValues.NO_MORE_ORDS, sortedSet.nextOrd());
assertEquals(1, sortedSet.getValueCount());
bits = FieldCache.DEFAULT.getDocsWithField(ar, "sorted", null);
assertTrue(bits.get(0));
// Numeric type: can be retrieved via getInts() and so on
NumericDocValues numeric = FieldCache.DEFAULT.getNumerics(ar, "numeric", FieldCache.INT_POINT_PARSER);
assertEquals(0, numeric.nextDoc());
assertEquals(42, numeric.longValue());
expectThrows(IllegalStateException.class, () -> {
FieldCache.DEFAULT.getTerms(ar, "numeric");
});
expectThrows(IllegalStateException.class, () -> {
FieldCache.DEFAULT.getTermsIndex(ar, "numeric");
});
expectThrows(IllegalStateException.class, () -> {
FieldCache.DEFAULT.getDocTermOrds(ar, "numeric", null);
});
expectThrows(IllegalStateException.class, () -> {
new DocTermOrds(ar, null, "numeric");
});
bits = FieldCache.DEFAULT.getDocsWithField(ar, "numeric", null);
assertTrue(bits.get(0));
// SortedSet type: can be retrieved via getDocTermOrds()
expectThrows(IllegalStateException.class, () -> {
FieldCache.DEFAULT.getNumerics(ar, "sortedset", FieldCache.INT_POINT_PARSER);
});
expectThrows(IllegalStateException.class, () -> {
FieldCache.DEFAULT.getTerms(ar, "sortedset");
});
expectThrows(IllegalStateException.class, () -> {
FieldCache.DEFAULT.getTermsIndex(ar, "sortedset");
});
expectThrows(IllegalStateException.class, () -> {
new DocTermOrds(ar, null, "sortedset");
});
sortedSet = FieldCache.DEFAULT.getDocTermOrds(ar, "sortedset", null);
assertEquals(0, sortedSet.nextDoc());
assertEquals(0, sortedSet.nextOrd());
assertEquals(1, sortedSet.nextOrd());
assertEquals(SortedSetDocValues.NO_MORE_ORDS, sortedSet.nextOrd());
assertEquals(2, sortedSet.getValueCount());
bits = FieldCache.DEFAULT.getDocsWithField(ar, "sortedset", null);
assertTrue(bits.get(0));
ir.close();
dir.close();
}
use of org.apache.lucene.document.BinaryDocValuesField in project lucene-solr by apache.
the class TestLegacyFieldCache method testDocValuesIntegration.
public void testDocValuesIntegration() throws Exception {
Directory dir = newDirectory();
IndexWriterConfig iwc = newIndexWriterConfig(null);
RandomIndexWriter iw = new RandomIndexWriter(random(), dir, iwc);
Document doc = new Document();
doc.add(new BinaryDocValuesField("binary", new BytesRef("binary value")));
doc.add(new SortedDocValuesField("sorted", new BytesRef("sorted value")));
doc.add(new NumericDocValuesField("numeric", 42));
doc.add(new SortedSetDocValuesField("sortedset", new BytesRef("sortedset value1")));
doc.add(new SortedSetDocValuesField("sortedset", new BytesRef("sortedset value2")));
iw.addDocument(doc);
DirectoryReader ir = iw.getReader();
iw.close();
LeafReader ar = getOnlyLeafReader(ir);
// Binary type: can be retrieved via getTerms()
expectThrows(IllegalStateException.class, () -> {
FieldCache.DEFAULT.getNumerics(ar, "binary", FieldCache.LEGACY_INT_PARSER);
});
// Sorted type: can be retrieved via getTerms(), getTermsIndex(), getDocTermOrds()
expectThrows(IllegalStateException.class, () -> {
FieldCache.DEFAULT.getNumerics(ar, "sorted", FieldCache.LEGACY_INT_PARSER);
});
// Numeric type: can be retrieved via getInts() and so on
NumericDocValues numeric = FieldCache.DEFAULT.getNumerics(ar, "numeric", FieldCache.LEGACY_INT_PARSER);
assertEquals(0, numeric.nextDoc());
assertEquals(42, numeric.longValue());
// SortedSet type: can be retrieved via getDocTermOrds()
expectThrows(IllegalStateException.class, () -> {
FieldCache.DEFAULT.getNumerics(ar, "sortedset", FieldCache.LEGACY_INT_PARSER);
});
ir.close();
dir.close();
}
use of org.apache.lucene.document.BinaryDocValuesField in project lucene-solr by apache.
the class BaseDocValuesFormatTestCase method testThreads2.
/** Tests dv against stored fields with threads (all types + missing) */
@Slow
public void testThreads2() throws Exception {
Directory dir = newDirectory();
IndexWriterConfig conf = newIndexWriterConfig(new MockAnalyzer(random()));
RandomIndexWriter writer = new RandomIndexWriter(random(), dir, conf);
Field idField = new StringField("id", "", Field.Store.NO);
Field storedBinField = new StoredField("storedBin", new byte[0]);
Field dvBinField = new BinaryDocValuesField("dvBin", new BytesRef());
Field dvSortedField = new SortedDocValuesField("dvSorted", new BytesRef());
Field storedNumericField = new StoredField("storedNum", "");
Field dvNumericField = new NumericDocValuesField("dvNum", 0);
// index some docs
int numDocs = TestUtil.nextInt(random(), 1025, 2047);
for (int i = 0; i < numDocs; i++) {
idField.setStringValue(Integer.toString(i));
int length = TestUtil.nextInt(random(), 0, 8);
byte[] buffer = new byte[length];
random().nextBytes(buffer);
storedBinField.setBytesValue(buffer);
dvBinField.setBytesValue(buffer);
dvSortedField.setBytesValue(buffer);
long numericValue = random().nextLong();
storedNumericField.setStringValue(Long.toString(numericValue));
dvNumericField.setLongValue(numericValue);
Document doc = new Document();
doc.add(idField);
if (random().nextInt(4) > 0) {
doc.add(storedBinField);
doc.add(dvBinField);
doc.add(dvSortedField);
}
if (random().nextInt(4) > 0) {
doc.add(storedNumericField);
doc.add(dvNumericField);
}
int numSortedSetFields = random().nextInt(3);
Set<String> values = new TreeSet<>();
for (int j = 0; j < numSortedSetFields; j++) {
values.add(TestUtil.randomSimpleString(random()));
}
for (String v : values) {
doc.add(new SortedSetDocValuesField("dvSortedSet", new BytesRef(v)));
doc.add(new StoredField("storedSortedSet", v));
}
int numSortedNumericFields = random().nextInt(3);
Set<Long> numValues = new TreeSet<>();
for (int j = 0; j < numSortedNumericFields; j++) {
numValues.add(TestUtil.nextLong(random(), Long.MIN_VALUE, Long.MAX_VALUE));
}
for (Long l : numValues) {
doc.add(new SortedNumericDocValuesField("dvSortedNumeric", l));
doc.add(new StoredField("storedSortedNumeric", Long.toString(l)));
}
writer.addDocument(doc);
if (random().nextInt(31) == 0) {
writer.commit();
}
}
// delete some docs
int numDeletions = random().nextInt(numDocs / 10);
for (int i = 0; i < numDeletions; i++) {
int id = random().nextInt(numDocs);
writer.deleteDocuments(new Term("id", Integer.toString(id)));
}
writer.close();
// compare
final DirectoryReader ir = DirectoryReader.open(dir);
int numThreads = TestUtil.nextInt(random(), 2, 7);
Thread[] threads = new Thread[numThreads];
final CountDownLatch startingGun = new CountDownLatch(1);
for (int i = 0; i < threads.length; i++) {
threads[i] = new Thread() {
@Override
public void run() {
try {
startingGun.await();
for (LeafReaderContext context : ir.leaves()) {
LeafReader r = context.reader();
BinaryDocValues binaries = r.getBinaryDocValues("dvBin");
SortedDocValues sorted = r.getSortedDocValues("dvSorted");
NumericDocValues numerics = r.getNumericDocValues("dvNum");
SortedSetDocValues sortedSet = r.getSortedSetDocValues("dvSortedSet");
SortedNumericDocValues sortedNumeric = r.getSortedNumericDocValues("dvSortedNumeric");
for (int j = 0; j < r.maxDoc(); j++) {
BytesRef binaryValue = r.document(j).getBinaryValue("storedBin");
if (binaryValue != null) {
if (binaries != null) {
assertEquals(j, binaries.nextDoc());
BytesRef scratch = binaries.binaryValue();
assertEquals(binaryValue, scratch);
assertEquals(j, sorted.nextDoc());
scratch = sorted.binaryValue();
assertEquals(binaryValue, scratch);
}
}
String number = r.document(j).get("storedNum");
if (number != null) {
if (numerics != null) {
assertEquals(j, numerics.advance(j));
assertEquals(Long.parseLong(number), numerics.longValue());
}
}
String[] values = r.document(j).getValues("storedSortedSet");
if (values.length > 0) {
assertNotNull(sortedSet);
assertEquals(j, sortedSet.nextDoc());
for (int k = 0; k < values.length; k++) {
long ord = sortedSet.nextOrd();
assertTrue(ord != SortedSetDocValues.NO_MORE_ORDS);
BytesRef value = sortedSet.lookupOrd(ord);
assertEquals(values[k], value.utf8ToString());
}
assertEquals(SortedSetDocValues.NO_MORE_ORDS, sortedSet.nextOrd());
}
String[] numValues = r.document(j).getValues("storedSortedNumeric");
if (numValues.length > 0) {
assertNotNull(sortedNumeric);
assertEquals(j, sortedNumeric.nextDoc());
assertEquals(numValues.length, sortedNumeric.docValueCount());
for (int k = 0; k < numValues.length; k++) {
long v = sortedNumeric.nextValue();
assertEquals(numValues[k], Long.toString(v));
}
}
}
}
TestUtil.checkReader(ir);
} catch (Exception e) {
throw new RuntimeException(e);
}
}
};
threads[i].start();
}
startingGun.countDown();
for (Thread t : threads) {
t.join();
}
ir.close();
dir.close();
}
Aggregations