use of org.apache.lucene.document.StoredField in project lucene-solr by apache.
the class BaseDocValuesFormatTestCase method doTestRandomAdvance.
private void doTestRandomAdvance(FieldCreator fieldCreator) throws IOException {
Analyzer analyzer = new MockAnalyzer(random());
Directory directory = newDirectory();
IndexWriterConfig conf = newIndexWriterConfig(analyzer);
conf.setMergePolicy(newLogMergePolicy());
RandomIndexWriter w = new RandomIndexWriter(random(), directory, conf);
int numChunks = atLeast(10);
int id = 0;
Set<Integer> missingSet = new HashSet<>();
for (int i = 0; i < numChunks; i++) {
// change sparseness for each chunk
double sparseChance = random().nextDouble();
int docCount = atLeast(1000);
for (int j = 0; j < docCount; j++) {
Document doc = new Document();
doc.add(new StoredField("id", id));
if (random().nextDouble() > sparseChance) {
doc.add(fieldCreator.next());
} else {
missingSet.add(id);
}
id++;
w.addDocument(doc);
}
}
if (random().nextBoolean()) {
w.forceMerge(1);
}
// Now search the index:
IndexReader r = w.getReader();
BitSet missing = new FixedBitSet(r.maxDoc());
for (int docID = 0; docID < r.maxDoc(); docID++) {
Document doc = r.document(docID);
if (missingSet.contains(doc.getField("id").numericValue())) {
missing.set(docID);
}
}
for (int iter = 0; iter < 100; iter++) {
DocIdSetIterator values = fieldCreator.iterator(r);
assertEquals(-1, values.docID());
while (true) {
int docID;
if (random().nextBoolean()) {
docID = values.nextDoc();
} else {
int range;
if (random().nextInt(10) == 7) {
// big jump
range = r.maxDoc() - values.docID();
} else {
// small jump
range = 25;
}
int inc = TestUtil.nextInt(random(), 1, range);
docID = values.advance(values.docID() + inc);
}
if (docID == NO_MORE_DOCS) {
break;
}
assertFalse(missing.get(docID));
}
}
IOUtils.close(r, w, directory);
}
use of org.apache.lucene.document.StoredField in project lucene-solr by apache.
the class BaseDocValuesFormatTestCase method doTestBinaryVsStoredFields.
private void doTestBinaryVsStoredFields(double density, Supplier<byte[]> bytes) throws Exception {
Directory dir = newDirectory();
IndexWriterConfig conf = newIndexWriterConfig(new MockAnalyzer(random()));
RandomIndexWriter writer = new RandomIndexWriter(random(), dir, conf);
Document doc = new Document();
Field idField = new StringField("id", "", Field.Store.NO);
Field storedField = new StoredField("stored", new byte[0]);
Field dvField = new BinaryDocValuesField("dv", new BytesRef());
doc.add(idField);
doc.add(storedField);
doc.add(dvField);
// index some docs
int numDocs = atLeast(300);
for (int i = 0; i < numDocs; i++) {
if (random().nextDouble() > density) {
writer.addDocument(new Document());
continue;
}
idField.setStringValue(Integer.toString(i));
byte[] buffer = bytes.get();
storedField.setBytesValue(buffer);
dvField.setBytesValue(buffer);
writer.addDocument(doc);
if (random().nextInt(31) == 0) {
writer.commit();
}
}
// delete some docs
int numDeletions = random().nextInt(numDocs / 10);
for (int i = 0; i < numDeletions; i++) {
int id = random().nextInt(numDocs);
writer.deleteDocuments(new Term("id", Integer.toString(id)));
}
// compare
DirectoryReader ir = writer.getReader();
TestUtil.checkReader(ir);
for (LeafReaderContext context : ir.leaves()) {
LeafReader r = context.reader();
BinaryDocValues docValues = DocValues.getBinary(r, "dv");
docValues.nextDoc();
for (int i = 0; i < r.maxDoc(); i++) {
BytesRef binaryValue = r.document(i).getBinaryValue("stored");
if (binaryValue == null) {
assertTrue(docValues.docID() > i);
} else {
assertEquals(i, docValues.docID());
assertEquals(binaryValue, docValues.binaryValue());
docValues.nextDoc();
}
}
assertEquals(DocIdSetIterator.NO_MORE_DOCS, docValues.docID());
}
ir.close();
// compare again
writer.forceMerge(1);
ir = writer.getReader();
TestUtil.checkReader(ir);
for (LeafReaderContext context : ir.leaves()) {
LeafReader r = context.reader();
BinaryDocValues docValues = DocValues.getBinary(r, "dv");
docValues.nextDoc();
for (int i = 0; i < r.maxDoc(); i++) {
BytesRef binaryValue = r.document(i).getBinaryValue("stored");
if (binaryValue == null) {
assertTrue(docValues.docID() > i);
} else {
assertEquals(i, docValues.docID());
assertEquals(binaryValue, docValues.binaryValue());
docValues.nextDoc();
}
}
assertEquals(DocIdSetIterator.NO_MORE_DOCS, docValues.docID());
}
ir.close();
writer.close();
dir.close();
}
use of org.apache.lucene.document.StoredField in project lucene-solr by apache.
the class FastVectorHighlighterTest method testBooleanPhraseWithSynonym.
public void testBooleanPhraseWithSynonym() throws IOException {
Directory dir = newDirectory();
IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig(new MockAnalyzer(random())));
Document doc = new Document();
FieldType type = new FieldType(TextField.TYPE_NOT_STORED);
type.setStoreTermVectorOffsets(true);
type.setStoreTermVectorPositions(true);
type.setStoreTermVectors(true);
type.freeze();
Token syn = new Token("httpwwwfacebookcom", 6, 29);
syn.setPositionIncrement(0);
CannedTokenStream ts = new CannedTokenStream(new Token("test", 0, 4), new Token("http", 6, 10), syn, new Token("www", 13, 16), new Token("facebook", 17, 25), new Token("com", 26, 29));
Field field = new Field("field", ts, type);
doc.add(field);
doc.add(new StoredField("field", "Test: http://www.facebook.com"));
writer.addDocument(doc);
FastVectorHighlighter highlighter = new FastVectorHighlighter();
IndexReader reader = DirectoryReader.open(writer);
int docId = 0;
// query1: match
PhraseQuery pq = new PhraseQuery("field", "test", "http", "www", "facebook", "com");
FieldQuery fieldQuery = highlighter.getFieldQuery(pq, reader);
String[] bestFragments = highlighter.getBestFragments(fieldQuery, reader, docId, "field", 54, 1);
assertEquals("<b>Test: http://www.facebook.com</b>", bestFragments[0]);
// query2: match
PhraseQuery pq2 = new PhraseQuery("field", "test", "httpwwwfacebookcom", "www", "facebook", "com");
fieldQuery = highlighter.getFieldQuery(pq2, reader);
bestFragments = highlighter.getBestFragments(fieldQuery, reader, docId, "field", 54, 1);
assertEquals("<b>Test: http://www.facebook.com</b>", bestFragments[0]);
// query3: OR query1 and query2 together
BooleanQuery.Builder bq = new BooleanQuery.Builder();
bq.add(pq, BooleanClause.Occur.SHOULD);
bq.add(pq2, BooleanClause.Occur.SHOULD);
fieldQuery = highlighter.getFieldQuery(bq.build(), reader);
bestFragments = highlighter.getBestFragments(fieldQuery, reader, docId, "field", 54, 1);
assertEquals("<b>Test: http://www.facebook.com</b>", bestFragments[0]);
reader.close();
writer.close();
dir.close();
}
use of org.apache.lucene.document.StoredField in project lucene-solr by apache.
the class StrategyTestCase method getDocuments.
protected List<Document> getDocuments(Iterator<SpatialTestData> sampleData) {
List<Document> documents = new ArrayList<>();
while (sampleData.hasNext()) {
SpatialTestData data = sampleData.next();
Document document = new Document();
document.add(new StringField("id", data.id, Field.Store.YES));
document.add(new StringField("name", data.name, Field.Store.YES));
Shape shape = data.shape;
shape = convertShapeFromGetDocuments(shape);
if (shape != null) {
for (Field f : strategy.createIndexableFields(shape)) {
document.add(f);
}
if (//just for diagnostics
storeShape)
document.add(new StoredField(strategy.getFieldName(), shape.toString()));
}
documents.add(document);
}
return documents;
}
use of org.apache.lucene.document.StoredField in project lucene-solr by apache.
the class TestMemoryIndex method testMissingPoints.
public void testMissingPoints() throws IOException {
Document doc = new Document();
doc.add(new StoredField("field", 42));
MemoryIndex mi = MemoryIndex.fromDocument(doc, analyzer);
IndexSearcher indexSearcher = mi.createSearcher();
// field that exists but does not have points
assertNull(indexSearcher.getIndexReader().leaves().get(0).reader().getPointValues("field"));
// field that does not exist
assertNull(indexSearcher.getIndexReader().leaves().get(0).reader().getPointValues("some_missing_field"));
}
Aggregations