Search in sources :

Example 16 with StoredField

use of org.apache.lucene.document.StoredField in project lucene-solr by apache.

the class BaseDocValuesFormatTestCase method doTestRandomAdvance.

private void doTestRandomAdvance(FieldCreator fieldCreator) throws IOException {
    Analyzer analyzer = new MockAnalyzer(random());
    Directory directory = newDirectory();
    IndexWriterConfig conf = newIndexWriterConfig(analyzer);
    conf.setMergePolicy(newLogMergePolicy());
    RandomIndexWriter w = new RandomIndexWriter(random(), directory, conf);
    int numChunks = atLeast(10);
    int id = 0;
    Set<Integer> missingSet = new HashSet<>();
    for (int i = 0; i < numChunks; i++) {
        // change sparseness for each chunk
        double sparseChance = random().nextDouble();
        int docCount = atLeast(1000);
        for (int j = 0; j < docCount; j++) {
            Document doc = new Document();
            doc.add(new StoredField("id", id));
            if (random().nextDouble() > sparseChance) {
                doc.add(fieldCreator.next());
            } else {
                missingSet.add(id);
            }
            id++;
            w.addDocument(doc);
        }
    }
    if (random().nextBoolean()) {
        w.forceMerge(1);
    }
    // Now search the index:
    IndexReader r = w.getReader();
    BitSet missing = new FixedBitSet(r.maxDoc());
    for (int docID = 0; docID < r.maxDoc(); docID++) {
        Document doc = r.document(docID);
        if (missingSet.contains(doc.getField("id").numericValue())) {
            missing.set(docID);
        }
    }
    for (int iter = 0; iter < 100; iter++) {
        DocIdSetIterator values = fieldCreator.iterator(r);
        assertEquals(-1, values.docID());
        while (true) {
            int docID;
            if (random().nextBoolean()) {
                docID = values.nextDoc();
            } else {
                int range;
                if (random().nextInt(10) == 7) {
                    // big jump
                    range = r.maxDoc() - values.docID();
                } else {
                    // small jump
                    range = 25;
                }
                int inc = TestUtil.nextInt(random(), 1, range);
                docID = values.advance(values.docID() + inc);
            }
            if (docID == NO_MORE_DOCS) {
                break;
            }
            assertFalse(missing.get(docID));
        }
    }
    IOUtils.close(r, w, directory);
}
Also used : BitSet(org.apache.lucene.util.BitSet) FixedBitSet(org.apache.lucene.util.FixedBitSet) MockAnalyzer(org.apache.lucene.analysis.MockAnalyzer) Analyzer(org.apache.lucene.analysis.Analyzer) Document(org.apache.lucene.document.Document) StoredField(org.apache.lucene.document.StoredField) MockAnalyzer(org.apache.lucene.analysis.MockAnalyzer) FixedBitSet(org.apache.lucene.util.FixedBitSet) DocIdSetIterator(org.apache.lucene.search.DocIdSetIterator) Directory(org.apache.lucene.store.Directory) HashSet(java.util.HashSet)

Example 17 with StoredField

use of org.apache.lucene.document.StoredField in project lucene-solr by apache.

the class BaseDocValuesFormatTestCase method doTestBinaryVsStoredFields.

private void doTestBinaryVsStoredFields(double density, Supplier<byte[]> bytes) throws Exception {
    Directory dir = newDirectory();
    IndexWriterConfig conf = newIndexWriterConfig(new MockAnalyzer(random()));
    RandomIndexWriter writer = new RandomIndexWriter(random(), dir, conf);
    Document doc = new Document();
    Field idField = new StringField("id", "", Field.Store.NO);
    Field storedField = new StoredField("stored", new byte[0]);
    Field dvField = new BinaryDocValuesField("dv", new BytesRef());
    doc.add(idField);
    doc.add(storedField);
    doc.add(dvField);
    // index some docs
    int numDocs = atLeast(300);
    for (int i = 0; i < numDocs; i++) {
        if (random().nextDouble() > density) {
            writer.addDocument(new Document());
            continue;
        }
        idField.setStringValue(Integer.toString(i));
        byte[] buffer = bytes.get();
        storedField.setBytesValue(buffer);
        dvField.setBytesValue(buffer);
        writer.addDocument(doc);
        if (random().nextInt(31) == 0) {
            writer.commit();
        }
    }
    // delete some docs
    int numDeletions = random().nextInt(numDocs / 10);
    for (int i = 0; i < numDeletions; i++) {
        int id = random().nextInt(numDocs);
        writer.deleteDocuments(new Term("id", Integer.toString(id)));
    }
    // compare
    DirectoryReader ir = writer.getReader();
    TestUtil.checkReader(ir);
    for (LeafReaderContext context : ir.leaves()) {
        LeafReader r = context.reader();
        BinaryDocValues docValues = DocValues.getBinary(r, "dv");
        docValues.nextDoc();
        for (int i = 0; i < r.maxDoc(); i++) {
            BytesRef binaryValue = r.document(i).getBinaryValue("stored");
            if (binaryValue == null) {
                assertTrue(docValues.docID() > i);
            } else {
                assertEquals(i, docValues.docID());
                assertEquals(binaryValue, docValues.binaryValue());
                docValues.nextDoc();
            }
        }
        assertEquals(DocIdSetIterator.NO_MORE_DOCS, docValues.docID());
    }
    ir.close();
    // compare again
    writer.forceMerge(1);
    ir = writer.getReader();
    TestUtil.checkReader(ir);
    for (LeafReaderContext context : ir.leaves()) {
        LeafReader r = context.reader();
        BinaryDocValues docValues = DocValues.getBinary(r, "dv");
        docValues.nextDoc();
        for (int i = 0; i < r.maxDoc(); i++) {
            BytesRef binaryValue = r.document(i).getBinaryValue("stored");
            if (binaryValue == null) {
                assertTrue(docValues.docID() > i);
            } else {
                assertEquals(i, docValues.docID());
                assertEquals(binaryValue, docValues.binaryValue());
                docValues.nextDoc();
            }
        }
        assertEquals(DocIdSetIterator.NO_MORE_DOCS, docValues.docID());
    }
    ir.close();
    writer.close();
    dir.close();
}
Also used : Document(org.apache.lucene.document.Document) BinaryDocValuesField(org.apache.lucene.document.BinaryDocValuesField) SortedNumericDocValuesField(org.apache.lucene.document.SortedNumericDocValuesField) StoredField(org.apache.lucene.document.StoredField) NumericDocValuesField(org.apache.lucene.document.NumericDocValuesField) SortedSetDocValuesField(org.apache.lucene.document.SortedSetDocValuesField) BinaryDocValuesField(org.apache.lucene.document.BinaryDocValuesField) SortedDocValuesField(org.apache.lucene.document.SortedDocValuesField) StringField(org.apache.lucene.document.StringField) FloatDocValuesField(org.apache.lucene.document.FloatDocValuesField) Field(org.apache.lucene.document.Field) TextField(org.apache.lucene.document.TextField) StoredField(org.apache.lucene.document.StoredField) MockAnalyzer(org.apache.lucene.analysis.MockAnalyzer) StringField(org.apache.lucene.document.StringField) BytesRef(org.apache.lucene.util.BytesRef) Directory(org.apache.lucene.store.Directory)

Example 18 with StoredField

use of org.apache.lucene.document.StoredField in project lucene-solr by apache.

the class FastVectorHighlighterTest method testBooleanPhraseWithSynonym.

public void testBooleanPhraseWithSynonym() throws IOException {
    Directory dir = newDirectory();
    IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig(new MockAnalyzer(random())));
    Document doc = new Document();
    FieldType type = new FieldType(TextField.TYPE_NOT_STORED);
    type.setStoreTermVectorOffsets(true);
    type.setStoreTermVectorPositions(true);
    type.setStoreTermVectors(true);
    type.freeze();
    Token syn = new Token("httpwwwfacebookcom", 6, 29);
    syn.setPositionIncrement(0);
    CannedTokenStream ts = new CannedTokenStream(new Token("test", 0, 4), new Token("http", 6, 10), syn, new Token("www", 13, 16), new Token("facebook", 17, 25), new Token("com", 26, 29));
    Field field = new Field("field", ts, type);
    doc.add(field);
    doc.add(new StoredField("field", "Test: http://www.facebook.com"));
    writer.addDocument(doc);
    FastVectorHighlighter highlighter = new FastVectorHighlighter();
    IndexReader reader = DirectoryReader.open(writer);
    int docId = 0;
    // query1: match
    PhraseQuery pq = new PhraseQuery("field", "test", "http", "www", "facebook", "com");
    FieldQuery fieldQuery = highlighter.getFieldQuery(pq, reader);
    String[] bestFragments = highlighter.getBestFragments(fieldQuery, reader, docId, "field", 54, 1);
    assertEquals("<b>Test: http://www.facebook.com</b>", bestFragments[0]);
    // query2: match
    PhraseQuery pq2 = new PhraseQuery("field", "test", "httpwwwfacebookcom", "www", "facebook", "com");
    fieldQuery = highlighter.getFieldQuery(pq2, reader);
    bestFragments = highlighter.getBestFragments(fieldQuery, reader, docId, "field", 54, 1);
    assertEquals("<b>Test: http://www.facebook.com</b>", bestFragments[0]);
    // query3: OR query1 and query2 together
    BooleanQuery.Builder bq = new BooleanQuery.Builder();
    bq.add(pq, BooleanClause.Occur.SHOULD);
    bq.add(pq2, BooleanClause.Occur.SHOULD);
    fieldQuery = highlighter.getFieldQuery(bq.build(), reader);
    bestFragments = highlighter.getBestFragments(fieldQuery, reader, docId, "field", 54, 1);
    assertEquals("<b>Test: http://www.facebook.com</b>", bestFragments[0]);
    reader.close();
    writer.close();
    dir.close();
}
Also used : BooleanQuery(org.apache.lucene.search.BooleanQuery) PhraseQuery(org.apache.lucene.search.PhraseQuery) Token(org.apache.lucene.analysis.Token) Document(org.apache.lucene.document.Document) FieldType(org.apache.lucene.document.FieldType) StoredField(org.apache.lucene.document.StoredField) Field(org.apache.lucene.document.Field) TextField(org.apache.lucene.document.TextField) StoredField(org.apache.lucene.document.StoredField) MockAnalyzer(org.apache.lucene.analysis.MockAnalyzer) IndexWriter(org.apache.lucene.index.IndexWriter) IndexReader(org.apache.lucene.index.IndexReader) CannedTokenStream(org.apache.lucene.analysis.CannedTokenStream) Directory(org.apache.lucene.store.Directory)

Example 19 with StoredField

use of org.apache.lucene.document.StoredField in project lucene-solr by apache.

the class StrategyTestCase method getDocuments.

protected List<Document> getDocuments(Iterator<SpatialTestData> sampleData) {
    List<Document> documents = new ArrayList<>();
    while (sampleData.hasNext()) {
        SpatialTestData data = sampleData.next();
        Document document = new Document();
        document.add(new StringField("id", data.id, Field.Store.YES));
        document.add(new StringField("name", data.name, Field.Store.YES));
        Shape shape = data.shape;
        shape = convertShapeFromGetDocuments(shape);
        if (shape != null) {
            for (Field f : strategy.createIndexableFields(shape)) {
                document.add(f);
            }
            if (//just for diagnostics
            storeShape)
                document.add(new StoredField(strategy.getFieldName(), shape.toString()));
        }
        documents.add(document);
    }
    return documents;
}
Also used : StringField(org.apache.lucene.document.StringField) StoredField(org.apache.lucene.document.StoredField) Field(org.apache.lucene.document.Field) StoredField(org.apache.lucene.document.StoredField) Shape(org.locationtech.spatial4j.shape.Shape) StringField(org.apache.lucene.document.StringField) ArrayList(java.util.ArrayList) Document(org.apache.lucene.document.Document)

Example 20 with StoredField

use of org.apache.lucene.document.StoredField in project lucene-solr by apache.

the class TestMemoryIndex method testMissingPoints.

public void testMissingPoints() throws IOException {
    Document doc = new Document();
    doc.add(new StoredField("field", 42));
    MemoryIndex mi = MemoryIndex.fromDocument(doc, analyzer);
    IndexSearcher indexSearcher = mi.createSearcher();
    // field that exists but does not have points
    assertNull(indexSearcher.getIndexReader().leaves().get(0).reader().getPointValues("field"));
    // field that does not exist
    assertNull(indexSearcher.getIndexReader().leaves().get(0).reader().getPointValues("some_missing_field"));
}
Also used : IndexSearcher(org.apache.lucene.search.IndexSearcher) StoredField(org.apache.lucene.document.StoredField) Document(org.apache.lucene.document.Document)

Aggregations

StoredField (org.apache.lucene.document.StoredField)140 Document (org.apache.lucene.document.Document)118 Directory (org.apache.lucene.store.Directory)77 StringField (org.apache.lucene.document.StringField)61 Field (org.apache.lucene.document.Field)52 BytesRef (org.apache.lucene.util.BytesRef)50 NumericDocValuesField (org.apache.lucene.document.NumericDocValuesField)41 MockAnalyzer (org.apache.lucene.analysis.MockAnalyzer)39 RandomIndexWriter (org.apache.lucene.index.RandomIndexWriter)36 TextField (org.apache.lucene.document.TextField)34 IntPoint (org.apache.lucene.document.IntPoint)31 SortedDocValuesField (org.apache.lucene.document.SortedDocValuesField)31 IndexReader (org.apache.lucene.index.IndexReader)29 IndexSearcher (org.apache.lucene.search.IndexSearcher)26 TopDocs (org.apache.lucene.search.TopDocs)24 SortedNumericDocValuesField (org.apache.lucene.document.SortedNumericDocValuesField)22 SortField (org.apache.lucene.search.SortField)22 FieldType (org.apache.lucene.document.FieldType)21 IndexWriterConfig (org.apache.lucene.index.IndexWriterConfig)21 Sort (org.apache.lucene.search.Sort)21