Search in sources :

Example 21 with NumericDocValuesField

use of org.apache.lucene.document.NumericDocValuesField in project lucene-solr by apache.

the class TestBackwardsCompatibility method testCreateSortedIndex.

// ant test -Dtestcase=TestBackwardsCompatibility -Dtestmethod=testCreateSortedIndex -Dtests.codec=default -Dtests.useSecurityManager=false -Dtests.bwcdir=/tmp/sorted
public void testCreateSortedIndex() throws Exception {
    Path indexDir = getIndexDir().resolve("sorted");
    Files.deleteIfExists(indexDir);
    Directory dir = newFSDirectory(indexDir);
    LogByteSizeMergePolicy mp = new LogByteSizeMergePolicy();
    mp.setNoCFSRatio(1.0);
    mp.setMaxCFSSegmentSizeMB(Double.POSITIVE_INFINITY);
    MockAnalyzer analyzer = new MockAnalyzer(random());
    analyzer.setMaxTokenLength(TestUtil.nextInt(random(), 1, IndexWriter.MAX_TERM_LENGTH));
    // TODO: remove randomness
    IndexWriterConfig conf = new IndexWriterConfig(analyzer);
    conf.setMergePolicy(mp);
    conf.setUseCompoundFile(false);
    conf.setIndexSort(new Sort(new SortField("dateDV", SortField.Type.LONG, true)));
    IndexWriter writer = new IndexWriter(dir, conf);
    LineFileDocs docs = new LineFileDocs(random());
    SimpleDateFormat parser = new SimpleDateFormat("yyyy-MM-dd", Locale.ROOT);
    parser.setTimeZone(TimeZone.getTimeZone("UTC"));
    ParsePosition position = new ParsePosition(0);
    Field dateDVField = null;
    for (int i = 0; i < 50; i++) {
        Document doc = docs.nextDoc();
        String dateString = doc.get("date");
        position.setIndex(0);
        Date date = parser.parse(dateString, position);
        if (position.getErrorIndex() != -1) {
            throw new AssertionError("failed to parse \"" + dateString + "\" as date");
        }
        if (position.getIndex() != dateString.length()) {
            throw new AssertionError("failed to parse \"" + dateString + "\" as date");
        }
        if (dateDVField == null) {
            dateDVField = new NumericDocValuesField("dateDV", 0l);
            doc.add(dateDVField);
        }
        dateDVField.setLongValue(date.getTime());
        if (i == 250) {
            writer.commit();
        }
        writer.addDocument(doc);
    }
    writer.forceMerge(1);
    writer.close();
    dir.close();
}
Also used : Path(java.nio.file.Path) SortField(org.apache.lucene.search.SortField) Document(org.apache.lucene.document.Document) BinaryPoint(org.apache.lucene.document.BinaryPoint) DoublePoint(org.apache.lucene.document.DoublePoint) LongPoint(org.apache.lucene.document.LongPoint) IntPoint(org.apache.lucene.document.IntPoint) FloatPoint(org.apache.lucene.document.FloatPoint) Date(java.util.Date) SortedNumericDocValuesField(org.apache.lucene.document.SortedNumericDocValuesField) SortField(org.apache.lucene.search.SortField) NumericDocValuesField(org.apache.lucene.document.NumericDocValuesField) SortedSetDocValuesField(org.apache.lucene.document.SortedSetDocValuesField) BinaryDocValuesField(org.apache.lucene.document.BinaryDocValuesField) SortedDocValuesField(org.apache.lucene.document.SortedDocValuesField) StringField(org.apache.lucene.document.StringField) DoubleDocValuesField(org.apache.lucene.document.DoubleDocValuesField) FloatDocValuesField(org.apache.lucene.document.FloatDocValuesField) Field(org.apache.lucene.document.Field) TextField(org.apache.lucene.document.TextField) MockAnalyzer(org.apache.lucene.analysis.MockAnalyzer) SortedNumericDocValuesField(org.apache.lucene.document.SortedNumericDocValuesField) NumericDocValuesField(org.apache.lucene.document.NumericDocValuesField) Sort(org.apache.lucene.search.Sort) SimpleDateFormat(java.text.SimpleDateFormat) Directory(org.apache.lucene.store.Directory) RAMDirectory(org.apache.lucene.store.RAMDirectory) FSDirectory(org.apache.lucene.store.FSDirectory) SimpleFSDirectory(org.apache.lucene.store.SimpleFSDirectory) NIOFSDirectory(org.apache.lucene.store.NIOFSDirectory) LineFileDocs(org.apache.lucene.util.LineFileDocs) ParsePosition(java.text.ParsePosition)

Example 22 with NumericDocValuesField

use of org.apache.lucene.document.NumericDocValuesField in project lucene-solr by apache.

the class TestLucene70DocValuesFormat method doTestSparseDocValuesVsStoredFields.

private void doTestSparseDocValuesVsStoredFields() throws Exception {
    final long[] values = new long[TestUtil.nextInt(random(), 1, 500)];
    for (int i = 0; i < values.length; ++i) {
        values[i] = random().nextLong();
    }
    Directory dir = newFSDirectory(createTempDir());
    IndexWriterConfig conf = newIndexWriterConfig(new MockAnalyzer(random()));
    conf.setMergeScheduler(new SerialMergeScheduler());
    RandomIndexWriter writer = new RandomIndexWriter(random(), dir, conf);
    // sparse compression is only enabled if less than 1% of docs have a value
    final int avgGap = 100;
    final int numDocs = atLeast(200);
    for (int i = random().nextInt(avgGap * 2); i >= 0; --i) {
        writer.addDocument(new Document());
    }
    final int maxNumValuesPerDoc = random().nextBoolean() ? 1 : TestUtil.nextInt(random(), 2, 5);
    for (int i = 0; i < numDocs; ++i) {
        Document doc = new Document();
        // single-valued
        long docValue = values[random().nextInt(values.length)];
        doc.add(new NumericDocValuesField("numeric", docValue));
        doc.add(new SortedDocValuesField("sorted", new BytesRef(Long.toString(docValue))));
        doc.add(new BinaryDocValuesField("binary", new BytesRef(Long.toString(docValue))));
        doc.add(new StoredField("value", docValue));
        // multi-valued
        final int numValues = TestUtil.nextInt(random(), 1, maxNumValuesPerDoc);
        for (int j = 0; j < numValues; ++j) {
            docValue = values[random().nextInt(values.length)];
            doc.add(new SortedNumericDocValuesField("sorted_numeric", docValue));
            doc.add(new SortedSetDocValuesField("sorted_set", new BytesRef(Long.toString(docValue))));
            doc.add(new StoredField("values", docValue));
        }
        writer.addDocument(doc);
        // add a gap
        for (int j = TestUtil.nextInt(random(), 0, avgGap * 2); j >= 0; --j) {
            writer.addDocument(new Document());
        }
    }
    if (random().nextBoolean()) {
        writer.forceMerge(1);
    }
    final IndexReader indexReader = writer.getReader();
    writer.close();
    for (LeafReaderContext context : indexReader.leaves()) {
        final LeafReader reader = context.reader();
        final NumericDocValues numeric = DocValues.getNumeric(reader, "numeric");
        final SortedDocValues sorted = DocValues.getSorted(reader, "sorted");
        final BinaryDocValues binary = DocValues.getBinary(reader, "binary");
        final SortedNumericDocValues sortedNumeric = DocValues.getSortedNumeric(reader, "sorted_numeric");
        final SortedSetDocValues sortedSet = DocValues.getSortedSet(reader, "sorted_set");
        for (int i = 0; i < reader.maxDoc(); ++i) {
            final Document doc = reader.document(i);
            final IndexableField valueField = doc.getField("value");
            final Long value = valueField == null ? null : valueField.numericValue().longValue();
            if (value == null) {
                assertTrue(numeric.docID() + " vs " + i, numeric.docID() < i);
            } else {
                assertEquals(i, numeric.nextDoc());
                assertEquals(i, binary.nextDoc());
                assertEquals(i, sorted.nextDoc());
                assertEquals(value.longValue(), numeric.longValue());
                assertTrue(sorted.ordValue() >= 0);
                assertEquals(new BytesRef(Long.toString(value)), sorted.lookupOrd(sorted.ordValue()));
                assertEquals(new BytesRef(Long.toString(value)), binary.binaryValue());
            }
            final IndexableField[] valuesFields = doc.getFields("values");
            if (valuesFields.length == 0) {
                assertTrue(sortedNumeric.docID() + " vs " + i, sortedNumeric.docID() < i);
            } else {
                final Set<Long> valueSet = new HashSet<>();
                for (IndexableField sf : valuesFields) {
                    valueSet.add(sf.numericValue().longValue());
                }
                assertEquals(i, sortedNumeric.nextDoc());
                assertEquals(valuesFields.length, sortedNumeric.docValueCount());
                for (int j = 0; j < sortedNumeric.docValueCount(); ++j) {
                    assertTrue(valueSet.contains(sortedNumeric.nextValue()));
                }
                assertEquals(i, sortedSet.nextDoc());
                int sortedSetCount = 0;
                while (true) {
                    long ord = sortedSet.nextOrd();
                    if (ord == SortedSetDocValues.NO_MORE_ORDS) {
                        break;
                    }
                    assertTrue(valueSet.contains(Long.parseLong(sortedSet.lookupOrd(ord).utf8ToString())));
                    sortedSetCount++;
                }
                assertEquals(valueSet.size(), sortedSetCount);
            }
        }
    }
    indexReader.close();
    dir.close();
}
Also used : SortedNumericDocValues(org.apache.lucene.index.SortedNumericDocValues) NumericDocValues(org.apache.lucene.index.NumericDocValues) SortedNumericDocValues(org.apache.lucene.index.SortedNumericDocValues) Document(org.apache.lucene.document.Document) BinaryDocValues(org.apache.lucene.index.BinaryDocValues) SerialMergeScheduler(org.apache.lucene.index.SerialMergeScheduler) StoredField(org.apache.lucene.document.StoredField) MockAnalyzer(org.apache.lucene.analysis.MockAnalyzer) SortedNumericDocValuesField(org.apache.lucene.document.SortedNumericDocValuesField) NumericDocValuesField(org.apache.lucene.document.NumericDocValuesField) SortedDocValuesField(org.apache.lucene.document.SortedDocValuesField) LeafReaderContext(org.apache.lucene.index.LeafReaderContext) BytesRef(org.apache.lucene.util.BytesRef) Directory(org.apache.lucene.store.Directory) HashSet(java.util.HashSet) LeafReader(org.apache.lucene.index.LeafReader) BinaryDocValuesField(org.apache.lucene.document.BinaryDocValuesField) SortedDocValues(org.apache.lucene.index.SortedDocValues) IndexableField(org.apache.lucene.index.IndexableField) SortedNumericDocValuesField(org.apache.lucene.document.SortedNumericDocValuesField) SortedSetDocValues(org.apache.lucene.index.SortedSetDocValues) IndexReader(org.apache.lucene.index.IndexReader) SortedSetDocValuesField(org.apache.lucene.document.SortedSetDocValuesField) RandomIndexWriter(org.apache.lucene.index.RandomIndexWriter) IndexWriterConfig(org.apache.lucene.index.IndexWriterConfig)

Example 23 with NumericDocValuesField

use of org.apache.lucene.document.NumericDocValuesField in project lucene-solr by apache.

the class TestPerFieldDocValuesFormat method testMergeCalledOnTwoFormats.

public void testMergeCalledOnTwoFormats() throws IOException {
    MergeRecordingDocValueFormatWrapper dvf1 = new MergeRecordingDocValueFormatWrapper(TestUtil.getDefaultDocValuesFormat());
    MergeRecordingDocValueFormatWrapper dvf2 = new MergeRecordingDocValueFormatWrapper(TestUtil.getDefaultDocValuesFormat());
    IndexWriterConfig iwc = new IndexWriterConfig();
    iwc.setCodec(new AssertingCodec() {

        @Override
        public DocValuesFormat getDocValuesFormatForField(String field) {
            switch(field) {
                case "dv1":
                case "dv2":
                    return dvf1;
                case "dv3":
                    return dvf2;
                default:
                    return super.getDocValuesFormatForField(field);
            }
        }
    });
    Directory directory = newDirectory();
    IndexWriter iwriter = new IndexWriter(directory, iwc);
    Document doc = new Document();
    doc.add(new NumericDocValuesField("dv1", 5));
    doc.add(new NumericDocValuesField("dv2", 42));
    doc.add(new BinaryDocValuesField("dv3", new BytesRef("hello world")));
    iwriter.addDocument(doc);
    iwriter.commit();
    doc = new Document();
    doc.add(new NumericDocValuesField("dv1", 8));
    doc.add(new NumericDocValuesField("dv2", 45));
    doc.add(new BinaryDocValuesField("dv3", new BytesRef("goodbye world")));
    iwriter.addDocument(doc);
    iwriter.commit();
    iwriter.forceMerge(1, true);
    iwriter.close();
    assertEquals(1, dvf1.nbMergeCalls);
    assertEquals(new HashSet<>(Arrays.asList("dv1", "dv2")), new HashSet<>(dvf1.fieldNames));
    assertEquals(1, dvf2.nbMergeCalls);
    assertEquals(Collections.singletonList("dv3"), dvf2.fieldNames);
    directory.close();
}
Also used : AssertingCodec(org.apache.lucene.codecs.asserting.AssertingCodec) Document(org.apache.lucene.document.Document) BinaryDocValuesField(org.apache.lucene.document.BinaryDocValuesField) DocValuesFormat(org.apache.lucene.codecs.DocValuesFormat) NumericDocValuesField(org.apache.lucene.document.NumericDocValuesField) IndexWriter(org.apache.lucene.index.IndexWriter) BytesRef(org.apache.lucene.util.BytesRef) IndexWriterConfig(org.apache.lucene.index.IndexWriterConfig) Directory(org.apache.lucene.store.Directory)

Example 24 with NumericDocValuesField

use of org.apache.lucene.document.NumericDocValuesField in project lucene-solr by apache.

the class TestIndexWriterDelete method updateDoc.

private void updateDoc(IndexWriter modifier, int id, int value) throws IOException {
    Document doc = new Document();
    doc.add(newTextField("content", "aaa", Field.Store.NO));
    doc.add(newStringField("id", String.valueOf(id), Field.Store.YES));
    doc.add(newStringField("value", String.valueOf(value), Field.Store.NO));
    doc.add(new NumericDocValuesField("dv", value));
    modifier.updateDocument(new Term("id", String.valueOf(id)), doc);
}
Also used : NumericDocValuesField(org.apache.lucene.document.NumericDocValuesField) Document(org.apache.lucene.document.Document)

Example 25 with NumericDocValuesField

use of org.apache.lucene.document.NumericDocValuesField in project lucene-solr by apache.

the class TestIndexWriterDelete method addDoc.

private void addDoc(IndexWriter modifier, int id, int value) throws IOException {
    Document doc = new Document();
    doc.add(newTextField("content", "aaa", Field.Store.NO));
    doc.add(newStringField("id", String.valueOf(id), Field.Store.YES));
    doc.add(newStringField("value", String.valueOf(value), Field.Store.NO));
    doc.add(new NumericDocValuesField("dv", value));
    modifier.addDocument(doc);
}
Also used : NumericDocValuesField(org.apache.lucene.document.NumericDocValuesField) Document(org.apache.lucene.document.Document)

Aggregations

NumericDocValuesField (org.apache.lucene.document.NumericDocValuesField)296 Document (org.apache.lucene.document.Document)268 Directory (org.apache.lucene.store.Directory)206 MockAnalyzer (org.apache.lucene.analysis.MockAnalyzer)132 SortedNumericDocValuesField (org.apache.lucene.document.SortedNumericDocValuesField)103 StringField (org.apache.lucene.document.StringField)92 BytesRef (org.apache.lucene.util.BytesRef)85 RandomIndexWriter (org.apache.lucene.index.RandomIndexWriter)75 SortedDocValuesField (org.apache.lucene.document.SortedDocValuesField)69 IndexReader (org.apache.lucene.index.IndexReader)54 Field (org.apache.lucene.document.Field)51 IndexSearcher (org.apache.lucene.search.IndexSearcher)49 BinaryDocValuesField (org.apache.lucene.document.BinaryDocValuesField)44 SortField (org.apache.lucene.search.SortField)42 TextField (org.apache.lucene.document.TextField)41 Sort (org.apache.lucene.search.Sort)41 Term (org.apache.lucene.index.Term)38 IntPoint (org.apache.lucene.document.IntPoint)36 SortedNumericSortField (org.apache.lucene.search.SortedNumericSortField)36 IndexWriterConfig (org.apache.lucene.index.IndexWriterConfig)34