Search in sources :

Example 96 with IndexWriterConfig

use of org.apache.lucene.index.IndexWriterConfig in project lucene-solr by apache.

the class TestLatLonPointDistanceSort method doRandomTest.

private void doRandomTest(int numDocs, int numQueries) throws IOException {
    Directory dir = newDirectory();
    IndexWriterConfig iwc = newIndexWriterConfig();
    // else seeds may not to reproduce:
    iwc.setMergeScheduler(new SerialMergeScheduler());
    RandomIndexWriter writer = new RandomIndexWriter(random(), dir, iwc);
    for (int i = 0; i < numDocs; i++) {
        Document doc = new Document();
        doc.add(new StoredField("id", i));
        doc.add(new NumericDocValuesField("id", i));
        if (random().nextInt(10) > 7) {
            double latRaw = GeoTestUtil.nextLatitude();
            double lonRaw = GeoTestUtil.nextLongitude();
            // pre-normalize up front, so we can just use quantized value for testing and do simple exact comparisons
            double lat = decodeLatitude(encodeLatitude(latRaw));
            double lon = decodeLongitude(encodeLongitude(lonRaw));
            doc.add(new LatLonDocValuesField("field", lat, lon));
            doc.add(new StoredField("lat", lat));
            doc.add(new StoredField("lon", lon));
        }
        // otherwise "missing"
        writer.addDocument(doc);
    }
    IndexReader reader = writer.getReader();
    IndexSearcher searcher = newSearcher(reader);
    for (int i = 0; i < numQueries; i++) {
        double lat = GeoTestUtil.nextLatitude();
        double lon = GeoTestUtil.nextLongitude();
        double missingValue = Double.POSITIVE_INFINITY;
        Result[] expected = new Result[reader.maxDoc()];
        for (int doc = 0; doc < reader.maxDoc(); doc++) {
            Document targetDoc = reader.document(doc);
            final double distance;
            if (targetDoc.getField("lat") == null) {
                // missing
                distance = missingValue;
            } else {
                double docLatitude = targetDoc.getField("lat").numericValue().doubleValue();
                double docLongitude = targetDoc.getField("lon").numericValue().doubleValue();
                distance = SloppyMath.haversinMeters(lat, lon, docLatitude, docLongitude);
            }
            int id = targetDoc.getField("id").numericValue().intValue();
            expected[doc] = new Result(id, distance);
        }
        Arrays.sort(expected);
        // randomize the topN a bit
        int topN = TestUtil.nextInt(random(), 1, reader.maxDoc());
        // sort by distance, then ID
        SortField distanceSort = LatLonDocValuesField.newDistanceSort("field", lat, lon);
        distanceSort.setMissingValue(missingValue);
        Sort sort = new Sort(distanceSort, new SortField("id", SortField.Type.INT));
        TopDocs topDocs = searcher.search(new MatchAllDocsQuery(), topN, sort);
        for (int resultNumber = 0; resultNumber < topN; resultNumber++) {
            FieldDoc fieldDoc = (FieldDoc) topDocs.scoreDocs[resultNumber];
            Result actual = new Result((Integer) fieldDoc.fields[1], (Double) fieldDoc.fields[0]);
            assertEquals(expected[resultNumber], actual);
        }
        // get page2 with searchAfter()
        if (topN < reader.maxDoc()) {
            int page2 = TestUtil.nextInt(random(), 1, reader.maxDoc() - topN);
            TopDocs topDocs2 = searcher.searchAfter(topDocs.scoreDocs[topN - 1], new MatchAllDocsQuery(), page2, sort);
            for (int resultNumber = 0; resultNumber < page2; resultNumber++) {
                FieldDoc fieldDoc = (FieldDoc) topDocs2.scoreDocs[resultNumber];
                Result actual = new Result((Integer) fieldDoc.fields[1], (Double) fieldDoc.fields[0]);
                assertEquals(expected[topN + resultNumber], actual);
            }
        }
    }
    reader.close();
    writer.close();
    dir.close();
}
Also used : IndexSearcher(org.apache.lucene.search.IndexSearcher) FieldDoc(org.apache.lucene.search.FieldDoc) SortField(org.apache.lucene.search.SortField) MatchAllDocsQuery(org.apache.lucene.search.MatchAllDocsQuery) SerialMergeScheduler(org.apache.lucene.index.SerialMergeScheduler) TopDocs(org.apache.lucene.search.TopDocs) IndexReader(org.apache.lucene.index.IndexReader) Sort(org.apache.lucene.search.Sort) RandomIndexWriter(org.apache.lucene.index.RandomIndexWriter) Directory(org.apache.lucene.store.Directory) IndexWriterConfig(org.apache.lucene.index.IndexWriterConfig)

Example 97 with IndexWriterConfig

use of org.apache.lucene.index.IndexWriterConfig in project lucene-solr by apache.

the class TestIDVersionPostingsFormat method testInvalidVersions2.

public void testInvalidVersions2() throws IOException {
    Directory dir = newDirectory();
    IndexWriterConfig iwc = newIndexWriterConfig(new MockAnalyzer(random()));
    iwc.setCodec(TestUtil.alwaysPostingsFormat(new IDVersionPostingsFormat()));
    RandomIndexWriter w = new RandomIndexWriter(random(), dir, iwc);
    Document doc = new Document();
    // Long.MAX_VALUE:
    doc.add(new StringAndPayloadField("id", "id", new BytesRef(new byte[] { (byte) 0x7f, (byte) 0xff, (byte) 0xff, (byte) 0xff, (byte) 0xff, (byte) 0xff, (byte) 0xff, (byte) 0xff })));
    expectThrows(IllegalArgumentException.class, () -> {
        w.addDocument(doc);
        w.commit();
    });
    expectThrows(AlreadyClosedException.class, () -> {
        w.addDocument(doc);
    });
    dir.close();
}
Also used : MockAnalyzer(org.apache.lucene.analysis.MockAnalyzer) Document(org.apache.lucene.document.Document) RandomIndexWriter(org.apache.lucene.index.RandomIndexWriter) BytesRef(org.apache.lucene.util.BytesRef) Directory(org.apache.lucene.store.Directory) IndexWriterConfig(org.apache.lucene.index.IndexWriterConfig)

Example 98 with IndexWriterConfig

use of org.apache.lucene.index.IndexWriterConfig in project lucene-solr by apache.

the class TestIDVersionPostingsFormat method testMissingPositions.

public void testMissingPositions() throws Exception {
    Directory dir = newDirectory();
    IndexWriterConfig iwc = newIndexWriterConfig(new MockAnalyzer(random()));
    iwc.setCodec(TestUtil.alwaysPostingsFormat(new IDVersionPostingsFormat()));
    RandomIndexWriter w = new RandomIndexWriter(random(), dir, iwc);
    Document doc = new Document();
    doc.add(newStringField("id", "id", Field.Store.NO));
    expectThrows(IllegalArgumentException.class, () -> {
        w.addDocument(doc);
        w.commit();
    });
    w.close();
    dir.close();
}
Also used : MockAnalyzer(org.apache.lucene.analysis.MockAnalyzer) Document(org.apache.lucene.document.Document) RandomIndexWriter(org.apache.lucene.index.RandomIndexWriter) Directory(org.apache.lucene.store.Directory) IndexWriterConfig(org.apache.lucene.index.IndexWriterConfig)

Example 99 with IndexWriterConfig

use of org.apache.lucene.index.IndexWriterConfig in project lucene-solr by apache.

the class DocumentDictionaryTest method testMultiValuedField.

@Test
public void testMultiValuedField() throws IOException {
    Directory dir = newDirectory();
    Analyzer analyzer = new MockAnalyzer(random());
    IndexWriterConfig iwc = newIndexWriterConfig(random(), analyzer);
    iwc.setMergePolicy(newLogMergePolicy());
    RandomIndexWriter writer = new RandomIndexWriter(random(), dir, iwc);
    List<Suggestion> suggestions = indexMultiValuedDocuments(atLeast(1000), writer);
    writer.commit();
    writer.close();
    IndexReader ir = DirectoryReader.open(dir);
    Dictionary dictionary = new DocumentDictionary(ir, FIELD_NAME, WEIGHT_FIELD_NAME, PAYLOAD_FIELD_NAME, CONTEXT_FIELD_NAME);
    InputIterator inputIterator = dictionary.getEntryIterator();
    BytesRef f;
    Iterator<Suggestion> suggestionsIter = suggestions.iterator();
    while ((f = inputIterator.next()) != null) {
        Suggestion nextSuggestion = suggestionsIter.next();
        assertTrue(f.equals(nextSuggestion.term));
        long weight = nextSuggestion.weight;
        assertEquals(inputIterator.weight(), (weight != -1) ? weight : 0);
        assertEquals(inputIterator.payload(), nextSuggestion.payload);
        assertTrue(inputIterator.contexts().equals(nextSuggestion.contexts));
    }
    assertFalse(suggestionsIter.hasNext());
    IOUtils.close(ir, analyzer, dir);
}
Also used : Dictionary(org.apache.lucene.search.spell.Dictionary) Analyzer(org.apache.lucene.analysis.Analyzer) MockAnalyzer(org.apache.lucene.analysis.MockAnalyzer) MockAnalyzer(org.apache.lucene.analysis.MockAnalyzer) IndexReader(org.apache.lucene.index.IndexReader) RandomIndexWriter(org.apache.lucene.index.RandomIndexWriter) BytesRef(org.apache.lucene.util.BytesRef) Directory(org.apache.lucene.store.Directory) IndexWriterConfig(org.apache.lucene.index.IndexWriterConfig) Test(org.junit.Test)

Example 100 with IndexWriterConfig

use of org.apache.lucene.index.IndexWriterConfig in project lucene-solr by apache.

the class DocumentDictionaryTest method testWithOptionalPayload.

@Test
public void testWithOptionalPayload() throws IOException {
    Directory dir = newDirectory();
    Analyzer analyzer = new MockAnalyzer(random());
    IndexWriterConfig iwc = newIndexWriterConfig(analyzer);
    iwc.setMergePolicy(newLogMergePolicy());
    RandomIndexWriter writer = new RandomIndexWriter(random(), dir, iwc);
    // Create a document that is missing the payload field
    Document doc = new Document();
    Field field = new TextField(FIELD_NAME, "some field", Field.Store.YES);
    doc.add(field);
    // do not store the payload or the contexts
    Field weight = new NumericDocValuesField(WEIGHT_FIELD_NAME, 100);
    doc.add(weight);
    writer.addDocument(doc);
    writer.commit();
    writer.close();
    IndexReader ir = DirectoryReader.open(dir);
    // Even though the payload field is missing, the dictionary iterator should not skip the document
    // because the payload field is optional.
    Dictionary dictionaryOptionalPayload = new DocumentDictionary(ir, FIELD_NAME, WEIGHT_FIELD_NAME, PAYLOAD_FIELD_NAME);
    InputIterator inputIterator = dictionaryOptionalPayload.getEntryIterator();
    BytesRef f = inputIterator.next();
    assertTrue(f.equals(new BytesRef(doc.get(FIELD_NAME))));
    IndexableField weightField = doc.getField(WEIGHT_FIELD_NAME);
    assertEquals(inputIterator.weight(), weightField.numericValue().longValue());
    IndexableField payloadField = doc.getField(PAYLOAD_FIELD_NAME);
    assertNull(payloadField);
    assertTrue(inputIterator.payload().length == 0);
    IOUtils.close(ir, analyzer, dir);
}
Also used : Dictionary(org.apache.lucene.search.spell.Dictionary) Analyzer(org.apache.lucene.analysis.Analyzer) MockAnalyzer(org.apache.lucene.analysis.MockAnalyzer) Document(org.apache.lucene.document.Document) IndexableField(org.apache.lucene.index.IndexableField) IndexableField(org.apache.lucene.index.IndexableField) StoredField(org.apache.lucene.document.StoredField) NumericDocValuesField(org.apache.lucene.document.NumericDocValuesField) Field(org.apache.lucene.document.Field) TextField(org.apache.lucene.document.TextField) MockAnalyzer(org.apache.lucene.analysis.MockAnalyzer) NumericDocValuesField(org.apache.lucene.document.NumericDocValuesField) IndexReader(org.apache.lucene.index.IndexReader) TextField(org.apache.lucene.document.TextField) RandomIndexWriter(org.apache.lucene.index.RandomIndexWriter) BytesRef(org.apache.lucene.util.BytesRef) Directory(org.apache.lucene.store.Directory) IndexWriterConfig(org.apache.lucene.index.IndexWriterConfig) Test(org.junit.Test)

Aggregations

IndexWriterConfig (org.apache.lucene.index.IndexWriterConfig)513 IndexWriter (org.apache.lucene.index.IndexWriter)362 Document (org.apache.lucene.document.Document)311 Directory (org.apache.lucene.store.Directory)289 RandomIndexWriter (org.apache.lucene.index.RandomIndexWriter)162 MockAnalyzer (org.apache.lucene.analysis.MockAnalyzer)143 IndexReader (org.apache.lucene.index.IndexReader)140 Term (org.apache.lucene.index.Term)116 IndexSearcher (org.apache.lucene.search.IndexSearcher)106 TextField (org.apache.lucene.document.TextField)93 DirectoryReader (org.apache.lucene.index.DirectoryReader)92 RAMDirectory (org.apache.lucene.store.RAMDirectory)89 IOException (java.io.IOException)88 BytesRef (org.apache.lucene.util.BytesRef)80 Field (org.apache.lucene.document.Field)78 Analyzer (org.apache.lucene.analysis.Analyzer)74 StandardAnalyzer (org.apache.lucene.analysis.standard.StandardAnalyzer)61 Test (org.junit.Test)61 StringField (org.apache.lucene.document.StringField)59 NumericDocValuesField (org.apache.lucene.document.NumericDocValuesField)49