Examples with IndexWriterConfig - org.apache.lucene.index.IndexWriterConfig

Example 61 with IndexWriterConfig

use of org.apache.lucene.index.IndexWriterConfig in project lucene-solr by apache.

the class TestIDVersionPostingsFormat method testRandom.

// TODO make a similar test for BT, w/ varied IDs:
public void testRandom() throws Exception {
    Directory dir = newDirectory();
    IndexWriterConfig iwc = newIndexWriterConfig(new MockAnalyzer(random()));
    int minItemsInBlock = TestUtil.nextInt(random(), 2, 50);
    int maxItemsInBlock = 2 * (minItemsInBlock - 1) + random().nextInt(50);
    iwc.setCodec(TestUtil.alwaysPostingsFormat(new IDVersionPostingsFormat(minItemsInBlock, maxItemsInBlock)));
    RandomIndexWriter w = new RandomIndexWriter(random(), dir, iwc);
    //IndexWriter w = new IndexWriter(dir, iwc);
    int numDocs = atLeast(1000);
    Map<String, Long> idValues = new HashMap<String, Long>();
    int docUpto = 0;
    if (VERBOSE) {
        System.out.println("TEST: numDocs=" + numDocs);
    }
    IDSource ids = getRandomIDs();
    String idPrefix;
    if (random().nextBoolean()) {
        idPrefix = "";
    } else {
        idPrefix = TestUtil.randomSimpleString(random());
        if (VERBOSE) {
            System.out.println("TEST: use id prefix: " + idPrefix);
        }
    }
    boolean useMonotonicVersion = random().nextBoolean();
    if (VERBOSE) {
        System.out.println("TEST: useMonotonicVersion=" + useMonotonicVersion);
    }
    List<String> idsList = new ArrayList<>();
    long version = 0;
    while (docUpto < numDocs) {
        String idValue = idPrefix + ids.next();
        if (idValues.containsKey(idValue)) {
            continue;
        }
        if (useMonotonicVersion) {
            version += TestUtil.nextInt(random(), 1, 10);
        } else {
            version = random().nextLong() & 0x3fffffffffffffffL;
        }
        idValues.put(idValue, version);
        if (VERBOSE) {
            System.out.println("  " + idValue + " -> " + version);
        }
        Document doc = new Document();
        doc.add(makeIDField(idValue, version));
        w.addDocument(doc);
        idsList.add(idValue);
        if (idsList.size() > 0 && random().nextInt(7) == 5) {
            // Randomly delete or update a previous ID
            idValue = idsList.get(random().nextInt(idsList.size()));
            if (random().nextBoolean()) {
                if (useMonotonicVersion) {
                    version += TestUtil.nextInt(random(), 1, 10);
                } else {
                    version = random().nextLong() & 0x3fffffffffffffffL;
                }
                doc = new Document();
                doc.add(makeIDField(idValue, version));
                if (VERBOSE) {
                    System.out.println("  update " + idValue + " -> " + version);
                }
                w.updateDocument(new Term("id", idValue), doc);
                idValues.put(idValue, version);
            } else {
                if (VERBOSE) {
                    System.out.println("  delete " + idValue);
                }
                w.deleteDocuments(new Term("id", idValue));
                idValues.remove(idValue);
            }
        }
        docUpto++;
    }
    IndexReader r = w.getReader();
    //IndexReader r = DirectoryReader.open(w);
    PerThreadVersionPKLookup lookup = new PerThreadVersionPKLookup(r, "id");
    List<Map.Entry<String, Long>> idValuesList = new ArrayList<>(idValues.entrySet());
    int iters = numDocs * 5;
    for (int iter = 0; iter < iters; iter++) {
        String idValue;
        if (random().nextBoolean()) {
            idValue = idValuesList.get(random().nextInt(idValuesList.size())).getKey();
        } else if (random().nextBoolean()) {
            idValue = ids.next();
        } else {
            idValue = idPrefix + TestUtil.randomSimpleString(random());
        }
        BytesRef idValueBytes = new BytesRef(idValue);
        Long expectedVersion = idValues.get(idValue);
        if (VERBOSE) {
            System.out.println("\nTEST: iter=" + iter + " id=" + idValue + " expectedVersion=" + expectedVersion);
        }
        if (expectedVersion == null) {
            assertEquals("term should not have been found (doesn't exist)", -1, lookup.lookup(idValueBytes));
        } else {
            if (random().nextBoolean()) {
                if (VERBOSE) {
                    System.out.println("  lookup exact version (should be found)");
                }
                assertTrue("term should have been found (version too old)", lookup.lookup(idValueBytes, expectedVersion.longValue()) != -1);
                assertEquals(expectedVersion.longValue(), lookup.getVersion());
            } else {
                if (VERBOSE) {
                    System.out.println("  lookup version+1 (should not be found)");
                }
                assertEquals("term should not have been found (version newer)", -1, lookup.lookup(idValueBytes, expectedVersion.longValue() + 1));
            }
        }
    }
    r.close();
    w.close();
    dir.close();
}

Also used : HashMap(java.util.HashMap) ConcurrentHashMap(java.util.concurrent.ConcurrentHashMap) ArrayList(java.util.ArrayList) Term(org.apache.lucene.index.Term) Document(org.apache.lucene.document.Document) MockAnalyzer(org.apache.lucene.analysis.MockAnalyzer) AtomicLong(java.util.concurrent.atomic.AtomicLong) IndexReader(org.apache.lucene.index.IndexReader) RandomIndexWriter(org.apache.lucene.index.RandomIndexWriter) BytesRef(org.apache.lucene.util.BytesRef) Directory(org.apache.lucene.store.Directory) IndexWriterConfig(org.apache.lucene.index.IndexWriterConfig)

Example 62 with IndexWriterConfig

use of org.apache.lucene.index.IndexWriterConfig in project lucene-solr by apache.

the class TestIDVersionPostingsFormat method testInvalidPayload.

public void testInvalidPayload() throws Exception {
    Directory dir = newDirectory();
    IndexWriterConfig iwc = newIndexWriterConfig(new MockAnalyzer(random()));
    iwc.setCodec(TestUtil.alwaysPostingsFormat(new IDVersionPostingsFormat()));
    RandomIndexWriter w = new RandomIndexWriter(random(), dir, iwc);
    Document doc = new Document();
    doc.add(new StringAndPayloadField("id", "id", new BytesRef("foo")));
    expectThrows(IllegalArgumentException.class, () -> {
        w.addDocument(doc);
        w.commit();
    });
    w.close();
    dir.close();
}

Also used : MockAnalyzer(org.apache.lucene.analysis.MockAnalyzer) Document(org.apache.lucene.document.Document) RandomIndexWriter(org.apache.lucene.index.RandomIndexWriter) BytesRef(org.apache.lucene.util.BytesRef) Directory(org.apache.lucene.store.Directory) IndexWriterConfig(org.apache.lucene.index.IndexWriterConfig)

Example 63 with IndexWriterConfig

use of org.apache.lucene.index.IndexWriterConfig in project lucene-solr by apache.

the class TestIDVersionPostingsFormat method testMoreThanOnceInSingleDoc.

public void testMoreThanOnceInSingleDoc() throws IOException {
    Directory dir = newDirectory();
    IndexWriterConfig iwc = newIndexWriterConfig(new MockAnalyzer(random()));
    iwc.setCodec(TestUtil.alwaysPostingsFormat(new IDVersionPostingsFormat()));
    RandomIndexWriter w = new RandomIndexWriter(random(), dir, iwc);
    Document doc = new Document();
    doc.add(makeIDField("id", 17));
    doc.add(makeIDField("id", 17));
    expectThrows(IllegalArgumentException.class, () -> {
        w.addDocument(doc);
        w.commit();
    });
    w.close();
    dir.close();
}

Also used : MockAnalyzer(org.apache.lucene.analysis.MockAnalyzer) Document(org.apache.lucene.document.Document) RandomIndexWriter(org.apache.lucene.index.RandomIndexWriter) Directory(org.apache.lucene.store.Directory) IndexWriterConfig(org.apache.lucene.index.IndexWriterConfig)

Example 64 with IndexWriterConfig

use of org.apache.lucene.index.IndexWriterConfig in project lucene-solr by apache.

the class BaseGeoPointTestCase method testMultiValued.

public void testMultiValued() throws Exception {
    int numPoints = atLeast(10000);
    // Every doc has 2 points:
    double[] lats = new double[2 * numPoints];
    double[] lons = new double[2 * numPoints];
    Directory dir = newDirectory();
    IndexWriterConfig iwc = newIndexWriterConfig();
    // We rely on docID order:
    iwc.setMergePolicy(newLogMergePolicy());
    // and on seeds being able to reproduce:
    iwc.setMergeScheduler(new SerialMergeScheduler());
    RandomIndexWriter w = new RandomIndexWriter(random(), dir, iwc);
    for (int id = 0; id < numPoints; id++) {
        Document doc = new Document();
        lats[2 * id] = quantizeLat(nextLatitude());
        lons[2 * id] = quantizeLon(nextLongitude());
        doc.add(newStringField("id", "" + id, Field.Store.YES));
        addPointToDoc(FIELD_NAME, doc, lats[2 * id], lons[2 * id]);
        lats[2 * id + 1] = quantizeLat(nextLatitude());
        lons[2 * id + 1] = quantizeLon(nextLongitude());
        addPointToDoc(FIELD_NAME, doc, lats[2 * id + 1], lons[2 * id + 1]);
        if (VERBOSE) {
            System.out.println("id=" + id);
            System.out.println("  lat=" + lats[2 * id] + " lon=" + lons[2 * id]);
            System.out.println("  lat=" + lats[2 * id + 1] + " lon=" + lons[2 * id + 1]);
        }
        w.addDocument(doc);
    }
    // TODO: share w/ verify; just need parallel array of the expected ids
    if (random().nextBoolean()) {
        w.forceMerge(1);
    }
    IndexReader r = w.getReader();
    w.close();
    IndexSearcher s = newSearcher(r);
    int iters = atLeast(25);
    for (int iter = 0; iter < iters; iter++) {
        Rectangle rect = nextBox();
        if (VERBOSE) {
            System.out.println("\nTEST: iter=" + iter + " rect=" + rect);
        }
        Query query = newRectQuery(FIELD_NAME, rect.minLat, rect.maxLat, rect.minLon, rect.maxLon);
        final FixedBitSet hits = new FixedBitSet(r.maxDoc());
        s.search(query, new SimpleCollector() {

            private int docBase;

            @Override
            public boolean needsScores() {
                return false;
            }

            @Override
            protected void doSetNextReader(LeafReaderContext context) throws IOException {
                docBase = context.docBase;
            }

            @Override
            public void collect(int doc) {
                hits.set(docBase + doc);
            }
        });
        boolean fail = false;
        for (int docID = 0; docID < lats.length / 2; docID++) {
            double latDoc1 = lats[2 * docID];
            double lonDoc1 = lons[2 * docID];
            double latDoc2 = lats[2 * docID + 1];
            double lonDoc2 = lons[2 * docID + 1];
            boolean result1 = rectContainsPoint(rect, latDoc1, lonDoc1);
            boolean result2 = rectContainsPoint(rect, latDoc2, lonDoc2);
            boolean expected = result1 || result2;
            if (hits.get(docID) != expected) {
                String id = s.doc(docID).get("id");
                if (expected) {
                    System.out.println("TEST: id=" + id + " docID=" + docID + " should match but did not");
                } else {
                    System.out.println("TEST: id=" + id + " docID=" + docID + " should not match but did");
                }
                System.out.println("  rect=" + rect);
                System.out.println("  lat=" + latDoc1 + " lon=" + lonDoc1 + "\n  lat=" + latDoc2 + " lon=" + lonDoc2);
                System.out.println("  result1=" + result1 + " result2=" + result2);
                fail = true;
            }
        }
        if (fail) {
            fail("some hits were wrong");
        }
    }
    r.close();
    dir.close();
}

Also used : IndexSearcher(org.apache.lucene.search.IndexSearcher) Query(org.apache.lucene.search.Query) MatchNoDocsQuery(org.apache.lucene.search.MatchNoDocsQuery) IOException(java.io.IOException) Document(org.apache.lucene.document.Document) SerialMergeScheduler(org.apache.lucene.index.SerialMergeScheduler) SimpleCollector(org.apache.lucene.search.SimpleCollector) FixedBitSet(org.apache.lucene.util.FixedBitSet) IndexReader(org.apache.lucene.index.IndexReader) LeafReaderContext(org.apache.lucene.index.LeafReaderContext) RandomIndexWriter(org.apache.lucene.index.RandomIndexWriter) Directory(org.apache.lucene.store.Directory) IndexWriterConfig(org.apache.lucene.index.IndexWriterConfig)

Example 65 with IndexWriterConfig

use of org.apache.lucene.index.IndexWriterConfig in project lucene-solr by apache.

the class BaseGeoPointTestCase method searchSmallSet.

/** return topdocs over a small set of points in field "point" */
private TopDocs searchSmallSet(Query query, int size) throws Exception {
    // this is a simple systematic test, indexing these points
    // TODO: fragile: does not understand quantization in any way yet uses extremely high precision!
    double[][] pts = new double[][] { { 32.763420, -96.774 }, { 32.7559529921407, -96.7759895324707 }, { 32.77866942010977, -96.77701950073242 }, { 32.7756745755423, -96.7706036567688 }, { 27.703618681345585, -139.73458170890808 }, { 32.94823588839368, -96.4538113027811 }, { 33.06047141970814, -96.65084838867188 }, { 32.778650, -96.7772 }, { -88.56029371730983, -177.23537676036358 }, { 33.541429799076354, -26.779373834241003 }, { 26.774024500421728, -77.35379276106497 }, { -90.0, -14.796283808944777 }, { 32.94823588839368, -178.8538113027811 }, { 32.94823588839368, 178.8538113027811 }, { 40.720611, -73.998776 }, { -44.5, -179.5 } };
    Directory directory = newDirectory();
    // TODO: must these simple tests really rely on docid order?
    IndexWriterConfig iwc = newIndexWriterConfig(new MockAnalyzer(random()));
    iwc.setMaxBufferedDocs(TestUtil.nextInt(random(), 100, 1000));
    iwc.setMergePolicy(newLogMergePolicy());
    // Else seeds may not reproduce:
    iwc.setMergeScheduler(new SerialMergeScheduler());
    RandomIndexWriter writer = new RandomIndexWriter(random(), directory, iwc);
    for (double[] p : pts) {
        Document doc = new Document();
        addPointToDoc("point", doc, p[0], p[1]);
        writer.addDocument(doc);
    }
    // add explicit multi-valued docs
    for (int i = 0; i < pts.length; i += 2) {
        Document doc = new Document();
        addPointToDoc("point", doc, pts[i][0], pts[i][1]);
        addPointToDoc("point", doc, pts[i + 1][0], pts[i + 1][1]);
        writer.addDocument(doc);
    }
    // index random string documents
    for (int i = 0; i < random().nextInt(10); ++i) {
        Document doc = new Document();
        doc.add(new StringField("string", Integer.toString(i), Field.Store.NO));
        writer.addDocument(doc);
    }
    IndexReader reader = writer.getReader();
    writer.close();
    IndexSearcher searcher = newSearcher(reader);
    TopDocs topDocs = searcher.search(query, size);
    reader.close();
    directory.close();
    return topDocs;
}

Also used : IndexSearcher(org.apache.lucene.search.IndexSearcher) Document(org.apache.lucene.document.Document) SerialMergeScheduler(org.apache.lucene.index.SerialMergeScheduler) TopDocs(org.apache.lucene.search.TopDocs) MockAnalyzer(org.apache.lucene.analysis.MockAnalyzer) StringField(org.apache.lucene.document.StringField) IndexReader(org.apache.lucene.index.IndexReader) RandomIndexWriter(org.apache.lucene.index.RandomIndexWriter) Directory(org.apache.lucene.store.Directory) IndexWriterConfig(org.apache.lucene.index.IndexWriterConfig)

Aggregations

IndexWriterConfig (org.apache.lucene.index.IndexWriterConfig)513 IndexWriter (org.apache.lucene.index.IndexWriter)362 Document (org.apache.lucene.document.Document)311 Directory (org.apache.lucene.store.Directory)289 RandomIndexWriter (org.apache.lucene.index.RandomIndexWriter)162 MockAnalyzer (org.apache.lucene.analysis.MockAnalyzer)143 IndexReader (org.apache.lucene.index.IndexReader)140 Term (org.apache.lucene.index.Term)116 IndexSearcher (org.apache.lucene.search.IndexSearcher)106 TextField (org.apache.lucene.document.TextField)93 DirectoryReader (org.apache.lucene.index.DirectoryReader)92 RAMDirectory (org.apache.lucene.store.RAMDirectory)89 IOException (java.io.IOException)88 BytesRef (org.apache.lucene.util.BytesRef)80 Field (org.apache.lucene.document.Field)78 Analyzer (org.apache.lucene.analysis.Analyzer)74 StandardAnalyzer (org.apache.lucene.analysis.standard.StandardAnalyzer)61 Test (org.junit.Test)61 StringField (org.apache.lucene.document.StringField)59 NumericDocValuesField (org.apache.lucene.document.NumericDocValuesField)49