Search in sources :

Example 6 with SegmentInfos

use of org.apache.lucene.index.SegmentInfos in project lucene-solr by apache.

the class TestSimilarityBase method testLengthEncodingBackwardCompatibility.

public void testLengthEncodingBackwardCompatibility() throws IOException {
    Similarity similarity = RandomPicks.randomFrom(random(), sims);
    for (int indexCreatedVersionMajor : new int[] { Version.LUCENE_6_0_0.major, Version.LATEST.major }) {
        for (int length : new int[] { 1, 2, 4 }) {
            // these length values are encoded accurately on both cases
            Directory dir = newDirectory();
            // set the version on the directory
            new SegmentInfos(indexCreatedVersionMajor).commit(dir);
            IndexWriter w = new IndexWriter(dir, newIndexWriterConfig().setSimilarity(similarity));
            Document doc = new Document();
            String value = IntStream.range(0, length).mapToObj(i -> "b").collect(Collectors.joining(" "));
            doc.add(new TextField("foo", value, Store.NO));
            w.addDocument(doc);
            IndexReader reader = DirectoryReader.open(w);
            IndexSearcher searcher = newSearcher(reader);
            searcher.setSimilarity(similarity);
            Term term = new Term("foo", "b");
            TermContext context = TermContext.build(reader.getContext(), term);
            SimWeight simWeight = similarity.computeWeight(1f, searcher.collectionStatistics("foo"), searcher.termStatistics(term, context));
            SimilarityBase.BasicSimScorer simScorer = (SimilarityBase.BasicSimScorer) similarity.simScorer(simWeight, reader.leaves().get(0));
            float docLength = simScorer.getLengthValue(0);
            assertEquals(length, (int) docLength);
            w.close();
            reader.close();
            dir.close();
        }
    }
}
Also used : IntStream(java.util.stream.IntStream) Query(org.apache.lucene.search.Query) RandomPicks(com.carrotsearch.randomizedtesting.generators.RandomPicks) FieldType(org.apache.lucene.document.FieldType) Term(org.apache.lucene.index.Term) SimWeight(org.apache.lucene.search.similarities.Similarity.SimWeight) ArrayList(java.util.ArrayList) Document(org.apache.lucene.document.Document) Directory(org.apache.lucene.store.Directory) Store(org.apache.lucene.document.Field.Store) TermStatistics(org.apache.lucene.search.TermStatistics) TopDocs(org.apache.lucene.search.TopDocs) Explanation(org.apache.lucene.search.Explanation) BytesRef(org.apache.lucene.util.BytesRef) DirectoryReader(org.apache.lucene.index.DirectoryReader) IOException(java.io.IOException) TermContext(org.apache.lucene.index.TermContext) Collectors(java.util.stream.Collectors) Version(org.apache.lucene.util.Version) SegmentInfos(org.apache.lucene.index.SegmentInfos) List(java.util.List) FieldInvertState(org.apache.lucene.index.FieldInvertState) IndexWriter(org.apache.lucene.index.IndexWriter) CollectionStatistics(org.apache.lucene.search.CollectionStatistics) TermQuery(org.apache.lucene.search.TermQuery) Field(org.apache.lucene.document.Field) LuceneTestCase(org.apache.lucene.util.LuceneTestCase) TextField(org.apache.lucene.document.TextField) IndexOptions(org.apache.lucene.index.IndexOptions) RandomIndexWriter(org.apache.lucene.index.RandomIndexWriter) IndexReader(org.apache.lucene.index.IndexReader) IndexSearcher(org.apache.lucene.search.IndexSearcher) IndexSearcher(org.apache.lucene.search.IndexSearcher) SegmentInfos(org.apache.lucene.index.SegmentInfos) SimWeight(org.apache.lucene.search.similarities.Similarity.SimWeight) Term(org.apache.lucene.index.Term) Document(org.apache.lucene.document.Document) TermContext(org.apache.lucene.index.TermContext) IndexWriter(org.apache.lucene.index.IndexWriter) RandomIndexWriter(org.apache.lucene.index.RandomIndexWriter) IndexReader(org.apache.lucene.index.IndexReader) TextField(org.apache.lucene.document.TextField) Directory(org.apache.lucene.store.Directory)

Example 7 with SegmentInfos

use of org.apache.lucene.index.SegmentInfos in project lucene-solr by apache.

the class TestBM25Similarity method testLengthEncodingBackwardCompatibility.

public void testLengthEncodingBackwardCompatibility() throws IOException {
    Similarity similarity = new BM25Similarity();
    for (int indexCreatedVersionMajor : new int[] { Version.LUCENE_6_0_0.major, Version.LATEST.major }) {
        for (int length : new int[] { 1, 2, 4 }) {
            // these length values are encoded accurately on both cases
            Directory dir = newDirectory();
            // set the version on the directory
            new SegmentInfos(indexCreatedVersionMajor).commit(dir);
            IndexWriter w = new IndexWriter(dir, newIndexWriterConfig().setSimilarity(similarity));
            Document doc = new Document();
            String value = IntStream.range(0, length).mapToObj(i -> "b").collect(Collectors.joining(" "));
            doc.add(new TextField("foo", value, Store.NO));
            w.addDocument(doc);
            IndexReader reader = DirectoryReader.open(w);
            IndexSearcher searcher = newSearcher(reader);
            searcher.setSimilarity(similarity);
            Explanation expl = searcher.explain(new TermQuery(new Term("foo", "b")), 0);
            Explanation docLen = findExplanation(expl, "fieldLength");
            assertNotNull(docLen);
            assertEquals(docLen.toString(), length, (int) docLen.getValue());
            w.close();
            reader.close();
            dir.close();
        }
    }
}
Also used : IntStream(java.util.stream.IntStream) Explanation(org.apache.lucene.search.Explanation) DirectoryReader(org.apache.lucene.index.DirectoryReader) Term(org.apache.lucene.index.Term) IOException(java.io.IOException) Collectors(java.util.stream.Collectors) Version(org.apache.lucene.util.Version) SegmentInfos(org.apache.lucene.index.SegmentInfos) Document(org.apache.lucene.document.Document) IndexWriter(org.apache.lucene.index.IndexWriter) TermQuery(org.apache.lucene.search.TermQuery) Directory(org.apache.lucene.store.Directory) Store(org.apache.lucene.document.Field.Store) LuceneTestCase(org.apache.lucene.util.LuceneTestCase) TextField(org.apache.lucene.document.TextField) IndexReader(org.apache.lucene.index.IndexReader) IndexSearcher(org.apache.lucene.search.IndexSearcher) IndexSearcher(org.apache.lucene.search.IndexSearcher) TermQuery(org.apache.lucene.search.TermQuery) SegmentInfos(org.apache.lucene.index.SegmentInfos) Explanation(org.apache.lucene.search.Explanation) Term(org.apache.lucene.index.Term) Document(org.apache.lucene.document.Document) IndexWriter(org.apache.lucene.index.IndexWriter) IndexReader(org.apache.lucene.index.IndexReader) TextField(org.apache.lucene.document.TextField) Directory(org.apache.lucene.store.Directory)

Example 8 with SegmentInfos

use of org.apache.lucene.index.SegmentInfos in project lucene-solr by apache.

the class TestClassicSimilarity method testNormEncodingBackwardCompatibility.

public void testNormEncodingBackwardCompatibility() throws IOException {
    Similarity similarity = new ClassicSimilarity();
    for (int indexCreatedVersionMajor : new int[] { Version.LUCENE_6_0_0.major, Version.LATEST.major }) {
        for (int length : new int[] { 1, 4, 16 }) {
            // these length values are encoded accurately on both cases
            Directory dir = newDirectory();
            // set the version on the directory
            new SegmentInfos(indexCreatedVersionMajor).commit(dir);
            IndexWriter w = new IndexWriter(dir, newIndexWriterConfig().setSimilarity(similarity));
            Document doc = new Document();
            String value = IntStream.range(0, length).mapToObj(i -> "b").collect(Collectors.joining(" "));
            doc.add(new TextField("foo", value, Store.NO));
            w.addDocument(doc);
            IndexReader reader = DirectoryReader.open(w);
            IndexSearcher searcher = newSearcher(reader);
            searcher.setSimilarity(similarity);
            Explanation expl = searcher.explain(new TermQuery(new Term("foo", "b")), 0);
            Explanation fieldNorm = findExplanation(expl, "fieldNorm");
            assertNotNull(fieldNorm);
            assertEquals(fieldNorm.toString(), 1 / Math.sqrt(length), fieldNorm.getValue(), 0f);
            w.close();
            reader.close();
            dir.close();
        }
    }
}
Also used : IntStream(java.util.stream.IntStream) MultiReader(org.apache.lucene.index.MultiReader) Query(org.apache.lucene.search.Query) Arrays(java.util.Arrays) StringField(org.apache.lucene.document.StringField) Term(org.apache.lucene.index.Term) TestUtil(org.apache.lucene.util.TestUtil) Document(org.apache.lucene.document.Document) Directory(org.apache.lucene.store.Directory) Store(org.apache.lucene.document.Field.Store) TopDocs(org.apache.lucene.search.TopDocs) Explanation(org.apache.lucene.search.Explanation) Occur(org.apache.lucene.search.BooleanClause.Occur) DirectoryReader(org.apache.lucene.index.DirectoryReader) IOUtils(org.apache.lucene.util.IOUtils) IOException(java.io.IOException) Collectors(java.util.stream.Collectors) Version(org.apache.lucene.util.Version) SegmentInfos(org.apache.lucene.index.SegmentInfos) FieldInvertState(org.apache.lucene.index.FieldInvertState) IndexWriter(org.apache.lucene.index.IndexWriter) DisjunctionMaxQuery(org.apache.lucene.search.DisjunctionMaxQuery) TermQuery(org.apache.lucene.search.TermQuery) BooleanQuery(org.apache.lucene.search.BooleanQuery) LuceneTestCase(org.apache.lucene.util.LuceneTestCase) TextField(org.apache.lucene.document.TextField) IDFStats(org.apache.lucene.search.similarities.TFIDFSimilarity.IDFStats) IndexReader(org.apache.lucene.index.IndexReader) IndexSearcher(org.apache.lucene.search.IndexSearcher) IndexSearcher(org.apache.lucene.search.IndexSearcher) TermQuery(org.apache.lucene.search.TermQuery) SegmentInfos(org.apache.lucene.index.SegmentInfos) Explanation(org.apache.lucene.search.Explanation) Term(org.apache.lucene.index.Term) Document(org.apache.lucene.document.Document) IndexWriter(org.apache.lucene.index.IndexWriter) IndexReader(org.apache.lucene.index.IndexReader) TextField(org.apache.lucene.document.TextField) Directory(org.apache.lucene.store.Directory)

Example 9 with SegmentInfos

use of org.apache.lucene.index.SegmentInfos in project lucene-solr by apache.

the class SegmentsInfoRequestHandler method getSegmentsInfo.

private SimpleOrderedMap<Object> getSegmentsInfo(SolrQueryRequest req, SolrQueryResponse rsp) throws Exception {
    SolrIndexSearcher searcher = req.getSearcher();
    SegmentInfos infos = SegmentInfos.readLatestCommit(searcher.getIndexReader().directory());
    List<String> mergeCandidates = getMergeCandidatesNames(req, infos);
    SimpleOrderedMap<Object> segmentInfos = new SimpleOrderedMap<>();
    SimpleOrderedMap<Object> segmentInfo = null;
    for (SegmentCommitInfo segmentCommitInfo : infos) {
        segmentInfo = getSegmentInfo(segmentCommitInfo);
        if (mergeCandidates.contains(segmentCommitInfo.info.name)) {
            segmentInfo.add("mergeCandidate", true);
        }
        segmentInfos.add((String) segmentInfo.get(NAME), segmentInfo);
    }
    return segmentInfos;
}
Also used : SegmentInfos(org.apache.lucene.index.SegmentInfos) SegmentCommitInfo(org.apache.lucene.index.SegmentCommitInfo) SolrIndexSearcher(org.apache.solr.search.SolrIndexSearcher) SimpleOrderedMap(org.apache.solr.common.util.SimpleOrderedMap)

Example 10 with SegmentInfos

use of org.apache.lucene.index.SegmentInfos in project lucene-solr by apache.

the class ReplicaNode method finishNRTCopy.

void finishNRTCopy(CopyJob job, long startNS) throws IOException {
    CopyState copyState = job.getCopyState();
    message("top: finishNRTCopy: version=" + copyState.version + (job.getFailed() ? " FAILED" : "") + " job=" + job);
    synchronized (this) {
        if ("syncing".equals(state)) {
            state = "idle";
        }
        if (curNRTCopy == job) {
            message("top: now clear curNRTCopy; job=" + job);
            curNRTCopy = null;
        } else {
            assert job.getFailed();
            message("top: skip clear curNRTCopy: we were cancelled; job=" + job);
        }
        if (job.getFailed()) {
            return;
        }
        // Does final file renames:
        job.finish();
        // Turn byte[] back to SegmentInfos:
        byte[] infosBytes = copyState.infosBytes;
        SegmentInfos infos = SegmentInfos.readCommit(dir, new BufferedChecksumIndexInput(new ByteArrayIndexInput("SegmentInfos", copyState.infosBytes)), copyState.gen);
        assert infos.getVersion() == copyState.version;
        message("  version=" + infos.getVersion() + " segments=" + infos.toString());
        // Cutover to new searcher:
        if (mgr != null) {
            ((SegmentInfosSearcherManager) mgr).setCurrentInfos(infos);
        }
        // Must first incRef new NRT files, then decRef old ones, to make sure we don't remove an NRT file that's in common to both:
        Collection<String> newFiles = copyState.files.keySet();
        message("top: incRef newNRTFiles=" + newFiles);
        deleter.incRef(newFiles);
        // If any of our new files were previously copied merges, we clear them now, so we don't try to later delete a non-existent file:
        pendingMergeFiles.removeAll(newFiles);
        message("top: after remove from pending merges pendingMergeFiles=" + pendingMergeFiles);
        message("top: decRef lastNRTFiles=" + lastNRTFiles);
        deleter.decRef(lastNRTFiles);
        lastNRTFiles.clear();
        lastNRTFiles.addAll(newFiles);
        message("top: set lastNRTFiles=" + lastNRTFiles);
        // in an NRT point:
        if (copyState.completedMergeFiles.isEmpty() == false) {
            message("now remove-if-not-ref'd completed merge files: " + copyState.completedMergeFiles);
            for (String fileName : copyState.completedMergeFiles) {
                if (pendingMergeFiles.contains(fileName)) {
                    pendingMergeFiles.remove(fileName);
                    deleter.deleteIfNoRef(fileName);
                }
            }
        }
        lastFileMetaData = copyState.files;
    }
    int markerCount;
    IndexSearcher s = mgr.acquire();
    try {
        markerCount = s.count(new TermQuery(new Term("marker", "marker")));
    } finally {
        mgr.release(s);
    }
    message(String.format(Locale.ROOT, "top: done sync: took %.3fs for %s, opened NRT reader version=%d markerCount=%d", (System.nanoTime() - startNS) / 1000000000.0, bytesToString(job.getTotalBytesCopied()), copyState.version, markerCount));
}
Also used : IndexSearcher(org.apache.lucene.search.IndexSearcher) TermQuery(org.apache.lucene.search.TermQuery) SegmentInfos(org.apache.lucene.index.SegmentInfos) BufferedChecksumIndexInput(org.apache.lucene.store.BufferedChecksumIndexInput) ByteArrayIndexInput(org.apache.lucene.store.ByteArrayIndexInput) Term(org.apache.lucene.index.Term)

Aggregations

SegmentInfos (org.apache.lucene.index.SegmentInfos)23 IOException (java.io.IOException)9 IndexWriter (org.apache.lucene.index.IndexWriter)9 Term (org.apache.lucene.index.Term)8 IndexSearcher (org.apache.lucene.search.IndexSearcher)8 TermQuery (org.apache.lucene.search.TermQuery)7 Document (org.apache.lucene.document.Document)6 TextField (org.apache.lucene.document.TextField)6 DirectoryReader (org.apache.lucene.index.DirectoryReader)6 Directory (org.apache.lucene.store.Directory)6 IndexWriterConfig (org.apache.lucene.index.IndexWriterConfig)5 SegmentCommitInfo (org.apache.lucene.index.SegmentCommitInfo)5 ArrayList (java.util.ArrayList)4 HashMap (java.util.HashMap)4 IndexReader (org.apache.lucene.index.IndexReader)4 Collectors (java.util.stream.Collectors)3 IntStream (java.util.stream.IntStream)3 Store (org.apache.lucene.document.Field.Store)3 StringField (org.apache.lucene.document.StringField)3 RandomIndexWriter (org.apache.lucene.index.RandomIndexWriter)3