Search in sources :

Example 31 with Codec

use of org.apache.lucene.codecs.Codec in project lucene-solr by apache.

the class TestDocTermOrds method testRandom.

public void testRandom() throws Exception {
    Directory dir = newDirectory();
    final int NUM_TERMS = atLeast(20);
    final Set<BytesRef> terms = new HashSet<>();
    while (terms.size() < NUM_TERMS) {
        final String s = TestUtil.randomRealisticUnicodeString(random());
        //final String s = _TestUtil.randomSimpleString(random);
        if (s.length() > 0) {
            terms.add(new BytesRef(s));
        }
    }
    final BytesRef[] termsArray = terms.toArray(new BytesRef[terms.size()]);
    Arrays.sort(termsArray);
    final int NUM_DOCS = atLeast(100);
    IndexWriterConfig conf = newIndexWriterConfig(new MockAnalyzer(random()));
    // Sometimes swap in codec that impls ord():
    if (random().nextInt(10) == 7) {
        // Make sure terms index has ords:
        Codec codec = TestUtil.alwaysPostingsFormat(TestUtil.getPostingsFormatWithOrds(random()));
        conf.setCodec(codec);
    }
    final RandomIndexWriter w = new RandomIndexWriter(random(), dir, conf);
    final int[][] idToOrds = new int[NUM_DOCS][];
    final Set<Integer> ordsForDocSet = new HashSet<>();
    for (int id = 0; id < NUM_DOCS; id++) {
        Document doc = new Document();
        doc.add(new LegacyIntField("id", id, Field.Store.YES));
        final int termCount = TestUtil.nextInt(random(), 0, 20 * RANDOM_MULTIPLIER);
        while (ordsForDocSet.size() < termCount) {
            ordsForDocSet.add(random().nextInt(termsArray.length));
        }
        final int[] ordsForDoc = new int[termCount];
        int upto = 0;
        if (VERBOSE) {
            System.out.println("TEST: doc id=" + id);
        }
        for (int ord : ordsForDocSet) {
            ordsForDoc[upto++] = ord;
            Field field = newStringField("field", termsArray[ord].utf8ToString(), Field.Store.NO);
            if (VERBOSE) {
                System.out.println("  f=" + termsArray[ord].utf8ToString());
            }
            doc.add(field);
        }
        ordsForDocSet.clear();
        Arrays.sort(ordsForDoc);
        idToOrds[id] = ordsForDoc;
        w.addDocument(doc);
    }
    final DirectoryReader r = w.getReader();
    w.close();
    if (VERBOSE) {
        System.out.println("TEST: reader=" + r);
    }
    for (LeafReaderContext ctx : r.leaves()) {
        if (VERBOSE) {
            System.out.println("\nTEST: sub=" + ctx.reader());
        }
        verify(ctx.reader(), idToOrds, termsArray, null);
    }
    // ord, so this forces the OrdWrapper to run:
    if (VERBOSE) {
        System.out.println("TEST: top reader");
    }
    LeafReader slowR = SlowCompositeReaderWrapper.wrap(r);
    TestUtil.checkReader(slowR);
    verify(slowR, idToOrds, termsArray, null);
    FieldCache.DEFAULT.purgeByCacheKey(slowR.getCoreCacheHelper().getKey());
    r.close();
    dir.close();
}
Also used : LeafReader(org.apache.lucene.index.LeafReader) DirectoryReader(org.apache.lucene.index.DirectoryReader) Document(org.apache.lucene.document.Document) StringField(org.apache.lucene.document.StringField) LegacyLongField(org.apache.solr.legacy.LegacyLongField) LegacyIntField(org.apache.solr.legacy.LegacyIntField) Field(org.apache.lucene.document.Field) Codec(org.apache.lucene.codecs.Codec) MockAnalyzer(org.apache.lucene.analysis.MockAnalyzer) LeafReaderContext(org.apache.lucene.index.LeafReaderContext) BytesRef(org.apache.lucene.util.BytesRef) RandomIndexWriter(org.apache.lucene.index.RandomIndexWriter) Directory(org.apache.lucene.store.Directory) HashSet(java.util.HashSet) IndexWriterConfig(org.apache.lucene.index.IndexWriterConfig) LegacyIntField(org.apache.solr.legacy.LegacyIntField)

Example 32 with Codec

use of org.apache.lucene.codecs.Codec in project lucene-solr by apache.

the class TestSegmentInfos method testVersionsOneSegment.

// LUCENE-5954
public void testVersionsOneSegment() throws IOException {
    BaseDirectoryWrapper dir = newDirectory();
    dir.setCheckIndexOnClose(false);
    byte[] id = StringHelper.randomId();
    Codec codec = Codec.getDefault();
    SegmentInfos sis = new SegmentInfos(Version.LATEST.major);
    SegmentInfo info = new SegmentInfo(dir, Version.LUCENE_7_0_0, Version.LUCENE_7_0_0, "_0", 1, false, Codec.getDefault(), Collections.<String, String>emptyMap(), id, Collections.<String, String>emptyMap(), null);
    info.setFiles(Collections.<String>emptySet());
    codec.segmentInfoFormat().write(dir, info, IOContext.DEFAULT);
    SegmentCommitInfo commitInfo = new SegmentCommitInfo(info, 0, -1, -1, -1);
    sis.add(commitInfo);
    sis.commit(dir);
    sis = SegmentInfos.readLatestCommit(dir);
    assertEquals(Version.LUCENE_7_0_0, sis.getMinSegmentLuceneVersion());
    assertEquals(Version.LATEST, sis.getCommitLuceneVersion());
    dir.close();
}
Also used : Codec(org.apache.lucene.codecs.Codec) BaseDirectoryWrapper(org.apache.lucene.store.BaseDirectoryWrapper)

Example 33 with Codec

use of org.apache.lucene.codecs.Codec in project lucene-solr by apache.

the class CheckIndex method checkIndex.

/** Returns a {@link Status} instance detailing
   *  the state of the index.
   * 
   *  @param onlySegments list of specific segment names to check
   *
   *  <p>As this method checks every byte in the specified
   *  segments, on a large index it can take quite a long
   *  time to run. */
public Status checkIndex(List<String> onlySegments) throws IOException {
    ensureOpen();
    long startNS = System.nanoTime();
    NumberFormat nf = NumberFormat.getInstance(Locale.ROOT);
    SegmentInfos sis = null;
    Status result = new Status();
    result.dir = dir;
    String[] files = dir.listAll();
    String lastSegmentsFile = SegmentInfos.getLastCommitSegmentsFileName(files);
    if (lastSegmentsFile == null) {
        throw new IndexNotFoundException("no segments* file found in " + dir + ": files: " + Arrays.toString(files));
    }
    try {
        // Do not use SegmentInfos.read(Directory) since the spooky
        // retrying it does is not necessary here (we hold the write lock):
        sis = SegmentInfos.readCommit(dir, lastSegmentsFile);
    } catch (Throwable t) {
        if (failFast) {
            throw IOUtils.rethrowAlways(t);
        }
        msg(infoStream, "ERROR: could not read any segments file in directory");
        result.missingSegments = true;
        if (infoStream != null)
            t.printStackTrace(infoStream);
        return result;
    }
    // find the oldest and newest segment versions
    Version oldest = null;
    Version newest = null;
    String oldSegs = null;
    for (SegmentCommitInfo si : sis) {
        Version version = si.info.getVersion();
        if (version == null) {
            // pre-3.1 segment
            oldSegs = "pre-3.1";
        } else {
            if (oldest == null || version.onOrAfter(oldest) == false) {
                oldest = version;
            }
            if (newest == null || version.onOrAfter(newest)) {
                newest = version;
            }
        }
    }
    final int numSegments = sis.size();
    final String segmentsFileName = sis.getSegmentsFileName();
    // note: we only read the format byte (required preamble) here!
    IndexInput input = null;
    try {
        input = dir.openInput(segmentsFileName, IOContext.READONCE);
    } catch (Throwable t) {
        if (failFast) {
            throw IOUtils.rethrowAlways(t);
        }
        msg(infoStream, "ERROR: could not open segments file in directory");
        if (infoStream != null) {
            t.printStackTrace(infoStream);
        }
        result.cantOpenSegments = true;
        return result;
    }
    try {
        /*int format =*/
        input.readInt();
    } catch (Throwable t) {
        if (failFast) {
            throw IOUtils.rethrowAlways(t);
        }
        msg(infoStream, "ERROR: could not read segment file version in directory");
        if (infoStream != null) {
            t.printStackTrace(infoStream);
        }
        result.missingSegmentVersion = true;
        return result;
    } finally {
        if (input != null)
            input.close();
    }
    result.segmentsFileName = segmentsFileName;
    result.numSegments = numSegments;
    result.userData = sis.getUserData();
    String userDataString;
    if (sis.getUserData().size() > 0) {
        userDataString = " userData=" + sis.getUserData();
    } else {
        userDataString = "";
    }
    String versionString = "";
    if (oldSegs != null) {
        if (newest != null) {
            versionString = "versions=[" + oldSegs + " .. " + newest + "]";
        } else {
            versionString = "version=" + oldSegs;
        }
    } else if (newest != null) {
        // implies oldest != null
        versionString = oldest.equals(newest) ? ("version=" + oldest) : ("versions=[" + oldest + " .. " + newest + "]");
    }
    msg(infoStream, "Segments file=" + segmentsFileName + " numSegments=" + numSegments + " " + versionString + " id=" + StringHelper.idToString(sis.getId()) + userDataString);
    if (onlySegments != null) {
        result.partial = true;
        if (infoStream != null) {
            infoStream.print("\nChecking only these segments:");
            for (String s : onlySegments) {
                infoStream.print(" " + s);
            }
        }
        result.segmentsChecked.addAll(onlySegments);
        msg(infoStream, ":");
    }
    result.newSegments = sis.clone();
    result.newSegments.clear();
    result.maxSegmentName = -1;
    for (int i = 0; i < numSegments; i++) {
        final SegmentCommitInfo info = sis.info(i);
        int segmentName = Integer.parseInt(info.info.name.substring(1), Character.MAX_RADIX);
        if (segmentName > result.maxSegmentName) {
            result.maxSegmentName = segmentName;
        }
        if (onlySegments != null && !onlySegments.contains(info.info.name)) {
            continue;
        }
        Status.SegmentInfoStatus segInfoStat = new Status.SegmentInfoStatus();
        result.segmentInfos.add(segInfoStat);
        msg(infoStream, "  " + (1 + i) + " of " + numSegments + ": name=" + info.info.name + " maxDoc=" + info.info.maxDoc());
        segInfoStat.name = info.info.name;
        segInfoStat.maxDoc = info.info.maxDoc();
        final Version version = info.info.getVersion();
        if (info.info.maxDoc() <= 0) {
            throw new RuntimeException("illegal number of documents: maxDoc=" + info.info.maxDoc());
        }
        int toLoseDocCount = info.info.maxDoc();
        SegmentReader reader = null;
        Sort previousIndexSort = null;
        try {
            msg(infoStream, "    version=" + (version == null ? "3.0" : version));
            msg(infoStream, "    id=" + StringHelper.idToString(info.info.getId()));
            final Codec codec = info.info.getCodec();
            msg(infoStream, "    codec=" + codec);
            segInfoStat.codec = codec;
            msg(infoStream, "    compound=" + info.info.getUseCompoundFile());
            segInfoStat.compound = info.info.getUseCompoundFile();
            msg(infoStream, "    numFiles=" + info.files().size());
            Sort indexSort = info.info.getIndexSort();
            if (indexSort != null) {
                msg(infoStream, "    sort=" + indexSort);
                if (previousIndexSort != null) {
                    if (previousIndexSort.equals(indexSort) == false) {
                        throw new RuntimeException("index sort changed from " + previousIndexSort + " to " + indexSort);
                    }
                } else {
                    previousIndexSort = indexSort;
                }
            }
            segInfoStat.numFiles = info.files().size();
            segInfoStat.sizeMB = info.sizeInBytes() / (1024. * 1024.);
            msg(infoStream, "    size (MB)=" + nf.format(segInfoStat.sizeMB));
            Map<String, String> diagnostics = info.info.getDiagnostics();
            segInfoStat.diagnostics = diagnostics;
            if (diagnostics.size() > 0) {
                msg(infoStream, "    diagnostics = " + diagnostics);
            }
            if (!info.hasDeletions()) {
                msg(infoStream, "    no deletions");
                segInfoStat.hasDeletions = false;
            } else {
                msg(infoStream, "    has deletions [delGen=" + info.getDelGen() + "]");
                segInfoStat.hasDeletions = true;
                segInfoStat.deletionsGen = info.getDelGen();
            }
            long startOpenReaderNS = System.nanoTime();
            if (infoStream != null)
                infoStream.print("    test: open reader.........");
            reader = new SegmentReader(info, sis.getIndexCreatedVersionMajor(), IOContext.DEFAULT);
            msg(infoStream, String.format(Locale.ROOT, "OK [took %.3f sec]", nsToSec(System.nanoTime() - startOpenReaderNS)));
            segInfoStat.openReaderPassed = true;
            long startIntegrityNS = System.nanoTime();
            if (infoStream != null)
                infoStream.print("    test: check integrity.....");
            reader.checkIntegrity();
            msg(infoStream, String.format(Locale.ROOT, "OK [took %.3f sec]", nsToSec(System.nanoTime() - startIntegrityNS)));
            if (reader.maxDoc() != info.info.maxDoc()) {
                throw new RuntimeException("SegmentReader.maxDoc() " + reader.maxDoc() + " != SegmentInfo.maxDoc " + info.info.maxDoc());
            }
            final int numDocs = reader.numDocs();
            toLoseDocCount = numDocs;
            if (reader.hasDeletions()) {
                if (reader.numDocs() != info.info.maxDoc() - info.getDelCount()) {
                    throw new RuntimeException("delete count mismatch: info=" + (info.info.maxDoc() - info.getDelCount()) + " vs reader=" + reader.numDocs());
                }
                if ((info.info.maxDoc() - reader.numDocs()) > reader.maxDoc()) {
                    throw new RuntimeException("too many deleted docs: maxDoc()=" + reader.maxDoc() + " vs del count=" + (info.info.maxDoc() - reader.numDocs()));
                }
                if (info.info.maxDoc() - reader.numDocs() != info.getDelCount()) {
                    throw new RuntimeException("delete count mismatch: info=" + info.getDelCount() + " vs reader=" + (info.info.maxDoc() - reader.numDocs()));
                }
            } else {
                if (info.getDelCount() != 0) {
                    throw new RuntimeException("delete count mismatch: info=" + info.getDelCount() + " vs reader=" + (info.info.maxDoc() - reader.numDocs()));
                }
            }
            if (checksumsOnly == false) {
                // Test Livedocs
                segInfoStat.liveDocStatus = testLiveDocs(reader, infoStream, failFast);
                // Test Fieldinfos
                segInfoStat.fieldInfoStatus = testFieldInfos(reader, infoStream, failFast);
                // Test Field Norms
                segInfoStat.fieldNormStatus = testFieldNorms(reader, infoStream, failFast);
                // Test the Term Index
                segInfoStat.termIndexStatus = testPostings(reader, infoStream, verbose, failFast, version);
                // Test Stored Fields
                segInfoStat.storedFieldStatus = testStoredFields(reader, infoStream, failFast);
                // Test Term Vectors
                segInfoStat.termVectorStatus = testTermVectors(reader, infoStream, verbose, crossCheckTermVectors, failFast, version);
                // Test Docvalues
                segInfoStat.docValuesStatus = testDocValues(reader, infoStream, failFast);
                // Test PointValues
                segInfoStat.pointsStatus = testPoints(reader, infoStream, failFast);
                // Test index sort
                segInfoStat.indexSortStatus = testSort(reader, indexSort, infoStream, failFast);
                //  This will cause stats for failed segments to be incremented properly
                if (segInfoStat.liveDocStatus.error != null) {
                    throw new RuntimeException("Live docs test failed");
                } else if (segInfoStat.fieldInfoStatus.error != null) {
                    throw new RuntimeException("Field Info test failed");
                } else if (segInfoStat.fieldNormStatus.error != null) {
                    throw new RuntimeException("Field Norm test failed");
                } else if (segInfoStat.termIndexStatus.error != null) {
                    throw new RuntimeException("Term Index test failed");
                } else if (segInfoStat.storedFieldStatus.error != null) {
                    throw new RuntimeException("Stored Field test failed");
                } else if (segInfoStat.termVectorStatus.error != null) {
                    throw new RuntimeException("Term Vector test failed");
                } else if (segInfoStat.docValuesStatus.error != null) {
                    throw new RuntimeException("DocValues test failed");
                } else if (segInfoStat.pointsStatus.error != null) {
                    throw new RuntimeException("Points test failed");
                }
            }
            msg(infoStream, "");
            if (verbose) {
                msg(infoStream, "detailed segment RAM usage: ");
                msg(infoStream, Accountables.toString(reader));
            }
        } catch (Throwable t) {
            if (failFast) {
                throw IOUtils.rethrowAlways(t);
            }
            msg(infoStream, "FAILED");
            String comment;
            comment = "exorciseIndex() would remove reference to this segment";
            msg(infoStream, "    WARNING: " + comment + "; full exception:");
            if (infoStream != null)
                t.printStackTrace(infoStream);
            msg(infoStream, "");
            result.totLoseDocCount += toLoseDocCount;
            result.numBadSegments++;
            continue;
        } finally {
            if (reader != null)
                reader.close();
        }
        // Keeper
        result.newSegments.add(info.clone());
    }
    if (0 == result.numBadSegments) {
        result.clean = true;
    } else
        msg(infoStream, "WARNING: " + result.numBadSegments + " broken segments (containing " + result.totLoseDocCount + " documents) detected");
    if (!(result.validCounter = (result.maxSegmentName < sis.counter))) {
        result.clean = false;
        result.newSegments.counter = result.maxSegmentName + 1;
        msg(infoStream, "ERROR: Next segment name counter " + sis.counter + " is not greater than max segment name " + result.maxSegmentName);
    }
    if (result.clean) {
        msg(infoStream, "No problems were detected with this index.\n");
    }
    msg(infoStream, String.format(Locale.ROOT, "Took %.3f sec total.", nsToSec(System.nanoTime() - startNS)));
    return result;
}
Also used : DocValuesStatus(org.apache.lucene.index.CheckIndex.Status.DocValuesStatus) Codec(org.apache.lucene.codecs.Codec) Version(org.apache.lucene.util.Version) IndexInput(org.apache.lucene.store.IndexInput) Sort(org.apache.lucene.search.Sort) NumberFormat(java.text.NumberFormat)

Example 34 with Codec

use of org.apache.lucene.codecs.Codec in project lucene-solr by apache.

the class IndexWriter method readFieldInfos.

// reads latest field infos for the commit
// this is used on IW init and addIndexes(Dir) to create/update the global field map.
// TODO: fix tests abusing this method!
static FieldInfos readFieldInfos(SegmentCommitInfo si) throws IOException {
    Codec codec = si.info.getCodec();
    FieldInfosFormat reader = codec.fieldInfosFormat();
    if (si.hasFieldUpdates()) {
        // there are updates, we read latest (always outside of CFS)
        final String segmentSuffix = Long.toString(si.getFieldInfosGen(), Character.MAX_RADIX);
        return reader.read(si.info.dir, si.info, segmentSuffix, IOContext.READONCE);
    } else if (si.info.getUseCompoundFile()) {
        // cfs
        try (Directory cfs = codec.compoundFormat().getCompoundReader(si.info.dir, si.info, IOContext.DEFAULT)) {
            return reader.read(cfs, si.info, "", IOContext.READONCE);
        }
    } else {
        // no cfs
        return reader.read(si.info.dir, si.info, "", IOContext.READONCE);
    }
}
Also used : Codec(org.apache.lucene.codecs.Codec) FieldInfosFormat(org.apache.lucene.codecs.FieldInfosFormat) MMapDirectory(org.apache.lucene.store.MMapDirectory) Directory(org.apache.lucene.store.Directory) FSDirectory(org.apache.lucene.store.FSDirectory)

Example 35 with Codec

use of org.apache.lucene.codecs.Codec in project lucene-solr by apache.

the class SolrCore method initCodec.

private Codec initCodec(SolrConfig solrConfig, final IndexSchema schema) {
    final PluginInfo info = solrConfig.getPluginInfo(CodecFactory.class.getName());
    final CodecFactory factory;
    if (info != null) {
        factory = schema.getResourceLoader().newInstance(info.className, CodecFactory.class);
        factory.init(info.initArgs);
    } else {
        factory = new CodecFactory() {

            @Override
            public Codec getCodec() {
                return Codec.getDefault();
            }
        };
    }
    if (factory instanceof SolrCoreAware) {
        // CodecFactory needs SolrCore before inform() is called on all registered
        // SolrCoreAware listeners, at the end of the SolrCore constructor
        ((SolrCoreAware) factory).inform(this);
    } else {
        for (FieldType ft : schema.getFieldTypes().values()) {
            if (null != ft.getPostingsFormat()) {
                String msg = "FieldType '" + ft.getTypeName() + "' is configured with a postings format, but the codec does not support it: " + factory.getClass();
                log.error(msg);
                throw new SolrException(SolrException.ErrorCode.SERVER_ERROR, msg);
            }
            if (null != ft.getDocValuesFormat()) {
                String msg = "FieldType '" + ft.getTypeName() + "' is configured with a docValues format, but the codec does not support it: " + factory.getClass();
                log.error(msg);
                throw new SolrException(SolrException.ErrorCode.SERVER_ERROR, msg);
            }
        }
    }
    return factory.getCodec();
}
Also used : Codec(org.apache.lucene.codecs.Codec) SolrCoreAware(org.apache.solr.util.plugin.SolrCoreAware) SolrException(org.apache.solr.common.SolrException) FieldType(org.apache.solr.schema.FieldType)

Aggregations

Codec (org.apache.lucene.codecs.Codec)56 Directory (org.apache.lucene.store.Directory)23 MockDirectoryWrapper (org.apache.lucene.store.MockDirectoryWrapper)10 MockAnalyzer (org.apache.lucene.analysis.MockAnalyzer)8 Document (org.apache.lucene.document.Document)8 Failure (org.apache.lucene.store.MockDirectoryWrapper.Failure)8 FakeIOException (org.apache.lucene.store.MockDirectoryWrapper.FakeIOException)8 BytesRef (org.apache.lucene.util.BytesRef)8 HashSet (java.util.HashSet)7 HashMap (java.util.HashMap)6 Field (org.apache.lucene.document.Field)6 TrackingDirectoryWrapper (org.apache.lucene.store.TrackingDirectoryWrapper)5 Version (org.apache.lucene.util.Version)5 AssertingCodec (org.apache.lucene.codecs.asserting.AssertingCodec)4 FieldType (org.apache.lucene.document.FieldType)4 IOContext (org.apache.lucene.store.IOContext)4 IOException (java.io.IOException)3 Set (java.util.Set)3 IndexWriterConfig (org.apache.lucene.index.IndexWriterConfig)3 RandomIndexWriter (org.apache.lucene.index.RandomIndexWriter)3