use of org.apache.lucene.codecs.Codec in project lucene-solr by apache.
the class TestDocTermOrds method testRandom.
public void testRandom() throws Exception {
Directory dir = newDirectory();
final int NUM_TERMS = atLeast(20);
final Set<BytesRef> terms = new HashSet<>();
while (terms.size() < NUM_TERMS) {
final String s = TestUtil.randomRealisticUnicodeString(random());
//final String s = _TestUtil.randomSimpleString(random);
if (s.length() > 0) {
terms.add(new BytesRef(s));
}
}
final BytesRef[] termsArray = terms.toArray(new BytesRef[terms.size()]);
Arrays.sort(termsArray);
final int NUM_DOCS = atLeast(100);
IndexWriterConfig conf = newIndexWriterConfig(new MockAnalyzer(random()));
// Sometimes swap in codec that impls ord():
if (random().nextInt(10) == 7) {
// Make sure terms index has ords:
Codec codec = TestUtil.alwaysPostingsFormat(TestUtil.getPostingsFormatWithOrds(random()));
conf.setCodec(codec);
}
final RandomIndexWriter w = new RandomIndexWriter(random(), dir, conf);
final int[][] idToOrds = new int[NUM_DOCS][];
final Set<Integer> ordsForDocSet = new HashSet<>();
for (int id = 0; id < NUM_DOCS; id++) {
Document doc = new Document();
doc.add(new LegacyIntField("id", id, Field.Store.YES));
final int termCount = TestUtil.nextInt(random(), 0, 20 * RANDOM_MULTIPLIER);
while (ordsForDocSet.size() < termCount) {
ordsForDocSet.add(random().nextInt(termsArray.length));
}
final int[] ordsForDoc = new int[termCount];
int upto = 0;
if (VERBOSE) {
System.out.println("TEST: doc id=" + id);
}
for (int ord : ordsForDocSet) {
ordsForDoc[upto++] = ord;
Field field = newStringField("field", termsArray[ord].utf8ToString(), Field.Store.NO);
if (VERBOSE) {
System.out.println(" f=" + termsArray[ord].utf8ToString());
}
doc.add(field);
}
ordsForDocSet.clear();
Arrays.sort(ordsForDoc);
idToOrds[id] = ordsForDoc;
w.addDocument(doc);
}
final DirectoryReader r = w.getReader();
w.close();
if (VERBOSE) {
System.out.println("TEST: reader=" + r);
}
for (LeafReaderContext ctx : r.leaves()) {
if (VERBOSE) {
System.out.println("\nTEST: sub=" + ctx.reader());
}
verify(ctx.reader(), idToOrds, termsArray, null);
}
// ord, so this forces the OrdWrapper to run:
if (VERBOSE) {
System.out.println("TEST: top reader");
}
LeafReader slowR = SlowCompositeReaderWrapper.wrap(r);
TestUtil.checkReader(slowR);
verify(slowR, idToOrds, termsArray, null);
FieldCache.DEFAULT.purgeByCacheKey(slowR.getCoreCacheHelper().getKey());
r.close();
dir.close();
}
use of org.apache.lucene.codecs.Codec in project lucene-solr by apache.
the class TestSegmentInfos method testVersionsOneSegment.
// LUCENE-5954
public void testVersionsOneSegment() throws IOException {
BaseDirectoryWrapper dir = newDirectory();
dir.setCheckIndexOnClose(false);
byte[] id = StringHelper.randomId();
Codec codec = Codec.getDefault();
SegmentInfos sis = new SegmentInfos(Version.LATEST.major);
SegmentInfo info = new SegmentInfo(dir, Version.LUCENE_7_0_0, Version.LUCENE_7_0_0, "_0", 1, false, Codec.getDefault(), Collections.<String, String>emptyMap(), id, Collections.<String, String>emptyMap(), null);
info.setFiles(Collections.<String>emptySet());
codec.segmentInfoFormat().write(dir, info, IOContext.DEFAULT);
SegmentCommitInfo commitInfo = new SegmentCommitInfo(info, 0, -1, -1, -1);
sis.add(commitInfo);
sis.commit(dir);
sis = SegmentInfos.readLatestCommit(dir);
assertEquals(Version.LUCENE_7_0_0, sis.getMinSegmentLuceneVersion());
assertEquals(Version.LATEST, sis.getCommitLuceneVersion());
dir.close();
}
use of org.apache.lucene.codecs.Codec in project lucene-solr by apache.
the class CheckIndex method checkIndex.
/** Returns a {@link Status} instance detailing
* the state of the index.
*
* @param onlySegments list of specific segment names to check
*
* <p>As this method checks every byte in the specified
* segments, on a large index it can take quite a long
* time to run. */
public Status checkIndex(List<String> onlySegments) throws IOException {
ensureOpen();
long startNS = System.nanoTime();
NumberFormat nf = NumberFormat.getInstance(Locale.ROOT);
SegmentInfos sis = null;
Status result = new Status();
result.dir = dir;
String[] files = dir.listAll();
String lastSegmentsFile = SegmentInfos.getLastCommitSegmentsFileName(files);
if (lastSegmentsFile == null) {
throw new IndexNotFoundException("no segments* file found in " + dir + ": files: " + Arrays.toString(files));
}
try {
// Do not use SegmentInfos.read(Directory) since the spooky
// retrying it does is not necessary here (we hold the write lock):
sis = SegmentInfos.readCommit(dir, lastSegmentsFile);
} catch (Throwable t) {
if (failFast) {
throw IOUtils.rethrowAlways(t);
}
msg(infoStream, "ERROR: could not read any segments file in directory");
result.missingSegments = true;
if (infoStream != null)
t.printStackTrace(infoStream);
return result;
}
// find the oldest and newest segment versions
Version oldest = null;
Version newest = null;
String oldSegs = null;
for (SegmentCommitInfo si : sis) {
Version version = si.info.getVersion();
if (version == null) {
// pre-3.1 segment
oldSegs = "pre-3.1";
} else {
if (oldest == null || version.onOrAfter(oldest) == false) {
oldest = version;
}
if (newest == null || version.onOrAfter(newest)) {
newest = version;
}
}
}
final int numSegments = sis.size();
final String segmentsFileName = sis.getSegmentsFileName();
// note: we only read the format byte (required preamble) here!
IndexInput input = null;
try {
input = dir.openInput(segmentsFileName, IOContext.READONCE);
} catch (Throwable t) {
if (failFast) {
throw IOUtils.rethrowAlways(t);
}
msg(infoStream, "ERROR: could not open segments file in directory");
if (infoStream != null) {
t.printStackTrace(infoStream);
}
result.cantOpenSegments = true;
return result;
}
try {
/*int format =*/
input.readInt();
} catch (Throwable t) {
if (failFast) {
throw IOUtils.rethrowAlways(t);
}
msg(infoStream, "ERROR: could not read segment file version in directory");
if (infoStream != null) {
t.printStackTrace(infoStream);
}
result.missingSegmentVersion = true;
return result;
} finally {
if (input != null)
input.close();
}
result.segmentsFileName = segmentsFileName;
result.numSegments = numSegments;
result.userData = sis.getUserData();
String userDataString;
if (sis.getUserData().size() > 0) {
userDataString = " userData=" + sis.getUserData();
} else {
userDataString = "";
}
String versionString = "";
if (oldSegs != null) {
if (newest != null) {
versionString = "versions=[" + oldSegs + " .. " + newest + "]";
} else {
versionString = "version=" + oldSegs;
}
} else if (newest != null) {
// implies oldest != null
versionString = oldest.equals(newest) ? ("version=" + oldest) : ("versions=[" + oldest + " .. " + newest + "]");
}
msg(infoStream, "Segments file=" + segmentsFileName + " numSegments=" + numSegments + " " + versionString + " id=" + StringHelper.idToString(sis.getId()) + userDataString);
if (onlySegments != null) {
result.partial = true;
if (infoStream != null) {
infoStream.print("\nChecking only these segments:");
for (String s : onlySegments) {
infoStream.print(" " + s);
}
}
result.segmentsChecked.addAll(onlySegments);
msg(infoStream, ":");
}
result.newSegments = sis.clone();
result.newSegments.clear();
result.maxSegmentName = -1;
for (int i = 0; i < numSegments; i++) {
final SegmentCommitInfo info = sis.info(i);
int segmentName = Integer.parseInt(info.info.name.substring(1), Character.MAX_RADIX);
if (segmentName > result.maxSegmentName) {
result.maxSegmentName = segmentName;
}
if (onlySegments != null && !onlySegments.contains(info.info.name)) {
continue;
}
Status.SegmentInfoStatus segInfoStat = new Status.SegmentInfoStatus();
result.segmentInfos.add(segInfoStat);
msg(infoStream, " " + (1 + i) + " of " + numSegments + ": name=" + info.info.name + " maxDoc=" + info.info.maxDoc());
segInfoStat.name = info.info.name;
segInfoStat.maxDoc = info.info.maxDoc();
final Version version = info.info.getVersion();
if (info.info.maxDoc() <= 0) {
throw new RuntimeException("illegal number of documents: maxDoc=" + info.info.maxDoc());
}
int toLoseDocCount = info.info.maxDoc();
SegmentReader reader = null;
Sort previousIndexSort = null;
try {
msg(infoStream, " version=" + (version == null ? "3.0" : version));
msg(infoStream, " id=" + StringHelper.idToString(info.info.getId()));
final Codec codec = info.info.getCodec();
msg(infoStream, " codec=" + codec);
segInfoStat.codec = codec;
msg(infoStream, " compound=" + info.info.getUseCompoundFile());
segInfoStat.compound = info.info.getUseCompoundFile();
msg(infoStream, " numFiles=" + info.files().size());
Sort indexSort = info.info.getIndexSort();
if (indexSort != null) {
msg(infoStream, " sort=" + indexSort);
if (previousIndexSort != null) {
if (previousIndexSort.equals(indexSort) == false) {
throw new RuntimeException("index sort changed from " + previousIndexSort + " to " + indexSort);
}
} else {
previousIndexSort = indexSort;
}
}
segInfoStat.numFiles = info.files().size();
segInfoStat.sizeMB = info.sizeInBytes() / (1024. * 1024.);
msg(infoStream, " size (MB)=" + nf.format(segInfoStat.sizeMB));
Map<String, String> diagnostics = info.info.getDiagnostics();
segInfoStat.diagnostics = diagnostics;
if (diagnostics.size() > 0) {
msg(infoStream, " diagnostics = " + diagnostics);
}
if (!info.hasDeletions()) {
msg(infoStream, " no deletions");
segInfoStat.hasDeletions = false;
} else {
msg(infoStream, " has deletions [delGen=" + info.getDelGen() + "]");
segInfoStat.hasDeletions = true;
segInfoStat.deletionsGen = info.getDelGen();
}
long startOpenReaderNS = System.nanoTime();
if (infoStream != null)
infoStream.print(" test: open reader.........");
reader = new SegmentReader(info, sis.getIndexCreatedVersionMajor(), IOContext.DEFAULT);
msg(infoStream, String.format(Locale.ROOT, "OK [took %.3f sec]", nsToSec(System.nanoTime() - startOpenReaderNS)));
segInfoStat.openReaderPassed = true;
long startIntegrityNS = System.nanoTime();
if (infoStream != null)
infoStream.print(" test: check integrity.....");
reader.checkIntegrity();
msg(infoStream, String.format(Locale.ROOT, "OK [took %.3f sec]", nsToSec(System.nanoTime() - startIntegrityNS)));
if (reader.maxDoc() != info.info.maxDoc()) {
throw new RuntimeException("SegmentReader.maxDoc() " + reader.maxDoc() + " != SegmentInfo.maxDoc " + info.info.maxDoc());
}
final int numDocs = reader.numDocs();
toLoseDocCount = numDocs;
if (reader.hasDeletions()) {
if (reader.numDocs() != info.info.maxDoc() - info.getDelCount()) {
throw new RuntimeException("delete count mismatch: info=" + (info.info.maxDoc() - info.getDelCount()) + " vs reader=" + reader.numDocs());
}
if ((info.info.maxDoc() - reader.numDocs()) > reader.maxDoc()) {
throw new RuntimeException("too many deleted docs: maxDoc()=" + reader.maxDoc() + " vs del count=" + (info.info.maxDoc() - reader.numDocs()));
}
if (info.info.maxDoc() - reader.numDocs() != info.getDelCount()) {
throw new RuntimeException("delete count mismatch: info=" + info.getDelCount() + " vs reader=" + (info.info.maxDoc() - reader.numDocs()));
}
} else {
if (info.getDelCount() != 0) {
throw new RuntimeException("delete count mismatch: info=" + info.getDelCount() + " vs reader=" + (info.info.maxDoc() - reader.numDocs()));
}
}
if (checksumsOnly == false) {
// Test Livedocs
segInfoStat.liveDocStatus = testLiveDocs(reader, infoStream, failFast);
// Test Fieldinfos
segInfoStat.fieldInfoStatus = testFieldInfos(reader, infoStream, failFast);
// Test Field Norms
segInfoStat.fieldNormStatus = testFieldNorms(reader, infoStream, failFast);
// Test the Term Index
segInfoStat.termIndexStatus = testPostings(reader, infoStream, verbose, failFast, version);
// Test Stored Fields
segInfoStat.storedFieldStatus = testStoredFields(reader, infoStream, failFast);
// Test Term Vectors
segInfoStat.termVectorStatus = testTermVectors(reader, infoStream, verbose, crossCheckTermVectors, failFast, version);
// Test Docvalues
segInfoStat.docValuesStatus = testDocValues(reader, infoStream, failFast);
// Test PointValues
segInfoStat.pointsStatus = testPoints(reader, infoStream, failFast);
// Test index sort
segInfoStat.indexSortStatus = testSort(reader, indexSort, infoStream, failFast);
// This will cause stats for failed segments to be incremented properly
if (segInfoStat.liveDocStatus.error != null) {
throw new RuntimeException("Live docs test failed");
} else if (segInfoStat.fieldInfoStatus.error != null) {
throw new RuntimeException("Field Info test failed");
} else if (segInfoStat.fieldNormStatus.error != null) {
throw new RuntimeException("Field Norm test failed");
} else if (segInfoStat.termIndexStatus.error != null) {
throw new RuntimeException("Term Index test failed");
} else if (segInfoStat.storedFieldStatus.error != null) {
throw new RuntimeException("Stored Field test failed");
} else if (segInfoStat.termVectorStatus.error != null) {
throw new RuntimeException("Term Vector test failed");
} else if (segInfoStat.docValuesStatus.error != null) {
throw new RuntimeException("DocValues test failed");
} else if (segInfoStat.pointsStatus.error != null) {
throw new RuntimeException("Points test failed");
}
}
msg(infoStream, "");
if (verbose) {
msg(infoStream, "detailed segment RAM usage: ");
msg(infoStream, Accountables.toString(reader));
}
} catch (Throwable t) {
if (failFast) {
throw IOUtils.rethrowAlways(t);
}
msg(infoStream, "FAILED");
String comment;
comment = "exorciseIndex() would remove reference to this segment";
msg(infoStream, " WARNING: " + comment + "; full exception:");
if (infoStream != null)
t.printStackTrace(infoStream);
msg(infoStream, "");
result.totLoseDocCount += toLoseDocCount;
result.numBadSegments++;
continue;
} finally {
if (reader != null)
reader.close();
}
// Keeper
result.newSegments.add(info.clone());
}
if (0 == result.numBadSegments) {
result.clean = true;
} else
msg(infoStream, "WARNING: " + result.numBadSegments + " broken segments (containing " + result.totLoseDocCount + " documents) detected");
if (!(result.validCounter = (result.maxSegmentName < sis.counter))) {
result.clean = false;
result.newSegments.counter = result.maxSegmentName + 1;
msg(infoStream, "ERROR: Next segment name counter " + sis.counter + " is not greater than max segment name " + result.maxSegmentName);
}
if (result.clean) {
msg(infoStream, "No problems were detected with this index.\n");
}
msg(infoStream, String.format(Locale.ROOT, "Took %.3f sec total.", nsToSec(System.nanoTime() - startNS)));
return result;
}
use of org.apache.lucene.codecs.Codec in project lucene-solr by apache.
the class IndexWriter method readFieldInfos.
// reads latest field infos for the commit
// this is used on IW init and addIndexes(Dir) to create/update the global field map.
// TODO: fix tests abusing this method!
static FieldInfos readFieldInfos(SegmentCommitInfo si) throws IOException {
Codec codec = si.info.getCodec();
FieldInfosFormat reader = codec.fieldInfosFormat();
if (si.hasFieldUpdates()) {
// there are updates, we read latest (always outside of CFS)
final String segmentSuffix = Long.toString(si.getFieldInfosGen(), Character.MAX_RADIX);
return reader.read(si.info.dir, si.info, segmentSuffix, IOContext.READONCE);
} else if (si.info.getUseCompoundFile()) {
// cfs
try (Directory cfs = codec.compoundFormat().getCompoundReader(si.info.dir, si.info, IOContext.DEFAULT)) {
return reader.read(cfs, si.info, "", IOContext.READONCE);
}
} else {
// no cfs
return reader.read(si.info.dir, si.info, "", IOContext.READONCE);
}
}
use of org.apache.lucene.codecs.Codec in project lucene-solr by apache.
the class SolrCore method initCodec.
private Codec initCodec(SolrConfig solrConfig, final IndexSchema schema) {
final PluginInfo info = solrConfig.getPluginInfo(CodecFactory.class.getName());
final CodecFactory factory;
if (info != null) {
factory = schema.getResourceLoader().newInstance(info.className, CodecFactory.class);
factory.init(info.initArgs);
} else {
factory = new CodecFactory() {
@Override
public Codec getCodec() {
return Codec.getDefault();
}
};
}
if (factory instanceof SolrCoreAware) {
// CodecFactory needs SolrCore before inform() is called on all registered
// SolrCoreAware listeners, at the end of the SolrCore constructor
((SolrCoreAware) factory).inform(this);
} else {
for (FieldType ft : schema.getFieldTypes().values()) {
if (null != ft.getPostingsFormat()) {
String msg = "FieldType '" + ft.getTypeName() + "' is configured with a postings format, but the codec does not support it: " + factory.getClass();
log.error(msg);
throw new SolrException(SolrException.ErrorCode.SERVER_ERROR, msg);
}
if (null != ft.getDocValuesFormat()) {
String msg = "FieldType '" + ft.getTypeName() + "' is configured with a docValues format, but the codec does not support it: " + factory.getClass();
log.error(msg);
throw new SolrException(SolrException.ErrorCode.SERVER_ERROR, msg);
}
}
}
return factory.getCodec();
}
Aggregations