use of org.apache.lucene.codecs.Codec in project lucene-solr by apache.
the class TestCodecs method testFixedPostings.
public void testFixedPostings() throws Throwable {
final int NUM_TERMS = 100;
final TermData[] terms = new TermData[NUM_TERMS];
for (int i = 0; i < NUM_TERMS; i++) {
final int[] docs = new int[] { i };
final String text = Integer.toString(i, Character.MAX_RADIX);
terms[i] = new TermData(text, docs, null);
}
final FieldInfos.Builder builder = new FieldInfos.Builder();
final FieldData field = new FieldData("field", builder, terms, true, false);
final FieldData[] fields = new FieldData[] { field };
final FieldInfos fieldInfos = builder.finish();
final Directory dir = newDirectory();
Codec codec = Codec.getDefault();
final SegmentInfo si = new SegmentInfo(dir, Version.LATEST, Version.LATEST, SEGMENT, 10000, false, codec, Collections.emptyMap(), StringHelper.randomId(), new HashMap<>(), null);
this.write(si, fieldInfos, dir, fields);
final FieldsProducer reader = codec.postingsFormat().fieldsProducer(new SegmentReadState(dir, si, fieldInfos, newIOContext(random())));
final Iterator<String> fieldsEnum = reader.iterator();
String fieldName = fieldsEnum.next();
assertNotNull(fieldName);
final Terms terms2 = reader.terms(fieldName);
assertNotNull(terms2);
final TermsEnum termsEnum = terms2.iterator();
PostingsEnum postingsEnum = null;
for (int i = 0; i < NUM_TERMS; i++) {
final BytesRef term = termsEnum.next();
assertNotNull(term);
assertEquals(terms[i].text2, term.utf8ToString());
// internal state:
for (int iter = 0; iter < 2; iter++) {
postingsEnum = TestUtil.docs(random(), termsEnum, postingsEnum, PostingsEnum.NONE);
assertEquals(terms[i].docs[0], postingsEnum.nextDoc());
assertEquals(DocIdSetIterator.NO_MORE_DOCS, postingsEnum.nextDoc());
}
}
assertNull(termsEnum.next());
for (int i = 0; i < NUM_TERMS; i++) {
assertEquals(termsEnum.seekCeil(new BytesRef(terms[i].text2)), TermsEnum.SeekStatus.FOUND);
}
assertFalse(fieldsEnum.hasNext());
reader.close();
dir.close();
}
use of org.apache.lucene.codecs.Codec in project lucene-solr by apache.
the class TestCodecs method testRandomPostings.
public void testRandomPostings() throws Throwable {
final FieldInfos.Builder builder = new FieldInfos.Builder();
final FieldData[] fields = new FieldData[NUM_FIELDS];
for (int i = 0; i < NUM_FIELDS; i++) {
final boolean omitTF = 0 == (i % 3);
final boolean storePayloads = 1 == (i % 3);
fields[i] = new FieldData(fieldNames[i], builder, this.makeRandomTerms(omitTF, storePayloads), omitTF, storePayloads);
}
final Directory dir = newDirectory();
final FieldInfos fieldInfos = builder.finish();
if (VERBOSE) {
System.out.println("TEST: now write postings");
}
Codec codec = Codec.getDefault();
final SegmentInfo si = new SegmentInfo(dir, Version.LATEST, Version.LATEST, SEGMENT, 10000, false, codec, Collections.emptyMap(), StringHelper.randomId(), new HashMap<>(), null);
this.write(si, fieldInfos, dir, fields);
if (VERBOSE) {
System.out.println("TEST: now read postings");
}
final FieldsProducer terms = codec.postingsFormat().fieldsProducer(new SegmentReadState(dir, si, fieldInfos, newIOContext(random())));
final Verify[] threads = new Verify[NUM_TEST_THREADS - 1];
for (int i = 0; i < NUM_TEST_THREADS - 1; i++) {
threads[i] = new Verify(si, fields, terms);
threads[i].setDaemon(true);
threads[i].start();
}
new Verify(si, fields, terms).run();
for (int i = 0; i < NUM_TEST_THREADS - 1; i++) {
threads[i].join();
assert !threads[i].failed;
}
terms.close();
dir.close();
}
use of org.apache.lucene.codecs.Codec in project lucene-solr by apache.
the class TestDoc method merge.
private SegmentCommitInfo merge(Directory dir, SegmentCommitInfo si1, SegmentCommitInfo si2, String merged, boolean useCompoundFile) throws Exception {
IOContext context = newIOContext(random(), new IOContext(new MergeInfo(-1, -1, false, -1)));
SegmentReader r1 = new SegmentReader(si1, Version.LATEST.major, context);
SegmentReader r2 = new SegmentReader(si2, Version.LATEST.major, context);
final Codec codec = Codec.getDefault();
TrackingDirectoryWrapper trackingDir = new TrackingDirectoryWrapper(si1.info.dir);
final SegmentInfo si = new SegmentInfo(si1.info.dir, Version.LATEST, null, merged, -1, false, codec, Collections.emptyMap(), StringHelper.randomId(), new HashMap<>(), null);
SegmentMerger merger = new SegmentMerger(Arrays.<CodecReader>asList(r1, r2), si, InfoStream.getDefault(), trackingDir, new FieldInfos.FieldNumbers(), context);
MergeState mergeState = merger.merge();
r1.close();
r2.close();
;
si.setFiles(new HashSet<>(trackingDir.getCreatedFiles()));
if (useCompoundFile) {
Collection<String> filesToDelete = si.files();
codec.compoundFormat().write(dir, si, context);
si.setUseCompoundFile(true);
for (String name : filesToDelete) {
si1.info.dir.deleteFile(name);
}
}
return new SegmentCommitInfo(si, 0, -1L, -1L, -1L);
}
use of org.apache.lucene.codecs.Codec in project lucene-solr by apache.
the class IndexWriter method mergeMiddle.
/** Does the actual (time-consuming) work of the merge,
* but without holding synchronized lock on IndexWriter
* instance */
private int mergeMiddle(MergePolicy.OneMerge merge, MergePolicy mergePolicy) throws IOException {
merge.checkAborted();
Directory mergeDirectory = config.getMergeScheduler().wrapForMerge(merge, directory);
List<SegmentCommitInfo> sourceSegments = merge.segments;
IOContext context = new IOContext(merge.getStoreMergeInfo());
final TrackingDirectoryWrapper dirWrapper = new TrackingDirectoryWrapper(mergeDirectory);
if (infoStream.isEnabled("IW")) {
infoStream.message("IW", "merging " + segString(merge.segments));
}
merge.readers = new ArrayList<>(sourceSegments.size());
// This is try/finally to make sure merger's readers are
// closed:
boolean success = false;
try {
int segUpto = 0;
while (segUpto < sourceSegments.size()) {
final SegmentCommitInfo info = sourceSegments.get(segUpto);
// Hold onto the "live" reader; we will use this to
// commit merged deletes
final ReadersAndUpdates rld = readerPool.get(info, true);
// Carefully pull the most recent live docs and reader
SegmentReader reader;
final Bits liveDocs;
final int delCount;
synchronized (this) {
// Must sync to ensure BufferedDeletesStream cannot change liveDocs,
// pendingDeleteCount and field updates while we pull a copy:
reader = rld.getReaderForMerge(context);
liveDocs = rld.getReadOnlyLiveDocs();
delCount = rld.getPendingDeleteCount() + info.getDelCount();
assert reader != null;
assert rld.verifyDocCounts();
if (infoStream.isEnabled("IW")) {
if (rld.getPendingDeleteCount() != 0) {
infoStream.message("IW", "seg=" + segString(info) + " delCount=" + info.getDelCount() + " pendingDelCount=" + rld.getPendingDeleteCount());
} else if (info.getDelCount() != 0) {
infoStream.message("IW", "seg=" + segString(info) + " delCount=" + info.getDelCount());
} else {
infoStream.message("IW", "seg=" + segString(info) + " no deletes");
}
}
}
// make a new reader with updated live docs and del count.
if (reader.numDeletedDocs() != delCount) {
// beware of zombies
assert delCount > reader.numDeletedDocs();
SegmentReader newReader;
synchronized (this) {
// We must also sync on IW here, because another thread could be writing
// new DV updates / remove old gen field infos files causing FNFE:
newReader = new SegmentReader(info, reader, liveDocs, info.info.maxDoc() - delCount);
}
boolean released = false;
try {
rld.release(reader);
released = true;
} finally {
if (!released) {
newReader.decRef();
}
}
reader = newReader;
}
merge.readers.add(reader);
assert delCount <= info.info.maxDoc() : "delCount=" + delCount + " info.maxDoc=" + info.info.maxDoc() + " rld.pendingDeleteCount=" + rld.getPendingDeleteCount() + " info.getDelCount()=" + info.getDelCount();
segUpto++;
}
// System.out.println("[" + Thread.currentThread().getName() + "] IW.mergeMiddle: merging " + merge.getMergeReaders());
// Let the merge wrap readers
List<CodecReader> mergeReaders = new ArrayList<>();
for (SegmentReader reader : merge.readers) {
CodecReader wrappedReader = merge.wrapForMerge(reader);
validateMergeReader(wrappedReader);
mergeReaders.add(wrappedReader);
}
final SegmentMerger merger = new SegmentMerger(mergeReaders, merge.info.info, infoStream, dirWrapper, globalFieldNumberMap, context);
merge.checkAborted();
merge.mergeStartNS = System.nanoTime();
// This is where all the work happens:
if (merger.shouldMerge()) {
merger.merge();
}
MergeState mergeState = merger.mergeState;
assert mergeState.segmentInfo == merge.info.info;
merge.info.info.setFiles(new HashSet<>(dirWrapper.getCreatedFiles()));
if (infoStream.isEnabled("IW")) {
if (merger.shouldMerge()) {
String pauseInfo = merge.getMergeProgress().getPauseTimes().entrySet().stream().filter((e) -> e.getValue() > 0).map((e) -> String.format(Locale.ROOT, "%.1f sec %s", e.getValue() / 1000000000., e.getKey().name().toLowerCase(Locale.ROOT))).collect(Collectors.joining(", "));
if (!pauseInfo.isEmpty()) {
pauseInfo = " (" + pauseInfo + ")";
}
long t1 = System.nanoTime();
double sec = (t1 - merge.mergeStartNS) / 1000000000.;
double segmentMB = (merge.info.sizeInBytes() / 1024. / 1024.);
infoStream.message("IW", "merge codec=" + codec + " maxDoc=" + merge.info.info.maxDoc() + "; merged segment has " + (mergeState.mergeFieldInfos.hasVectors() ? "vectors" : "no vectors") + "; " + (mergeState.mergeFieldInfos.hasNorms() ? "norms" : "no norms") + "; " + (mergeState.mergeFieldInfos.hasDocValues() ? "docValues" : "no docValues") + "; " + (mergeState.mergeFieldInfos.hasProx() ? "prox" : "no prox") + "; " + (mergeState.mergeFieldInfos.hasProx() ? "freqs" : "no freqs") + "; " + (mergeState.mergeFieldInfos.hasPointValues() ? "points" : "no points") + "; " + String.format(Locale.ROOT, "%.1f sec%s to merge segment [%.2f MB, %.2f MB/sec]", sec, pauseInfo, segmentMB, segmentMB / sec));
} else {
infoStream.message("IW", "skip merging fully deleted segments");
}
}
if (merger.shouldMerge() == false) {
// Merge would produce a 0-doc segment, so we do nothing except commit the merge to remove all the 0-doc segments that we "merged":
assert merge.info.info.maxDoc() == 0;
commitMerge(merge, mergeState);
return 0;
}
assert merge.info.info.maxDoc() > 0;
// Very important to do this before opening the reader
// because codec must know if prox was written for
// this segment:
//System.out.println("merger set hasProx=" + merger.hasProx() + " seg=" + merge.info.name);
boolean useCompoundFile;
synchronized (this) {
// Guard segmentInfos
useCompoundFile = mergePolicy.useCompoundFile(segmentInfos, merge.info, this);
}
if (useCompoundFile) {
success = false;
Collection<String> filesToRemove = merge.info.files();
TrackingDirectoryWrapper trackingCFSDir = new TrackingDirectoryWrapper(mergeDirectory);
try {
createCompoundFile(infoStream, trackingCFSDir, merge.info.info, context);
success = true;
} catch (Throwable t) {
synchronized (this) {
if (merge.isAborted()) {
// merged files:
if (infoStream.isEnabled("IW")) {
infoStream.message("IW", "hit merge abort exception creating compound file during merge");
}
return 0;
} else {
handleMergeException(t, merge);
}
}
} finally {
if (success == false) {
if (infoStream.isEnabled("IW")) {
infoStream.message("IW", "hit exception creating compound file during merge");
}
// Safe: these files must exist
deleteNewFiles(merge.info.files());
}
}
// So that, if we hit exc in deleteNewFiles (next)
// or in commitMerge (later), we close the
// per-segment readers in the finally clause below:
success = false;
synchronized (this) {
// delete new non cfs files directly: they were never
// registered with IFD
deleteNewFiles(filesToRemove);
if (merge.isAborted()) {
if (infoStream.isEnabled("IW")) {
infoStream.message("IW", "abort merge after building CFS");
}
// Safe: these files must exist
deleteNewFiles(merge.info.files());
return 0;
}
}
merge.info.info.setUseCompoundFile(true);
} else {
// So that, if we hit exc in commitMerge (later),
// we close the per-segment readers in the finally
// clause below:
success = false;
}
// Have codec write SegmentInfo. Must do this after
// creating CFS so that 1) .si isn't slurped into CFS,
// and 2) .si reflects useCompoundFile=true change
// above:
boolean success2 = false;
try {
codec.segmentInfoFormat().write(directory, merge.info.info, context);
success2 = true;
} finally {
if (!success2) {
// Safe: these files must exist
deleteNewFiles(merge.info.files());
}
}
if (infoStream.isEnabled("IW")) {
infoStream.message("IW", String.format(Locale.ROOT, "merged segment size=%.3f MB vs estimate=%.3f MB", merge.info.sizeInBytes() / 1024. / 1024., merge.estimatedMergeBytes / 1024 / 1024.));
}
final IndexReaderWarmer mergedSegmentWarmer = config.getMergedSegmentWarmer();
if (poolReaders && mergedSegmentWarmer != null) {
final ReadersAndUpdates rld = readerPool.get(merge.info, true);
final SegmentReader sr = rld.getReader(IOContext.READ);
try {
mergedSegmentWarmer.warm(sr);
} finally {
synchronized (this) {
rld.release(sr);
readerPool.release(rld);
}
}
}
if (!commitMerge(merge, mergeState)) {
// aborted
return 0;
}
success = true;
} finally {
// an exc:
if (success == false) {
closeMergeReaders(merge, true);
}
}
return merge.info.info.maxDoc();
}
use of org.apache.lucene.codecs.Codec in project lucene-solr by apache.
the class ReadersAndUpdates method writeFieldUpdates.
// Writes field updates (new _X_N updates files) to the directory
public synchronized void writeFieldUpdates(Directory dir, DocValuesFieldUpdates.Container dvUpdates) throws IOException {
assert Thread.holdsLock(writer);
assert dvUpdates.any();
// Do this so we can delete any created files on
// exception; this saves all codecs from having to do
// it:
TrackingDirectoryWrapper trackingDir = new TrackingDirectoryWrapper(dir);
final Map<Integer, Set<String>> newDVFiles = new HashMap<>();
Set<String> fieldInfosFiles = null;
FieldInfos fieldInfos = null;
boolean success = false;
try {
final Codec codec = info.info.getCodec();
// reader could be null e.g. for a just merged segment (from
// IndexWriter.commitMergedDeletes).
final SegmentReader reader = this.reader == null ? new SegmentReader(info, writer.segmentInfos.getIndexCreatedVersionMajor(), IOContext.READONCE) : this.reader;
try {
// clone FieldInfos so that we can update their dvGen separately from
// the reader's infos and write them to a new fieldInfos_gen file
FieldInfos.Builder builder = new FieldInfos.Builder(writer.globalFieldNumberMap);
// clone FI.attributes as well FI.dvGen
for (FieldInfo fi : reader.getFieldInfos()) {
FieldInfo clone = builder.add(fi);
// copy the stuff FieldInfos.Builder doesn't copy
for (Entry<String, String> e : fi.attributes().entrySet()) {
clone.putAttribute(e.getKey(), e.getValue());
}
clone.setDocValuesGen(fi.getDocValuesGen());
}
// create new fields or update existing ones to have NumericDV type
for (String f : dvUpdates.numericDVUpdates.keySet()) {
FieldInfo fieldInfo = builder.getOrAdd(f);
fieldInfo.setDocValuesType(DocValuesType.NUMERIC);
}
// create new fields or update existing ones to have BinaryDV type
for (String f : dvUpdates.binaryDVUpdates.keySet()) {
FieldInfo fieldInfo = builder.getOrAdd(f);
fieldInfo.setDocValuesType(DocValuesType.BINARY);
}
fieldInfos = builder.finish();
final DocValuesFormat docValuesFormat = codec.docValuesFormat();
// System.out.println("[" + Thread.currentThread().getName() + "] RLD.writeFieldUpdates: applying numeric updates; seg=" + info + " updates=" + numericFieldUpdates);
handleNumericDVUpdates(fieldInfos, dvUpdates.numericDVUpdates, trackingDir, docValuesFormat, reader, newDVFiles);
// System.out.println("[" + Thread.currentThread().getName() + "] RAU.writeFieldUpdates: applying binary updates; seg=" + info + " updates=" + dvUpdates.binaryDVUpdates);
handleBinaryDVUpdates(fieldInfos, dvUpdates.binaryDVUpdates, trackingDir, docValuesFormat, reader, newDVFiles);
// System.out.println("[" + Thread.currentThread().getName() + "] RAU.writeFieldUpdates: write fieldInfos; seg=" + info);
fieldInfosFiles = writeFieldInfosGen(fieldInfos, trackingDir, docValuesFormat, codec.fieldInfosFormat());
} finally {
if (reader != this.reader) {
// System.out.println("[" + Thread.currentThread().getName() + "] RLD.writeLiveDocs: closeReader " + reader);
reader.close();
}
}
success = true;
} finally {
if (!success) {
// Advance only the nextWriteFieldInfosGen and nextWriteDocValuesGen, so
// that a 2nd attempt to write will write to a new file
info.advanceNextWriteFieldInfosGen();
info.advanceNextWriteDocValuesGen();
// Delete any partially created file(s):
for (String fileName : trackingDir.getCreatedFiles()) {
IOUtils.deleteFilesIgnoringExceptions(dir, fileName);
}
}
}
// copy all the updates to mergingUpdates, so they can later be applied to the merged segment
if (isMerging) {
for (Entry<String, NumericDocValuesFieldUpdates> e : dvUpdates.numericDVUpdates.entrySet()) {
DocValuesFieldUpdates updates = mergingDVUpdates.get(e.getKey());
if (updates == null) {
mergingDVUpdates.put(e.getKey(), e.getValue());
} else {
updates.merge(e.getValue());
}
}
for (Entry<String, BinaryDocValuesFieldUpdates> e : dvUpdates.binaryDVUpdates.entrySet()) {
DocValuesFieldUpdates updates = mergingDVUpdates.get(e.getKey());
if (updates == null) {
mergingDVUpdates.put(e.getKey(), e.getValue());
} else {
updates.merge(e.getValue());
}
}
}
// writing field updates succeeded
assert fieldInfosFiles != null;
info.setFieldInfosFiles(fieldInfosFiles);
// were updated now.
assert !newDVFiles.isEmpty();
for (Entry<Integer, Set<String>> e : info.getDocValuesUpdatesFiles().entrySet()) {
if (!newDVFiles.containsKey(e.getKey())) {
newDVFiles.put(e.getKey(), e.getValue());
}
}
info.setDocValuesUpdatesFiles(newDVFiles);
// wrote new files, should checkpoint()
writer.checkpoint();
// if there is a reader open, reopen it to reflect the updates
if (reader != null) {
SegmentReader newReader = new SegmentReader(info, reader, liveDocs, info.info.maxDoc() - info.getDelCount() - pendingDeleteCount);
boolean reopened = false;
try {
reader.decRef();
reader = newReader;
reopened = true;
} finally {
if (!reopened) {
newReader.decRef();
}
}
}
}
Aggregations