use of org.apache.lucene.store.IOContext in project jackrabbit-oak by apache.
the class IndexCopierTest method cowReadDoneFromLocalIfFileExist.
@Test
public void cowReadDoneFromLocalIfFileExist() throws Exception {
final Set<String> readLocal = newHashSet();
Directory baseDir = new CloseSafeDir() {
@Override
public IndexInput openInput(String name, IOContext context) throws IOException {
readLocal.add(name);
return super.openInput(name, context);
}
};
IndexDefinition defn = new IndexDefinition(root, builder.getNodeState(), "/foo");
IndexCopier copier = new RAMIndexCopier(baseDir, sameThreadExecutor(), getWorkDir());
final Set<String> readRemotes = newHashSet();
Directory remote = new RAMDirectory() {
@Override
public IndexInput openInput(String name, IOContext context) throws IOException {
readRemotes.add(name);
return super.openInput(name, context);
}
};
byte[] t1 = writeFile(remote, "t1");
Directory local = copier.wrapForWrite(defn, remote, false, INDEX_DATA_CHILD_NAME);
//Read should be served from remote
readRemotes.clear();
readLocal.clear();
readAndAssert(local, "t1", t1);
assertEquals(newHashSet("t1"), readRemotes);
assertEquals(newHashSet(), readLocal);
//Now pull in the file t1 via CopyOnRead in baseDir
Directory localForRead = copier.wrapForRead("/foo", defn, remote, INDEX_DATA_CHILD_NAME);
readAndAssert(localForRead, "t1", t1);
//Read should be served from local
readRemotes.clear();
readLocal.clear();
readAndAssert(local, "t1", t1);
assertEquals(newHashSet(), readRemotes);
assertEquals(newHashSet("t1"), readLocal);
local.close();
}
use of org.apache.lucene.store.IOContext in project lucene-solr by apache.
the class DocumentsWriterPerThread method sealFlushedSegment.
/**
* Seals the {@link SegmentInfo} for the new flushed segment and persists
* the deleted documents {@link MutableBits}.
*/
void sealFlushedSegment(FlushedSegment flushedSegment, Sorter.DocMap sortMap) throws IOException {
assert flushedSegment != null;
SegmentCommitInfo newSegment = flushedSegment.segmentInfo;
IndexWriter.setDiagnostics(newSegment.info, IndexWriter.SOURCE_FLUSH);
IOContext context = new IOContext(new FlushInfo(newSegment.info.maxDoc(), newSegment.sizeInBytes()));
boolean success = false;
try {
if (indexWriterConfig.getUseCompoundFile()) {
Set<String> originalFiles = newSegment.info.files();
// TODO: like addIndexes, we are relying on createCompoundFile to successfully cleanup...
indexWriter.createCompoundFile(infoStream, new TrackingDirectoryWrapper(directory), newSegment.info, context);
filesToDelete.addAll(originalFiles);
newSegment.info.setUseCompoundFile(true);
}
// Have codec write SegmentInfo. Must do this after
// creating CFS so that 1) .si isn't slurped into CFS,
// and 2) .si reflects useCompoundFile=true change
// above:
codec.segmentInfoFormat().write(directory, newSegment.info, context);
// slurp the del file into CFS:
if (flushedSegment.liveDocs != null) {
final int delCount = flushedSegment.delCount;
assert delCount > 0;
if (infoStream.isEnabled("DWPT")) {
infoStream.message("DWPT", "flush: write " + delCount + " deletes gen=" + flushedSegment.segmentInfo.getDelGen());
}
// TODO: we should prune the segment if it's 100%
// deleted... but merge will also catch it.
// TODO: in the NRT case it'd be better to hand
// this del vector over to the
// shortly-to-be-opened SegmentReader and let it
// carry the changes; there's no reason to use
// filesystem as intermediary here.
SegmentCommitInfo info = flushedSegment.segmentInfo;
Codec codec = info.info.getCodec();
final MutableBits bits;
if (sortMap == null) {
bits = flushedSegment.liveDocs;
} else {
bits = sortLiveDocs(flushedSegment.liveDocs, sortMap);
}
codec.liveDocsFormat().writeLiveDocs(bits, directory, info, delCount, context);
newSegment.setDelCount(delCount);
newSegment.advanceDelGen();
}
success = true;
} finally {
if (!success) {
if (infoStream.isEnabled("DWPT")) {
infoStream.message("DWPT", "hit exception creating compound file for newly flushed segment " + newSegment.info.name);
}
}
}
}
use of org.apache.lucene.store.IOContext in project lucene-solr by apache.
the class DocumentsWriterPerThread method flush.
/** Flush all pending docs to a new segment */
FlushedSegment flush() throws IOException, AbortingException {
assert numDocsInRAM > 0;
assert deleteSlice.isEmpty() : "all deletes must be applied in prepareFlush";
segmentInfo.setMaxDoc(numDocsInRAM);
final SegmentWriteState flushState = new SegmentWriteState(infoStream, directory, segmentInfo, fieldInfos.finish(), pendingUpdates, new IOContext(new FlushInfo(numDocsInRAM, bytesUsed())));
final double startMBUsed = bytesUsed() / 1024. / 1024.;
// doc, eg if analyzer has some problem w/ the text):
if (pendingUpdates.docIDs.size() > 0) {
flushState.liveDocs = codec.liveDocsFormat().newLiveDocs(numDocsInRAM);
for (int delDocID : pendingUpdates.docIDs) {
flushState.liveDocs.clear(delDocID);
}
flushState.delCountOnFlush = pendingUpdates.docIDs.size();
pendingUpdates.bytesUsed.addAndGet(-pendingUpdates.docIDs.size() * BufferedUpdates.BYTES_PER_DEL_DOCID);
pendingUpdates.docIDs.clear();
}
if (aborted) {
if (infoStream.isEnabled("DWPT")) {
infoStream.message("DWPT", "flush: skip because aborting is set");
}
return null;
}
long t0 = System.nanoTime();
if (infoStream.isEnabled("DWPT")) {
infoStream.message("DWPT", "flush postings as segment " + flushState.segmentInfo.name + " numDocs=" + numDocsInRAM);
}
final Sorter.DocMap sortMap;
try {
sortMap = consumer.flush(flushState);
pendingUpdates.terms.clear();
segmentInfo.setFiles(new HashSet<>(directory.getCreatedFiles()));
final SegmentCommitInfo segmentInfoPerCommit = new SegmentCommitInfo(segmentInfo, 0, -1L, -1L, -1L);
if (infoStream.isEnabled("DWPT")) {
infoStream.message("DWPT", "new segment has " + (flushState.liveDocs == null ? 0 : flushState.delCountOnFlush) + " deleted docs");
infoStream.message("DWPT", "new segment has " + (flushState.fieldInfos.hasVectors() ? "vectors" : "no vectors") + "; " + (flushState.fieldInfos.hasNorms() ? "norms" : "no norms") + "; " + (flushState.fieldInfos.hasDocValues() ? "docValues" : "no docValues") + "; " + (flushState.fieldInfos.hasProx() ? "prox" : "no prox") + "; " + (flushState.fieldInfos.hasFreq() ? "freqs" : "no freqs"));
infoStream.message("DWPT", "flushedFiles=" + segmentInfoPerCommit.files());
infoStream.message("DWPT", "flushed codec=" + codec);
}
final BufferedUpdates segmentDeletes;
if (pendingUpdates.queries.isEmpty() && pendingUpdates.numericUpdates.isEmpty() && pendingUpdates.binaryUpdates.isEmpty()) {
pendingUpdates.clear();
segmentDeletes = null;
} else {
segmentDeletes = pendingUpdates;
}
if (infoStream.isEnabled("DWPT")) {
final double newSegmentSize = segmentInfoPerCommit.sizeInBytes() / 1024. / 1024.;
infoStream.message("DWPT", "flushed: segment=" + segmentInfo.name + " ramUsed=" + nf.format(startMBUsed) + " MB" + " newFlushedSize=" + nf.format(newSegmentSize) + " MB" + " docs/MB=" + nf.format(flushState.segmentInfo.maxDoc() / newSegmentSize));
}
assert segmentInfo != null;
FlushedSegment fs = new FlushedSegment(segmentInfoPerCommit, flushState.fieldInfos, segmentDeletes, flushState.liveDocs, flushState.delCountOnFlush);
sealFlushedSegment(fs, sortMap);
if (infoStream.isEnabled("DWPT")) {
infoStream.message("DWPT", "flush time " + ((System.nanoTime() - t0) / 1000000.0) + " msec");
}
return fs;
} catch (Throwable th) {
abort();
throw AbortingException.wrap(th);
}
}
use of org.apache.lucene.store.IOContext in project lucene-solr by apache.
the class TestSegmentMerger method testMerge.
public void testMerge() throws IOException {
final Codec codec = Codec.getDefault();
final SegmentInfo si = new SegmentInfo(mergedDir, Version.LATEST, null, mergedSegment, -1, false, codec, Collections.emptyMap(), StringHelper.randomId(), new HashMap<>(), null);
SegmentMerger merger = new SegmentMerger(Arrays.<CodecReader>asList(reader1, reader2), si, InfoStream.getDefault(), mergedDir, new FieldInfos.FieldNumbers(), newIOContext(random(), new IOContext(new MergeInfo(-1, -1, false, -1))));
MergeState mergeState = merger.merge();
int docsMerged = mergeState.segmentInfo.maxDoc();
assertTrue(docsMerged == 2);
//Should be able to open a new SegmentReader against the new directory
SegmentReader mergedReader = new SegmentReader(new SegmentCommitInfo(mergeState.segmentInfo, 0, -1L, -1L, -1L), Version.LATEST.major, newIOContext(random()));
assertTrue(mergedReader != null);
assertTrue(mergedReader.numDocs() == 2);
Document newDoc1 = mergedReader.document(0);
assertTrue(newDoc1 != null);
//There are 2 unstored fields on the document
assertTrue(DocHelper.numFields(newDoc1) == DocHelper.numFields(doc1) - DocHelper.unstored.size());
Document newDoc2 = mergedReader.document(1);
assertTrue(newDoc2 != null);
assertTrue(DocHelper.numFields(newDoc2) == DocHelper.numFields(doc2) - DocHelper.unstored.size());
PostingsEnum termDocs = TestUtil.docs(random(), mergedReader, DocHelper.TEXT_FIELD_2_KEY, new BytesRef("field"), null, 0);
assertTrue(termDocs != null);
assertTrue(termDocs.nextDoc() != DocIdSetIterator.NO_MORE_DOCS);
int tvCount = 0;
for (FieldInfo fieldInfo : mergedReader.getFieldInfos()) {
if (fieldInfo.hasVectors()) {
tvCount++;
}
}
//System.out.println("stored size: " + stored.size());
assertEquals("We do not have 3 fields that were indexed with term vector", 3, tvCount);
Terms vector = mergedReader.getTermVectors(0).terms(DocHelper.TEXT_FIELD_2_KEY);
assertNotNull(vector);
assertEquals(3, vector.size());
TermsEnum termsEnum = vector.iterator();
int i = 0;
while (termsEnum.next() != null) {
String term = termsEnum.term().utf8ToString();
int freq = (int) termsEnum.totalTermFreq();
//System.out.println("Term: " + term + " Freq: " + freq);
assertTrue(DocHelper.FIELD_2_TEXT.indexOf(term) != -1);
assertTrue(DocHelper.FIELD_2_FREQS[i] == freq);
i++;
}
TestSegmentReader.checkNorms(mergedReader);
mergedReader.close();
}
use of org.apache.lucene.store.IOContext in project lucene-solr by apache.
the class SimpleTextCompoundFormat method getCompoundReader.
@Override
public Directory getCompoundReader(Directory dir, SegmentInfo si, IOContext context) throws IOException {
String dataFile = IndexFileNames.segmentFileName(si.name, "", DATA_EXTENSION);
final IndexInput in = dir.openInput(dataFile, context);
BytesRefBuilder scratch = new BytesRefBuilder();
// first get to TOC:
DecimalFormat df = new DecimalFormat(OFFSETPATTERN, DecimalFormatSymbols.getInstance(Locale.ROOT));
long pos = in.length() - TABLEPOS.length - OFFSETPATTERN.length() - 1;
in.seek(pos);
SimpleTextUtil.readLine(in, scratch);
assert StringHelper.startsWith(scratch.get(), TABLEPOS);
long tablePos = -1;
try {
tablePos = df.parse(stripPrefix(scratch, TABLEPOS)).longValue();
} catch (ParseException e) {
throw new CorruptIndexException("can't parse CFS trailer, got: " + scratch.get().utf8ToString(), in);
}
// seek to TOC and read it
in.seek(tablePos);
SimpleTextUtil.readLine(in, scratch);
assert StringHelper.startsWith(scratch.get(), TABLE);
int numEntries = Integer.parseInt(stripPrefix(scratch, TABLE));
final String[] fileNames = new String[numEntries];
final long[] startOffsets = new long[numEntries];
final long[] endOffsets = new long[numEntries];
for (int i = 0; i < numEntries; i++) {
SimpleTextUtil.readLine(in, scratch);
assert StringHelper.startsWith(scratch.get(), TABLENAME);
fileNames[i] = si.name + IndexFileNames.stripSegmentName(stripPrefix(scratch, TABLENAME));
if (i > 0) {
// files must be unique and in sorted order
assert fileNames[i].compareTo(fileNames[i - 1]) > 0;
}
SimpleTextUtil.readLine(in, scratch);
assert StringHelper.startsWith(scratch.get(), TABLESTART);
startOffsets[i] = Long.parseLong(stripPrefix(scratch, TABLESTART));
SimpleTextUtil.readLine(in, scratch);
assert StringHelper.startsWith(scratch.get(), TABLEEND);
endOffsets[i] = Long.parseLong(stripPrefix(scratch, TABLEEND));
}
return new Directory() {
private int getIndex(String name) throws IOException {
int index = Arrays.binarySearch(fileNames, name);
if (index < 0) {
throw new FileNotFoundException("No sub-file found (fileName=" + name + " files: " + Arrays.toString(fileNames) + ")");
}
return index;
}
@Override
public String[] listAll() throws IOException {
ensureOpen();
return fileNames.clone();
}
@Override
public long fileLength(String name) throws IOException {
ensureOpen();
int index = getIndex(name);
return endOffsets[index] - startOffsets[index];
}
@Override
public IndexInput openInput(String name, IOContext context) throws IOException {
ensureOpen();
int index = getIndex(name);
return in.slice(name, startOffsets[index], endOffsets[index] - startOffsets[index]);
}
@Override
public void close() throws IOException {
in.close();
}
// write methods: disabled
@Override
public IndexOutput createOutput(String name, IOContext context) {
throw new UnsupportedOperationException();
}
@Override
public IndexOutput createTempOutput(String prefix, String suffix, IOContext context) {
throw new UnsupportedOperationException();
}
@Override
public void sync(Collection<String> names) {
throw new UnsupportedOperationException();
}
@Override
public void deleteFile(String name) {
throw new UnsupportedOperationException();
}
@Override
public void rename(String source, String dest) {
throw new UnsupportedOperationException();
}
@Override
public void syncMetaData() {
throw new UnsupportedOperationException();
}
@Override
public Lock obtainLock(String name) {
throw new UnsupportedOperationException();
}
};
}
Aggregations