Search in sources :

Example 16 with SegmentInfos

use of org.apache.lucene.index.SegmentInfos in project lucene-solr by apache.

the class ReplicaNode method commit.

@Override
public void commit() throws IOException {
    synchronized (commitLock) {
        SegmentInfos infos;
        Collection<String> indexFiles;
        synchronized (this) {
            infos = ((SegmentInfosSearcherManager) mgr).getCurrentInfos();
            indexFiles = infos.files(false);
            deleter.incRef(indexFiles);
        }
        message("top: commit primaryGen=" + lastPrimaryGen + " infos=" + infos.toString() + " files=" + indexFiles);
        // fsync all index files we are now referencing
        dir.sync(indexFiles);
        Map<String, String> commitData = new HashMap<>();
        commitData.put(PRIMARY_GEN_KEY, Long.toString(lastPrimaryGen));
        commitData.put(VERSION_KEY, Long.toString(getCurrentSearchingVersion()));
        infos.setUserData(commitData, false);
        // write and fsync a new segments_N
        infos.commit(dir);
        // commits are guaranteed to go to the next (unwritten) generations:
        if (mgr != null) {
            ((SegmentInfosSearcherManager) mgr).getCurrentInfos().updateGeneration(infos);
        }
        String segmentsFileName = infos.getSegmentsFileName();
        message("top: commit wrote segments file " + segmentsFileName + " version=" + infos.getVersion() + " sis=" + infos.toString() + " commitData=" + commitData);
        deleter.incRef(Collections.singletonList(segmentsFileName));
        message("top: commit decRef lastCommitFiles=" + lastCommitFiles);
        deleter.decRef(lastCommitFiles);
        lastCommitFiles.clear();
        lastCommitFiles.addAll(indexFiles);
        lastCommitFiles.add(segmentsFileName);
        message("top: commit version=" + infos.getVersion() + " files now " + lastCommitFiles);
    }
}
Also used : SegmentInfos(org.apache.lucene.index.SegmentInfos) HashMap(java.util.HashMap) ConcurrentHashMap(java.util.concurrent.ConcurrentHashMap)

Example 17 with SegmentInfos

use of org.apache.lucene.index.SegmentInfos in project lucene-solr by apache.

the class ReplicaNode method start.

/** Start up this replica, which possibly requires heavy copying of files from the primary node, if we were down for a long time */
protected synchronized void start(long curPrimaryGen) throws IOException {
    if (state.equals("init") == false) {
        throw new IllegalStateException("already started");
    }
    message("top: now start");
    try {
        // Figure out what state our local index is in now:
        String segmentsFileName = SegmentInfos.getLastCommitSegmentsFileName(dir);
        // Also look for any pending_segments_N, in case we crashed mid-commit.  We must "inflate" our infos gen to at least this, since
        // otherwise we may wind up re-using the pending_segments_N file name on commit, and then our deleter can get angry because it still
        // wants to delete this file:
        long maxPendingGen = -1;
        for (String fileName : dir.listAll()) {
            if (fileName.startsWith(IndexFileNames.PENDING_SEGMENTS)) {
                long gen = Long.parseLong(fileName.substring(IndexFileNames.PENDING_SEGMENTS.length() + 1), Character.MAX_RADIX);
                if (gen > maxPendingGen) {
                    maxPendingGen = gen;
                }
            }
        }
        SegmentInfos infos;
        if (segmentsFileName == null) {
            // No index here yet:
            infos = new SegmentInfos(Version.LATEST.major);
            message("top: init: no segments in index");
        } else {
            message("top: init: read existing segments commit " + segmentsFileName);
            infos = SegmentInfos.readCommit(dir, segmentsFileName);
            message("top: init: segments: " + infos.toString() + " version=" + infos.getVersion());
            Collection<String> indexFiles = infos.files(false);
            lastCommitFiles.add(segmentsFileName);
            lastCommitFiles.addAll(indexFiles);
            // Always protect the last commit:
            deleter.incRef(lastCommitFiles);
            lastNRTFiles.addAll(indexFiles);
            deleter.incRef(lastNRTFiles);
            message("top: commitFiles=" + lastCommitFiles);
            message("top: nrtFiles=" + lastNRTFiles);
        }
        message("top: delete unknown files on init: all files=" + Arrays.toString(dir.listAll()));
        deleter.deleteUnknownFiles(segmentsFileName);
        message("top: done delete unknown files on init: all files=" + Arrays.toString(dir.listAll()));
        String s = infos.getUserData().get(PRIMARY_GEN_KEY);
        long myPrimaryGen;
        if (s == null) {
            assert infos.size() == 0;
            myPrimaryGen = -1;
        } else {
            myPrimaryGen = Long.parseLong(s);
        }
        message("top: myPrimaryGen=" + myPrimaryGen);
        boolean doCommit;
        if (infos.size() > 0 && myPrimaryGen != -1 && myPrimaryGen != curPrimaryGen) {
            assert myPrimaryGen < curPrimaryGen;
            // Primary changed while we were down.  In this case, we must sync from primary before opening a reader, because it's possible current
            // files we have will need to be overwritten with different ones (if index rolled back and "forked"), and we can't overwrite open
            // files on Windows:
            final long initSyncStartNS = System.nanoTime();
            message("top: init: primary changed while we were down myPrimaryGen=" + myPrimaryGen + " vs curPrimaryGen=" + curPrimaryGen + "; sync now before mgr init");
            // Try until we succeed in copying over the latest NRT point:
            CopyJob job = null;
            // We may need to overwrite files referenced by our latest commit, either right now on initial sync, or on a later sync.  To make
            // sure the index is never even in an "apparently" corrupt state (where an old segments_N references invalid files) we forcefully
            // remove the commit now, and refuse to start the replica if this delete fails:
            message("top: now delete starting commit point " + segmentsFileName);
            // If this throws exc (e.g. due to virus checker), we cannot start this replica:
            assert deleter.getRefCount(segmentsFileName) == 1;
            deleter.decRef(Collections.singleton(segmentsFileName));
            if (dir instanceof FSDirectory && ((FSDirectory) dir).checkPendingDeletions()) {
                // which if we carsh, we cause corruption:
                throw new RuntimeException("replica cannot start: existing segments file=" + segmentsFileName + " must be removed in order to start, but the file delete failed");
            }
            // So we don't later try to decRef it (illegally) again:
            boolean didRemove = lastCommitFiles.remove(segmentsFileName);
            assert didRemove;
            while (true) {
                job = newCopyJob("sync on startup replica=" + name() + " myVersion=" + infos.getVersion(), null, null, true, null);
                job.start();
                message("top: init: sync sis.version=" + job.getCopyState().version);
                // NOTE: newNRTPoint detects we are still in init (mgr is null) and does not cancel our copy if a flush happens
                try {
                    job.runBlocking();
                    job.finish();
                    // Success!
                    break;
                } catch (IOException ioe) {
                    job.cancel("startup failed", ioe);
                    if (ioe.getMessage().contains("checksum mismatch after file copy")) {
                        // OK-ish
                        message("top: failed to copy: " + ioe + "; retrying");
                    } else {
                        throw ioe;
                    }
                }
            }
            lastPrimaryGen = job.getCopyState().primaryGen;
            byte[] infosBytes = job.getCopyState().infosBytes;
            SegmentInfos syncInfos = SegmentInfos.readCommit(dir, new BufferedChecksumIndexInput(new ByteArrayIndexInput("SegmentInfos", job.getCopyState().infosBytes)), job.getCopyState().gen);
            // Must always commit to a larger generation than what's currently in the index:
            syncInfos.updateGeneration(infos);
            infos = syncInfos;
            assert infos.getVersion() == job.getCopyState().version;
            message("  version=" + infos.getVersion() + " segments=" + infos.toString());
            message("top: init: incRef nrtFiles=" + job.getFileNames());
            deleter.incRef(job.getFileNames());
            message("top: init: decRef lastNRTFiles=" + lastNRTFiles);
            deleter.decRef(lastNRTFiles);
            lastNRTFiles.clear();
            lastNRTFiles.addAll(job.getFileNames());
            message("top: init: set lastNRTFiles=" + lastNRTFiles);
            lastFileMetaData = job.getCopyState().files;
            message(String.format(Locale.ROOT, "top: %d: start: done sync: took %.3fs for %s, opened NRT reader version=%d", id, (System.nanoTime() - initSyncStartNS) / 1000000000.0, bytesToString(job.getTotalBytesCopied()), job.getCopyState().version));
            doCommit = true;
        } else {
            doCommit = false;
            lastPrimaryGen = curPrimaryGen;
            message("top: same primary as before");
        }
        if (infos.getGeneration() < maxPendingGen) {
            message("top: move infos generation from " + infos.getGeneration() + " to " + maxPendingGen);
            infos.setNextWriteGeneration(maxPendingGen);
        }
        // Notify primary we started, to give it a chance to send any warming merges our way to reduce NRT latency of first sync:
        sendNewReplica();
        // Finally, we are open for business, since our index now "agrees" with the primary:
        mgr = new SegmentInfosSearcherManager(dir, this, infos, searcherFactory);
        IndexSearcher searcher = mgr.acquire();
        try {
            // TODO: this is test specific:
            int hitCount = searcher.count(new TermQuery(new Term("marker", "marker")));
            message("top: marker count=" + hitCount + " version=" + ((DirectoryReader) searcher.getIndexReader()).getVersion());
        } finally {
            mgr.release(searcher);
        }
        // Must commit after init mgr:
        if (doCommit) {
            // Very important to commit what we just sync'd over, because we removed the pre-existing commit point above if we had to
            // overwrite any files it referenced:
            commit();
        }
        message("top: done start");
        state = "idle";
    } catch (Throwable t) {
        if (t.getMessage().startsWith("replica cannot start") == false) {
            message("exc on start:");
            t.printStackTrace(printStream);
        } else {
            dir.close();
        }
        throw IOUtils.rethrowAlways(t);
    }
}
Also used : IndexSearcher(org.apache.lucene.search.IndexSearcher) TermQuery(org.apache.lucene.search.TermQuery) SegmentInfos(org.apache.lucene.index.SegmentInfos) DirectoryReader(org.apache.lucene.index.DirectoryReader) BufferedChecksumIndexInput(org.apache.lucene.store.BufferedChecksumIndexInput) FSDirectory(org.apache.lucene.store.FSDirectory) IOException(java.io.IOException) ByteArrayIndexInput(org.apache.lucene.store.ByteArrayIndexInput) Term(org.apache.lucene.index.Term)

Example 18 with SegmentInfos

use of org.apache.lucene.index.SegmentInfos in project lucene-solr by apache.

the class PrimaryNode method setCurrentInfos.

/** Steals incoming infos refCount; returns true if there were changes. */
private synchronized boolean setCurrentInfos(Set<String> completedMergeFiles) throws IOException {
    IndexSearcher searcher = null;
    SegmentInfos infos;
    try {
        searcher = mgr.acquire();
        infos = ((StandardDirectoryReader) searcher.getIndexReader()).getSegmentInfos();
        // TODO: this is test code specific!!
        message("setCurrentInfos: marker count: " + searcher.count(new TermQuery(new Term("marker", "marker"))) + " version=" + infos.getVersion() + " searcher=" + searcher);
    } finally {
        if (searcher != null) {
            mgr.release(searcher);
        }
    }
    if (curInfos != null && infos.getVersion() == curInfos.getVersion()) {
        // no change
        message("top: skip switch to infos: version=" + infos.getVersion() + " is unchanged: " + infos.toString());
        return false;
    }
    SegmentInfos oldInfos = curInfos;
    writer.incRefDeleter(infos);
    curInfos = infos;
    if (oldInfos != null) {
        writer.decRefDeleter(oldInfos);
    }
    message("top: switch to infos=" + infos.toString() + " version=" + infos.getVersion());
    // Serialize the SegmentInfos:
    RAMOutputStream out = new RAMOutputStream(new RAMFile(), true);
    infos.write(dir, out);
    byte[] infosBytes = new byte[(int) out.getFilePointer()];
    out.writeTo(infosBytes, 0);
    Map<String, FileMetaData> filesMetaData = new HashMap<String, FileMetaData>();
    for (SegmentCommitInfo info : infos) {
        for (String fileName : info.files()) {
            FileMetaData metaData = readLocalFileMetaData(fileName);
            // NOTE: we hold a refCount on this infos, so this file better exist:
            assert metaData != null;
            assert filesMetaData.containsKey(fileName) == false;
            filesMetaData.put(fileName, metaData);
        }
    }
    lastFileMetaData = Collections.unmodifiableMap(filesMetaData);
    message("top: set copyState primaryGen=" + primaryGen + " version=" + infos.getVersion() + " files=" + filesMetaData.keySet());
    copyState = new CopyState(lastFileMetaData, infos.getVersion(), infos.getGeneration(), infosBytes, completedMergeFiles, primaryGen, curInfos);
    return true;
}
Also used : IndexSearcher(org.apache.lucene.search.IndexSearcher) TermQuery(org.apache.lucene.search.TermQuery) SegmentInfos(org.apache.lucene.index.SegmentInfos) SegmentCommitInfo(org.apache.lucene.index.SegmentCommitInfo) HashMap(java.util.HashMap) Term(org.apache.lucene.index.Term) RAMFile(org.apache.lucene.store.RAMFile) RAMOutputStream(org.apache.lucene.store.RAMOutputStream)

Example 19 with SegmentInfos

use of org.apache.lucene.index.SegmentInfos in project lucene-solr by apache.

the class MockDirectoryWrapper method corruptUnknownFiles.

public synchronized void corruptUnknownFiles() throws IOException {
    System.out.println("MDW: corrupt unknown files");
    Set<String> knownFiles = new HashSet<>();
    for (String fileName : listAll()) {
        if (fileName.startsWith(IndexFileNames.SEGMENTS)) {
            System.out.println("MDW: read " + fileName + " to gather files it references");
            SegmentInfos infos;
            try {
                infos = SegmentInfos.readCommit(this, fileName);
            } catch (IOException ioe) {
                System.out.println("MDW: exception reading segment infos " + fileName + "; files: " + Arrays.toString(listAll()));
                throw ioe;
            }
            knownFiles.addAll(infos.files(true));
        }
    }
    Set<String> toCorrupt = new HashSet<>();
    Matcher m = IndexFileNames.CODEC_FILE_PATTERN.matcher("");
    for (String fileName : listAll()) {
        m.reset(fileName);
        if (knownFiles.contains(fileName) == false && fileName.endsWith("write.lock") == false && (m.matches() || fileName.startsWith(IndexFileNames.PENDING_SEGMENTS))) {
            toCorrupt.add(fileName);
        }
    }
    corruptFiles(toCorrupt);
}
Also used : SegmentInfos(org.apache.lucene.index.SegmentInfos) Matcher(java.util.regex.Matcher) IOException(java.io.IOException) HashSet(java.util.HashSet)

Example 20 with SegmentInfos

use of org.apache.lucene.index.SegmentInfos in project lucene-solr by apache.

the class ReplicationHandler method getFileList.

@SuppressWarnings("unchecked")
private void getFileList(SolrParams solrParams, SolrQueryResponse rsp) {
    String v = solrParams.get(GENERATION);
    if (v == null) {
        rsp.add("status", "no index generation specified");
        return;
    }
    long gen = Long.parseLong(v);
    IndexCommit commit = core.getDeletionPolicy().getCommitPoint(gen);
    //System.out.println("ask for files for gen:" + commit.getGeneration() + core.getCoreDescriptor().getCoreContainer().getZkController().getNodeName());
    if (commit == null) {
        rsp.add("status", "invalid index generation");
        return;
    }
    // reserve the indexcommit for sometime
    core.getDeletionPolicy().setReserveDuration(gen, reserveCommitDuration);
    List<Map<String, Object>> result = new ArrayList<>();
    Directory dir = null;
    try {
        dir = core.getDirectoryFactory().get(core.getNewIndexDir(), DirContext.DEFAULT, core.getSolrConfig().indexConfig.lockType);
        SegmentInfos infos = SegmentInfos.readCommit(dir, commit.getSegmentsFileName());
        for (SegmentCommitInfo commitInfo : infos) {
            for (String file : commitInfo.files()) {
                Map<String, Object> fileMeta = new HashMap<>();
                fileMeta.put(NAME, file);
                fileMeta.put(SIZE, dir.fileLength(file));
                try (final IndexInput in = dir.openInput(file, IOContext.READONCE)) {
                    try {
                        long checksum = CodecUtil.retrieveChecksum(in);
                        fileMeta.put(CHECKSUM, checksum);
                    } catch (Exception e) {
                        LOG.warn("Could not read checksum from index file: " + file, e);
                    }
                }
                result.add(fileMeta);
            }
        }
        // add the segments_N file
        Map<String, Object> fileMeta = new HashMap<>();
        fileMeta.put(NAME, infos.getSegmentsFileName());
        fileMeta.put(SIZE, dir.fileLength(infos.getSegmentsFileName()));
        if (infos.getId() != null) {
            try (final IndexInput in = dir.openInput(infos.getSegmentsFileName(), IOContext.READONCE)) {
                try {
                    fileMeta.put(CHECKSUM, CodecUtil.retrieveChecksum(in));
                } catch (Exception e) {
                    LOG.warn("Could not read checksum from index file: " + infos.getSegmentsFileName(), e);
                }
            }
        }
        result.add(fileMeta);
    } catch (IOException e) {
        rsp.add("status", "unable to get file names for given index generation");
        rsp.add(EXCEPTION, e);
        LOG.error("Unable to get file names for indexCommit generation: " + gen, e);
    } finally {
        if (dir != null) {
            try {
                core.getDirectoryFactory().release(dir);
            } catch (IOException e) {
                SolrException.log(LOG, "Could not release directory after fetching file list", e);
            }
        }
    }
    rsp.add(CMD_GET_FILE_LIST, result);
    // fetch list of tlog files only if cdcr is activated
    if (solrParams.getBool(TLOG_FILES, true) && core.getUpdateHandler().getUpdateLog() != null && core.getUpdateHandler().getUpdateLog() instanceof CdcrUpdateLog) {
        try {
            List<Map<String, Object>> tlogfiles = getTlogFileList(commit);
            LOG.info("Adding tlog files to list: " + tlogfiles);
            rsp.add(TLOG_FILES, tlogfiles);
        } catch (IOException e) {
            rsp.add("status", "unable to get tlog file names for given index generation");
            rsp.add(EXCEPTION, e);
            LOG.error("Unable to get tlog file names for indexCommit generation: " + gen, e);
        }
    }
    if (confFileNameAlias.size() < 1 || core.getCoreContainer().isZooKeeperAware())
        return;
    LOG.debug("Adding config files to list: " + includeConfFiles);
    //if configuration files need to be included get their details
    rsp.add(CONF_FILES, getConfFileInfoFromCache(confFileNameAlias, confFileInfoCache));
}
Also used : SegmentInfos(org.apache.lucene.index.SegmentInfos) SegmentCommitInfo(org.apache.lucene.index.SegmentCommitInfo) HashMap(java.util.HashMap) ArrayList(java.util.ArrayList) IOException(java.io.IOException) IndexCommit(org.apache.lucene.index.IndexCommit) NoSuchFileException(java.nio.file.NoSuchFileException) SolrException(org.apache.solr.common.SolrException) FileNotFoundException(java.io.FileNotFoundException) IOException(java.io.IOException) CdcrUpdateLog(org.apache.solr.update.CdcrUpdateLog) IndexInput(org.apache.lucene.store.IndexInput) SimpleOrderedMap(org.apache.solr.common.util.SimpleOrderedMap) Map(java.util.Map) HashMap(java.util.HashMap) MetricsMap(org.apache.solr.metrics.MetricsMap) Directory(org.apache.lucene.store.Directory)

Aggregations

SegmentInfos (org.apache.lucene.index.SegmentInfos)23 IOException (java.io.IOException)9 IndexWriter (org.apache.lucene.index.IndexWriter)9 Term (org.apache.lucene.index.Term)8 IndexSearcher (org.apache.lucene.search.IndexSearcher)8 TermQuery (org.apache.lucene.search.TermQuery)7 Document (org.apache.lucene.document.Document)6 TextField (org.apache.lucene.document.TextField)6 DirectoryReader (org.apache.lucene.index.DirectoryReader)6 Directory (org.apache.lucene.store.Directory)6 IndexWriterConfig (org.apache.lucene.index.IndexWriterConfig)5 SegmentCommitInfo (org.apache.lucene.index.SegmentCommitInfo)5 ArrayList (java.util.ArrayList)4 HashMap (java.util.HashMap)4 IndexReader (org.apache.lucene.index.IndexReader)4 Collectors (java.util.stream.Collectors)3 IntStream (java.util.stream.IntStream)3 Store (org.apache.lucene.document.Field.Store)3 StringField (org.apache.lucene.document.StringField)3 RandomIndexWriter (org.apache.lucene.index.RandomIndexWriter)3