Search in sources :

Example 1 with MonitoredTask

use of org.apache.hadoop.hbase.monitoring.MonitoredTask in project hbase by apache.

the class HMaster method startActiveMasterManager.

private void startActiveMasterManager(int infoPort) throws KeeperException {
    String backupZNode = ZKUtil.joinZNode(zooKeeper.znodePaths.backupMasterAddressesZNode, serverName.toString());
    /*
    * Add a ZNode for ourselves in the backup master directory since we
    * may not become the active master. If so, we want the actual active
    * master to know we are backup masters, so that it won't assign
    * regions to us if so configured.
    *
    * If we become the active master later, ActiveMasterManager will delete
    * this node explicitly.  If we crash before then, ZooKeeper will delete
    * this node for us since it is ephemeral.
    */
    LOG.info("Adding backup master ZNode " + backupZNode);
    if (!MasterAddressTracker.setMasterAddress(zooKeeper, backupZNode, serverName, infoPort)) {
        LOG.warn("Failed create of " + backupZNode + " by " + serverName);
    }
    activeMasterManager.setInfoPort(infoPort);
    // Start a thread to try to become the active master, so we won't block here
    Threads.setDaemonThreadRunning(new Thread(new Runnable() {

        @Override
        public void run() {
            int timeout = conf.getInt(HConstants.ZK_SESSION_TIMEOUT, HConstants.DEFAULT_ZK_SESSION_TIMEOUT);
            // If we're a backup master, stall until a primary to writes his address
            if (conf.getBoolean(HConstants.MASTER_TYPE_BACKUP, HConstants.DEFAULT_MASTER_TYPE_BACKUP)) {
                LOG.debug("HMaster started in backup mode. " + "Stalling until master znode is written.");
                // so don't worry about setting watches on the parent znode
                while (!activeMasterManager.hasActiveMaster()) {
                    LOG.debug("Waiting for master address ZNode to be written " + "(Also watching cluster state node)");
                    Threads.sleep(timeout);
                }
            }
            MonitoredTask status = TaskMonitor.get().createStatus("Master startup");
            status.setDescription("Master startup");
            try {
                if (activeMasterManager.blockUntilBecomingActiveMaster(timeout, status)) {
                    finishActiveMasterInitialization(status);
                }
            } catch (Throwable t) {
                status.setStatus("Failed to become active: " + t.getMessage());
                LOG.fatal("Failed to become active master", t);
                // HBASE-5680: Likely hadoop23 vs hadoop 20.x/1.x incompatibility
                if (t instanceof NoClassDefFoundError && t.getMessage().contains("org/apache/hadoop/hdfs/protocol/HdfsConstants$SafeModeAction")) {
                    // improved error message for this special case
                    abort("HBase is having a problem with its Hadoop jars.  You may need to " + "recompile HBase against Hadoop version " + org.apache.hadoop.util.VersionInfo.getVersion() + " or change your hadoop jars to start properly", t);
                } else {
                    abort("Unhandled exception. Starting shutdown.", t);
                }
            } finally {
                status.cleanup();
            }
        }
    }, getServerName().toShortString() + ".activeMasterManager"));
}
Also used : HasThread(org.apache.hadoop.hbase.util.HasThread) MonitoredTask(org.apache.hadoop.hbase.monitoring.MonitoredTask)

Example 2 with MonitoredTask

use of org.apache.hadoop.hbase.monitoring.MonitoredTask in project hbase by apache.

the class RestoreSnapshotHelper method copySnapshotForScanner.

/**
   * Copy the snapshot files for a snapshot scanner, discards meta changes.
   * @param conf
   * @param fs
   * @param rootDir
   * @param restoreDir
   * @param snapshotName
   * @throws IOException
   */
public static RestoreMetaChanges copySnapshotForScanner(Configuration conf, FileSystem fs, Path rootDir, Path restoreDir, String snapshotName) throws IOException {
    // ensure that restore dir is not under root dir
    if (!restoreDir.getFileSystem(conf).getUri().equals(rootDir.getFileSystem(conf).getUri())) {
        throw new IllegalArgumentException("Filesystems for restore directory and HBase root " + "directory should be the same");
    }
    if (restoreDir.toUri().getPath().startsWith(rootDir.toUri().getPath())) {
        throw new IllegalArgumentException("Restore directory cannot be a sub directory of HBase " + "root directory. RootDir: " + rootDir + ", restoreDir: " + restoreDir);
    }
    Path snapshotDir = SnapshotDescriptionUtils.getCompletedSnapshotDir(snapshotName, rootDir);
    SnapshotDescription snapshotDesc = SnapshotDescriptionUtils.readSnapshotInfo(fs, snapshotDir);
    SnapshotManifest manifest = SnapshotManifest.open(conf, fs, snapshotDir, snapshotDesc);
    MonitoredTask status = TaskMonitor.get().createStatus("Restoring  snapshot '" + snapshotName + "' to directory " + restoreDir);
    ForeignExceptionDispatcher monitor = new ForeignExceptionDispatcher();
    // we send createBackRefs=false so that restored hfiles do not create back reference links
    // in the base hbase root dir.
    RestoreSnapshotHelper helper = new RestoreSnapshotHelper(conf, fs, manifest, manifest.getTableDescriptor(), restoreDir, monitor, status, false);
    // TODO: parallelize.
    RestoreMetaChanges metaChanges = helper.restoreHdfsRegions();
    if (LOG.isDebugEnabled()) {
        LOG.debug("Restored table dir:" + restoreDir);
        FSUtils.logFileSystemState(fs, restoreDir, LOG);
    }
    return metaChanges;
}
Also used : Path(org.apache.hadoop.fs.Path) SnapshotDescription(org.apache.hadoop.hbase.shaded.protobuf.generated.HBaseProtos.SnapshotDescription) ForeignExceptionDispatcher(org.apache.hadoop.hbase.errorhandling.ForeignExceptionDispatcher) MonitoredTask(org.apache.hadoop.hbase.monitoring.MonitoredTask)

Example 3 with MonitoredTask

use of org.apache.hadoop.hbase.monitoring.MonitoredTask in project hbase by apache.

the class SplitLogManager method splitLogDistributed.

/**
   * The caller will block until all the hbase:meta log files of the given region server have been
   * processed - successfully split or an error is encountered - by an available worker region
   * server. This method must only be called after the region servers have been brought online.
   * @param logDirs List of log dirs to split
   * @param filter the Path filter to select specific files for considering
   * @throws IOException If there was an error while splitting any log file
   * @return cumulative size of the logfiles split
   */
public long splitLogDistributed(final Set<ServerName> serverNames, final List<Path> logDirs, PathFilter filter) throws IOException {
    MonitoredTask status = TaskMonitor.get().createStatus("Doing distributed log split in " + logDirs + " for serverName=" + serverNames);
    FileStatus[] logfiles = getFileList(logDirs, filter);
    status.setStatus("Checking directory contents...");
    SplitLogCounters.tot_mgr_log_split_batch_start.incrementAndGet();
    LOG.info("Started splitting " + logfiles.length + " logs in " + logDirs + " for " + serverNames);
    long t = EnvironmentEdgeManager.currentTime();
    long totalSize = 0;
    TaskBatch batch = new TaskBatch();
    Boolean isMetaRecovery = (filter == null) ? null : false;
    for (FileStatus lf : logfiles) {
        // TODO If the log file is still being written to - which is most likely
        // the case for the last log file - then its length will show up here
        // as zero. The size of such a file can only be retrieved after
        // recover-lease is done. totalSize will be under in most cases and the
        // metrics that it drives will also be under-reported.
        totalSize += lf.getLen();
        String pathToLog = FSUtils.removeWALRootPath(lf.getPath(), conf);
        if (!enqueueSplitTask(pathToLog, batch)) {
            throw new IOException("duplicate log split scheduled for " + lf.getPath());
        }
    }
    waitForSplittingCompletion(batch, status);
    // remove recovering regions
    if (filter == MasterWalManager.META_FILTER) /* reference comparison */
    {
        // we split meta regions and user regions separately therefore logfiles are either all for
        // meta or user regions but won't for both( we could have mixed situations in tests)
        isMetaRecovery = true;
    }
    removeRecoveringRegions(serverNames, isMetaRecovery);
    if (batch.done != batch.installed) {
        batch.isDead = true;
        SplitLogCounters.tot_mgr_log_split_batch_err.incrementAndGet();
        LOG.warn("error while splitting logs in " + logDirs + " installed = " + batch.installed + " but only " + batch.done + " done");
        String msg = "error or interrupted while splitting logs in " + logDirs + " Task = " + batch;
        status.abort(msg);
        throw new IOException(msg);
    }
    for (Path logDir : logDirs) {
        status.setStatus("Cleaning up log directory...");
        final FileSystem fs = logDir.getFileSystem(conf);
        try {
            if (fs.exists(logDir) && !fs.delete(logDir, false)) {
                LOG.warn("Unable to delete log src dir. Ignoring. " + logDir);
            }
        } catch (IOException ioe) {
            FileStatus[] files = fs.listStatus(logDir);
            if (files != null && files.length > 0) {
                LOG.warn("Returning success without actually splitting and " + "deleting all the log files in path " + logDir + ": " + Arrays.toString(files), ioe);
            } else {
                LOG.warn("Unable to delete log src dir. Ignoring. " + logDir, ioe);
            }
        }
        SplitLogCounters.tot_mgr_log_split_batch_success.incrementAndGet();
    }
    String msg = "finished splitting (more than or equal to) " + totalSize + " bytes in " + batch.installed + " log files in " + logDirs + " in " + (EnvironmentEdgeManager.currentTime() - t) + "ms";
    status.markComplete(msg);
    LOG.info(msg);
    return totalSize;
}
Also used : Path(org.apache.hadoop.fs.Path) FileStatus(org.apache.hadoop.fs.FileStatus) FileSystem(org.apache.hadoop.fs.FileSystem) InterruptedIOException(java.io.InterruptedIOException) IOException(java.io.IOException) MonitoredTask(org.apache.hadoop.hbase.monitoring.MonitoredTask)

Example 4 with MonitoredTask

use of org.apache.hadoop.hbase.monitoring.MonitoredTask in project hbase by apache.

the class HRegion method replayWALFlushStartMarker.

/** Replay the flush marker from primary region by creating a corresponding snapshot of
   * the store memstores, only if the memstores do not have a higher seqId from an earlier wal
   * edit (because the events may be coming out of order).
   */
@VisibleForTesting
PrepareFlushResult replayWALFlushStartMarker(FlushDescriptor flush) throws IOException {
    long flushSeqId = flush.getFlushSequenceNumber();
    HashSet<Store> storesToFlush = new HashSet<>();
    for (StoreFlushDescriptor storeFlush : flush.getStoreFlushesList()) {
        byte[] family = storeFlush.getFamilyName().toByteArray();
        Store store = getStore(family);
        if (store == null) {
            LOG.warn(getRegionInfo().getEncodedName() + " : " + "Received a flush start marker from primary, but the family is not found. Ignoring" + " StoreFlushDescriptor:" + TextFormat.shortDebugString(storeFlush));
            continue;
        }
        storesToFlush.add(store);
    }
    MonitoredTask status = TaskMonitor.get().createStatus("Preparing flush " + this);
    // (flush, compaction, region open etc)
    synchronized (writestate) {
        try {
            if (flush.getFlushSequenceNumber() < lastReplayedOpenRegionSeqId) {
                LOG.warn(getRegionInfo().getEncodedName() + " : " + "Skipping replaying flush event :" + TextFormat.shortDebugString(flush) + " because its sequence id is smaller than this regions lastReplayedOpenRegionSeqId " + " of " + lastReplayedOpenRegionSeqId);
                return null;
            }
            if (numMutationsWithoutWAL.sum() > 0) {
                numMutationsWithoutWAL.reset();
                dataInMemoryWithoutWAL.reset();
            }
            if (!writestate.flushing) {
                // we do not have an active snapshot and corresponding this.prepareResult. This means
                // we can just snapshot our memstores and continue as normal.
                // invoke prepareFlushCache. Send null as wal since we do not want the flush events in wal
                PrepareFlushResult prepareResult = internalPrepareFlushCache(null, flushSeqId, storesToFlush, status, false);
                if (prepareResult.result == null) {
                    // save the PrepareFlushResult so that we can use it later from commit flush
                    this.writestate.flushing = true;
                    this.prepareFlushResult = prepareResult;
                    status.markComplete("Flush prepare successful");
                    if (LOG.isDebugEnabled()) {
                        LOG.debug(getRegionInfo().getEncodedName() + " : " + " Prepared flush with seqId:" + flush.getFlushSequenceNumber());
                    }
                } else {
                    // our memstore ie empty, but the primary is still flushing
                    if (prepareResult.getResult().getResult() == FlushResult.Result.CANNOT_FLUSH_MEMSTORE_EMPTY) {
                        this.writestate.flushing = true;
                        this.prepareFlushResult = prepareResult;
                        if (LOG.isDebugEnabled()) {
                            LOG.debug(getRegionInfo().getEncodedName() + " : " + " Prepared empty flush with seqId:" + flush.getFlushSequenceNumber());
                        }
                    }
                    status.abort("Flush prepare failed with " + prepareResult.result);
                // nothing much to do. prepare flush failed because of some reason.
                }
                return prepareResult;
            } else {
                // we already have an active snapshot.
                if (flush.getFlushSequenceNumber() == this.prepareFlushResult.flushOpSeqId) {
                    // They define the same flush. Log and continue.
                    LOG.warn(getRegionInfo().getEncodedName() + " : " + "Received a flush prepare marker with the same seqId: " + +flush.getFlushSequenceNumber() + " before clearing the previous one with seqId: " + prepareFlushResult.flushOpSeqId + ". Ignoring");
                // ignore
                } else if (flush.getFlushSequenceNumber() < this.prepareFlushResult.flushOpSeqId) {
                    // We received a flush with a smaller seqNum than what we have prepared. We can only
                    // ignore this prepare flush request.
                    LOG.warn(getRegionInfo().getEncodedName() + " : " + "Received a flush prepare marker with a smaller seqId: " + +flush.getFlushSequenceNumber() + " before clearing the previous one with seqId: " + prepareFlushResult.flushOpSeqId + ". Ignoring");
                // ignore
                } else {
                    // We received a flush with a larger seqNum than what we have prepared
                    LOG.warn(getRegionInfo().getEncodedName() + " : " + "Received a flush prepare marker with a larger seqId: " + +flush.getFlushSequenceNumber() + " before clearing the previous one with seqId: " + prepareFlushResult.flushOpSeqId + ". Ignoring");
                // We do not have multiple active snapshots in the memstore or a way to merge current
                // memstore snapshot with the contents and resnapshot for now. We cannot take
                // another snapshot and drop the previous one because that will cause temporary
                // data loss in the secondary. So we ignore this for now, deferring the resolution
                // to happen when we see the corresponding flush commit marker. If we have a memstore
                // snapshot with x, and later received another prepare snapshot with y (where x < y),
                // when we see flush commit for y, we will drop snapshot for x, and can also drop all
                // the memstore edits if everything in memstore is < y. This is the usual case for
                // RS crash + recovery where we might see consequtive prepare flush wal markers.
                // Otherwise, this will cause more memory to be used in secondary replica until a
                // further prapare + commit flush is seen and replayed.
                }
            }
        } finally {
            status.cleanup();
            writestate.notifyAll();
        }
    }
    return null;
}
Also used : HashSet(java.util.HashSet) StoreFlushDescriptor(org.apache.hadoop.hbase.shaded.protobuf.generated.WALProtos.FlushDescriptor.StoreFlushDescriptor) MonitoredTask(org.apache.hadoop.hbase.monitoring.MonitoredTask) VisibleForTesting(com.google.common.annotations.VisibleForTesting)

Example 5 with MonitoredTask

use of org.apache.hadoop.hbase.monitoring.MonitoredTask in project hbase by apache.

the class HRegion method flushcache.

/**
   * Flush the cache.
   *
   * When this method is called the cache will be flushed unless:
   * <ol>
   *   <li>the cache is empty</li>
   *   <li>the region is closed.</li>
   *   <li>a flush is already in progress</li>
   *   <li>writes are disabled</li>
   * </ol>
   *
   * <p>This method may block for some time, so it should not be called from a
   * time-sensitive thread.
   * @param forceFlushAllStores whether we want to flush all stores
   * @param writeFlushRequestWalMarker whether to write the flush request marker to WAL
   * @return whether the flush is success and whether the region needs compacting
   *
   * @throws IOException general io exceptions
   * @throws DroppedSnapshotException Thrown when replay of wal is required
   * because a Snapshot was not properly persisted. The region is put in closing mode, and the
   * caller MUST abort after this.
   */
public FlushResult flushcache(boolean forceFlushAllStores, boolean writeFlushRequestWalMarker) throws IOException {
    // fail-fast instead of waiting on the lock
    if (this.closing.get()) {
        String msg = "Skipping flush on " + this + " because closing";
        LOG.debug(msg);
        return new FlushResultImpl(FlushResult.Result.CANNOT_FLUSH, msg, false);
    }
    MonitoredTask status = TaskMonitor.get().createStatus("Flushing " + this);
    status.setStatus("Acquiring readlock on region");
    // block waiting for the lock for flushing cache
    lock.readLock().lock();
    try {
        if (this.closed.get()) {
            String msg = "Skipping flush on " + this + " because closed";
            LOG.debug(msg);
            status.abort(msg);
            return new FlushResultImpl(FlushResult.Result.CANNOT_FLUSH, msg, false);
        }
        if (coprocessorHost != null) {
            status.setStatus("Running coprocessor pre-flush hooks");
            coprocessorHost.preFlush();
        }
        // successful
        if (numMutationsWithoutWAL.sum() > 0) {
            numMutationsWithoutWAL.reset();
            dataInMemoryWithoutWAL.reset();
        }
        synchronized (writestate) {
            if (!writestate.flushing && writestate.writesEnabled) {
                this.writestate.flushing = true;
            } else {
                if (LOG.isDebugEnabled()) {
                    LOG.debug("NOT flushing memstore for region " + this + ", flushing=" + writestate.flushing + ", writesEnabled=" + writestate.writesEnabled);
                }
                String msg = "Not flushing since " + (writestate.flushing ? "already flushing" : "writes not enabled");
                status.abort(msg);
                return new FlushResultImpl(FlushResult.Result.CANNOT_FLUSH, msg, false);
            }
        }
        try {
            Collection<Store> specificStoresToFlush = forceFlushAllStores ? stores.values() : flushPolicy.selectStoresToFlush();
            FlushResult fs = internalFlushcache(specificStoresToFlush, status, writeFlushRequestWalMarker);
            if (coprocessorHost != null) {
                status.setStatus("Running post-flush coprocessor hooks");
                coprocessorHost.postFlush();
            }
            status.markComplete("Flush successful");
            return fs;
        } finally {
            synchronized (writestate) {
                writestate.flushing = false;
                this.writestate.flushRequested = false;
                writestate.notifyAll();
            }
        }
    } finally {
        lock.readLock().unlock();
        status.cleanup();
    }
}
Also used : MonitoredTask(org.apache.hadoop.hbase.monitoring.MonitoredTask)

Aggregations

MonitoredTask (org.apache.hadoop.hbase.monitoring.MonitoredTask)20 Path (org.apache.hadoop.fs.Path)8 FileSystem (org.apache.hadoop.fs.FileSystem)7 Test (org.junit.Test)7 IOException (java.io.IOException)6 InterruptedIOException (java.io.InterruptedIOException)5 WAL (org.apache.hadoop.hbase.wal.WAL)5 WALKey (org.apache.hadoop.hbase.wal.WALKey)5 TreeMap (java.util.TreeMap)4 FaultyFileSystem (org.apache.hadoop.hbase.regionserver.TestStore.FaultyFileSystem)4 WALEdit (org.apache.hadoop.hbase.regionserver.wal.WALEdit)4 Cell (org.apache.hadoop.hbase.Cell)3 DoNotRetryIOException (org.apache.hadoop.hbase.DoNotRetryIOException)3 KeyValue (org.apache.hadoop.hbase.KeyValue)3 Get (org.apache.hadoop.hbase.client.Get)3 Result (org.apache.hadoop.hbase.client.Result)3 VisibleForTesting (com.google.common.annotations.VisibleForTesting)2 ArrayList (java.util.ArrayList)2 HashSet (java.util.HashSet)2 Configuration (org.apache.hadoop.conf.Configuration)2