Search in sources :

Example 1 with StopWatch

use of org.apache.hadoop.util.StopWatch in project hadoop by apache.

the class SecurityUtil method getByName.

/**
   * Resolves a host subject to the security requirements determined by
   * hadoop.security.token.service.use_ip. Optionally logs slow resolutions.
   * 
   * @param hostname host or ip to resolve
   * @return a resolved host
   * @throws UnknownHostException if the host doesn't exist
   */
@InterfaceAudience.Private
public static InetAddress getByName(String hostname) throws UnknownHostException {
    if (logSlowLookups || LOG.isTraceEnabled()) {
        StopWatch lookupTimer = new StopWatch().start();
        InetAddress result = hostResolver.getByName(hostname);
        long elapsedMs = lookupTimer.stop().now(TimeUnit.MILLISECONDS);
        if (elapsedMs >= slowLookupThresholdMs) {
            LOG.warn("Slow name lookup for " + hostname + ". Took " + elapsedMs + " ms.");
        } else if (LOG.isTraceEnabled()) {
            LOG.trace("Name lookup for " + hostname + " took " + elapsedMs + " ms.");
        }
        return result;
    } else {
        return hostResolver.getByName(hostname);
    }
}
Also used : InetAddress(java.net.InetAddress) StopWatch(org.apache.hadoop.util.StopWatch)

Example 2 with StopWatch

use of org.apache.hadoop.util.StopWatch in project hadoop by apache.

the class DataXceiver method checkAndWaitForBP.

/**
   * Wait until the BP is registered, upto the configured amount of time.
   * Throws an exception if times out, which should fail the client request.
   * @param block requested block
   */
void checkAndWaitForBP(final ExtendedBlock block) throws IOException {
    String bpId = block.getBlockPoolId();
    // Optimistically perform this first.
    try {
        datanode.getDNRegistrationForBP(bpId);
        return;
    } catch (IOException ioe) {
    // not registered
    }
    // retry
    long bpReadyTimeout = dnConf.getBpReadyTimeout();
    StopWatch sw = new StopWatch();
    sw.start();
    while (sw.now(TimeUnit.SECONDS) <= bpReadyTimeout) {
        try {
            datanode.getDNRegistrationForBP(bpId);
            return;
        } catch (IOException ioe) {
        // not registered
        }
        // sleep before trying again
        try {
            Thread.sleep(1000);
        } catch (InterruptedException ie) {
            throw new IOException("Interrupted while serving request. Aborting.");
        }
    }
    // failed to obtain registration.
    throw new IOException("Not ready to serve the block pool, " + bpId + ".");
}
Also used : ByteString(com.google.protobuf.ByteString) InterruptedIOException(java.io.InterruptedIOException) IOException(java.io.IOException) StopWatch(org.apache.hadoop.util.StopWatch)

Example 3 with StopWatch

use of org.apache.hadoop.util.StopWatch in project hadoop by apache.

the class ErasureCodeBenchmarkThroughput method benchmark.

private void benchmark(OpType type, int dataSizeMB, int numClients, boolean isEc, boolean statefulRead) throws Exception {
    List<Long> sizes = null;
    StopWatch sw = new StopWatch().start();
    switch(type) {
        case READ:
            sizes = doBenchmark(true, dataSizeMB, numClients, isEc, statefulRead, false);
            break;
        case WRITE:
            sizes = doBenchmark(false, dataSizeMB, numClients, isEc, statefulRead, false);
            break;
        case GEN:
            sizes = doBenchmark(false, dataSizeMB, numClients, isEc, statefulRead, true);
    }
    long elapsedSec = sw.now(TimeUnit.SECONDS);
    double totalDataSizeMB = 0;
    for (Long size : sizes) {
        if (size >= 0) {
            totalDataSizeMB += size.doubleValue() / 1024 / 1024;
        }
    }
    double throughput = totalDataSizeMB / elapsedSec;
    DecimalFormat df = getDecimalFormat();
    System.out.println(type + " " + df.format(totalDataSizeMB) + " MB data takes: " + elapsedSec + " s.\nTotal throughput: " + df.format(throughput) + " MB/s.");
}
Also used : DecimalFormat(java.text.DecimalFormat) StopWatch(org.apache.hadoop.util.StopWatch)

Example 4 with StopWatch

use of org.apache.hadoop.util.StopWatch in project hadoop by apache.

the class DFSInputStream method readBlockLength.

/** Read the block length from one of the datanodes. */
private long readBlockLength(LocatedBlock locatedblock) throws IOException {
    assert locatedblock != null : "LocatedBlock cannot be null";
    int replicaNotFoundCount = locatedblock.getLocations().length;
    final DfsClientConf conf = dfsClient.getConf();
    final int timeout = conf.getSocketTimeout();
    LinkedList<DatanodeInfo> nodeList = new LinkedList<DatanodeInfo>(Arrays.asList(locatedblock.getLocations()));
    LinkedList<DatanodeInfo> retryList = new LinkedList<DatanodeInfo>();
    boolean isRetry = false;
    StopWatch sw = new StopWatch();
    while (nodeList.size() > 0) {
        DatanodeInfo datanode = nodeList.pop();
        ClientDatanodeProtocol cdp = null;
        try {
            cdp = DFSUtilClient.createClientDatanodeProtocolProxy(datanode, dfsClient.getConfiguration(), timeout, conf.isConnectToDnViaHostname(), locatedblock);
            final long n = cdp.getReplicaVisibleLength(locatedblock.getBlock());
            if (n >= 0) {
                return n;
            }
        } catch (IOException ioe) {
            checkInterrupted(ioe);
            if (ioe instanceof RemoteException) {
                if (((RemoteException) ioe).unwrapRemoteException() instanceof ReplicaNotFoundException) {
                    // replica is not on the DN. We will treat it as 0 length
                    // if no one actually has a replica.
                    replicaNotFoundCount--;
                } else if (((RemoteException) ioe).unwrapRemoteException() instanceof RetriableException) {
                    // add to the list to be retried if necessary.
                    retryList.add(datanode);
                }
            }
            DFSClient.LOG.debug("Failed to getReplicaVisibleLength from datanode {}" + " for block {}", datanode, locatedblock.getBlock(), ioe);
        } finally {
            if (cdp != null) {
                RPC.stopProxy(cdp);
            }
        }
        // Ran out of nodes, but there are retriable nodes.
        if (nodeList.size() == 0 && retryList.size() > 0) {
            nodeList.addAll(retryList);
            retryList.clear();
            isRetry = true;
        }
        if (isRetry) {
            // start the stop watch if not already running.
            if (!sw.isRunning()) {
                sw.start();
            }
            try {
                // delay between retries.
                Thread.sleep(500);
            } catch (InterruptedException e) {
                Thread.currentThread().interrupt();
                throw new InterruptedIOException("Interrupted while getting the length.");
            }
        }
        // see if we ran out of retry time
        if (sw.isRunning() && sw.now(TimeUnit.MILLISECONDS) > timeout) {
            break;
        }
    }
    // on a DN that has it.  we want to report that error
    if (replicaNotFoundCount == 0) {
        return 0;
    }
    throw new IOException("Cannot obtain block length for " + locatedblock);
}
Also used : InterruptedIOException(java.io.InterruptedIOException) DatanodeInfo(org.apache.hadoop.hdfs.protocol.DatanodeInfo) ReplicaNotFoundException(org.apache.hadoop.hdfs.server.datanode.ReplicaNotFoundException) InterruptedIOException(java.io.InterruptedIOException) IOException(java.io.IOException) ClientDatanodeProtocol(org.apache.hadoop.hdfs.protocol.ClientDatanodeProtocol) LinkedList(java.util.LinkedList) StopWatch(org.apache.hadoop.util.StopWatch) DfsClientConf(org.apache.hadoop.hdfs.client.impl.DfsClientConf) RemoteException(org.apache.hadoop.ipc.RemoteException) RetriableException(org.apache.hadoop.ipc.RetriableException)

Example 5 with StopWatch

use of org.apache.hadoop.util.StopWatch in project hadoop by apache.

the class Journal method journal.

/**
   * Write a batch of edits to the journal.
   * {@see QJournalProtocol#journal(RequestInfo, long, long, int, byte[])}
   */
synchronized void journal(RequestInfo reqInfo, long segmentTxId, long firstTxnId, int numTxns, byte[] records) throws IOException {
    checkFormatted();
    checkWriteRequest(reqInfo);
    // committedTxId only. So we can return early.
    if (numTxns == 0) {
        return;
    }
    checkSync(curSegment != null, "Can't write, no segment open");
    if (curSegmentTxId != segmentTxId) {
        // Sanity check: it is possible that the writer will fail IPCs
        // on both the finalize() and then the start() of the next segment.
        // This could cause us to continue writing to an old segment
        // instead of rolling to a new one, which breaks one of the
        // invariants in the design. If it happens, abort the segment
        // and throw an exception.
        JournalOutOfSyncException e = new JournalOutOfSyncException("Writer out of sync: it thinks it is writing segment " + segmentTxId + " but current segment is " + curSegmentTxId);
        abortCurSegment();
        throw e;
    }
    checkSync(nextTxId == firstTxnId, "Can't write txid " + firstTxnId + " expecting nextTxId=" + nextTxId);
    long lastTxnId = firstTxnId + numTxns - 1;
    if (LOG.isTraceEnabled()) {
        LOG.trace("Writing txid " + firstTxnId + "-" + lastTxnId);
    }
    // If the edit has already been marked as committed, we know
    // it has been fsynced on a quorum of other nodes, and we are
    // "catching up" with the rest. Hence we do not need to fsync.
    boolean isLagging = lastTxnId <= committedTxnId.get();
    boolean shouldFsync = !isLagging;
    curSegment.writeRaw(records, 0, records.length);
    curSegment.setReadyToFlush();
    StopWatch sw = new StopWatch();
    sw.start();
    curSegment.flush(shouldFsync);
    sw.stop();
    long nanoSeconds = sw.now();
    metrics.addSync(TimeUnit.MICROSECONDS.convert(nanoSeconds, TimeUnit.NANOSECONDS));
    long milliSeconds = TimeUnit.MILLISECONDS.convert(nanoSeconds, TimeUnit.NANOSECONDS);
    if (milliSeconds > WARN_SYNC_MILLIS_THRESHOLD) {
        LOG.warn("Sync of transaction range " + firstTxnId + "-" + lastTxnId + " took " + milliSeconds + "ms");
    }
    if (isLagging) {
        // This batch of edits has already been committed on a quorum of other
        // nodes. So, we are in "catch up" mode. This gets its own metric.
        metrics.batchesWrittenWhileLagging.incr(1);
    }
    metrics.batchesWritten.incr(1);
    metrics.bytesWritten.incr(records.length);
    metrics.txnsWritten.incr(numTxns);
    updateHighestWrittenTxId(lastTxnId);
    nextTxId = lastTxnId + 1;
    lastJournalTimestamp = Time.now();
}
Also used : JournalOutOfSyncException(org.apache.hadoop.hdfs.qjournal.protocol.JournalOutOfSyncException) StopWatch(org.apache.hadoop.util.StopWatch)

Aggregations

StopWatch (org.apache.hadoop.util.StopWatch)12 IOException (java.io.IOException)5 ArrayList (java.util.ArrayList)5 FileStatus (org.apache.hadoop.fs.FileStatus)4 LocatedFileStatus (org.apache.hadoop.fs.LocatedFileStatus)4 Path (org.apache.hadoop.fs.Path)4 InterruptedIOException (java.io.InterruptedIOException)2 DecimalFormat (java.text.DecimalFormat)2 BlockLocation (org.apache.hadoop.fs.BlockLocation)2 FileSystem (org.apache.hadoop.fs.FileSystem)2 PathFilter (org.apache.hadoop.fs.PathFilter)2 ByteString (com.google.protobuf.ByteString)1 InetAddress (java.net.InetAddress)1 ByteBuffer (java.nio.ByteBuffer)1 LinkedList (java.util.LinkedList)1 ExecutorService (java.util.concurrent.ExecutorService)1 Future (java.util.concurrent.Future)1 DfsClientConf (org.apache.hadoop.hdfs.client.impl.DfsClientConf)1 ClientDatanodeProtocol (org.apache.hadoop.hdfs.protocol.ClientDatanodeProtocol)1 DatanodeInfo (org.apache.hadoop.hdfs.protocol.DatanodeInfo)1