Search in sources :

Example 6 with StopWatch

use of org.apache.hadoop.util.StopWatch in project hadoop by apache.

the class Journal method journal.

   * Write a batch of edits to the journal.
   * {@see QJournalProtocol#journal(RequestInfo, long, long, int, byte[])}
synchronized void journal(RequestInfo reqInfo, long segmentTxId, long firstTxnId, int numTxns, byte[] records) throws IOException {
    // committedTxId only. So we can return early.
    if (numTxns == 0) {
    checkSync(curSegment != null, "Can't write, no segment open");
    if (curSegmentTxId != segmentTxId) {
        // Sanity check: it is possible that the writer will fail IPCs
        // on both the finalize() and then the start() of the next segment.
        // This could cause us to continue writing to an old segment
        // instead of rolling to a new one, which breaks one of the
        // invariants in the design. If it happens, abort the segment
        // and throw an exception.
        JournalOutOfSyncException e = new JournalOutOfSyncException("Writer out of sync: it thinks it is writing segment " + segmentTxId + " but current segment is " + curSegmentTxId);
        throw e;
    checkSync(nextTxId == firstTxnId, "Can't write txid " + firstTxnId + " expecting nextTxId=" + nextTxId);
    long lastTxnId = firstTxnId + numTxns - 1;
    if (LOG.isTraceEnabled()) {
        LOG.trace("Writing txid " + firstTxnId + "-" + lastTxnId);
    // If the edit has already been marked as committed, we know
    // it has been fsynced on a quorum of other nodes, and we are
    // "catching up" with the rest. Hence we do not need to fsync.
    boolean isLagging = lastTxnId <= committedTxnId.get();
    boolean shouldFsync = !isLagging;
    curSegment.writeRaw(records, 0, records.length);
    StopWatch sw = new StopWatch();
    long nanoSeconds =;
    metrics.addSync(TimeUnit.MICROSECONDS.convert(nanoSeconds, TimeUnit.NANOSECONDS));
    long milliSeconds = TimeUnit.MILLISECONDS.convert(nanoSeconds, TimeUnit.NANOSECONDS);
    if (milliSeconds > WARN_SYNC_MILLIS_THRESHOLD) {
        LOG.warn("Sync of transaction range " + firstTxnId + "-" + lastTxnId + " took " + milliSeconds + "ms");
    if (isLagging) {
        // This batch of edits has already been committed on a quorum of other
        // nodes. So, we are in "catch up" mode. This gets its own metric.
    nextTxId = lastTxnId + 1;
    lastJournalTimestamp =;
Also used : JournalOutOfSyncException(org.apache.hadoop.hdfs.qjournal.protocol.JournalOutOfSyncException) StopWatch(org.apache.hadoop.util.StopWatch)

Example 7 with StopWatch

use of org.apache.hadoop.util.StopWatch in project hadoop by apache.

the class TestJournalNode method doPerfTest.

private void doPerfTest(int editsSize, int numEdits) throws Exception {
    byte[] data = new byte[editsSize];
    ch.startLogSegment(1, NameNodeLayoutVersion.CURRENT_LAYOUT_VERSION).get();
    StopWatch sw = new StopWatch().start();
    for (int i = 1; i < numEdits; i++) {
        ch.sendEdits(1L, i, 1, data).get();
    long time =;
    System.err.println("Wrote " + numEdits + " batches of " + editsSize + " bytes in " + time + "ms");
    float avgRtt = (float) time / (float) numEdits;
    long throughput = ((long) numEdits * editsSize * 1000L) / time;
    System.err.println("Time per batch: " + avgRtt + "ms");
    System.err.println("Throughput: " + throughput + " bytes/sec");
Also used : StopWatch(org.apache.hadoop.util.StopWatch)

Example 8 with StopWatch

use of org.apache.hadoop.util.StopWatch in project hadoop by apache.

the class FileInputFormat method listStatus.

/** List input directories.
   * Subclasses may override to, e.g., select only files matching a regular
   * expression. 
   * @param job the job to list input paths for
   * @return array of FileStatus objects
   * @throws IOException if zero items.
protected FileStatus[] listStatus(JobConf job) throws IOException {
    Path[] dirs = getInputPaths(job);
    if (dirs.length == 0) {
        throw new IOException("No input paths specified in job");
    // get tokens for all the required FileSystems..
    TokenCache.obtainTokensForNamenodes(job.getCredentials(), dirs, job);
    // Whether we need to recursive look into the directory structure
    boolean recursive = job.getBoolean(INPUT_DIR_RECURSIVE, false);
    // creates a MultiPathFilter with the hiddenFileFilter and the
    // user provided one (if any).
    List<PathFilter> filters = new ArrayList<PathFilter>();
    PathFilter jobFilter = getInputPathFilter(job);
    if (jobFilter != null) {
    PathFilter inputFilter = new MultiPathFilter(filters);
    FileStatus[] result;
    int numThreads = job.getInt(org.apache.hadoop.mapreduce.lib.input.FileInputFormat.LIST_STATUS_NUM_THREADS, org.apache.hadoop.mapreduce.lib.input.FileInputFormat.DEFAULT_LIST_STATUS_NUM_THREADS);
    StopWatch sw = new StopWatch().start();
    if (numThreads == 1) {
        List<FileStatus> locatedFiles = singleThreadedListStatus(job, dirs, inputFilter, recursive);
        result = locatedFiles.toArray(new FileStatus[locatedFiles.size()]);
    } else {
        Iterable<FileStatus> locatedFiles = null;
        try {
            LocatedFileStatusFetcher locatedFileStatusFetcher = new LocatedFileStatusFetcher(job, dirs, recursive, inputFilter, false);
            locatedFiles = locatedFileStatusFetcher.getFileStatuses();
        } catch (InterruptedException e) {
            throw new IOException("Interrupted while getting file statuses");
        result = Iterables.toArray(locatedFiles, FileStatus.class);
    if (LOG.isDebugEnabled()) {
        LOG.debug("Time taken to get FileStatuses: " +;
    }"Total input files to process : " + result.length);
    return result;
Also used : Path(org.apache.hadoop.fs.Path) PathFilter(org.apache.hadoop.fs.PathFilter) FileStatus(org.apache.hadoop.fs.FileStatus) LocatedFileStatus(org.apache.hadoop.fs.LocatedFileStatus) ArrayList(java.util.ArrayList) IOException( StopWatch(org.apache.hadoop.util.StopWatch)

Example 9 with StopWatch

use of org.apache.hadoop.util.StopWatch in project hadoop by apache.

the class FileInputFormat method getSplits.

/** Splits files returned by {@link #listStatus(JobConf)} when
   * they're too big.*/
public InputSplit[] getSplits(JobConf job, int numSplits) throws IOException {
    StopWatch sw = new StopWatch().start();
    FileStatus[] files = listStatus(job);
    // Save the number of input files for metrics/loadgen
    job.setLong(NUM_INPUT_FILES, files.length);
    // compute total size
    long totalSize = 0;
    for (FileStatus file : files) {
        // check we have valid files
        if (file.isDirectory()) {
            throw new IOException("Not a file: " + file.getPath());
        totalSize += file.getLen();
    long goalSize = totalSize / (numSplits == 0 ? 1 : numSplits);
    long minSize = Math.max(job.getLong(org.apache.hadoop.mapreduce.lib.input.FileInputFormat.SPLIT_MINSIZE, 1), minSplitSize);
    // generate splits
    ArrayList<FileSplit> splits = new ArrayList<FileSplit>(numSplits);
    NetworkTopology clusterMap = new NetworkTopology();
    for (FileStatus file : files) {
        Path path = file.getPath();
        long length = file.getLen();
        if (length != 0) {
            FileSystem fs = path.getFileSystem(job);
            BlockLocation[] blkLocations;
            if (file instanceof LocatedFileStatus) {
                blkLocations = ((LocatedFileStatus) file).getBlockLocations();
            } else {
                blkLocations = fs.getFileBlockLocations(file, 0, length);
            if (isSplitable(fs, path)) {
                long blockSize = file.getBlockSize();
                long splitSize = computeSplitSize(goalSize, minSize, blockSize);
                long bytesRemaining = length;
                while (((double) bytesRemaining) / splitSize > SPLIT_SLOP) {
                    String[][] splitHosts = getSplitHostsAndCachedHosts(blkLocations, length - bytesRemaining, splitSize, clusterMap);
                    splits.add(makeSplit(path, length - bytesRemaining, splitSize, splitHosts[0], splitHosts[1]));
                    bytesRemaining -= splitSize;
                if (bytesRemaining != 0) {
                    String[][] splitHosts = getSplitHostsAndCachedHosts(blkLocations, length - bytesRemaining, bytesRemaining, clusterMap);
                    splits.add(makeSplit(path, length - bytesRemaining, bytesRemaining, splitHosts[0], splitHosts[1]));
            } else {
                if (LOG.isDebugEnabled()) {
                    // Log only if the file is big enough to be splitted
                    if (length > Math.min(file.getBlockSize(), minSize)) {
                        LOG.debug("File is not splittable so no parallelization " + "is possible: " + file.getPath());
                String[][] splitHosts = getSplitHostsAndCachedHosts(blkLocations, 0, length, clusterMap);
                splits.add(makeSplit(path, 0, length, splitHosts[0], splitHosts[1]));
        } else {
            //Create empty hosts array for zero length files
            splits.add(makeSplit(path, 0, length, new String[0]));
    if (LOG.isDebugEnabled()) {
        LOG.debug("Total # of splits generated by getSplits: " + splits.size() + ", TimeTaken: " +;
    return splits.toArray(new FileSplit[splits.size()]);
Also used : Path(org.apache.hadoop.fs.Path) FileStatus(org.apache.hadoop.fs.FileStatus) LocatedFileStatus(org.apache.hadoop.fs.LocatedFileStatus) ArrayList(java.util.ArrayList) LocatedFileStatus(org.apache.hadoop.fs.LocatedFileStatus) IOException( BlockLocation(org.apache.hadoop.fs.BlockLocation) StopWatch(org.apache.hadoop.util.StopWatch) NetworkTopology( FileSystem(org.apache.hadoop.fs.FileSystem)

Example 10 with StopWatch

use of org.apache.hadoop.util.StopWatch in project hadoop by apache.

the class FileInputFormat method getSplits.

   * Generate the list of files and make them into FileSplits.
   * @param job the job context
   * @throws IOException
public List<InputSplit> getSplits(JobContext job) throws IOException {
    StopWatch sw = new StopWatch().start();
    long minSize = Math.max(getFormatMinSplitSize(), getMinSplitSize(job));
    long maxSize = getMaxSplitSize(job);
    // generate splits
    List<InputSplit> splits = new ArrayList<InputSplit>();
    List<FileStatus> files = listStatus(job);
    for (FileStatus file : files) {
        Path path = file.getPath();
        long length = file.getLen();
        if (length != 0) {
            BlockLocation[] blkLocations;
            if (file instanceof LocatedFileStatus) {
                blkLocations = ((LocatedFileStatus) file).getBlockLocations();
            } else {
                FileSystem fs = path.getFileSystem(job.getConfiguration());
                blkLocations = fs.getFileBlockLocations(file, 0, length);
            if (isSplitable(job, path)) {
                long blockSize = file.getBlockSize();
                long splitSize = computeSplitSize(blockSize, minSize, maxSize);
                long bytesRemaining = length;
                while (((double) bytesRemaining) / splitSize > SPLIT_SLOP) {
                    int blkIndex = getBlockIndex(blkLocations, length - bytesRemaining);
                    splits.add(makeSplit(path, length - bytesRemaining, splitSize, blkLocations[blkIndex].getHosts(), blkLocations[blkIndex].getCachedHosts()));
                    bytesRemaining -= splitSize;
                if (bytesRemaining != 0) {
                    int blkIndex = getBlockIndex(blkLocations, length - bytesRemaining);
                    splits.add(makeSplit(path, length - bytesRemaining, bytesRemaining, blkLocations[blkIndex].getHosts(), blkLocations[blkIndex].getCachedHosts()));
            } else {
                // not splitable
                if (LOG.isDebugEnabled()) {
                    // Log only if the file is big enough to be splitted
                    if (length > Math.min(file.getBlockSize(), minSize)) {
                        LOG.debug("File is not splittable so no parallelization " + "is possible: " + file.getPath());
                splits.add(makeSplit(path, 0, length, blkLocations[0].getHosts(), blkLocations[0].getCachedHosts()));
        } else {
            //Create empty hosts array for zero length files
            splits.add(makeSplit(path, 0, length, new String[0]));
    // Save the number of input files for metrics/loadgen
    job.getConfiguration().setLong(NUM_INPUT_FILES, files.size());
    if (LOG.isDebugEnabled()) {
        LOG.debug("Total # of splits generated by getSplits: " + splits.size() + ", TimeTaken: " +;
    return splits;
Also used : Path(org.apache.hadoop.fs.Path) FileStatus(org.apache.hadoop.fs.FileStatus) LocatedFileStatus(org.apache.hadoop.fs.LocatedFileStatus) ArrayList(java.util.ArrayList) LocatedFileStatus(org.apache.hadoop.fs.LocatedFileStatus) BlockLocation(org.apache.hadoop.fs.BlockLocation) StopWatch(org.apache.hadoop.util.StopWatch) FileSystem(org.apache.hadoop.fs.FileSystem) InputSplit(org.apache.hadoop.mapreduce.InputSplit)


StopWatch (org.apache.hadoop.util.StopWatch)12 IOException ( ArrayList (java.util.ArrayList)5 FileStatus (org.apache.hadoop.fs.FileStatus)4 LocatedFileStatus (org.apache.hadoop.fs.LocatedFileStatus)4 Path (org.apache.hadoop.fs.Path)4 InterruptedIOException ( DecimalFormat (java.text.DecimalFormat)2 BlockLocation (org.apache.hadoop.fs.BlockLocation)2 FileSystem (org.apache.hadoop.fs.FileSystem)2 PathFilter (org.apache.hadoop.fs.PathFilter)2 ByteString ( InetAddress ( ByteBuffer (java.nio.ByteBuffer)1 LinkedList (java.util.LinkedList)1 ExecutorService (java.util.concurrent.ExecutorService)1 Future (java.util.concurrent.Future)1 DfsClientConf (org.apache.hadoop.hdfs.client.impl.DfsClientConf)1 ClientDatanodeProtocol (org.apache.hadoop.hdfs.protocol.ClientDatanodeProtocol)1 DatanodeInfo (org.apache.hadoop.hdfs.protocol.DatanodeInfo)1