Search in sources :

Example 6 with StopWatch

use of org.apache.hadoop.util.StopWatch in project hadoop by apache.

the class Journal method journal.

/**
   * Write a batch of edits to the journal.
   * {@see QJournalProtocol#journal(RequestInfo, long, long, int, byte[])}
   */
synchronized void journal(RequestInfo reqInfo, long segmentTxId, long firstTxnId, int numTxns, byte[] records) throws IOException {
    checkFormatted();
    checkWriteRequest(reqInfo);
    // committedTxId only. So we can return early.
    if (numTxns == 0) {
        return;
    }
    checkSync(curSegment != null, "Can't write, no segment open");
    if (curSegmentTxId != segmentTxId) {
        // Sanity check: it is possible that the writer will fail IPCs
        // on both the finalize() and then the start() of the next segment.
        // This could cause us to continue writing to an old segment
        // instead of rolling to a new one, which breaks one of the
        // invariants in the design. If it happens, abort the segment
        // and throw an exception.
        JournalOutOfSyncException e = new JournalOutOfSyncException("Writer out of sync: it thinks it is writing segment " + segmentTxId + " but current segment is " + curSegmentTxId);
        abortCurSegment();
        throw e;
    }
    checkSync(nextTxId == firstTxnId, "Can't write txid " + firstTxnId + " expecting nextTxId=" + nextTxId);
    long lastTxnId = firstTxnId + numTxns - 1;
    if (LOG.isTraceEnabled()) {
        LOG.trace("Writing txid " + firstTxnId + "-" + lastTxnId);
    }
    // If the edit has already been marked as committed, we know
    // it has been fsynced on a quorum of other nodes, and we are
    // "catching up" with the rest. Hence we do not need to fsync.
    boolean isLagging = lastTxnId <= committedTxnId.get();
    boolean shouldFsync = !isLagging;
    curSegment.writeRaw(records, 0, records.length);
    curSegment.setReadyToFlush();
    StopWatch sw = new StopWatch();
    sw.start();
    curSegment.flush(shouldFsync);
    sw.stop();
    long nanoSeconds = sw.now();
    metrics.addSync(TimeUnit.MICROSECONDS.convert(nanoSeconds, TimeUnit.NANOSECONDS));
    long milliSeconds = TimeUnit.MILLISECONDS.convert(nanoSeconds, TimeUnit.NANOSECONDS);
    if (milliSeconds > WARN_SYNC_MILLIS_THRESHOLD) {
        LOG.warn("Sync of transaction range " + firstTxnId + "-" + lastTxnId + " took " + milliSeconds + "ms");
    }
    if (isLagging) {
        // This batch of edits has already been committed on a quorum of other
        // nodes. So, we are in "catch up" mode. This gets its own metric.
        metrics.batchesWrittenWhileLagging.incr(1);
    }
    metrics.batchesWritten.incr(1);
    metrics.bytesWritten.incr(records.length);
    metrics.txnsWritten.incr(numTxns);
    updateHighestWrittenTxId(lastTxnId);
    nextTxId = lastTxnId + 1;
    lastJournalTimestamp = Time.now();
}
Also used : JournalOutOfSyncException(org.apache.hadoop.hdfs.qjournal.protocol.JournalOutOfSyncException) StopWatch(org.apache.hadoop.util.StopWatch)

Example 7 with StopWatch

use of org.apache.hadoop.util.StopWatch in project hadoop by apache.

the class TestJournalNode method doPerfTest.

private void doPerfTest(int editsSize, int numEdits) throws Exception {
    byte[] data = new byte[editsSize];
    ch.newEpoch(1).get();
    ch.setEpoch(1);
    ch.startLogSegment(1, NameNodeLayoutVersion.CURRENT_LAYOUT_VERSION).get();
    StopWatch sw = new StopWatch().start();
    for (int i = 1; i < numEdits; i++) {
        ch.sendEdits(1L, i, 1, data).get();
    }
    long time = sw.now(TimeUnit.MILLISECONDS);
    System.err.println("Wrote " + numEdits + " batches of " + editsSize + " bytes in " + time + "ms");
    float avgRtt = (float) time / (float) numEdits;
    long throughput = ((long) numEdits * editsSize * 1000L) / time;
    System.err.println("Time per batch: " + avgRtt + "ms");
    System.err.println("Throughput: " + throughput + " bytes/sec");
}
Also used : StopWatch(org.apache.hadoop.util.StopWatch)

Example 8 with StopWatch

use of org.apache.hadoop.util.StopWatch in project hadoop by apache.

the class FileInputFormat method listStatus.

/** List input directories.
   * Subclasses may override to, e.g., select only files matching a regular
   * expression. 
   * 
   * @param job the job to list input paths for
   * @return array of FileStatus objects
   * @throws IOException if zero items.
   */
protected FileStatus[] listStatus(JobConf job) throws IOException {
    Path[] dirs = getInputPaths(job);
    if (dirs.length == 0) {
        throw new IOException("No input paths specified in job");
    }
    // get tokens for all the required FileSystems..
    TokenCache.obtainTokensForNamenodes(job.getCredentials(), dirs, job);
    // Whether we need to recursive look into the directory structure
    boolean recursive = job.getBoolean(INPUT_DIR_RECURSIVE, false);
    // creates a MultiPathFilter with the hiddenFileFilter and the
    // user provided one (if any).
    List<PathFilter> filters = new ArrayList<PathFilter>();
    filters.add(hiddenFileFilter);
    PathFilter jobFilter = getInputPathFilter(job);
    if (jobFilter != null) {
        filters.add(jobFilter);
    }
    PathFilter inputFilter = new MultiPathFilter(filters);
    FileStatus[] result;
    int numThreads = job.getInt(org.apache.hadoop.mapreduce.lib.input.FileInputFormat.LIST_STATUS_NUM_THREADS, org.apache.hadoop.mapreduce.lib.input.FileInputFormat.DEFAULT_LIST_STATUS_NUM_THREADS);
    StopWatch sw = new StopWatch().start();
    if (numThreads == 1) {
        List<FileStatus> locatedFiles = singleThreadedListStatus(job, dirs, inputFilter, recursive);
        result = locatedFiles.toArray(new FileStatus[locatedFiles.size()]);
    } else {
        Iterable<FileStatus> locatedFiles = null;
        try {
            LocatedFileStatusFetcher locatedFileStatusFetcher = new LocatedFileStatusFetcher(job, dirs, recursive, inputFilter, false);
            locatedFiles = locatedFileStatusFetcher.getFileStatuses();
        } catch (InterruptedException e) {
            throw new IOException("Interrupted while getting file statuses");
        }
        result = Iterables.toArray(locatedFiles, FileStatus.class);
    }
    sw.stop();
    if (LOG.isDebugEnabled()) {
        LOG.debug("Time taken to get FileStatuses: " + sw.now(TimeUnit.MILLISECONDS));
    }
    LOG.info("Total input files to process : " + result.length);
    return result;
}
Also used : Path(org.apache.hadoop.fs.Path) PathFilter(org.apache.hadoop.fs.PathFilter) FileStatus(org.apache.hadoop.fs.FileStatus) LocatedFileStatus(org.apache.hadoop.fs.LocatedFileStatus) ArrayList(java.util.ArrayList) IOException(java.io.IOException) StopWatch(org.apache.hadoop.util.StopWatch)

Example 9 with StopWatch

use of org.apache.hadoop.util.StopWatch in project hadoop by apache.

the class FileInputFormat method getSplits.

/** Splits files returned by {@link #listStatus(JobConf)} when
   * they're too big.*/
public InputSplit[] getSplits(JobConf job, int numSplits) throws IOException {
    StopWatch sw = new StopWatch().start();
    FileStatus[] files = listStatus(job);
    // Save the number of input files for metrics/loadgen
    job.setLong(NUM_INPUT_FILES, files.length);
    // compute total size
    long totalSize = 0;
    for (FileStatus file : files) {
        // check we have valid files
        if (file.isDirectory()) {
            throw new IOException("Not a file: " + file.getPath());
        }
        totalSize += file.getLen();
    }
    long goalSize = totalSize / (numSplits == 0 ? 1 : numSplits);
    long minSize = Math.max(job.getLong(org.apache.hadoop.mapreduce.lib.input.FileInputFormat.SPLIT_MINSIZE, 1), minSplitSize);
    // generate splits
    ArrayList<FileSplit> splits = new ArrayList<FileSplit>(numSplits);
    NetworkTopology clusterMap = new NetworkTopology();
    for (FileStatus file : files) {
        Path path = file.getPath();
        long length = file.getLen();
        if (length != 0) {
            FileSystem fs = path.getFileSystem(job);
            BlockLocation[] blkLocations;
            if (file instanceof LocatedFileStatus) {
                blkLocations = ((LocatedFileStatus) file).getBlockLocations();
            } else {
                blkLocations = fs.getFileBlockLocations(file, 0, length);
            }
            if (isSplitable(fs, path)) {
                long blockSize = file.getBlockSize();
                long splitSize = computeSplitSize(goalSize, minSize, blockSize);
                long bytesRemaining = length;
                while (((double) bytesRemaining) / splitSize > SPLIT_SLOP) {
                    String[][] splitHosts = getSplitHostsAndCachedHosts(blkLocations, length - bytesRemaining, splitSize, clusterMap);
                    splits.add(makeSplit(path, length - bytesRemaining, splitSize, splitHosts[0], splitHosts[1]));
                    bytesRemaining -= splitSize;
                }
                if (bytesRemaining != 0) {
                    String[][] splitHosts = getSplitHostsAndCachedHosts(blkLocations, length - bytesRemaining, bytesRemaining, clusterMap);
                    splits.add(makeSplit(path, length - bytesRemaining, bytesRemaining, splitHosts[0], splitHosts[1]));
                }
            } else {
                if (LOG.isDebugEnabled()) {
                    // Log only if the file is big enough to be splitted
                    if (length > Math.min(file.getBlockSize(), minSize)) {
                        LOG.debug("File is not splittable so no parallelization " + "is possible: " + file.getPath());
                    }
                }
                String[][] splitHosts = getSplitHostsAndCachedHosts(blkLocations, 0, length, clusterMap);
                splits.add(makeSplit(path, 0, length, splitHosts[0], splitHosts[1]));
            }
        } else {
            //Create empty hosts array for zero length files
            splits.add(makeSplit(path, 0, length, new String[0]));
        }
    }
    sw.stop();
    if (LOG.isDebugEnabled()) {
        LOG.debug("Total # of splits generated by getSplits: " + splits.size() + ", TimeTaken: " + sw.now(TimeUnit.MILLISECONDS));
    }
    return splits.toArray(new FileSplit[splits.size()]);
}
Also used : Path(org.apache.hadoop.fs.Path) FileStatus(org.apache.hadoop.fs.FileStatus) LocatedFileStatus(org.apache.hadoop.fs.LocatedFileStatus) ArrayList(java.util.ArrayList) LocatedFileStatus(org.apache.hadoop.fs.LocatedFileStatus) IOException(java.io.IOException) BlockLocation(org.apache.hadoop.fs.BlockLocation) StopWatch(org.apache.hadoop.util.StopWatch) NetworkTopology(org.apache.hadoop.net.NetworkTopology) FileSystem(org.apache.hadoop.fs.FileSystem)

Example 10 with StopWatch

use of org.apache.hadoop.util.StopWatch in project hadoop by apache.

the class FileInputFormat method getSplits.

/** 
   * Generate the list of files and make them into FileSplits.
   * @param job the job context
   * @throws IOException
   */
public List<InputSplit> getSplits(JobContext job) throws IOException {
    StopWatch sw = new StopWatch().start();
    long minSize = Math.max(getFormatMinSplitSize(), getMinSplitSize(job));
    long maxSize = getMaxSplitSize(job);
    // generate splits
    List<InputSplit> splits = new ArrayList<InputSplit>();
    List<FileStatus> files = listStatus(job);
    for (FileStatus file : files) {
        Path path = file.getPath();
        long length = file.getLen();
        if (length != 0) {
            BlockLocation[] blkLocations;
            if (file instanceof LocatedFileStatus) {
                blkLocations = ((LocatedFileStatus) file).getBlockLocations();
            } else {
                FileSystem fs = path.getFileSystem(job.getConfiguration());
                blkLocations = fs.getFileBlockLocations(file, 0, length);
            }
            if (isSplitable(job, path)) {
                long blockSize = file.getBlockSize();
                long splitSize = computeSplitSize(blockSize, minSize, maxSize);
                long bytesRemaining = length;
                while (((double) bytesRemaining) / splitSize > SPLIT_SLOP) {
                    int blkIndex = getBlockIndex(blkLocations, length - bytesRemaining);
                    splits.add(makeSplit(path, length - bytesRemaining, splitSize, blkLocations[blkIndex].getHosts(), blkLocations[blkIndex].getCachedHosts()));
                    bytesRemaining -= splitSize;
                }
                if (bytesRemaining != 0) {
                    int blkIndex = getBlockIndex(blkLocations, length - bytesRemaining);
                    splits.add(makeSplit(path, length - bytesRemaining, bytesRemaining, blkLocations[blkIndex].getHosts(), blkLocations[blkIndex].getCachedHosts()));
                }
            } else {
                // not splitable
                if (LOG.isDebugEnabled()) {
                    // Log only if the file is big enough to be splitted
                    if (length > Math.min(file.getBlockSize(), minSize)) {
                        LOG.debug("File is not splittable so no parallelization " + "is possible: " + file.getPath());
                    }
                }
                splits.add(makeSplit(path, 0, length, blkLocations[0].getHosts(), blkLocations[0].getCachedHosts()));
            }
        } else {
            //Create empty hosts array for zero length files
            splits.add(makeSplit(path, 0, length, new String[0]));
        }
    }
    // Save the number of input files for metrics/loadgen
    job.getConfiguration().setLong(NUM_INPUT_FILES, files.size());
    sw.stop();
    if (LOG.isDebugEnabled()) {
        LOG.debug("Total # of splits generated by getSplits: " + splits.size() + ", TimeTaken: " + sw.now(TimeUnit.MILLISECONDS));
    }
    return splits;
}
Also used : Path(org.apache.hadoop.fs.Path) FileStatus(org.apache.hadoop.fs.FileStatus) LocatedFileStatus(org.apache.hadoop.fs.LocatedFileStatus) ArrayList(java.util.ArrayList) LocatedFileStatus(org.apache.hadoop.fs.LocatedFileStatus) BlockLocation(org.apache.hadoop.fs.BlockLocation) StopWatch(org.apache.hadoop.util.StopWatch) FileSystem(org.apache.hadoop.fs.FileSystem) InputSplit(org.apache.hadoop.mapreduce.InputSplit)

Aggregations

StopWatch (org.apache.hadoop.util.StopWatch)12 IOException (java.io.IOException)5 ArrayList (java.util.ArrayList)5 FileStatus (org.apache.hadoop.fs.FileStatus)4 LocatedFileStatus (org.apache.hadoop.fs.LocatedFileStatus)4 Path (org.apache.hadoop.fs.Path)4 InterruptedIOException (java.io.InterruptedIOException)2 DecimalFormat (java.text.DecimalFormat)2 BlockLocation (org.apache.hadoop.fs.BlockLocation)2 FileSystem (org.apache.hadoop.fs.FileSystem)2 PathFilter (org.apache.hadoop.fs.PathFilter)2 ByteString (com.google.protobuf.ByteString)1 InetAddress (java.net.InetAddress)1 ByteBuffer (java.nio.ByteBuffer)1 LinkedList (java.util.LinkedList)1 ExecutorService (java.util.concurrent.ExecutorService)1 Future (java.util.concurrent.Future)1 DfsClientConf (org.apache.hadoop.hdfs.client.impl.DfsClientConf)1 ClientDatanodeProtocol (org.apache.hadoop.hdfs.protocol.ClientDatanodeProtocol)1 DatanodeInfo (org.apache.hadoop.hdfs.protocol.DatanodeInfo)1