Examples with DataInputStream - java.io.DataInputStream

Example 41 with DataInputStream

use of java.io.DataInputStream in project hive by apache.

the class TokenStoreDelegationTokenSecretManager method decodeWritable.

public static void decodeWritable(Writable w, String idStr) throws IOException {
    DataInputStream in = new DataInputStream(new ByteArrayInputStream(Base64.decodeBase64(idStr)));
    w.readFields(in);
}

Also used : ByteArrayInputStream(java.io.ByteArrayInputStream) DataInputStream(java.io.DataInputStream)

Example 42 with DataInputStream

use of java.io.DataInputStream in project hadoop by apache.

the class Fetcher method copyMapOutput.

private TaskAttemptID[] copyMapOutput(MapHost host, DataInputStream input, Set<TaskAttemptID> remaining, boolean canRetry) throws IOException {
    MapOutput<K, V> mapOutput = null;
    TaskAttemptID mapId = null;
    long decompressedLength = -1;
    long compressedLength = -1;
    try {
        long startTime = Time.monotonicNow();
        int forReduce = -1;
        //Read the shuffle header
        try {
            ShuffleHeader header = new ShuffleHeader();
            header.readFields(input);
            mapId = TaskAttemptID.forName(header.mapId);
            compressedLength = header.compressedLength;
            decompressedLength = header.uncompressedLength;
            forReduce = header.forReduce;
        } catch (IllegalArgumentException e) {
            badIdErrs.increment(1);
            LOG.warn("Invalid map id ", e);
            //Don't know which one was bad, so consider all of them as bad
            return remaining.toArray(new TaskAttemptID[remaining.size()]);
        }
        InputStream is = input;
        is = CryptoUtils.wrapIfNecessary(jobConf, is, compressedLength);
        compressedLength -= CryptoUtils.cryptoPadding(jobConf);
        decompressedLength -= CryptoUtils.cryptoPadding(jobConf);
        // Do some basic sanity verification
        if (!verifySanity(compressedLength, decompressedLength, forReduce, remaining, mapId)) {
            return new TaskAttemptID[] { mapId };
        }
        if (LOG.isDebugEnabled()) {
            LOG.debug("header: " + mapId + ", len: " + compressedLength + ", decomp len: " + decompressedLength);
        }
        // Get the location for the map output - either in-memory or on-disk
        try {
            mapOutput = merger.reserve(mapId, decompressedLength, id);
        } catch (IOException ioe) {
            // kill this reduce attempt
            ioErrs.increment(1);
            scheduler.reportLocalError(ioe);
            return EMPTY_ATTEMPT_ID_ARRAY;
        }
        // Check if we can shuffle *now* ...
        if (mapOutput == null) {
            LOG.info("fetcher#" + id + " - MergeManager returned status WAIT ...");
            //Not an error but wait to process data.
            return EMPTY_ATTEMPT_ID_ARRAY;
        }
        // to allow fetch failure logic to be processed
        try {
            // Go!
            LOG.info("fetcher#" + id + " about to shuffle output of map " + mapOutput.getMapId() + " decomp: " + decompressedLength + " len: " + compressedLength + " to " + mapOutput.getDescription());
            mapOutput.shuffle(host, is, compressedLength, decompressedLength, metrics, reporter);
        } catch (java.lang.InternalError | Exception e) {
            LOG.warn("Failed to shuffle for fetcher#" + id, e);
            throw new IOException(e);
        }
        // Inform the shuffle scheduler
        long endTime = Time.monotonicNow();
        // Reset retryStartTime as map task make progress if retried before.
        retryStartTime = 0;
        scheduler.copySucceeded(mapId, host, compressedLength, startTime, endTime, mapOutput);
        // Note successful shuffle
        remaining.remove(mapId);
        metrics.successFetch();
        return null;
    } catch (IOException ioe) {
        if (mapOutput != null) {
            mapOutput.abort();
        }
        if (canRetry) {
            checkTimeoutOrRetry(host, ioe);
        }
        ioErrs.increment(1);
        if (mapId == null || mapOutput == null) {
            LOG.warn("fetcher#" + id + " failed to read map header" + mapId + " decomp: " + decompressedLength + ", " + compressedLength, ioe);
            if (mapId == null) {
                return remaining.toArray(new TaskAttemptID[remaining.size()]);
            } else {
                return new TaskAttemptID[] { mapId };
            }
        }
        LOG.warn("Failed to shuffle output of " + mapId + " from " + host.getHostName(), ioe);
        // Inform the shuffle-scheduler
        metrics.failedFetch();
        return new TaskAttemptID[] { mapId };
    }
}

Also used : TaskAttemptID(org.apache.hadoop.mapreduce.TaskAttemptID) DataInputStream(java.io.DataInputStream) InputStream(java.io.InputStream) IOException(java.io.IOException) GeneralSecurityException(java.security.GeneralSecurityException) ConnectException(java.net.ConnectException) MalformedURLException(java.net.MalformedURLException) IOException(java.io.IOException)

Example 43 with DataInputStream

use of java.io.DataInputStream in project hadoop by apache.

the class Fetcher method copyFromHost.

/**
   * The crux of the matter...
   * 
   * @param host {@link MapHost} from which we need to  
   *              shuffle available map-outputs.
   */
@VisibleForTesting
protected void copyFromHost(MapHost host) throws IOException {
    // reset retryStartTime for a new host
    retryStartTime = 0;
    // Get completed maps on 'host'
    List<TaskAttemptID> maps = scheduler.getMapsForHost(host);
    // especially at the tail of large jobs
    if (maps.size() == 0) {
        return;
    }
    if (LOG.isDebugEnabled()) {
        LOG.debug("Fetcher " + id + " going to fetch from " + host + " for: " + maps);
    }
    // List of maps to be fetched yet
    Set<TaskAttemptID> remaining = new HashSet<TaskAttemptID>(maps);
    // Construct the url and connect
    URL url = getMapOutputURL(host, maps);
    DataInputStream input = openShuffleUrl(host, remaining, url);
    if (input == null) {
        return;
    }
    try {
        // Loop through available map-outputs and fetch them
        // On any error, faildTasks is not null and we exit
        // after putting back the remaining maps to the 
        // yet_to_be_fetched list and marking the failed tasks.
        TaskAttemptID[] failedTasks = null;
        while (!remaining.isEmpty() && failedTasks == null) {
            try {
                failedTasks = copyMapOutput(host, input, remaining, fetchRetryEnabled);
            } catch (IOException e) {
                IOUtils.cleanup(LOG, input);
                //
                // Setup connection again if disconnected by NM
                connection.disconnect();
                // Get map output from remaining tasks only.
                url = getMapOutputURL(host, remaining);
                input = openShuffleUrl(host, remaining, url);
                if (input == null) {
                    return;
                }
            }
        }
        if (failedTasks != null && failedTasks.length > 0) {
            LOG.warn("copyMapOutput failed for tasks " + Arrays.toString(failedTasks));
            scheduler.hostFailed(host.getHostName());
            for (TaskAttemptID left : failedTasks) {
                scheduler.copyFailed(left, host, true, false);
            }
        }
        // Sanity check
        if (failedTasks == null && !remaining.isEmpty()) {
            throw new IOException("server didn't return all expected map outputs: " + remaining.size() + " left.");
        }
        input.close();
        input = null;
    } finally {
        if (input != null) {
            IOUtils.cleanup(LOG, input);
            input = null;
        }
        for (TaskAttemptID left : remaining) {
            scheduler.putBackKnownMapOutput(host, left);
        }
    }
}

Also used : TaskAttemptID(org.apache.hadoop.mapreduce.TaskAttemptID) IOException(java.io.IOException) DataInputStream(java.io.DataInputStream) URL(java.net.URL) HashSet(java.util.HashSet) VisibleForTesting(com.google.common.annotations.VisibleForTesting)

Example 44 with DataInputStream

use of java.io.DataInputStream in project hadoop by apache.

the class StreamBackedIterator method reset.

public void reset() {
    if (null != outfbuf) {
        inbuf = new ReplayableByteInputStream(outbuf.toByteArray());
        infbuf = new DataInputStream(inbuf);
        outfbuf = null;
    }
    inbuf.resetStream();
}

Also used : DataInputStream(java.io.DataInputStream)

Example 45 with DataInputStream

use of java.io.DataInputStream in project hadoop by apache.

the class NNBench method analyzeResults.

/**
   * Analyze the results
   * @throws IOException on error
   */
private int analyzeResults() throws IOException {
    final FileSystem fs = FileSystem.get(getConf());
    Path reduceDir = new Path(baseDir, OUTPUT_DIR_NAME);
    long totalTimeAL1 = 0l;
    long totalTimeAL2 = 0l;
    long totalTimeTPmS = 0l;
    long lateMaps = 0l;
    long numOfExceptions = 0l;
    long successfulFileOps = 0l;
    long mapStartTimeTPmS = 0l;
    long mapEndTimeTPmS = 0l;
    FileStatus[] fss = fs.listStatus(reduceDir);
    for (FileStatus status : fss) {
        Path reduceFile = status.getPath();
        try (DataInputStream in = new DataInputStream(fs.open(reduceFile));
            BufferedReader lines = new BufferedReader(new InputStreamReader(in))) {
            String line;
            while ((line = lines.readLine()) != null) {
                StringTokenizer tokens = new StringTokenizer(line, " \t\n\r\f%;");
                String attr = tokens.nextToken();
                if (attr.endsWith(":totalTimeAL1")) {
                    totalTimeAL1 = Long.parseLong(tokens.nextToken());
                } else if (attr.endsWith(":totalTimeAL2")) {
                    totalTimeAL2 = Long.parseLong(tokens.nextToken());
                } else if (attr.endsWith(":totalTimeTPmS")) {
                    totalTimeTPmS = Long.parseLong(tokens.nextToken());
                } else if (attr.endsWith(":latemaps")) {
                    lateMaps = Long.parseLong(tokens.nextToken());
                } else if (attr.endsWith(":numOfExceptions")) {
                    numOfExceptions = Long.parseLong(tokens.nextToken());
                } else if (attr.endsWith(":successfulFileOps")) {
                    successfulFileOps = Long.parseLong(tokens.nextToken());
                } else if (attr.endsWith(":mapStartTimeTPmS")) {
                    mapStartTimeTPmS = Long.parseLong(tokens.nextToken());
                } else if (attr.endsWith(":mapEndTimeTPmS")) {
                    mapEndTimeTPmS = Long.parseLong(tokens.nextToken());
                }
            }
        }
    }
    // Average latency is the average time to perform 'n' number of
    // operations, n being the number of files
    double avgLatency1 = (double) totalTimeAL1 / successfulFileOps;
    double avgLatency2 = (double) totalTimeAL2 / successfulFileOps;
    // The time it takes for the longest running map is measured. Using that,
    // cluster transactions per second is calculated. It includes time to 
    // retry any of the failed operations
    double longestMapTimeTPmS = (double) (mapEndTimeTPmS - mapStartTimeTPmS);
    double totalTimeTPS = (longestMapTimeTPmS == 0) ? (1000 * successfulFileOps) : (double) (1000 * successfulFileOps) / longestMapTimeTPmS;
    // The time it takes to perform 'n' operations is calculated (in ms),
    // n being the number of files. Using that time, the average execution 
    // time is calculated. It includes time to retry any of the
    // failed operations
    double AverageExecutionTime = (totalTimeTPmS == 0) ? (double) successfulFileOps : (double) totalTimeTPmS / successfulFileOps;
    String resultTPSLine1 = null;
    String resultTPSLine2 = null;
    String resultALLine1 = null;
    String resultALLine2 = null;
    if (operation.equals(OP_CREATE_WRITE)) {
        // For create/write/close, it is treated as two transactions,
        // since a file create from a client perspective involves create and close
        resultTPSLine1 = "               TPS: Create/Write/Close: " + (int) (totalTimeTPS * 2);
        resultTPSLine2 = "Avg exec time (ms): Create/Write/Close: " + AverageExecutionTime;
        resultALLine1 = "            Avg Lat (ms): Create/Write: " + avgLatency1;
        resultALLine2 = "                   Avg Lat (ms): Close: " + avgLatency2;
    } else if (operation.equals(OP_OPEN_READ)) {
        resultTPSLine1 = "                        TPS: Open/Read: " + (int) totalTimeTPS;
        resultTPSLine2 = "         Avg Exec time (ms): Open/Read: " + AverageExecutionTime;
        resultALLine1 = "                    Avg Lat (ms): Open: " + avgLatency1;
        if (readFileAfterOpen) {
            resultALLine2 = "                  Avg Lat (ms): Read: " + avgLatency2;
        }
    } else if (operation.equals(OP_RENAME)) {
        resultTPSLine1 = "                           TPS: Rename: " + (int) totalTimeTPS;
        resultTPSLine2 = "            Avg Exec time (ms): Rename: " + AverageExecutionTime;
        resultALLine1 = "                  Avg Lat (ms): Rename: " + avgLatency1;
    } else if (operation.equals(OP_DELETE)) {
        resultTPSLine1 = "                           TPS: Delete: " + (int) totalTimeTPS;
        resultTPSLine2 = "            Avg Exec time (ms): Delete: " + AverageExecutionTime;
        resultALLine1 = "                  Avg Lat (ms): Delete: " + avgLatency1;
    }
    String[] resultLines = { "-------------- NNBench -------------- : ", "                               Version: " + NNBENCH_VERSION, "                           Date & time: " + sdf.format(new Date(System.currentTimeMillis())), "", "                        Test Operation: " + operation, "                            Start time: " + sdf.format(new Date(startTime)), "                           Maps to run: " + numberOfMaps, "                        Reduces to run: " + numberOfReduces, "                    Block Size (bytes): " + blockSize, "                        Bytes to write: " + bytesToWrite, "                    Bytes per checksum: " + bytesPerChecksum, "                       Number of files: " + numberOfFiles, "                    Replication factor: " + replicationFactorPerFile, "            Successful file operations: " + successfulFileOps, "", "        # maps that missed the barrier: " + lateMaps, "                          # exceptions: " + numOfExceptions, "", resultTPSLine1, resultTPSLine2, resultALLine1, resultALLine2, "", "                 RAW DATA: AL Total #1: " + totalTimeAL1, "                 RAW DATA: AL Total #2: " + totalTimeAL2, "              RAW DATA: TPS Total (ms): " + totalTimeTPmS, "       RAW DATA: Longest Map Time (ms): " + longestMapTimeTPmS, "                   RAW DATA: Late maps: " + lateMaps, "             RAW DATA: # of exceptions: " + numOfExceptions, "" };
    try (PrintStream res = new PrintStream(new FileOutputStream(new File(DEFAULT_RES_FILE_NAME), true))) {
        // Write to a file and also dump to log
        for (String resultLine : resultLines) {
            LOG.info(resultLine);
            res.println(resultLine);
        }
    }
    if (numOfExceptions >= MAX_OPERATION_EXCEPTIONS) {
        return -1;
    }
    return 0;
}

Also used : Path(org.apache.hadoop.fs.Path) PrintStream(java.io.PrintStream) FileStatus(org.apache.hadoop.fs.FileStatus) InputStreamReader(java.io.InputStreamReader) DataInputStream(java.io.DataInputStream) FSDataInputStream(org.apache.hadoop.fs.FSDataInputStream) Date(java.util.Date) StringTokenizer(java.util.StringTokenizer) FileSystem(org.apache.hadoop.fs.FileSystem) FileOutputStream(java.io.FileOutputStream) BufferedReader(java.io.BufferedReader) SequenceFile(org.apache.hadoop.io.SequenceFile) File(java.io.File)

Aggregations

DataInputStream (java.io.DataInputStream)2761 ByteArrayInputStream (java.io.ByteArrayInputStream)1139 IOException (java.io.IOException)1043 DataOutputStream (java.io.DataOutputStream)606 FileInputStream (java.io.FileInputStream)542 Test (org.junit.Test)533 ByteArrayOutputStream (java.io.ByteArrayOutputStream)368 File (java.io.File)274 BufferedInputStream (java.io.BufferedInputStream)253 InputStream (java.io.InputStream)245 ArrayList (java.util.ArrayList)200 EOFException (java.io.EOFException)154 DataInput (java.io.DataInput)141 FileNotFoundException (java.io.FileNotFoundException)131 ByteBuffer (java.nio.ByteBuffer)119 FileOutputStream (java.io.FileOutputStream)105 HashMap (java.util.HashMap)101 BufferedReader (java.io.BufferedReader)90 InputStreamReader (java.io.InputStreamReader)89 Socket (java.net.Socket)75