Search in sources :

Example 1 with ThrottledInputStream

use of org.apache.gobblin.util.io.ThrottledInputStream in project incubator-gobblin by apache.

the class FileAwareInputStreamDataWriter method writeImpl.

/**
 * Write the contents of input stream into staging path.
 *
 * <p>
 *   WriteAt indicates the path where the contents of the input stream should be written. When this method is called,
 *   the path writeAt.getParent() will exist already, but the path writeAt will not exist. When this method is returned,
 *   the path writeAt must exist. Any data written to any location other than writeAt or a descendant of writeAt
 *   will be ignored.
 * </p>
 *
 * @param inputStream {@link FSDataInputStream} whose contents should be written to staging path.
 * @param writeAt {@link Path} at which contents should be written.
 * @param copyableFile {@link org.apache.gobblin.data.management.copy.CopyEntity} that generated this copy operation.
 * @throws IOException
 */
protected void writeImpl(InputStream inputStream, Path writeAt, CopyableFile copyableFile) throws IOException {
    final short replication = copyableFile.getPreserve().preserve(PreserveAttributes.Option.REPLICATION) ? copyableFile.getOrigin().getReplication() : this.fs.getDefaultReplication(writeAt);
    final long blockSize = copyableFile.getPreserve().preserve(PreserveAttributes.Option.BLOCK_SIZE) ? copyableFile.getOrigin().getBlockSize() : this.fs.getDefaultBlockSize(writeAt);
    Predicate<FileStatus> fileStatusAttributesFilter = new Predicate<FileStatus>() {

        @Override
        public boolean apply(FileStatus input) {
            return input.getReplication() == replication && input.getBlockSize() == blockSize;
        }
    };
    Optional<FileStatus> persistedFile = this.recoveryHelper.findPersistedFile(this.state, copyableFile, fileStatusAttributesFilter);
    if (persistedFile.isPresent()) {
        log.info(String.format("Recovering persisted file %s to %s.", persistedFile.get().getPath(), writeAt));
        this.fs.rename(persistedFile.get().getPath(), writeAt);
    } else {
        // Copy empty directories
        if (copyableFile.getFileStatus().isDirectory()) {
            this.fs.mkdirs(writeAt);
            return;
        }
        OutputStream os = this.fs.create(writeAt, true, this.fs.getConf().getInt("io.file.buffer.size", 4096), replication, blockSize);
        if (encryptionConfig != null) {
            os = EncryptionFactory.buildStreamCryptoProvider(encryptionConfig).encodeOutputStream(os);
        }
        try {
            FileSystem defaultFS = FileSystem.get(new Configuration());
            StreamThrottler<GobblinScopeTypes> throttler = this.taskBroker.getSharedResource(new StreamThrottler.Factory<GobblinScopeTypes>(), new EmptyKey());
            ThrottledInputStream throttledInputStream = throttler.throttleInputStream().inputStream(inputStream).sourceURI(copyableFile.getOrigin().getPath().makeQualified(defaultFS.getUri(), defaultFS.getWorkingDirectory()).toUri()).targetURI(this.fs.makeQualified(writeAt).toUri()).build();
            StreamCopier copier = new StreamCopier(throttledInputStream, os).withBufferSize(this.bufferSize);
            log.info("File {}: Starting copy", copyableFile.getOrigin().getPath());
            if (isInstrumentationEnabled()) {
                copier.withCopySpeedMeter(this.copySpeedMeter);
            }
            long numBytes = copier.copy();
            long fileSize = copyableFile.getFileStatus().getLen();
            if (this.checkFileSize && numBytes != fileSize) {
                throw new IOException(String.format("Number of bytes copied doesn't match filesize for file %s.", copyableFile.getOrigin().getPath()));
            }
            this.bytesWritten.addAndGet(numBytes);
            if (isInstrumentationEnabled()) {
                log.info("File {}: copied {} bytes, average rate: {} B/s", copyableFile.getOrigin().getPath(), this.copySpeedMeter.getCount(), this.copySpeedMeter.getMeanRate());
            } else {
                log.info("File {} copied.", copyableFile.getOrigin().getPath());
            }
        } catch (NotConfiguredException nce) {
            log.warn("Broker error. Some features of stream copier may not be available.", nce);
        } finally {
            os.close();
            inputStream.close();
        }
    }
}
Also used : NotConfiguredException(org.apache.gobblin.broker.iface.NotConfiguredException) FileStatus(org.apache.hadoop.fs.FileStatus) EmptyKey(org.apache.gobblin.broker.EmptyKey) Configuration(org.apache.hadoop.conf.Configuration) CopyConfiguration(org.apache.gobblin.data.management.copy.CopyConfiguration) OutputStream(java.io.OutputStream) IOException(java.io.IOException) Predicate(com.google.common.base.Predicate) GobblinScopeTypes(org.apache.gobblin.broker.gobblin_scopes.GobblinScopeTypes) FileSystem(org.apache.hadoop.fs.FileSystem) ThrottledInputStream(org.apache.gobblin.util.io.ThrottledInputStream) StreamThrottler(org.apache.gobblin.util.io.StreamThrottler) StreamCopier(org.apache.gobblin.util.io.StreamCopier)

Aggregations

Predicate (com.google.common.base.Predicate)1 IOException (java.io.IOException)1 OutputStream (java.io.OutputStream)1 EmptyKey (org.apache.gobblin.broker.EmptyKey)1 GobblinScopeTypes (org.apache.gobblin.broker.gobblin_scopes.GobblinScopeTypes)1 NotConfiguredException (org.apache.gobblin.broker.iface.NotConfiguredException)1 CopyConfiguration (org.apache.gobblin.data.management.copy.CopyConfiguration)1 StreamCopier (org.apache.gobblin.util.io.StreamCopier)1 StreamThrottler (org.apache.gobblin.util.io.StreamThrottler)1 ThrottledInputStream (org.apache.gobblin.util.io.ThrottledInputStream)1 Configuration (org.apache.hadoop.conf.Configuration)1 FileStatus (org.apache.hadoop.fs.FileStatus)1 FileSystem (org.apache.hadoop.fs.FileSystem)1