use of org.apache.gobblin.util.io.ThrottledInputStream in project incubator-gobblin by apache.
the class FileAwareInputStreamDataWriter method writeImpl.
/**
* Write the contents of input stream into staging path.
*
* <p>
* WriteAt indicates the path where the contents of the input stream should be written. When this method is called,
* the path writeAt.getParent() will exist already, but the path writeAt will not exist. When this method is returned,
* the path writeAt must exist. Any data written to any location other than writeAt or a descendant of writeAt
* will be ignored.
* </p>
*
* @param inputStream {@link FSDataInputStream} whose contents should be written to staging path.
* @param writeAt {@link Path} at which contents should be written.
* @param copyableFile {@link org.apache.gobblin.data.management.copy.CopyEntity} that generated this copy operation.
* @throws IOException
*/
protected void writeImpl(InputStream inputStream, Path writeAt, CopyableFile copyableFile) throws IOException {
final short replication = copyableFile.getPreserve().preserve(PreserveAttributes.Option.REPLICATION) ? copyableFile.getOrigin().getReplication() : this.fs.getDefaultReplication(writeAt);
final long blockSize = copyableFile.getPreserve().preserve(PreserveAttributes.Option.BLOCK_SIZE) ? copyableFile.getOrigin().getBlockSize() : this.fs.getDefaultBlockSize(writeAt);
Predicate<FileStatus> fileStatusAttributesFilter = new Predicate<FileStatus>() {
@Override
public boolean apply(FileStatus input) {
return input.getReplication() == replication && input.getBlockSize() == blockSize;
}
};
Optional<FileStatus> persistedFile = this.recoveryHelper.findPersistedFile(this.state, copyableFile, fileStatusAttributesFilter);
if (persistedFile.isPresent()) {
log.info(String.format("Recovering persisted file %s to %s.", persistedFile.get().getPath(), writeAt));
this.fs.rename(persistedFile.get().getPath(), writeAt);
} else {
// Copy empty directories
if (copyableFile.getFileStatus().isDirectory()) {
this.fs.mkdirs(writeAt);
return;
}
OutputStream os = this.fs.create(writeAt, true, this.fs.getConf().getInt("io.file.buffer.size", 4096), replication, blockSize);
if (encryptionConfig != null) {
os = EncryptionFactory.buildStreamCryptoProvider(encryptionConfig).encodeOutputStream(os);
}
try {
FileSystem defaultFS = FileSystem.get(new Configuration());
StreamThrottler<GobblinScopeTypes> throttler = this.taskBroker.getSharedResource(new StreamThrottler.Factory<GobblinScopeTypes>(), new EmptyKey());
ThrottledInputStream throttledInputStream = throttler.throttleInputStream().inputStream(inputStream).sourceURI(copyableFile.getOrigin().getPath().makeQualified(defaultFS.getUri(), defaultFS.getWorkingDirectory()).toUri()).targetURI(this.fs.makeQualified(writeAt).toUri()).build();
StreamCopier copier = new StreamCopier(throttledInputStream, os).withBufferSize(this.bufferSize);
log.info("File {}: Starting copy", copyableFile.getOrigin().getPath());
if (isInstrumentationEnabled()) {
copier.withCopySpeedMeter(this.copySpeedMeter);
}
long numBytes = copier.copy();
long fileSize = copyableFile.getFileStatus().getLen();
if (this.checkFileSize && numBytes != fileSize) {
throw new IOException(String.format("Number of bytes copied doesn't match filesize for file %s.", copyableFile.getOrigin().getPath()));
}
this.bytesWritten.addAndGet(numBytes);
if (isInstrumentationEnabled()) {
log.info("File {}: copied {} bytes, average rate: {} B/s", copyableFile.getOrigin().getPath(), this.copySpeedMeter.getCount(), this.copySpeedMeter.getMeanRate());
} else {
log.info("File {} copied.", copyableFile.getOrigin().getPath());
}
} catch (NotConfiguredException nce) {
log.warn("Broker error. Some features of stream copier may not be available.", nce);
} finally {
os.close();
inputStream.close();
}
}
}
Aggregations