use of org.apache.gobblin.util.io.StreamCopier in project incubator-gobblin by apache.
the class FileAwareInputStreamDataWriter method writeImpl.
/**
* Write the contents of input stream into staging path.
*
* <p>
* WriteAt indicates the path where the contents of the input stream should be written. When this method is called,
* the path writeAt.getParent() will exist already, but the path writeAt will not exist. When this method is returned,
* the path writeAt must exist. Any data written to any location other than writeAt or a descendant of writeAt
* will be ignored.
* </p>
*
* @param inputStream {@link FSDataInputStream} whose contents should be written to staging path.
* @param writeAt {@link Path} at which contents should be written.
* @param copyableFile {@link org.apache.gobblin.data.management.copy.CopyEntity} that generated this copy operation.
* @throws IOException
*/
protected void writeImpl(InputStream inputStream, Path writeAt, CopyableFile copyableFile) throws IOException {
final short replication = copyableFile.getPreserve().preserve(PreserveAttributes.Option.REPLICATION) ? copyableFile.getOrigin().getReplication() : this.fs.getDefaultReplication(writeAt);
final long blockSize = copyableFile.getPreserve().preserve(PreserveAttributes.Option.BLOCK_SIZE) ? copyableFile.getOrigin().getBlockSize() : this.fs.getDefaultBlockSize(writeAt);
Predicate<FileStatus> fileStatusAttributesFilter = new Predicate<FileStatus>() {
@Override
public boolean apply(FileStatus input) {
return input.getReplication() == replication && input.getBlockSize() == blockSize;
}
};
Optional<FileStatus> persistedFile = this.recoveryHelper.findPersistedFile(this.state, copyableFile, fileStatusAttributesFilter);
if (persistedFile.isPresent()) {
log.info(String.format("Recovering persisted file %s to %s.", persistedFile.get().getPath(), writeAt));
this.fs.rename(persistedFile.get().getPath(), writeAt);
} else {
// Copy empty directories
if (copyableFile.getFileStatus().isDirectory()) {
this.fs.mkdirs(writeAt);
return;
}
OutputStream os = this.fs.create(writeAt, true, this.fs.getConf().getInt("io.file.buffer.size", 4096), replication, blockSize);
if (encryptionConfig != null) {
os = EncryptionFactory.buildStreamCryptoProvider(encryptionConfig).encodeOutputStream(os);
}
try {
FileSystem defaultFS = FileSystem.get(new Configuration());
StreamThrottler<GobblinScopeTypes> throttler = this.taskBroker.getSharedResource(new StreamThrottler.Factory<GobblinScopeTypes>(), new EmptyKey());
ThrottledInputStream throttledInputStream = throttler.throttleInputStream().inputStream(inputStream).sourceURI(copyableFile.getOrigin().getPath().makeQualified(defaultFS.getUri(), defaultFS.getWorkingDirectory()).toUri()).targetURI(this.fs.makeQualified(writeAt).toUri()).build();
StreamCopier copier = new StreamCopier(throttledInputStream, os).withBufferSize(this.bufferSize);
log.info("File {}: Starting copy", copyableFile.getOrigin().getPath());
if (isInstrumentationEnabled()) {
copier.withCopySpeedMeter(this.copySpeedMeter);
}
long numBytes = copier.copy();
long fileSize = copyableFile.getFileStatus().getLen();
if (this.checkFileSize && numBytes != fileSize) {
throw new IOException(String.format("Number of bytes copied doesn't match filesize for file %s.", copyableFile.getOrigin().getPath()));
}
this.bytesWritten.addAndGet(numBytes);
if (isInstrumentationEnabled()) {
log.info("File {}: copied {} bytes, average rate: {} B/s", copyableFile.getOrigin().getPath(), this.copySpeedMeter.getCount(), this.copySpeedMeter.getMeanRate());
} else {
log.info("File {} copied.", copyableFile.getOrigin().getPath());
}
} catch (NotConfiguredException nce) {
log.warn("Broker error. Some features of stream copier may not be available.", nce);
} finally {
os.close();
inputStream.close();
}
}
}
use of org.apache.gobblin.util.io.StreamCopier in project incubator-gobblin by apache.
the class TarArchiveInputStreamDataWriter method writeImpl.
/**
* Untars the passed in {@link FileAwareInputStream} to the task's staging directory. Uses the name of the root
* {@link TarArchiveEntry} in the stream as the directory name for the untarred file. The method also commits the data
* by moving the file from staging to output directory.
*
* @see org.apache.gobblin.data.management.copy.writer.FileAwareInputStreamDataWriter#write(org.apache.gobblin.data.management.copy.FileAwareInputStream)
*/
@Override
public void writeImpl(InputStream inputStream, Path writeAt, CopyableFile copyableFile) throws IOException {
this.closer.register(inputStream);
TarArchiveInputStream tarIn = new TarArchiveInputStream(inputStream);
final ReadableByteChannel inputChannel = Channels.newChannel(tarIn);
TarArchiveEntry tarEntry;
// flush the first entry in the tar, which is just the root directory
tarEntry = tarIn.getNextTarEntry();
String tarEntryRootName = StringUtils.remove(tarEntry.getName(), Path.SEPARATOR);
log.info("Unarchiving at " + writeAt);
try {
while ((tarEntry = tarIn.getNextTarEntry()) != null) {
// the API tarEntry.getName() is misleading, it is actually the path of the tarEntry in the tar file
String newTarEntryPath = tarEntry.getName().replace(tarEntryRootName, writeAt.getName());
Path tarEntryStagingPath = new Path(writeAt.getParent(), newTarEntryPath);
if (tarEntry.isDirectory() && !this.fs.exists(tarEntryStagingPath)) {
this.fs.mkdirs(tarEntryStagingPath);
} else if (!tarEntry.isDirectory()) {
FSDataOutputStream out = this.fs.create(tarEntryStagingPath, true);
final WritableByteChannel outputChannel = Channels.newChannel(out);
try {
StreamCopier copier = new StreamCopier(inputChannel, outputChannel);
if (isInstrumentationEnabled()) {
copier.withCopySpeedMeter(this.copySpeedMeter);
}
this.bytesWritten.addAndGet(copier.copy());
if (isInstrumentationEnabled()) {
log.info("File {}: copied {} bytes, average rate: {} B/s", copyableFile.getOrigin().getPath(), this.copySpeedMeter.getCount(), this.copySpeedMeter.getMeanRate());
} else {
log.info("File {} copied.", copyableFile.getOrigin().getPath());
}
} finally {
out.close();
outputChannel.close();
}
}
}
} finally {
tarIn.close();
inputChannel.close();
inputStream.close();
}
}
Aggregations