use of org.apache.gobblin.data.management.copy.OwnerAndPermission in project incubator-gobblin by apache.
the class FileAwareInputStreamDataWriter method commit.
/**
* Moves the file from task staging to task output. Each task has its own staging directory but all the tasks share
* the same task output directory.
*
* {@inheritDoc}
*
* @see org.apache.gobblin.writer.DataWriter#commit()
*/
@Override
public void commit() throws IOException {
if (!this.actualProcessedCopyableFile.isPresent()) {
return;
}
CopyableFile copyableFile = this.actualProcessedCopyableFile.get();
Path stagingFilePath = getStagingFilePath(copyableFile);
Path outputFilePath = getOutputFilePath(copyableFile, this.outputDir, copyableFile.getDatasetAndPartition(this.copyableDatasetMetadata));
log.info(String.format("Committing data from %s to %s", stagingFilePath, outputFilePath));
try {
setFilePermissions(copyableFile);
Iterator<OwnerAndPermission> ancestorOwnerAndPermissionIt = copyableFile.getAncestorsOwnerAndPermission() == null ? Iterators.<OwnerAndPermission>emptyIterator() : copyableFile.getAncestorsOwnerAndPermission().iterator();
ensureDirectoryExists(this.fs, outputFilePath.getParent(), ancestorOwnerAndPermissionIt);
if (!this.fs.rename(stagingFilePath, outputFilePath)) {
// target exists
throw new IOException(String.format("Could not commit file %s.", outputFilePath));
}
} catch (IOException ioe) {
// persist file
this.recoveryHelper.persistFile(this.state, copyableFile, stagingFilePath);
throw ioe;
} finally {
try {
this.fs.delete(this.stagingDir, true);
} catch (IOException ioe) {
log.warn("Failed to delete staging path at " + this.stagingDir);
}
}
}
use of org.apache.gobblin.data.management.copy.OwnerAndPermission in project incubator-gobblin by apache.
the class FileAwareInputStreamDataWriter method ensureDirectoryExists.
private void ensureDirectoryExists(FileSystem fs, Path path, Iterator<OwnerAndPermission> ownerAndPermissionIterator) throws IOException {
if (fs.exists(path)) {
return;
}
if (ownerAndPermissionIterator.hasNext()) {
OwnerAndPermission ownerAndPermission = ownerAndPermissionIterator.next();
if (path.getParent() != null) {
ensureDirectoryExists(fs, path.getParent(), ownerAndPermissionIterator);
}
if (!fs.mkdirs(path)) {
// fs.mkdirs returns false if path already existed. Do not overwrite permissions
return;
}
if (ownerAndPermission.getFsPermission() != null) {
log.debug("Applying permissions %s to path %s.", ownerAndPermission.getFsPermission(), path);
fs.setPermission(path, addExecutePermissionToOwner(ownerAndPermission.getFsPermission()));
}
String group = ownerAndPermission.getGroup();
String owner = ownerAndPermission.getOwner();
if (group != null || owner != null) {
log.debug("Applying owner %s and group %s to path %s.", owner, group, path);
fs.setOwner(path, owner, group);
}
} else {
fs.mkdirs(path);
}
}
use of org.apache.gobblin.data.management.copy.OwnerAndPermission in project incubator-gobblin by apache.
the class FileAwareInputStreamDataWriterTest method testWrite.
@Test
public void testWrite() throws Exception {
String streamString = "testContents";
FileStatus status = fs.getFileStatus(testTempPath);
OwnerAndPermission ownerAndPermission = new OwnerAndPermission(status.getOwner(), status.getGroup(), new FsPermission(FsAction.ALL, FsAction.ALL, FsAction.ALL));
CopyableFile cf = CopyableFileUtils.getTestCopyableFile(ownerAndPermission);
CopyableDatasetMetadata metadata = new CopyableDatasetMetadata(new TestCopyableDataset(new Path("/source")));
WorkUnitState state = TestUtils.createTestWorkUnitState();
state.setProp(ConfigurationKeys.WRITER_STAGING_DIR, new Path(testTempPath, "staging").toString());
state.setProp(ConfigurationKeys.WRITER_OUTPUT_DIR, new Path(testTempPath, "output").toString());
state.setProp(ConfigurationKeys.WRITER_FILE_PATH, RandomStringUtils.randomAlphabetic(5));
CopySource.serializeCopyEntity(state, cf);
CopySource.serializeCopyableDataset(state, metadata);
FileAwareInputStreamDataWriter dataWriter = new FileAwareInputStreamDataWriter(state, 1, 0);
FileAwareInputStream fileAwareInputStream = new FileAwareInputStream(cf, StreamUtils.convertStream(IOUtils.toInputStream(streamString)));
dataWriter.write(fileAwareInputStream);
dataWriter.commit();
Path writtenFilePath = new Path(new Path(state.getProp(ConfigurationKeys.WRITER_OUTPUT_DIR), cf.getDatasetAndPartition(metadata).identifier()), cf.getDestination());
Assert.assertEquals(IOUtils.toString(new FileInputStream(writtenFilePath.toString())), streamString);
}
use of org.apache.gobblin.data.management.copy.OwnerAndPermission in project incubator-gobblin by apache.
the class TarArchiveInputStreamDataWriterTest method getCompressedInputStream.
/**
* Find the test compressed file <code><filePath/code> in classpath and read it as a {@link FileAwareInputStream}
*/
private FileAwareInputStream getCompressedInputStream(final String filePath, final String newFileName) throws Exception {
UnGzipConverter converter = new UnGzipConverter();
FileSystem fs = FileSystem.getLocal(new Configuration());
String fullPath = getClass().getClassLoader().getResource(filePath).getFile();
FileStatus status = fs.getFileStatus(testTempPath);
OwnerAndPermission ownerAndPermission = new OwnerAndPermission(status.getOwner(), status.getGroup(), new FsPermission(FsAction.ALL, FsAction.ALL, FsAction.ALL));
CopyableFile cf = CopyableFileUtils.getTestCopyableFile(filePath, new Path(testTempPath, newFileName).toString(), newFileName, ownerAndPermission);
FileAwareInputStream fileAwareInputStream = new FileAwareInputStream(cf, fs.open(new Path(fullPath)));
Iterable<FileAwareInputStream> iterable = converter.convertRecord("outputSchema", fileAwareInputStream, new WorkUnitState());
return Iterables.getFirst(iterable, null);
}
use of org.apache.gobblin.data.management.copy.OwnerAndPermission in project incubator-gobblin by apache.
the class FileAwareInputStreamDataWriterTest method testCommit.
@Test
public void testCommit() throws IOException {
String destinationExistingToken = "destination";
String destinationAdditionalTokens = "path";
String fileName = "file";
// Asemble destination paths
Path destination = new Path(new Path(new Path("/", destinationExistingToken), destinationAdditionalTokens), fileName);
Path destinationWithoutLeadingSeparator = new Path(new Path(destinationExistingToken, destinationAdditionalTokens), fileName);
// Create temp directory
File tmpFile = Files.createTempDir();
tmpFile.deleteOnExit();
Path tmpPath = new Path(tmpFile.getAbsolutePath());
// create origin file
Path originFile = new Path(tmpPath, fileName);
this.fs.createNewFile(originFile);
// create stating dir
Path stagingDir = new Path(tmpPath, "staging");
this.fs.mkdirs(stagingDir);
// create output dir
Path outputDir = new Path(tmpPath, "output");
this.fs.mkdirs(outputDir);
// create copyable file
FileStatus status = this.fs.getFileStatus(originFile);
FsPermission readWrite = new FsPermission(FsAction.READ_WRITE, FsAction.READ_WRITE, FsAction.READ_WRITE);
FsPermission dirReadWrite = new FsPermission(FsAction.ALL, FsAction.READ_WRITE, FsAction.READ_WRITE);
OwnerAndPermission ownerAndPermission = new OwnerAndPermission(status.getOwner(), status.getGroup(), readWrite);
List<OwnerAndPermission> ancestorOwnerAndPermissions = Lists.newArrayList();
ancestorOwnerAndPermissions.add(ownerAndPermission);
ancestorOwnerAndPermissions.add(ownerAndPermission);
ancestorOwnerAndPermissions.add(ownerAndPermission);
ancestorOwnerAndPermissions.add(ownerAndPermission);
Properties properties = new Properties();
properties.setProperty(ConfigurationKeys.DATA_PUBLISHER_FINAL_DIR, "/publisher");
CopyableFile cf = CopyableFile.fromOriginAndDestination(this.fs, status, destination, CopyConfiguration.builder(FileSystem.getLocal(new Configuration()), properties).publishDir(new Path("/target")).preserve(PreserveAttributes.fromMnemonicString("")).build()).destinationOwnerAndPermission(ownerAndPermission).ancestorsOwnerAndPermission(ancestorOwnerAndPermissions).build();
// create work unit state
WorkUnitState state = TestUtils.createTestWorkUnitState();
state.setProp(ConfigurationKeys.WRITER_STAGING_DIR, stagingDir.toUri().getPath());
state.setProp(ConfigurationKeys.WRITER_OUTPUT_DIR, outputDir.toUri().getPath());
state.setProp(ConfigurationKeys.WRITER_FILE_PATH, RandomStringUtils.randomAlphabetic(5));
CopyableDatasetMetadata metadata = new CopyableDatasetMetadata(new TestCopyableDataset(new Path("/source")));
CopySource.serializeCopyEntity(state, cf);
CopySource.serializeCopyableDataset(state, metadata);
// create writer
FileAwareInputStreamDataWriter writer = new FileAwareInputStreamDataWriter(state, 1, 0);
// create output of writer.write
Path writtenFile = writer.getStagingFilePath(cf);
this.fs.mkdirs(writtenFile.getParent());
this.fs.createNewFile(writtenFile);
// create existing directories in writer output
Path outputRoot = FileAwareInputStreamDataWriter.getPartitionOutputRoot(outputDir, cf.getDatasetAndPartition(metadata));
Path existingOutputPath = new Path(outputRoot, destinationExistingToken);
this.fs.mkdirs(existingOutputPath);
FileStatus fileStatus = this.fs.getFileStatus(existingOutputPath);
FsPermission existingPathPermission = fileStatus.getPermission();
// check initial state of the relevant directories
Assert.assertTrue(this.fs.exists(existingOutputPath));
Assert.assertEquals(this.fs.listStatus(existingOutputPath).length, 0);
writer.actualProcessedCopyableFile = Optional.of(cf);
// commit
writer.commit();
// check state of relevant paths after commit
Path expectedOutputPath = new Path(outputRoot, destinationWithoutLeadingSeparator);
Assert.assertTrue(this.fs.exists(expectedOutputPath));
fileStatus = this.fs.getFileStatus(expectedOutputPath);
Assert.assertEquals(fileStatus.getOwner(), ownerAndPermission.getOwner());
Assert.assertEquals(fileStatus.getGroup(), ownerAndPermission.getGroup());
Assert.assertEquals(fileStatus.getPermission(), readWrite);
// parent should have permissions set correctly
fileStatus = this.fs.getFileStatus(expectedOutputPath.getParent());
Assert.assertEquals(fileStatus.getPermission(), dirReadWrite);
// previously existing paths should not have permissions changed
fileStatus = this.fs.getFileStatus(existingOutputPath);
Assert.assertEquals(fileStatus.getPermission(), existingPathPermission);
Assert.assertFalse(this.fs.exists(writer.stagingDir));
}
Aggregations