Search in sources :

Example 1 with OwnerAndPermission

use of org.apache.gobblin.data.management.copy.OwnerAndPermission in project incubator-gobblin by apache.

the class FileAwareInputStreamDataWriter method commit.

/**
 * Moves the file from task staging to task output. Each task has its own staging directory but all the tasks share
 * the same task output directory.
 *
 * {@inheritDoc}
 *
 * @see org.apache.gobblin.writer.DataWriter#commit()
 */
@Override
public void commit() throws IOException {
    if (!this.actualProcessedCopyableFile.isPresent()) {
        return;
    }
    CopyableFile copyableFile = this.actualProcessedCopyableFile.get();
    Path stagingFilePath = getStagingFilePath(copyableFile);
    Path outputFilePath = getOutputFilePath(copyableFile, this.outputDir, copyableFile.getDatasetAndPartition(this.copyableDatasetMetadata));
    log.info(String.format("Committing data from %s to %s", stagingFilePath, outputFilePath));
    try {
        setFilePermissions(copyableFile);
        Iterator<OwnerAndPermission> ancestorOwnerAndPermissionIt = copyableFile.getAncestorsOwnerAndPermission() == null ? Iterators.<OwnerAndPermission>emptyIterator() : copyableFile.getAncestorsOwnerAndPermission().iterator();
        ensureDirectoryExists(this.fs, outputFilePath.getParent(), ancestorOwnerAndPermissionIt);
        if (!this.fs.rename(stagingFilePath, outputFilePath)) {
            // target exists
            throw new IOException(String.format("Could not commit file %s.", outputFilePath));
        }
    } catch (IOException ioe) {
        // persist file
        this.recoveryHelper.persistFile(this.state, copyableFile, stagingFilePath);
        throw ioe;
    } finally {
        try {
            this.fs.delete(this.stagingDir, true);
        } catch (IOException ioe) {
            log.warn("Failed to delete staging path at " + this.stagingDir);
        }
    }
}
Also used : Path(org.apache.hadoop.fs.Path) OwnerAndPermission(org.apache.gobblin.data.management.copy.OwnerAndPermission) CopyableFile(org.apache.gobblin.data.management.copy.CopyableFile) IOException(java.io.IOException)

Example 2 with OwnerAndPermission

use of org.apache.gobblin.data.management.copy.OwnerAndPermission in project incubator-gobblin by apache.

the class FileAwareInputStreamDataWriter method ensureDirectoryExists.

private void ensureDirectoryExists(FileSystem fs, Path path, Iterator<OwnerAndPermission> ownerAndPermissionIterator) throws IOException {
    if (fs.exists(path)) {
        return;
    }
    if (ownerAndPermissionIterator.hasNext()) {
        OwnerAndPermission ownerAndPermission = ownerAndPermissionIterator.next();
        if (path.getParent() != null) {
            ensureDirectoryExists(fs, path.getParent(), ownerAndPermissionIterator);
        }
        if (!fs.mkdirs(path)) {
            // fs.mkdirs returns false if path already existed. Do not overwrite permissions
            return;
        }
        if (ownerAndPermission.getFsPermission() != null) {
            log.debug("Applying permissions %s to path %s.", ownerAndPermission.getFsPermission(), path);
            fs.setPermission(path, addExecutePermissionToOwner(ownerAndPermission.getFsPermission()));
        }
        String group = ownerAndPermission.getGroup();
        String owner = ownerAndPermission.getOwner();
        if (group != null || owner != null) {
            log.debug("Applying owner %s and group %s to path %s.", owner, group, path);
            fs.setOwner(path, owner, group);
        }
    } else {
        fs.mkdirs(path);
    }
}
Also used : OwnerAndPermission(org.apache.gobblin.data.management.copy.OwnerAndPermission)

Example 3 with OwnerAndPermission

use of org.apache.gobblin.data.management.copy.OwnerAndPermission in project incubator-gobblin by apache.

the class FileAwareInputStreamDataWriterTest method testWrite.

@Test
public void testWrite() throws Exception {
    String streamString = "testContents";
    FileStatus status = fs.getFileStatus(testTempPath);
    OwnerAndPermission ownerAndPermission = new OwnerAndPermission(status.getOwner(), status.getGroup(), new FsPermission(FsAction.ALL, FsAction.ALL, FsAction.ALL));
    CopyableFile cf = CopyableFileUtils.getTestCopyableFile(ownerAndPermission);
    CopyableDatasetMetadata metadata = new CopyableDatasetMetadata(new TestCopyableDataset(new Path("/source")));
    WorkUnitState state = TestUtils.createTestWorkUnitState();
    state.setProp(ConfigurationKeys.WRITER_STAGING_DIR, new Path(testTempPath, "staging").toString());
    state.setProp(ConfigurationKeys.WRITER_OUTPUT_DIR, new Path(testTempPath, "output").toString());
    state.setProp(ConfigurationKeys.WRITER_FILE_PATH, RandomStringUtils.randomAlphabetic(5));
    CopySource.serializeCopyEntity(state, cf);
    CopySource.serializeCopyableDataset(state, metadata);
    FileAwareInputStreamDataWriter dataWriter = new FileAwareInputStreamDataWriter(state, 1, 0);
    FileAwareInputStream fileAwareInputStream = new FileAwareInputStream(cf, StreamUtils.convertStream(IOUtils.toInputStream(streamString)));
    dataWriter.write(fileAwareInputStream);
    dataWriter.commit();
    Path writtenFilePath = new Path(new Path(state.getProp(ConfigurationKeys.WRITER_OUTPUT_DIR), cf.getDatasetAndPartition(metadata).identifier()), cf.getDestination());
    Assert.assertEquals(IOUtils.toString(new FileInputStream(writtenFilePath.toString())), streamString);
}
Also used : TestCopyableDataset(org.apache.gobblin.data.management.copy.TestCopyableDataset) Path(org.apache.hadoop.fs.Path) FileStatus(org.apache.hadoop.fs.FileStatus) CopyableDatasetMetadata(org.apache.gobblin.data.management.copy.CopyableDatasetMetadata) WorkUnitState(org.apache.gobblin.configuration.WorkUnitState) OwnerAndPermission(org.apache.gobblin.data.management.copy.OwnerAndPermission) FileAwareInputStream(org.apache.gobblin.data.management.copy.FileAwareInputStream) CopyableFile(org.apache.gobblin.data.management.copy.CopyableFile) FsPermission(org.apache.hadoop.fs.permission.FsPermission) FileInputStream(java.io.FileInputStream) Test(org.testng.annotations.Test)

Example 4 with OwnerAndPermission

use of org.apache.gobblin.data.management.copy.OwnerAndPermission in project incubator-gobblin by apache.

the class TarArchiveInputStreamDataWriterTest method getCompressedInputStream.

/**
 * Find the test compressed file <code><filePath/code> in classpath and read it as a {@link FileAwareInputStream}
 */
private FileAwareInputStream getCompressedInputStream(final String filePath, final String newFileName) throws Exception {
    UnGzipConverter converter = new UnGzipConverter();
    FileSystem fs = FileSystem.getLocal(new Configuration());
    String fullPath = getClass().getClassLoader().getResource(filePath).getFile();
    FileStatus status = fs.getFileStatus(testTempPath);
    OwnerAndPermission ownerAndPermission = new OwnerAndPermission(status.getOwner(), status.getGroup(), new FsPermission(FsAction.ALL, FsAction.ALL, FsAction.ALL));
    CopyableFile cf = CopyableFileUtils.getTestCopyableFile(filePath, new Path(testTempPath, newFileName).toString(), newFileName, ownerAndPermission);
    FileAwareInputStream fileAwareInputStream = new FileAwareInputStream(cf, fs.open(new Path(fullPath)));
    Iterable<FileAwareInputStream> iterable = converter.convertRecord("outputSchema", fileAwareInputStream, new WorkUnitState());
    return Iterables.getFirst(iterable, null);
}
Also used : Path(org.apache.hadoop.fs.Path) UnGzipConverter(org.apache.gobblin.data.management.copy.converter.UnGzipConverter) FileStatus(org.apache.hadoop.fs.FileStatus) Configuration(org.apache.hadoop.conf.Configuration) WorkUnitState(org.apache.gobblin.configuration.WorkUnitState) FileSystem(org.apache.hadoop.fs.FileSystem) OwnerAndPermission(org.apache.gobblin.data.management.copy.OwnerAndPermission) FileAwareInputStream(org.apache.gobblin.data.management.copy.FileAwareInputStream) CopyableFile(org.apache.gobblin.data.management.copy.CopyableFile) FsPermission(org.apache.hadoop.fs.permission.FsPermission)

Example 5 with OwnerAndPermission

use of org.apache.gobblin.data.management.copy.OwnerAndPermission in project incubator-gobblin by apache.

the class FileAwareInputStreamDataWriterTest method testCommit.

@Test
public void testCommit() throws IOException {
    String destinationExistingToken = "destination";
    String destinationAdditionalTokens = "path";
    String fileName = "file";
    // Asemble destination paths
    Path destination = new Path(new Path(new Path("/", destinationExistingToken), destinationAdditionalTokens), fileName);
    Path destinationWithoutLeadingSeparator = new Path(new Path(destinationExistingToken, destinationAdditionalTokens), fileName);
    // Create temp directory
    File tmpFile = Files.createTempDir();
    tmpFile.deleteOnExit();
    Path tmpPath = new Path(tmpFile.getAbsolutePath());
    // create origin file
    Path originFile = new Path(tmpPath, fileName);
    this.fs.createNewFile(originFile);
    // create stating dir
    Path stagingDir = new Path(tmpPath, "staging");
    this.fs.mkdirs(stagingDir);
    // create output dir
    Path outputDir = new Path(tmpPath, "output");
    this.fs.mkdirs(outputDir);
    // create copyable file
    FileStatus status = this.fs.getFileStatus(originFile);
    FsPermission readWrite = new FsPermission(FsAction.READ_WRITE, FsAction.READ_WRITE, FsAction.READ_WRITE);
    FsPermission dirReadWrite = new FsPermission(FsAction.ALL, FsAction.READ_WRITE, FsAction.READ_WRITE);
    OwnerAndPermission ownerAndPermission = new OwnerAndPermission(status.getOwner(), status.getGroup(), readWrite);
    List<OwnerAndPermission> ancestorOwnerAndPermissions = Lists.newArrayList();
    ancestorOwnerAndPermissions.add(ownerAndPermission);
    ancestorOwnerAndPermissions.add(ownerAndPermission);
    ancestorOwnerAndPermissions.add(ownerAndPermission);
    ancestorOwnerAndPermissions.add(ownerAndPermission);
    Properties properties = new Properties();
    properties.setProperty(ConfigurationKeys.DATA_PUBLISHER_FINAL_DIR, "/publisher");
    CopyableFile cf = CopyableFile.fromOriginAndDestination(this.fs, status, destination, CopyConfiguration.builder(FileSystem.getLocal(new Configuration()), properties).publishDir(new Path("/target")).preserve(PreserveAttributes.fromMnemonicString("")).build()).destinationOwnerAndPermission(ownerAndPermission).ancestorsOwnerAndPermission(ancestorOwnerAndPermissions).build();
    // create work unit state
    WorkUnitState state = TestUtils.createTestWorkUnitState();
    state.setProp(ConfigurationKeys.WRITER_STAGING_DIR, stagingDir.toUri().getPath());
    state.setProp(ConfigurationKeys.WRITER_OUTPUT_DIR, outputDir.toUri().getPath());
    state.setProp(ConfigurationKeys.WRITER_FILE_PATH, RandomStringUtils.randomAlphabetic(5));
    CopyableDatasetMetadata metadata = new CopyableDatasetMetadata(new TestCopyableDataset(new Path("/source")));
    CopySource.serializeCopyEntity(state, cf);
    CopySource.serializeCopyableDataset(state, metadata);
    // create writer
    FileAwareInputStreamDataWriter writer = new FileAwareInputStreamDataWriter(state, 1, 0);
    // create output of writer.write
    Path writtenFile = writer.getStagingFilePath(cf);
    this.fs.mkdirs(writtenFile.getParent());
    this.fs.createNewFile(writtenFile);
    // create existing directories in writer output
    Path outputRoot = FileAwareInputStreamDataWriter.getPartitionOutputRoot(outputDir, cf.getDatasetAndPartition(metadata));
    Path existingOutputPath = new Path(outputRoot, destinationExistingToken);
    this.fs.mkdirs(existingOutputPath);
    FileStatus fileStatus = this.fs.getFileStatus(existingOutputPath);
    FsPermission existingPathPermission = fileStatus.getPermission();
    // check initial state of the relevant directories
    Assert.assertTrue(this.fs.exists(existingOutputPath));
    Assert.assertEquals(this.fs.listStatus(existingOutputPath).length, 0);
    writer.actualProcessedCopyableFile = Optional.of(cf);
    // commit
    writer.commit();
    // check state of relevant paths after commit
    Path expectedOutputPath = new Path(outputRoot, destinationWithoutLeadingSeparator);
    Assert.assertTrue(this.fs.exists(expectedOutputPath));
    fileStatus = this.fs.getFileStatus(expectedOutputPath);
    Assert.assertEquals(fileStatus.getOwner(), ownerAndPermission.getOwner());
    Assert.assertEquals(fileStatus.getGroup(), ownerAndPermission.getGroup());
    Assert.assertEquals(fileStatus.getPermission(), readWrite);
    // parent should have permissions set correctly
    fileStatus = this.fs.getFileStatus(expectedOutputPath.getParent());
    Assert.assertEquals(fileStatus.getPermission(), dirReadWrite);
    // previously existing paths should not have permissions changed
    fileStatus = this.fs.getFileStatus(existingOutputPath);
    Assert.assertEquals(fileStatus.getPermission(), existingPathPermission);
    Assert.assertFalse(this.fs.exists(writer.stagingDir));
}
Also used : Path(org.apache.hadoop.fs.Path) TestCopyableDataset(org.apache.gobblin.data.management.copy.TestCopyableDataset) FileStatus(org.apache.hadoop.fs.FileStatus) Configuration(org.apache.hadoop.conf.Configuration) CopyConfiguration(org.apache.gobblin.data.management.copy.CopyConfiguration) WorkUnitState(org.apache.gobblin.configuration.WorkUnitState) CopyableDatasetMetadata(org.apache.gobblin.data.management.copy.CopyableDatasetMetadata) Properties(java.util.Properties) OwnerAndPermission(org.apache.gobblin.data.management.copy.OwnerAndPermission) CopyableFile(org.apache.gobblin.data.management.copy.CopyableFile) FsPermission(org.apache.hadoop.fs.permission.FsPermission) File(java.io.File) CopyableFile(org.apache.gobblin.data.management.copy.CopyableFile) Test(org.testng.annotations.Test)

Aggregations

OwnerAndPermission (org.apache.gobblin.data.management.copy.OwnerAndPermission)7 CopyableFile (org.apache.gobblin.data.management.copy.CopyableFile)5 FileStatus (org.apache.hadoop.fs.FileStatus)5 Path (org.apache.hadoop.fs.Path)5 WorkUnitState (org.apache.gobblin.configuration.WorkUnitState)4 FsPermission (org.apache.hadoop.fs.permission.FsPermission)4 CopyableDatasetMetadata (org.apache.gobblin.data.management.copy.CopyableDatasetMetadata)3 FileAwareInputStream (org.apache.gobblin.data.management.copy.FileAwareInputStream)3 TestCopyableDataset (org.apache.gobblin.data.management.copy.TestCopyableDataset)3 Configuration (org.apache.hadoop.conf.Configuration)3 Test (org.testng.annotations.Test)3 FileInputStream (java.io.FileInputStream)2 IOException (java.io.IOException)2 CopyConfiguration (org.apache.gobblin.data.management.copy.CopyConfiguration)2 FileSystem (org.apache.hadoop.fs.FileSystem)2 ByteArrayInputStream (java.io.ByteArrayInputStream)1 File (java.io.File)1 URI (java.net.URI)1 Properties (java.util.Properties)1 Builder (lombok.Builder)1